{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c:\\Users\\Edouard\\Documents\\Git\\microwave\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\Edouard\\Documents\\Git\\microwave\\.venv_microwave\\Lib\\site-packages\\IPython\\core\\magics\\osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.\n", " self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n" ] } ], "source": [ "%cd ..\n", "import microwave.data_analysis.ppscore as pps\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.tree import DecisionTreeRegressor\n", "import seaborn as sns\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCDE
002102
110100
220100
301001
410110
500200
600210
701111
820110
922202
1020120
1110120
1221021
1312122
1421001
\n", "
" ], "text/plain": [ " A B C D E\n", "0 0 2 1 0 2\n", "1 1 0 1 0 0\n", "2 2 0 1 0 0\n", "3 0 1 0 0 1\n", "4 1 0 1 1 0\n", "5 0 0 2 0 0\n", "6 0 0 2 1 0\n", "7 0 1 1 1 1\n", "8 2 0 1 1 0\n", "9 2 2 2 0 2\n", "10 2 0 1 2 0\n", "11 1 0 1 2 0\n", "12 2 1 0 2 1\n", "13 1 2 1 2 2\n", "14 2 1 0 0 1" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(np.random.randint(0,3,size=(15, 4)), columns=list('ABCD'))\n", "df['E'] = df['B']\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ppscorecasemetricperfect_scorenaive_scoremodel_scoremodel
00.128681regressionmean_squared_error0.01.0666670.929407DecisionTreeRegressor
\n", "
" ], "text/plain": [ " ppscore case metric perfect_score naive_score \\\n", "0 0.128681 regression mean_squared_error 0.0 1.066667 \n", "\n", " model_score model \n", "0 0.929407 DecisionTreeRegressor " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pps.score(df['A'], df['B'], metric=mean_squared_error, model=DecisionTreeRegressor())" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xppscorecasemetricperfect_scorenaive_scoremodel_scoremodel
0A1.0regressionmean_squared_error0.00.7333330.000000DecisionTreeRegressor
1B0.0regressionmean_squared_error0.00.7333331.211852DecisionTreeRegressor
2C0.0regressionmean_squared_error0.00.7333331.109609DecisionTreeRegressor
3D0.0regressionmean_squared_error0.00.7333330.866667DecisionTreeRegressor
4E0.0regressionmean_squared_error0.00.7333331.211852DecisionTreeRegressor
\n", "
" ], "text/plain": [ " x ppscore case metric perfect_score naive_score \\\n", "0 A 1.0 regression mean_squared_error 0.0 0.733333 \n", "1 B 0.0 regression mean_squared_error 0.0 0.733333 \n", "2 C 0.0 regression mean_squared_error 0.0 0.733333 \n", "3 D 0.0 regression mean_squared_error 0.0 0.733333 \n", "4 E 0.0 regression mean_squared_error 0.0 0.733333 \n", "\n", " model_score model \n", "0 0.000000 DecisionTreeRegressor \n", "1 1.211852 DecisionTreeRegressor \n", "2 1.109609 DecisionTreeRegressor \n", "3 0.866667 DecisionTreeRegressor \n", "4 1.211852 DecisionTreeRegressor " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pps.predictors(df, df['A'], metric=mean_squared_error, model=DecisionTreeRegressor())" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xyppscorecasemetricperfect_scorenaive_scoremodel_scoremodel
0BE1.000000regressionmean_squared_error0.01.0666670.000000DecisionTreeRegressor
1EB1.000000regressionmean_squared_error0.01.0666670.000000DecisionTreeRegressor
2AB0.197917regressionmean_squared_error0.01.0666670.855556DecisionTreeRegressor
3AE0.197917regressionmean_squared_error0.01.0666670.855556DecisionTreeRegressor
4EC0.197279regressionmean_squared_error0.00.4000000.321088DecisionTreeRegressor
5BC0.197279regressionmean_squared_error0.00.4000000.321088DecisionTreeRegressor
6DB0.154167regressionmean_squared_error0.01.0666670.902222DecisionTreeRegressor
7DE0.154167regressionmean_squared_error0.01.0666670.902222DecisionTreeRegressor
8CB0.134706regressionmean_squared_error0.01.0666670.922980DecisionTreeRegressor
9CE0.134706regressionmean_squared_error0.01.0666670.922980DecisionTreeRegressor
10CA0.000000regressionmean_squared_error0.00.7333331.219813DecisionTreeRegressor
11BA0.000000regressionmean_squared_error0.00.7333331.210204DecisionTreeRegressor
12EA0.000000regressionmean_squared_error0.00.7333331.210204DecisionTreeRegressor
13DA0.000000regressionmean_squared_error0.00.7333331.200963DecisionTreeRegressor
14DC0.000000regressionmean_squared_error0.00.4000000.549704DecisionTreeRegressor
15AC0.000000regressionmean_squared_error0.00.4000000.550000DecisionTreeRegressor
16ED0.000000regressionmean_squared_error0.00.7333331.107861DecisionTreeRegressor
17CD0.000000regressionmean_squared_error0.00.7333331.070089DecisionTreeRegressor
18BD0.000000regressionmean_squared_error0.00.7333331.107861DecisionTreeRegressor
19AD0.000000regressionmean_squared_error0.00.7333331.096296DecisionTreeRegressor
\n", "
" ], "text/plain": [ " x y ppscore case metric perfect_score \\\n", "0 B E 1.000000 regression mean_squared_error 0.0 \n", "1 E B 1.000000 regression mean_squared_error 0.0 \n", "2 A B 0.197917 regression mean_squared_error 0.0 \n", "3 A E 0.197917 regression mean_squared_error 0.0 \n", "4 E C 0.197279 regression mean_squared_error 0.0 \n", "5 B C 0.197279 regression mean_squared_error 0.0 \n", "6 D B 0.154167 regression mean_squared_error 0.0 \n", "7 D E 0.154167 regression mean_squared_error 0.0 \n", "8 C B 0.134706 regression mean_squared_error 0.0 \n", "9 C E 0.134706 regression mean_squared_error 0.0 \n", "10 C A 0.000000 regression mean_squared_error 0.0 \n", "11 B A 0.000000 regression mean_squared_error 0.0 \n", "12 E A 0.000000 regression mean_squared_error 0.0 \n", "13 D A 0.000000 regression mean_squared_error 0.0 \n", "14 D C 0.000000 regression mean_squared_error 0.0 \n", "15 A C 0.000000 regression mean_squared_error 0.0 \n", "16 E D 0.000000 regression mean_squared_error 0.0 \n", "17 C D 0.000000 regression mean_squared_error 0.0 \n", "18 B D 0.000000 regression mean_squared_error 0.0 \n", "19 A D 0.000000 regression mean_squared_error 0.0 \n", "\n", " naive_score model_score model \n", "0 1.066667 0.000000 DecisionTreeRegressor \n", "1 1.066667 0.000000 DecisionTreeRegressor \n", "2 1.066667 0.855556 DecisionTreeRegressor \n", "3 1.066667 0.855556 DecisionTreeRegressor \n", "4 0.400000 0.321088 DecisionTreeRegressor \n", "5 0.400000 0.321088 DecisionTreeRegressor \n", "6 1.066667 0.902222 DecisionTreeRegressor \n", "7 1.066667 0.902222 DecisionTreeRegressor \n", "8 1.066667 0.922980 DecisionTreeRegressor \n", "9 1.066667 0.922980 DecisionTreeRegressor \n", "10 0.733333 1.219813 DecisionTreeRegressor \n", "11 0.733333 1.210204 DecisionTreeRegressor \n", "12 0.733333 1.210204 DecisionTreeRegressor \n", "13 0.733333 1.200963 DecisionTreeRegressor \n", "14 0.400000 0.549704 DecisionTreeRegressor \n", "15 0.400000 0.550000 DecisionTreeRegressor \n", "16 0.733333 1.107861 DecisionTreeRegressor \n", "17 0.733333 1.070089 DecisionTreeRegressor \n", "18 0.733333 1.107861 DecisionTreeRegressor \n", "19 0.733333 1.096296 DecisionTreeRegressor " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pps_mat = pps.matrix(df, metric=mean_squared_error, model=DecisionTreeRegressor())\n", "pps_mat" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "mat = pps_mat[[\"x\", \"y\", \"ppscore\"]].pivot(columns='x', index='y', values='ppscore')\n", "_ = sns.heatmap(mat,annot=True,fmt=\".2f\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['B', 'E']" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pps.mutual_predictors(pps_mat, threshold=0.9)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv_microwave (3.13.2)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.2" } }, "nbformat": 4, "nbformat_minor": 2 }