Files
microwave/notebooks/demo_univariate_aggregates.ipynb
2025-02-27 13:46:56 +01:00

1235 lines
44 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"c:\\Users\\Edouard\\Documents\\Git\\microwave\n"
]
}
],
"source": [
"%cd ..\n",
"import microwave.data_analysis.univariate as univariate\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['size', 'non-null', 'nunique', 'sum', 'min', 'max', 'first', 'last', 'mean', 'median', 'mode', 'gmean', 'hmean', 'Pmean', 'geothmetic meandian', 'variance', 'std', 'mad', 'skewness', 'excesskurtosis', 'range', 'Prange', 'n_outliers', 'P75', 'P25', 'P10', 'P90', 'PN', 'skewtest', 'kurtosistest', 'normaltest', 'jarque_bera', 'shapiro', 'anderson', 'energy', 'rms', 'entropy', 'autocorrelation'])"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"univariate.AGGFUNCCODES.keys()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>995</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>996</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>997</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>998</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" A B C D E\n",
"0 0 1 2 1 1\n",
"1 1 1 1 2 1\n",
"2 0 2 0 2 2\n",
"3 2 0 2 1 0\n",
"4 2 2 1 2 2\n",
".. .. .. .. .. ..\n",
"995 0 1 2 2 1\n",
"996 1 2 1 1 2\n",
"997 1 0 0 1 0\n",
"998 2 0 1 1 0\n",
"999 2 2 0 1 2\n",
"\n",
"[1000 rows x 5 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(np.random.randint(0,3,size=(1000, 4)), columns=list('ABCD'))\n",
"df['E'] = df['B']\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>size</th>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>non-null</th>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nunique</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sum</th>\n",
" <td>1040</td>\n",
" <td>1026</td>\n",
" <td>1002</td>\n",
" <td>989</td>\n",
" <td>1026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>first</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>last</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.04</td>\n",
" <td>1.026</td>\n",
" <td>1.002</td>\n",
" <td>0.989</td>\n",
" <td>1.026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>median</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mode</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>gmean</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hmean</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Pmean</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>geothmetic meandian</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>variance</th>\n",
" <td>0.6604</td>\n",
" <td>0.649324</td>\n",
" <td>0.661996</td>\n",
" <td>0.634879</td>\n",
" <td>0.649324</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.81265</td>\n",
" <td>0.805806</td>\n",
" <td>0.813631</td>\n",
" <td>0.796793</td>\n",
" <td>0.805806</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mad</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>skewness</th>\n",
" <td>-0.073251</td>\n",
" <td>-0.04714</td>\n",
" <td>-0.003661</td>\n",
" <td>0.019674</td>\n",
" <td>-0.04714</td>\n",
" </tr>\n",
" <tr>\n",
" <th>excesskurtosis</th>\n",
" <td>1.517782</td>\n",
" <td>1.541503</td>\n",
" <td>1.510592</td>\n",
" <td>1.575346</td>\n",
" <td>1.541503</td>\n",
" </tr>\n",
" <tr>\n",
" <th>range</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Prange</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>n_outliers</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P75</th>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P25</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P10</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P90</th>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PN</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>skewtest_a</th>\n",
" <td>-0.951391</td>\n",
" <td>-0.612722</td>\n",
" <td>-0.047614</td>\n",
" <td>0.255835</td>\n",
" <td>-0.612722</td>\n",
" </tr>\n",
" <tr>\n",
" <th>skewtest_b</th>\n",
" <td>0.341406</td>\n",
" <td>0.54006</td>\n",
" <td>0.962024</td>\n",
" <td>0.798078</td>\n",
" <td>0.54006</td>\n",
" </tr>\n",
" <tr>\n",
" <th>kurtosistest_a</th>\n",
" <td>87.592119</td>\n",
" <td>92.396965</td>\n",
" <td>86.38827</td>\n",
" <td>103.150756</td>\n",
" <td>92.396965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>kurtosistest_b</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>normaltest_a</th>\n",
" <td>7673.284425</td>\n",
" <td>8537.574579</td>\n",
" <td>7462.935452</td>\n",
" <td>10640.143829</td>\n",
" <td>8537.574579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>normaltest_b</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>jarque_bera_a</th>\n",
" <td>92.4347</td>\n",
" <td>89.004259</td>\n",
" <td>92.432906</td>\n",
" <td>84.632762</td>\n",
" <td>89.004259</td>\n",
" </tr>\n",
" <tr>\n",
" <th>jarque_bera_b</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>shapiro_a</th>\n",
" <td>0.793814</td>\n",
" <td>0.79688</td>\n",
" <td>0.79431</td>\n",
" <td>0.80017</td>\n",
" <td>0.79688</td>\n",
" </tr>\n",
" <tr>\n",
" <th>shapiro_b</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>anderson_a</th>\n",
" <td>81.552961</td>\n",
" <td>80.265394</td>\n",
" <td>81.257078</td>\n",
" <td>79.026048</td>\n",
" <td>80.265394</td>\n",
" </tr>\n",
" <tr>\n",
" <th>anderson_b</th>\n",
" <td>[0.574, 0.653, 0.784, 0.914, 1.088]</td>\n",
" <td>[0.574, 0.653, 0.784, 0.914, 1.088]</td>\n",
" <td>[0.574, 0.653, 0.784, 0.914, 1.088]</td>\n",
" <td>[0.574, 0.653, 0.784, 0.914, 1.088]</td>\n",
" <td>[0.574, 0.653, 0.784, 0.914, 1.088]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>anderson_c</th>\n",
" <td>[15.0, 10.0, 5.0, 2.5, 1.0]</td>\n",
" <td>[15.0, 10.0, 5.0, 2.5, 1.0]</td>\n",
" <td>[15.0, 10.0, 5.0, 2.5, 1.0]</td>\n",
" <td>[15.0, 10.0, 5.0, 2.5, 1.0]</td>\n",
" <td>[15.0, 10.0, 5.0, 2.5, 1.0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>energy</th>\n",
" <td>1742</td>\n",
" <td>1702</td>\n",
" <td>1666</td>\n",
" <td>1613</td>\n",
" <td>1702</td>\n",
" </tr>\n",
" <tr>\n",
" <th>rms</th>\n",
" <td>1.319848</td>\n",
" <td>1.304607</td>\n",
" <td>1.290736</td>\n",
" <td>1.270039</td>\n",
" <td>1.304607</td>\n",
" </tr>\n",
" <tr>\n",
" <th>entropy</th>\n",
" <td>1.583147</td>\n",
" <td>1.583318</td>\n",
" <td>1.584888</td>\n",
" <td>1.581618</td>\n",
" <td>1.583318</td>\n",
" </tr>\n",
" <tr>\n",
" <th>autocorrelation</th>\n",
" <td>-0.008494</td>\n",
" <td>-0.001003</td>\n",
" <td>0.001508</td>\n",
" <td>-0.015942</td>\n",
" <td>-0.001003</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A \\\n",
"size 1000 \n",
"non-null 1000 \n",
"nunique 3 \n",
"sum 1040 \n",
"min 0 \n",
"max 2 \n",
"first 0 \n",
"last 2 \n",
"mean 1.04 \n",
"median 1.0 \n",
"mode 2 \n",
"gmean 0.0 \n",
"hmean 0.0 \n",
"Pmean 0.0 \n",
"geothmetic meandian 0.0 \n",
"variance 0.6604 \n",
"std 0.81265 \n",
"mad 1.0 \n",
"skewness -0.073251 \n",
"excesskurtosis 1.517782 \n",
"range 2 \n",
"Prange 0.0 \n",
"n_outliers 0 \n",
"P75 2.0 \n",
"P25 0.0 \n",
"P10 0.0 \n",
"P90 2.0 \n",
"PN NaN \n",
"skewtest_a -0.951391 \n",
"skewtest_b 0.341406 \n",
"kurtosistest_a 87.592119 \n",
"kurtosistest_b 0.0 \n",
"normaltest_a 7673.284425 \n",
"normaltest_b 0.0 \n",
"jarque_bera_a 92.4347 \n",
"jarque_bera_b 0.0 \n",
"shapiro_a 0.793814 \n",
"shapiro_b 0.0 \n",
"anderson_a 81.552961 \n",
"anderson_b [0.574, 0.653, 0.784, 0.914, 1.088] \n",
"anderson_c [15.0, 10.0, 5.0, 2.5, 1.0] \n",
"energy 1742 \n",
"rms 1.319848 \n",
"entropy 1.583147 \n",
"autocorrelation -0.008494 \n",
"\n",
" B \\\n",
"size 1000 \n",
"non-null 1000 \n",
"nunique 3 \n",
"sum 1026 \n",
"min 0 \n",
"max 2 \n",
"first 1 \n",
"last 2 \n",
"mean 1.026 \n",
"median 1.0 \n",
"mode 1 \n",
"gmean 0.0 \n",
"hmean 0.0 \n",
"Pmean 0.0 \n",
"geothmetic meandian 0.0 \n",
"variance 0.649324 \n",
"std 0.805806 \n",
"mad 1.0 \n",
"skewness -0.04714 \n",
"excesskurtosis 1.541503 \n",
"range 2 \n",
"Prange 0.0 \n",
"n_outliers 0 \n",
"P75 2.0 \n",
"P25 0.0 \n",
"P10 0.0 \n",
"P90 2.0 \n",
"PN NaN \n",
"skewtest_a -0.612722 \n",
"skewtest_b 0.54006 \n",
"kurtosistest_a 92.396965 \n",
"kurtosistest_b 0.0 \n",
"normaltest_a 8537.574579 \n",
"normaltest_b 0.0 \n",
"jarque_bera_a 89.004259 \n",
"jarque_bera_b 0.0 \n",
"shapiro_a 0.79688 \n",
"shapiro_b 0.0 \n",
"anderson_a 80.265394 \n",
"anderson_b [0.574, 0.653, 0.784, 0.914, 1.088] \n",
"anderson_c [15.0, 10.0, 5.0, 2.5, 1.0] \n",
"energy 1702 \n",
"rms 1.304607 \n",
"entropy 1.583318 \n",
"autocorrelation -0.001003 \n",
"\n",
" C \\\n",
"size 1000 \n",
"non-null 1000 \n",
"nunique 3 \n",
"sum 1002 \n",
"min 0 \n",
"max 2 \n",
"first 2 \n",
"last 0 \n",
"mean 1.002 \n",
"median 1.0 \n",
"mode 1 \n",
"gmean 0.0 \n",
"hmean 0.0 \n",
"Pmean 0.0 \n",
"geothmetic meandian 0.0 \n",
"variance 0.661996 \n",
"std 0.813631 \n",
"mad 1.0 \n",
"skewness -0.003661 \n",
"excesskurtosis 1.510592 \n",
"range 2 \n",
"Prange 0.0 \n",
"n_outliers 0 \n",
"P75 2.0 \n",
"P25 0.0 \n",
"P10 0.0 \n",
"P90 2.0 \n",
"PN NaN \n",
"skewtest_a -0.047614 \n",
"skewtest_b 0.962024 \n",
"kurtosistest_a 86.38827 \n",
"kurtosistest_b 0.0 \n",
"normaltest_a 7462.935452 \n",
"normaltest_b 0.0 \n",
"jarque_bera_a 92.432906 \n",
"jarque_bera_b 0.0 \n",
"shapiro_a 0.79431 \n",
"shapiro_b 0.0 \n",
"anderson_a 81.257078 \n",
"anderson_b [0.574, 0.653, 0.784, 0.914, 1.088] \n",
"anderson_c [15.0, 10.0, 5.0, 2.5, 1.0] \n",
"energy 1666 \n",
"rms 1.290736 \n",
"entropy 1.584888 \n",
"autocorrelation 0.001508 \n",
"\n",
" D \\\n",
"size 1000 \n",
"non-null 1000 \n",
"nunique 3 \n",
"sum 989 \n",
"min 0 \n",
"max 2 \n",
"first 1 \n",
"last 1 \n",
"mean 0.989 \n",
"median 1.0 \n",
"mode 1 \n",
"gmean 0.0 \n",
"hmean 0.0 \n",
"Pmean 0.0 \n",
"geothmetic meandian 0.0 \n",
"variance 0.634879 \n",
"std 0.796793 \n",
"mad 1.0 \n",
"skewness 0.019674 \n",
"excesskurtosis 1.575346 \n",
"range 2 \n",
"Prange 0.0 \n",
"n_outliers 0 \n",
"P75 2.0 \n",
"P25 0.0 \n",
"P10 0.0 \n",
"P90 2.0 \n",
"PN NaN \n",
"skewtest_a 0.255835 \n",
"skewtest_b 0.798078 \n",
"kurtosistest_a 103.150756 \n",
"kurtosistest_b 0.0 \n",
"normaltest_a 10640.143829 \n",
"normaltest_b 0.0 \n",
"jarque_bera_a 84.632762 \n",
"jarque_bera_b 0.0 \n",
"shapiro_a 0.80017 \n",
"shapiro_b 0.0 \n",
"anderson_a 79.026048 \n",
"anderson_b [0.574, 0.653, 0.784, 0.914, 1.088] \n",
"anderson_c [15.0, 10.0, 5.0, 2.5, 1.0] \n",
"energy 1613 \n",
"rms 1.270039 \n",
"entropy 1.581618 \n",
"autocorrelation -0.015942 \n",
"\n",
" E \n",
"size 1000 \n",
"non-null 1000 \n",
"nunique 3 \n",
"sum 1026 \n",
"min 0 \n",
"max 2 \n",
"first 1 \n",
"last 2 \n",
"mean 1.026 \n",
"median 1.0 \n",
"mode 1 \n",
"gmean 0.0 \n",
"hmean 0.0 \n",
"Pmean 0.0 \n",
"geothmetic meandian 0.0 \n",
"variance 0.649324 \n",
"std 0.805806 \n",
"mad 1.0 \n",
"skewness -0.04714 \n",
"excesskurtosis 1.541503 \n",
"range 2 \n",
"Prange 0.0 \n",
"n_outliers 0 \n",
"P75 2.0 \n",
"P25 0.0 \n",
"P10 0.0 \n",
"P90 2.0 \n",
"PN NaN \n",
"skewtest_a -0.612722 \n",
"skewtest_b 0.54006 \n",
"kurtosistest_a 92.396965 \n",
"kurtosistest_b 0.0 \n",
"normaltest_a 8537.574579 \n",
"normaltest_b 0.0 \n",
"jarque_bera_a 89.004259 \n",
"jarque_bera_b 0.0 \n",
"shapiro_a 0.79688 \n",
"shapiro_b 0.0 \n",
"anderson_a 80.265394 \n",
"anderson_b [0.574, 0.653, 0.784, 0.914, 1.088] \n",
"anderson_c [15.0, 10.0, 5.0, 2.5, 1.0] \n",
"energy 1702 \n",
"rms 1.304607 \n",
"entropy 1.583318 \n",
"autocorrelation -0.001003 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"univariate.build_univariate_statistics(df, agg=\"all\", n_jobs=-1).T"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.04</td>\n",
" <td>1.026</td>\n",
" <td>1.002</td>\n",
" <td>0.989</td>\n",
" <td>1.026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>median</th>\n",
" <td>1.00</td>\n",
" <td>1.000</td>\n",
" <td>1.000</td>\n",
" <td>1.000</td>\n",
" <td>1.000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D E\n",
"mean 1.04 1.026 1.002 0.989 1.026\n",
"median 1.00 1.000 1.000 1.000 1.000"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"univariate.build_univariate_statistics(df, agg=[\"mean\", \"median\"], n_jobs=1).T"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>somename</th>\n",
" <td>1.04</td>\n",
" <td>1.026</td>\n",
" <td>1.002</td>\n",
" <td>0.989</td>\n",
" <td>1.026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>median</th>\n",
" <td>1.00</td>\n",
" <td>1.000</td>\n",
" <td>1.000</td>\n",
" <td>1.000</td>\n",
" <td>1.000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D E\n",
"somename 1.04 1.026 1.002 0.989 1.026\n",
"median 1.00 1.000 1.000 1.000 1.000"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"univariate.build_univariate_statistics(df, agg=[{'func':\"mean\", 'name':\"somename\"}, \"median\"], n_jobs=1).T"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>func_0</th>\n",
" <td>1.04</td>\n",
" <td>1.026</td>\n",
" <td>1.002</td>\n",
" <td>0.989</td>\n",
" <td>1.026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.04</td>\n",
" <td>1.026</td>\n",
" <td>1.002</td>\n",
" <td>0.989</td>\n",
" <td>1.026</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D E\n",
"func_0 1.04 1.026 1.002 0.989 1.026\n",
"mean 1.04 1.026 1.002 0.989 1.026"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"univariate.build_univariate_statistics(df, agg=[np.mean, \"mean\"], n_jobs=1).T"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>skewtest_a</th>\n",
" <td>-0.951391</td>\n",
" <td>-0.612722</td>\n",
" <td>-0.047614</td>\n",
" <td>0.255835</td>\n",
" <td>-0.612722</td>\n",
" </tr>\n",
" <tr>\n",
" <th>skewtest_b</th>\n",
" <td>0.341406</td>\n",
" <td>0.540060</td>\n",
" <td>0.962024</td>\n",
" <td>0.798078</td>\n",
" <td>0.540060</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.040000</td>\n",
" <td>1.026000</td>\n",
" <td>1.002000</td>\n",
" <td>0.989000</td>\n",
" <td>1.026000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D E\n",
"skewtest_a -0.951391 -0.612722 -0.047614 0.255835 -0.612722\n",
"skewtest_b 0.341406 0.540060 0.962024 0.798078 0.540060\n",
"mean 1.040000 1.026000 1.002000 0.989000 1.026000"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"univariate.build_univariate_statistics(df, agg=[\"skewtest\", \"mean\"], n_jobs=1).T"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>skewtest_stat</th>\n",
" <td>-0.951391</td>\n",
" <td>-0.612722</td>\n",
" <td>-0.047614</td>\n",
" <td>0.255835</td>\n",
" <td>-0.612722</td>\n",
" </tr>\n",
" <tr>\n",
" <th>skewtest_p</th>\n",
" <td>0.341406</td>\n",
" <td>0.540060</td>\n",
" <td>0.962024</td>\n",
" <td>0.798078</td>\n",
" <td>0.540060</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.040000</td>\n",
" <td>1.026000</td>\n",
" <td>1.002000</td>\n",
" <td>0.989000</td>\n",
" <td>1.026000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D E\n",
"skewtest_stat -0.951391 -0.612722 -0.047614 0.255835 -0.612722\n",
"skewtest_p 0.341406 0.540060 0.962024 0.798078 0.540060\n",
"mean 1.040000 1.026000 1.002000 0.989000 1.026000"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"univariate.build_univariate_statistics(df, agg=[{'func':\"skewtest\", \"ret_names\":[\"stat\", \"p\"]}, \"mean\"], n_jobs=1).T"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv_microwave (3.13.2)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}