{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import string" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD0
00222b
11211c
21011d
30010d
42122c
50000d
60222a
70200a
80100a
90221b
102201c
112111a
120102a
132101c
141001b
\n", "
" ], "text/plain": [ " A B C D 0\n", "0 0 2 2 2 b\n", "1 1 2 1 1 c\n", "2 1 0 1 1 d\n", "3 0 0 1 0 d\n", "4 2 1 2 2 c\n", "5 0 0 0 0 d\n", "6 0 2 2 2 a\n", "7 0 2 0 0 a\n", "8 0 1 0 0 a\n", "9 0 2 2 1 b\n", "10 2 2 0 1 c\n", "11 2 1 1 1 a\n", "12 0 1 0 2 a\n", "13 2 1 0 1 c\n", "14 1 0 0 1 b" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(np.random.randint(0,3,size=(15, 4)), columns=list('ABCD'))\n", "df = pd.concat([df, pd.DataFrame(np.random.choice(list(string.ascii_letters)[:4], size=15, replace=True))], axis=1)\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c:\\Users\\Edouard\\Documents\\Git\\microwave\n" ] } ], "source": [ "%cd ..\n", "import microwave.data_processing as dp" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD0_encoded_a0_encoded_b0_encoded_c0_encoded_d
00.02.02.02.00.01.00.00.0
11.02.01.01.00.00.01.00.0
21.00.01.01.00.00.00.01.0
30.00.01.00.00.00.00.01.0
42.01.02.02.00.00.01.00.0
50.00.00.00.00.00.00.01.0
60.02.02.02.01.00.00.00.0
70.02.00.00.01.00.00.00.0
80.01.00.00.01.00.00.00.0
90.02.02.01.00.01.00.00.0
102.02.00.01.00.00.01.00.0
112.01.01.01.01.00.00.00.0
120.01.00.02.01.00.00.00.0
132.01.00.01.00.00.01.00.0
141.00.00.01.00.01.00.00.0
\n", "
" ], "text/plain": [ " A B C D 0_encoded_a 0_encoded_b 0_encoded_c 0_encoded_d\n", "0 0.0 2.0 2.0 2.0 0.0 1.0 0.0 0.0\n", "1 1.0 2.0 1.0 1.0 0.0 0.0 1.0 0.0\n", "2 1.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0\n", "3 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0\n", "4 2.0 1.0 2.0 2.0 0.0 0.0 1.0 0.0\n", "5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0\n", "6 0.0 2.0 2.0 2.0 1.0 0.0 0.0 0.0\n", "7 0.0 2.0 0.0 0.0 1.0 0.0 0.0 0.0\n", "8 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0\n", "9 0.0 2.0 2.0 1.0 0.0 1.0 0.0 0.0\n", "10 2.0 2.0 0.0 1.0 0.0 0.0 1.0 0.0\n", "11 2.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0\n", "12 0.0 1.0 0.0 2.0 1.0 0.0 0.0 0.0\n", "13 2.0 1.0 0.0 1.0 0.0 0.0 1.0 0.0\n", "14 1.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df, trans = dp.df_to_numeric(df)\n", "new_df" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(0,\n", " OneHotEncoder(sparse_output=False),\n", " ['0_encoded_a', '0_encoded_b', '0_encoded_c', '0_encoded_d']),\n", " ('A',\n", " FunctionTransformer(func=. at 0x000001B4F3F920C0>),\n", " ['column']),\n", " ('B',\n", " FunctionTransformer(func=. at 0x000001B4F3F920C0>),\n", " ['column']),\n", " ('C',\n", " FunctionTransformer(func=. at 0x000001B4F3F920C0>),\n", " ['column']),\n", " ('D',\n", " FunctionTransformer(func=. at 0x000001B4F3F920C0>),\n", " ['column']),\n", " ('0_encoded_a',\n", " FunctionTransformer(func=. at 0x000001B4F3F920C0>),\n", " ['column']),\n", " ('0_encoded_b',\n", " FunctionTransformer(func=. at 0x000001B4F3F920C0>),\n", " ['column']),\n", " ('0_encoded_c',\n", " FunctionTransformer(func=. at 0x000001B4F3F920C0>),\n", " ['column']),\n", " ('0_encoded_d',\n", " FunctionTransformer(func=. at 0x000001B4F3F920C0>),\n", " ['column'])]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trans.transforms" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\Edouard\\Documents\\Git\\microwave\\.venv_microwave\\Lib\\site-packages\\sklearn\\preprocessing\\_label.py:110: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", " y = column_or_1d(y, warn=True)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD0_encoded
00.02.02.02.01.0
11.02.01.01.02.0
21.00.01.01.03.0
30.00.01.00.03.0
42.01.02.02.02.0
50.00.00.00.03.0
60.02.02.02.00.0
70.02.00.00.00.0
80.01.00.00.00.0
90.02.02.01.01.0
102.02.00.01.02.0
112.01.01.01.00.0
120.01.00.02.00.0
132.01.00.01.02.0
141.00.00.01.01.0
\n", "
" ], "text/plain": [ " A B C D 0_encoded\n", "0 0.0 2.0 2.0 2.0 1.0\n", "1 1.0 2.0 1.0 1.0 2.0\n", "2 1.0 0.0 1.0 1.0 3.0\n", "3 0.0 0.0 1.0 0.0 3.0\n", "4 2.0 1.0 2.0 2.0 2.0\n", "5 0.0 0.0 0.0 0.0 3.0\n", "6 0.0 2.0 2.0 2.0 0.0\n", "7 0.0 2.0 0.0 0.0 0.0\n", "8 0.0 1.0 0.0 0.0 0.0\n", "9 0.0 2.0 2.0 1.0 1.0\n", "10 2.0 2.0 0.0 1.0 2.0\n", "11 2.0 1.0 1.0 1.0 0.0\n", "12 0.0 1.0 0.0 2.0 0.0\n", "13 2.0 1.0 0.0 1.0 2.0\n", "14 1.0 0.0 0.0 1.0 1.0" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df, trans = dp.df_to_numeric(df, encoding=\"label\")\n", "new_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv_microwave (3.13.2)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.2" } }, "nbformat": 4, "nbformat_minor": 2 }