{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2024-04-16T17:40:39.605207Z", "start_time": "2024-04-16T17:40:37.592172Z" } }, "source": [ "from data import load_data, categorize_metadata, process_experiments\n", "import pandas as pd\n", "import numpy as np\n", "import os" ], "outputs": [], "execution_count": 1 }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-16T17:40:40.100841Z", "start_time": "2024-04-16T17:40:39.605207Z" } }, "cell_type": "code", "source": [ "metadata_train, experiments_train = load_data(os.path.join(\"..\", \"data\", \"train\"), \"\")\n", "truth_train, metadata_train = categorize_metadata(metadata_train)\n", "metadata_test, experiments_test = load_data(os.path.join(\"..\", \"data\", \"test\"), \"\")\n", "truth_test, metadata_test = categorize_metadata(metadata_test)" ], "id": "f48d3dca7499a5f8", "outputs": [], "execution_count": 2 }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-16T17:40:43.055129Z", "start_time": "2024-04-16T17:40:40.100841Z" } }, "cell_type": "code", "source": [ "from sklearn.model_selection import train_test_split\n", "processed_train = process_experiments(experiments_train, baseline_lam=10, baseline_p=1e-2, smooth_window_length=7, smooth_polyorder=3)\n", "X_train = pd.concat([metadata_train, processed_train], axis=1)\n", "processed_test = process_experiments(experiments_test, baseline_lam=10, baseline_p=1e-2, smooth_window_length=7, smooth_polyorder=3)\n", "X_test = pd.concat([metadata_test, processed_test], axis=1)" ], "id": "8fb458c0b78c9aa7", "outputs": [], "execution_count": 3 }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-16T17:40:43.127855Z", "start_time": "2024-04-16T17:40:43.055129Z" } }, "cell_type": "code", "source": [ "from sklearn.preprocessing import StandardScaler\n", "scaler = StandardScaler()\n", "scaler.fit(X_train)\n", "X_train = scaler.transform(X_train)\n", "X_test = scaler.transform(X_test)" ], "id": "2572f4508200d308", "outputs": [], "execution_count": 4 }, { "metadata": { "ExecuteTime": { "end_time": "2024-04-16T17:42:11.247830Z", "start_time": "2024-04-16T17:40:43.127855Z" } }, "cell_type": "code", "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.model_selection import GridSearchCV\n", "param_grid = {\n", " 'n_estimators': range(1, 201, 50),\n", " 'max_depth': range(10, 11, 10)\n", "}\n", "\n", "clf = RandomForestClassifier()\n", "\n", "grid_clf = GridSearchCV(clf, param_grid, cv=20)\n", "grid_clf.fit(X_train, truth_train.to_numpy().ravel())" ], "id": "80a355d2740ebf4a", "outputs": [ { "data": { "text/plain": [ "GridSearchCV(cv=20, estimator=RandomForestClassifier(),\n", " param_grid={'max_depth': range(10, 11, 10),\n", " 'n_estimators': range(1, 201, 50)})" ], "text/html": [ "
GridSearchCV(cv=20, estimator=RandomForestClassifier(),\n",
" param_grid={'max_depth': range(10, 11, 10),\n",
" 'n_estimators': range(1, 201, 50)})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(cv=20, estimator=RandomForestClassifier(),\n",
" param_grid={'max_depth': range(10, 11, 10),\n",
" 'n_estimators': range(1, 201, 50)})RandomForestClassifier()
RandomForestClassifier()