Better random forests
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -57,5 +57,5 @@ def load_raw_data() -> tuple[pd.DataFrame, list[pd.DataFrame]]:
|
||||
|
||||
def load_data(name: str, path: os.path = os.path.join("data")) -> tuple[pd.DataFrame, pd.DataFrame]:
|
||||
metadata = pd.read_csv(os.path.join(path, name, "metadata.csv"))
|
||||
experiments = pd.read_csv(os.path.join(path, name, "experiments.csv"))
|
||||
experiments = pd.read_csv(os.path.join(path, name, "experiments.csv"), dtype=float)
|
||||
return metadata, experiments
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -80,9 +80,26 @@ def categorize_metadata(metadata: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFr
|
||||
return truth, encoded
|
||||
|
||||
|
||||
def process_experiments(experiments: pd.DataFrame, baseline_lam=10, baseline_p=1e-2,
|
||||
smooth_window_length=7, smooth_polyorder=3) -> pd.DataFrame:
|
||||
def process_experiments(experiments: pd.DataFrame, baseline_lam: int = 10, baseline_p: float = 1e-2,
|
||||
smooth_window_length: int = 7, smooth_polyorder: int = 3) -> pd.DataFrame:
|
||||
experiments = adjust_all_baselines(experiments, lam=baseline_lam, p=baseline_p)
|
||||
experiments = scale_experiments(experiments)
|
||||
experiments = smooth_experiments(experiments, window_length=smooth_window_length, polyorder=smooth_polyorder)
|
||||
return experiments
|
||||
|
||||
|
||||
def process_train_test(params: dict, experiments_train: pd.DataFrame, metadata_train: pd.DataFrame, experiments_test: pd.DataFrame = None, metadata_test: pd.DataFrame = None, scale: bool=True) -> tuple[pd.DataFrame, pd.DataFrame]:
|
||||
processed_train = process_experiments(experiments_train, **params)
|
||||
X_train = pd.concat([metadata_train, processed_train], axis=1)
|
||||
if experiments_test is not None:
|
||||
processed_test = process_experiments(experiments_test, **params)
|
||||
X_test = pd.concat([metadata_test, processed_test], axis=1)
|
||||
else:
|
||||
X_test = None
|
||||
if scale:
|
||||
scaler = StandardScaler()
|
||||
scaler.fit(X_train)
|
||||
X_train = scaler.transform(X_train)
|
||||
if X_test is not None:
|
||||
X_test = scaler.transform(X_test)
|
||||
return X_train, X_test
|
||||
|
||||
Reference in New Issue
Block a user