Files
RamanClassifier/data/data_processing.py

54 lines
1.8 KiB
Python

from pybaselines import Baseline
import numpy as np
from math import factorial
from sklearn.preprocessing import StandardScaler
from scipy.signal import savgol_filter
def calculate_baseline(measure, lam=10, p=1e-2):
baseline_fitter = Baseline(x_data=measure[:len(measure)//2])
bkg_2, params_2 = baseline_fitter.iasls(measure[len(measure)//2:], lam=lam, p=p)
return bkg_2
def adjust_baseline(measure, lam=10, p=1e-2):
baseline = calculate_baseline(measure, lam=lam, p=p)
return measure[len(measure)//2:] - baseline
def adjust_all_baselines(measures, lam=10, p=1e-2):
result = measures.copy(deep=True)
for index, row in result.iterrows():
result.iloc[index, len(row)//2:] = adjust_baseline(row, lam=lam, p=p)
return result
def scale_experiments(experiments):
result = experiments.copy(deep=True)
trans = StandardScaler()
scaled = trans.fit_transform(experiments.transpose()[len(experiments.columns)//2:]).T
result.iloc[:, len(result.columns)//2:] = scaled
return result
def apply_smoothing(experiment, window_length=7, polyorder=3):
return savgol_filter(experiment[len(experiment)//2:], window_length, polyorder)
def smooth_experiments(experiments, window_length=7, polyorder=3):
result = experiments.copy(deep=True)
for index, row in result.iterrows():
result.iloc[index, len(row) // 2:] = apply_smoothing(row, window_length=window_length, polyorder=polyorder)
return result
def process_experiments(experiments, scale_features=True):
baselined_experiments = adjust_all_baselines(experiments)
scaled_experiments = scale_experiments(baselined_experiments)
smoothed_experiments = smooth_experiments(scaled_experiments)
if scale_features:
trans = StandardScaler()
return trans.fit_transform(smoothed_experiments)
else:
return smoothed_experiments