First data dump and metadata handling

This commit is contained in:
2024-02-26 19:16:07 +01:00
parent f466cd0a2d
commit d7ec6835c8
32 changed files with 712983 additions and 0 deletions

29
data_raw/metadata.csv Normal file
View File

@@ -0,0 +1,29 @@
file,strain,replica,phase,objective,substrate,laser_power[%],grating,exposition[sec],confocalhigh,accumulations
A390SampleSpectraLiquid1_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt,A390,1.0,liquid,50,alu,100,1800,20,True,2
A390SampleSpectraliquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu (2).txt,A390,3.0,liquid,50,alu,100,1800,20,True,2
A390SampleSpectraliquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt,A390,3.0,liquid,50,alu,100,1800,20,True,2
A390SampleSpectraSolid2_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu (2).txt,A390,2.0,solid,50,alu,100,1800,20,True,2
A390SampleSpectraSolid2_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt,A390,2.0,solid,50,alu,100,1800,20,True,2
A390SampleSpectraSolid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt,A390,3.0,solid,50,alu,100,1800,20,True,2
A390SampleSpectraSolid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round211 (2).txt,A390,3.0,solid,50,alu,100,1800,20,True,2
A390SampleSpectraSolid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round211.txt,A390,3.0,solid,50,alu,100,1800,20,True,2
CHA0Liquid1_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt,CHA0,1.0,liquid,50,alu,100,1800,20,True,2
CHA0SampleSpectraLiquid1_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round (2).txt,CHA0,1.0,liquid,50,alu,100,1800,20,True,2
CHA0SampleSpectraLiquid1_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round.txt,CHA0,1.0,liquid,50,alu,100,1800,20,True,2
CHA0SampleSpectraLiquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt,CHA0,3.0,liquid,50,alu,100,1800,20,True,2
CHA0SampleSpectraLiquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2.txt,CHA0,3.0,liquid,50,alu,100,1800,20,True,2
CHA0SampleSpectraSolid1_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2 (2).txt,CHA0,1.0,solid,50,alu,100,1800,20,True,2
CHA0SampleSpectraSolid1_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2.txt,CHA0,1.0,solid,50,alu,100,1800,20,True,2
CHA0SampleSpectraSolid_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu (2).txt,CHA0,,soli,50,alu,100,1800,20,True,2
CHA0SampleSpectraSolid_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu(1).txt,CHA0,,soli,50,alu,100,1800,20,True,2
CHA0SampleSpectraSolid_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt,CHA0,,soli,50,alu,100,1800,20,True,2
CHA0SampleSpectra_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt,CHA0,,,50,alu,100,1800,20,True,2
F113SampleSpectraLiquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu (2).txt,F113,3.0,liquid,50,alu,100,1800,20,True,2
F113SampleSpectraLiquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt,F113,3.0,liquid,50,alu,100,1800,20,True,2
F113SampleSpectraLiquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2_otherdrop1 (2).txt,F113,3.0,liquid,50,alu,100,1800,20,True,2
F113SampleSpectraLiquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2_otherdrop1.txt,F113,3.0,liquid,50,alu,100,1800,20,True,2
F113SampleSpectraLiquid_50x_dried3_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2_otherdrop.txt,F113,,liqui,50,alu,100,1800,20,True,2
F113SampleSpectraLiquid_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt,F113,,liqui,50,alu,100,1800,20,True,2
F113SampleSpectraSolid2_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt,F113,2.0,solid,50,alu,100,1800,20,True,2
F113SampleSpectraSolid2_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2 (2).txt,F113,2.0,solid,50,alu,100,1800,20,True,2
F113SampleSpectraSolid2_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2.txt,F113,2.0,solid,50,alu,100,1800,20,True,2
1 file strain replica phase objective substrate laser_power[%] grating exposition[sec] confocalhigh accumulations
2 A390SampleSpectraLiquid1_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt A390 1.0 liquid 50 alu 100 1800 20 True 2
3 A390SampleSpectraliquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu (2).txt A390 3.0 liquid 50 alu 100 1800 20 True 2
4 A390SampleSpectraliquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt A390 3.0 liquid 50 alu 100 1800 20 True 2
5 A390SampleSpectraSolid2_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu (2).txt A390 2.0 solid 50 alu 100 1800 20 True 2
6 A390SampleSpectraSolid2_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt A390 2.0 solid 50 alu 100 1800 20 True 2
7 A390SampleSpectraSolid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt A390 3.0 solid 50 alu 100 1800 20 True 2
8 A390SampleSpectraSolid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round211 (2).txt A390 3.0 solid 50 alu 100 1800 20 True 2
9 A390SampleSpectraSolid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round211.txt A390 3.0 solid 50 alu 100 1800 20 True 2
10 CHA0Liquid1_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt CHA0 1.0 liquid 50 alu 100 1800 20 True 2
11 CHA0SampleSpectraLiquid1_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round (2).txt CHA0 1.0 liquid 50 alu 100 1800 20 True 2
12 CHA0SampleSpectraLiquid1_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round.txt CHA0 1.0 liquid 50 alu 100 1800 20 True 2
13 CHA0SampleSpectraLiquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt CHA0 3.0 liquid 50 alu 100 1800 20 True 2
14 CHA0SampleSpectraLiquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2.txt CHA0 3.0 liquid 50 alu 100 1800 20 True 2
15 CHA0SampleSpectraSolid1_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2 (2).txt CHA0 1.0 solid 50 alu 100 1800 20 True 2
16 CHA0SampleSpectraSolid1_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2.txt CHA0 1.0 solid 50 alu 100 1800 20 True 2
17 CHA0SampleSpectraSolid_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu (2).txt CHA0 soli 50 alu 100 1800 20 True 2
18 CHA0SampleSpectraSolid_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu(1).txt CHA0 soli 50 alu 100 1800 20 True 2
19 CHA0SampleSpectraSolid_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt CHA0 soli 50 alu 100 1800 20 True 2
20 CHA0SampleSpectra_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt CHA0 50 alu 100 1800 20 True 2
21 F113SampleSpectraLiquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu (2).txt F113 3.0 liquid 50 alu 100 1800 20 True 2
22 F113SampleSpectraLiquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt F113 3.0 liquid 50 alu 100 1800 20 True 2
23 F113SampleSpectraLiquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2_otherdrop1 (2).txt F113 3.0 liquid 50 alu 100 1800 20 True 2
24 F113SampleSpectraLiquid3_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2_otherdrop1.txt F113 3.0 liquid 50 alu 100 1800 20 True 2
25 F113SampleSpectraLiquid_50x_dried3_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2_otherdrop.txt F113 liqui 50 alu 100 1800 20 True 2
26 F113SampleSpectraLiquid_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt F113 liqui 50 alu 100 1800 20 True 2
27 F113SampleSpectraSolid2_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu.txt F113 2.0 solid 50 alu 100 1800 20 True 2
28 F113SampleSpectraSolid2_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2 (2).txt F113 2.0 solid 50 alu 100 1800 20 True 2
29 F113SampleSpectraSolid2_50x_dried_drop_alu_100percent_1800gr_20sec_confocalhigh_2accu_round2.txt F113 2.0 solid 50 alu 100 1800 20 True 2

View File

@@ -0,0 +1,21 @@
import os
import pandas as pd
df = pd.DataFrame([i for i in os.listdir() if i.endswith(".txt")], columns=["file"])
df["strain"] = df.apply(lambda x: x.file[:4].upper(), axis=1)
df["replica"] = df.apply(lambda x: x.file.split("_")[0][-1], axis=1)
df["replica"] = df.apply(lambda x: int(x.replica) if x.replica.isnumeric() else None, axis=1)
df["phase"] = df.apply(lambda x: x.file.split("_")[0].replace("SampleSpectra", "")[4:-1 if x.replica is not None else None].casefold(), axis=1)
df["objective"] = df.apply(lambda x: int(x.file.split("_")[1].replace("x", "")), axis=1)
df["substrate"] = df.apply(lambda x: x.file.split("_")[4].casefold(), axis=1)
df["laser_power[%]"] = df.apply(lambda x: int(x.file.split("_")[5].replace("percent", "")), axis=1)
df["grating"] = df.apply(lambda x: int(x.file.split("_")[6].replace("gr", "")), axis=1)
df["exposition[sec]"] = df.apply(lambda x: int(x.file.split("_")[7].replace("sec", "")), axis=1)
df["confocalhigh"] = df.apply(lambda x: x.file.split("_")[8] == "confocalhigh", axis=1)
df["accumulations"] = df.apply(lambda x: int(x.file.split("_")[9].replace("."," ").replace("(", " ").split()[0].replace("accu", "")), axis=1)
df.set_index("file", inplace=True)
df.to_csv("metadata.csv")
print(df)