import os import pandas as pd df = pd.DataFrame([i for i in os.listdir() if i.endswith(".txt")], columns=["file"]) df["strain"] = df.apply(lambda x: x.file[:4].upper(), axis=1) df["replica"] = df.apply(lambda x: x.file.split("_")[0][-1], axis=1) df["replica"] = df.apply(lambda x: int(x.replica) if x.replica.isnumeric() else None, axis=1) df["phase"] = df.apply(lambda x: x.file.split("_")[0].replace("SampleSpectra", "")[4:].casefold(), axis=1) df["phase"] = df.apply(lambda x: x.phase if pd.isnull(x.replica) else x.phase[:-1], axis=1) df["objective"] = df.apply(lambda x: int(x.file.split("_")[1].replace("x", "")), axis=1) df["substrate"] = df.apply(lambda x: x.file.split("_")[4].casefold(), axis=1) df["laser_power[%]"] = df.apply(lambda x: int(x.file.split("_")[5].replace("percent", "")), axis=1) df["grating"] = df.apply(lambda x: int(x.file.split("_")[6].replace("gr", "")), axis=1) df["exposition[sec]"] = df.apply(lambda x: int(x.file.split("_")[7].replace("sec", "")), axis=1) df["confocalhigh"] = df.apply(lambda x: x.file.split("_")[8] == "confocalhigh", axis=1) df["accumulations"] = df.apply(lambda x: int(x.file.split("_")[9].replace("."," ").replace("(", " ").split()[0].replace("accu", "")), axis=1) df.set_index("file", inplace=True) df.to_csv("metadata.csv") print(df)