import pandas as pd
[docs]
def cleanup_table(input_data:pd.DataFrame,path:str):
"""Takes pandas dataframe of columns [mz, name] or vice versa and sanitizes it for imzML scout by:
1. Making sure headers are consistent with expected (presence) - check_headers()
2. Makes sure orders are expected (name then mz) - check_column_order()
3. Cleans up any incompatible characters in the same that will prevent file saving - name_cleanup()
This allows users to specify 'messy' excel sheets for bulk export without imzML Scout failing.
:param input_data: Pandas dataframe of input mz and name
:param path: Path to the corresponding excel sheet, unless it needs to be reread to omit headers
:return: Sanitized pandas dataframe of mz and names compatible with image/csv export of imzML Scout.
"""
input_data = check_headers(input_data,path)
input_data = check_column_order(input_data)
input_data = name_cleanup(input_data)
return input_data
[docs]
def name_cleanup(input_data:pd.DataFrame):
"""Takes a pandas dataframe of form name, mz and reads the first column (names) replacing 'dangerous' characters with '_' to ensure safe storage.
:param input_data: Pandas dataframe of form [name, mz]
:return: Pandas dataframe with trouble characters removed."""
names = input_data.iloc[:,0]
repl_chars = ["/",".","'"]
for char in repl_chars:
names = names.str.replace(char,"_")
input_data.iloc[:,0]=names
return input_data
[docs]
def check_column_order(input_data:pd.DataFrame):
"""Takes a pandas dataframe of mz and names and checks that they're in the order assumed by imzML_Scout (name, mz). If not, reorganizes
columns to match expected order.
:param input_data: Pandas dataframe containing mz and names of targets
:return: same dataframe with columns ordered [name, mz]"""
data_formats = [dtype for dtype in input_data.dtypes]
headers = list(input_data)
if data_formats[0]=="float64":
#mz in column 0, reorder
input_data=input_data[[headers[1],headers[0]]]
return input_data
# path_file="/Users/josephmonaghan/Downloads/pos_analyte.xlsx"
# data=pd.read_excel(path_file)
# data=cleanup_table(data,path_file)
# print(data)