Source code for imzml_writer.analyte_list_cleanup

import pandas as pd


[docs] def cleanup_table(input_data:pd.DataFrame,path:str): """Takes pandas dataframe of columns [mz, name] or vice versa and sanitizes it for imzML scout by: 1. Making sure headers are consistent with expected (presence) - check_headers() 2. Makes sure orders are expected (name then mz) - check_column_order() 3. Cleans up any incompatible characters in the same that will prevent file saving - name_cleanup() This allows users to specify 'messy' excel sheets for bulk export without imzML Scout failing. :param input_data: Pandas dataframe of input mz and name :param path: Path to the corresponding excel sheet, unless it needs to be reread to omit headers :return: Sanitized pandas dataframe of mz and names compatible with image/csv export of imzML Scout. """ input_data = check_headers(input_data,path) input_data = check_column_order(input_data) input_data = name_cleanup(input_data) return input_data
[docs] def name_cleanup(input_data:pd.DataFrame): """Takes a pandas dataframe of form name, mz and reads the first column (names) replacing 'dangerous' characters with '_' to ensure safe storage. :param input_data: Pandas dataframe of form [name, mz] :return: Pandas dataframe with trouble characters removed.""" names = input_data.iloc[:,0] repl_chars = ["/",".","'"] for char in repl_chars: names = names.str.replace(char,"_") input_data.iloc[:,0]=names return input_data
[docs] def check_headers(input_data:pd.DataFrame,path:str): """Takes a pandas dataframe of mz and name and checks if the headers are missing - taken as a header being convertible to a integer (i.e. an mz value in header). If headers are missing, it rereads the sheet specified at [path] with no headers and manually inserts them. :param input_data: Pandas dataframe with columns of mz and names :param path: Absolute or relative path specified as a string :return: pandas dataframe of mz and names with header inserted, if needed""" data_headers = list(input_data) no_head = False for head in data_headers: try: int(head) no_head = True except: pass if no_head: output_data=pd.read_excel(path,header=None) else: output_data=input_data return output_data
[docs] def check_column_order(input_data:pd.DataFrame): """Takes a pandas dataframe of mz and names and checks that they're in the order assumed by imzML_Scout (name, mz). If not, reorganizes columns to match expected order. :param input_data: Pandas dataframe containing mz and names of targets :return: same dataframe with columns ordered [name, mz]""" data_formats = [dtype for dtype in input_data.dtypes] headers = list(input_data) if data_formats[0]=="float64": #mz in column 0, reorder input_data=input_data[[headers[1],headers[0]]] return input_data
# path_file="/Users/josephmonaghan/Downloads/pos_analyte.xlsx" # data=pd.read_excel(path_file) # data=cleanup_table(data,path_file) # print(data)