Mercurial > repos > recetox > table_scipy_interpolate
diff table_pandas_transform.py @ 0:0112f08c95ed draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
author | recetox |
---|---|
date | Wed, 29 Jan 2025 15:36:02 +0000 (3 months ago) |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/table_pandas_transform.py Wed Jan 29 15:36:02 2025 +0000 @@ -0,0 +1,113 @@ +import argparse +import logging +from typing import Callable, List, Tuple + + +import numpy as np +import pandas as pd +from utils import LoadDataAction, SplitColumnIndicesAction, StoreOutputAction + + +# Define the available transformations +TRANSFORMATIONS = { + "log": np.log, + "log10": np.log10, + "ln": np.log, + "sqrt": np.sqrt, + "exp": np.exp, + "abs": np.abs, + "floor": np.floor, + "ceil": np.ceil, +} + + +def apply_transformation( + df: pd.DataFrame, columns: List[int], transformation: str +) -> pd.DataFrame: + """ + Apply the specified transformation to the given columns of the dataframe. + + Parameters: + df (pd.DataFrame): The input dataframe. + columns (List[int]): The 0-based indices of the columns to transform. + transformation (str): The transformation to apply. + + Returns: + pd.DataFrame: The dataframe with the transformation applied. + """ + try: + transform_func = TRANSFORMATIONS[transformation] + for column_index in columns: + column_name = df.columns[column_index] + df[column_name] = transform_func(df[column_name]) + return df + except KeyError as e: + logging.error(f"Invalid transformation: {e}") + raise + except IndexError as e: + logging.error(f"Invalid column index: {e}") + raise + except Exception as e: + logging.error(f"Error applying transformation: {e}") + raise + + +def main( + input_dataset: pd.DataFrame, + columns: List[int], + transformation: str, + output_dataset: Tuple[Callable[[pd.DataFrame, str], None], str], +) -> None: + """ + Main function to load the dataset, apply the transformation, and save the result. + + Parameters: + input_dataset (pd.DataFrame): The input dataset. + columns (List[int]): The 0-based indices of the columns to transform. + transformation (str): The transformation to apply. + output_dataset (Tuple[Callable[[pd.DataFrame, str], None], str]): The output dataset and its file extension. + """ + try: + df = apply_transformation(input_dataset, columns, transformation) + write_func, file_path = output_dataset + write_func(df, file_path) + except Exception as e: + logging.error(f"Error in main function: {e}") + raise + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser( + description="Apply mathematical transformations to dataframe columns." + ) + parser.add_argument( + "--input_dataset", + nargs=2, + action=LoadDataAction, + required=True, + help="Path to the input dataset and its file extension (csv, tsv, parquet)", + ) + parser.add_argument( + "--columns", + action=SplitColumnIndicesAction, + required=True, + help="Comma-separated list of 1-based indices of the columns to apply the transformation on", + ) + parser.add_argument( + "--transformation", + type=str, + choices=TRANSFORMATIONS.keys(), + required=True, + help="Transformation to apply", + ) + parser.add_argument( + "--output_dataset", + nargs=2, + action=StoreOutputAction, + required=True, + help="Path to the output dataset and its file extension (csv, tsv, parquet)", + ) + + args = parser.parse_args() + main(args.input_dataset, args.columns, args.transformation, args.output_dataset)