Mercurial > repos > recetox > table_scipy_interpolate
comparison table_pandas_transform.py @ 0:0112f08c95ed draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
| author | recetox |
|---|---|
| date | Wed, 29 Jan 2025 15:36:02 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0112f08c95ed |
|---|---|
| 1 import argparse | |
| 2 import logging | |
| 3 from typing import Callable, List, Tuple | |
| 4 | |
| 5 | |
| 6 import numpy as np | |
| 7 import pandas as pd | |
| 8 from utils import LoadDataAction, SplitColumnIndicesAction, StoreOutputAction | |
| 9 | |
| 10 | |
| 11 # Define the available transformations | |
| 12 TRANSFORMATIONS = { | |
| 13 "log": np.log, | |
| 14 "log10": np.log10, | |
| 15 "ln": np.log, | |
| 16 "sqrt": np.sqrt, | |
| 17 "exp": np.exp, | |
| 18 "abs": np.abs, | |
| 19 "floor": np.floor, | |
| 20 "ceil": np.ceil, | |
| 21 } | |
| 22 | |
| 23 | |
| 24 def apply_transformation( | |
| 25 df: pd.DataFrame, columns: List[int], transformation: str | |
| 26 ) -> pd.DataFrame: | |
| 27 """ | |
| 28 Apply the specified transformation to the given columns of the dataframe. | |
| 29 | |
| 30 Parameters: | |
| 31 df (pd.DataFrame): The input dataframe. | |
| 32 columns (List[int]): The 0-based indices of the columns to transform. | |
| 33 transformation (str): The transformation to apply. | |
| 34 | |
| 35 Returns: | |
| 36 pd.DataFrame: The dataframe with the transformation applied. | |
| 37 """ | |
| 38 try: | |
| 39 transform_func = TRANSFORMATIONS[transformation] | |
| 40 for column_index in columns: | |
| 41 column_name = df.columns[column_index] | |
| 42 df[column_name] = transform_func(df[column_name]) | |
| 43 return df | |
| 44 except KeyError as e: | |
| 45 logging.error(f"Invalid transformation: {e}") | |
| 46 raise | |
| 47 except IndexError as e: | |
| 48 logging.error(f"Invalid column index: {e}") | |
| 49 raise | |
| 50 except Exception as e: | |
| 51 logging.error(f"Error applying transformation: {e}") | |
| 52 raise | |
| 53 | |
| 54 | |
| 55 def main( | |
| 56 input_dataset: pd.DataFrame, | |
| 57 columns: List[int], | |
| 58 transformation: str, | |
| 59 output_dataset: Tuple[Callable[[pd.DataFrame, str], None], str], | |
| 60 ) -> None: | |
| 61 """ | |
| 62 Main function to load the dataset, apply the transformation, and save the result. | |
| 63 | |
| 64 Parameters: | |
| 65 input_dataset (pd.DataFrame): The input dataset. | |
| 66 columns (List[int]): The 0-based indices of the columns to transform. | |
| 67 transformation (str): The transformation to apply. | |
| 68 output_dataset (Tuple[Callable[[pd.DataFrame, str], None], str]): The output dataset and its file extension. | |
| 69 """ | |
| 70 try: | |
| 71 df = apply_transformation(input_dataset, columns, transformation) | |
| 72 write_func, file_path = output_dataset | |
| 73 write_func(df, file_path) | |
| 74 except Exception as e: | |
| 75 logging.error(f"Error in main function: {e}") | |
| 76 raise | |
| 77 | |
| 78 | |
| 79 if __name__ == "__main__": | |
| 80 logging.basicConfig(level=logging.INFO) | |
| 81 parser = argparse.ArgumentParser( | |
| 82 description="Apply mathematical transformations to dataframe columns." | |
| 83 ) | |
| 84 parser.add_argument( | |
| 85 "--input_dataset", | |
| 86 nargs=2, | |
| 87 action=LoadDataAction, | |
| 88 required=True, | |
| 89 help="Path to the input dataset and its file extension (csv, tsv, parquet)", | |
| 90 ) | |
| 91 parser.add_argument( | |
| 92 "--columns", | |
| 93 action=SplitColumnIndicesAction, | |
| 94 required=True, | |
| 95 help="Comma-separated list of 1-based indices of the columns to apply the transformation on", | |
| 96 ) | |
| 97 parser.add_argument( | |
| 98 "--transformation", | |
| 99 type=str, | |
| 100 choices=TRANSFORMATIONS.keys(), | |
| 101 required=True, | |
| 102 help="Transformation to apply", | |
| 103 ) | |
| 104 parser.add_argument( | |
| 105 "--output_dataset", | |
| 106 nargs=2, | |
| 107 action=StoreOutputAction, | |
| 108 required=True, | |
| 109 help="Path to the output dataset and its file extension (csv, tsv, parquet)", | |
| 110 ) | |
| 111 | |
| 112 args = parser.parse_args() | |
| 113 main(args.input_dataset, args.columns, args.transformation, args.output_dataset) |
