Mercurial > repos > recetox > table_pandas_rename_columns_regex
comparison table_pandas_transform.py @ 0:505a8e975968 draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
author | recetox |
---|---|
date | Wed, 29 Jan 2025 15:35:08 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:505a8e975968 |
---|---|
1 import argparse | |
2 import logging | |
3 from typing import Callable, List, Tuple | |
4 | |
5 | |
6 import numpy as np | |
7 import pandas as pd | |
8 from utils import LoadDataAction, SplitColumnIndicesAction, StoreOutputAction | |
9 | |
10 | |
11 # Define the available transformations | |
12 TRANSFORMATIONS = { | |
13 "log": np.log, | |
14 "log10": np.log10, | |
15 "ln": np.log, | |
16 "sqrt": np.sqrt, | |
17 "exp": np.exp, | |
18 "abs": np.abs, | |
19 "floor": np.floor, | |
20 "ceil": np.ceil, | |
21 } | |
22 | |
23 | |
24 def apply_transformation( | |
25 df: pd.DataFrame, columns: List[int], transformation: str | |
26 ) -> pd.DataFrame: | |
27 """ | |
28 Apply the specified transformation to the given columns of the dataframe. | |
29 | |
30 Parameters: | |
31 df (pd.DataFrame): The input dataframe. | |
32 columns (List[int]): The 0-based indices of the columns to transform. | |
33 transformation (str): The transformation to apply. | |
34 | |
35 Returns: | |
36 pd.DataFrame: The dataframe with the transformation applied. | |
37 """ | |
38 try: | |
39 transform_func = TRANSFORMATIONS[transformation] | |
40 for column_index in columns: | |
41 column_name = df.columns[column_index] | |
42 df[column_name] = transform_func(df[column_name]) | |
43 return df | |
44 except KeyError as e: | |
45 logging.error(f"Invalid transformation: {e}") | |
46 raise | |
47 except IndexError as e: | |
48 logging.error(f"Invalid column index: {e}") | |
49 raise | |
50 except Exception as e: | |
51 logging.error(f"Error applying transformation: {e}") | |
52 raise | |
53 | |
54 | |
55 def main( | |
56 input_dataset: pd.DataFrame, | |
57 columns: List[int], | |
58 transformation: str, | |
59 output_dataset: Tuple[Callable[[pd.DataFrame, str], None], str], | |
60 ) -> None: | |
61 """ | |
62 Main function to load the dataset, apply the transformation, and save the result. | |
63 | |
64 Parameters: | |
65 input_dataset (pd.DataFrame): The input dataset. | |
66 columns (List[int]): The 0-based indices of the columns to transform. | |
67 transformation (str): The transformation to apply. | |
68 output_dataset (Tuple[Callable[[pd.DataFrame, str], None], str]): The output dataset and its file extension. | |
69 """ | |
70 try: | |
71 df = apply_transformation(input_dataset, columns, transformation) | |
72 write_func, file_path = output_dataset | |
73 write_func(df, file_path) | |
74 except Exception as e: | |
75 logging.error(f"Error in main function: {e}") | |
76 raise | |
77 | |
78 | |
79 if __name__ == "__main__": | |
80 logging.basicConfig(level=logging.INFO) | |
81 parser = argparse.ArgumentParser( | |
82 description="Apply mathematical transformations to dataframe columns." | |
83 ) | |
84 parser.add_argument( | |
85 "--input_dataset", | |
86 nargs=2, | |
87 action=LoadDataAction, | |
88 required=True, | |
89 help="Path to the input dataset and its file extension (csv, tsv, parquet)", | |
90 ) | |
91 parser.add_argument( | |
92 "--columns", | |
93 action=SplitColumnIndicesAction, | |
94 required=True, | |
95 help="Comma-separated list of 1-based indices of the columns to apply the transformation on", | |
96 ) | |
97 parser.add_argument( | |
98 "--transformation", | |
99 type=str, | |
100 choices=TRANSFORMATIONS.keys(), | |
101 required=True, | |
102 help="Transformation to apply", | |
103 ) | |
104 parser.add_argument( | |
105 "--output_dataset", | |
106 nargs=2, | |
107 action=StoreOutputAction, | |
108 required=True, | |
109 help="Path to the output dataset and its file extension (csv, tsv, parquet)", | |
110 ) | |
111 | |
112 args = parser.parse_args() | |
113 main(args.input_dataset, args.columns, args.transformation, args.output_dataset) |