comparison table_pandas_transform.py @ 0:0112f08c95ed draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
author recetox
date Wed, 29 Jan 2025 15:36:02 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0112f08c95ed
1 import argparse
2 import logging
3 from typing import Callable, List, Tuple
4
5
6 import numpy as np
7 import pandas as pd
8 from utils import LoadDataAction, SplitColumnIndicesAction, StoreOutputAction
9
10
11 # Define the available transformations
12 TRANSFORMATIONS = {
13 "log": np.log,
14 "log10": np.log10,
15 "ln": np.log,
16 "sqrt": np.sqrt,
17 "exp": np.exp,
18 "abs": np.abs,
19 "floor": np.floor,
20 "ceil": np.ceil,
21 }
22
23
24 def apply_transformation(
25 df: pd.DataFrame, columns: List[int], transformation: str
26 ) -> pd.DataFrame:
27 """
28 Apply the specified transformation to the given columns of the dataframe.
29
30 Parameters:
31 df (pd.DataFrame): The input dataframe.
32 columns (List[int]): The 0-based indices of the columns to transform.
33 transformation (str): The transformation to apply.
34
35 Returns:
36 pd.DataFrame: The dataframe with the transformation applied.
37 """
38 try:
39 transform_func = TRANSFORMATIONS[transformation]
40 for column_index in columns:
41 column_name = df.columns[column_index]
42 df[column_name] = transform_func(df[column_name])
43 return df
44 except KeyError as e:
45 logging.error(f"Invalid transformation: {e}")
46 raise
47 except IndexError as e:
48 logging.error(f"Invalid column index: {e}")
49 raise
50 except Exception as e:
51 logging.error(f"Error applying transformation: {e}")
52 raise
53
54
55 def main(
56 input_dataset: pd.DataFrame,
57 columns: List[int],
58 transformation: str,
59 output_dataset: Tuple[Callable[[pd.DataFrame, str], None], str],
60 ) -> None:
61 """
62 Main function to load the dataset, apply the transformation, and save the result.
63
64 Parameters:
65 input_dataset (pd.DataFrame): The input dataset.
66 columns (List[int]): The 0-based indices of the columns to transform.
67 transformation (str): The transformation to apply.
68 output_dataset (Tuple[Callable[[pd.DataFrame, str], None], str]): The output dataset and its file extension.
69 """
70 try:
71 df = apply_transformation(input_dataset, columns, transformation)
72 write_func, file_path = output_dataset
73 write_func(df, file_path)
74 except Exception as e:
75 logging.error(f"Error in main function: {e}")
76 raise
77
78
79 if __name__ == "__main__":
80 logging.basicConfig(level=logging.INFO)
81 parser = argparse.ArgumentParser(
82 description="Apply mathematical transformations to dataframe columns."
83 )
84 parser.add_argument(
85 "--input_dataset",
86 nargs=2,
87 action=LoadDataAction,
88 required=True,
89 help="Path to the input dataset and its file extension (csv, tsv, parquet)",
90 )
91 parser.add_argument(
92 "--columns",
93 action=SplitColumnIndicesAction,
94 required=True,
95 help="Comma-separated list of 1-based indices of the columns to apply the transformation on",
96 )
97 parser.add_argument(
98 "--transformation",
99 type=str,
100 choices=TRANSFORMATIONS.keys(),
101 required=True,
102 help="Transformation to apply",
103 )
104 parser.add_argument(
105 "--output_dataset",
106 nargs=2,
107 action=StoreOutputAction,
108 required=True,
109 help="Path to the output dataset and its file extension (csv, tsv, parquet)",
110 )
111
112 args = parser.parse_args()
113 main(args.input_dataset, args.columns, args.transformation, args.output_dataset)