comparison table_pandas_arithmetics.py @ 0:e6d5fee8c7a6 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
author recetox
date Wed, 29 Jan 2025 15:35:42 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e6d5fee8c7a6
1 import argparse
2 import logging
3 from typing import List, Tuple
4
5
6 import numpy as np
7 import pandas as pd
8 from utils import LoadDataAction, SplitColumnIndicesAction, StoreOutputAction
9
10
11 # Constants for operations
12 OPERATIONS = {
13 "mul": np.multiply,
14 "sub": np.subtract,
15 "div": np.divide,
16 "add": np.add,
17 "pow": np.power,
18 }
19
20
21 def perform_operation(df: pd.DataFrame, column_indices: List[int], operation: str, operand: float):
22 """
23 Perform the specified arithmetic operation on the given columns of the dataframe.
24
25 Parameters:
26 df (pd.DataFrame): The input dataframe.
27 column_indices (list): The 0-based indices of the columns to perform the operation on.
28 operation (str): The arithmetic operation to perform.
29 operand (float): The operand for the arithmetic operation.
30
31 Returns:
32 pd.DataFrame: The dataframe with the operation applied.
33 """
34 for column_index in column_indices:
35 column_name = df.columns[column_index]
36 df[column_name] = OPERATIONS[operation](df[column_name], operand)
37 return df
38
39
40 def main(input_dataset: pd.DataFrame, column_indices: List[int], operation: str, operand: float, output_dataset: Tuple[callable, str]):
41 """
42 Main function to load the dataset, perform the operation, and save the result.
43
44 Parameters:
45 input_dataset (tuple): The input dataset and its file extension.
46 column_indices (list): The 0-based indices of the columns to perform the operation on.
47 operation (str): The arithmetic operation to perform.
48 operand (float): The operand for the arithmetic operation.
49 output_dataset (tuple): The output dataset and its file extension.
50 """
51 try:
52 df = perform_operation(input_dataset, column_indices, operation, operand)
53 write_func, file_path = output_dataset
54 write_func(df, file_path)
55 except Exception as e:
56 logging.error(f"Error in main function: {e}")
57 raise
58
59
60 if __name__ == "__main__":
61 logging.basicConfig(level=logging.INFO)
62 parser = argparse.ArgumentParser(
63 description="Perform arithmetic operations on dataframe columns."
64 )
65 parser.add_argument(
66 "--input_dataset",
67 nargs=2,
68 action=LoadDataAction,
69 required=True,
70 help="Path to the input dataset and its file extension (csv, tsv, parquet)",
71 )
72 parser.add_argument(
73 "--columns",
74 action=SplitColumnIndicesAction,
75 required=True,
76 help="Comma-separated list of 1-based indices of the columns to perform the operation on",
77 )
78 parser.add_argument(
79 "--operation",
80 type=str,
81 choices=OPERATIONS.keys(),
82 required=True,
83 help="Arithmetic operation to perform",
84 )
85 parser.add_argument(
86 "--operand",
87 type=float,
88 required=True,
89 help="Operand for the arithmetic operation",
90 )
91 parser.add_argument(
92 "--output_dataset",
93 nargs=2,
94 action=StoreOutputAction,
95 required=True,
96 help="Path to the output dataset and its file extension (csv, tsv, parquet)",
97 )
98
99 args = parser.parse_args()
100 main(
101 args.input_dataset,
102 args.columns,
103 args.operation,
104 args.operand,
105 args.output_dataset,
106 )