diff table_pandas_transform.py @ 0:0112f08c95ed draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
author recetox
date Wed, 29 Jan 2025 15:36:02 +0000 (3 months ago)
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/table_pandas_transform.py	Wed Jan 29 15:36:02 2025 +0000
@@ -0,0 +1,113 @@
+import argparse
+import logging
+from typing import Callable, List, Tuple
+
+
+import numpy as np
+import pandas as pd
+from utils import LoadDataAction, SplitColumnIndicesAction, StoreOutputAction
+
+
+# Define the available transformations
+TRANSFORMATIONS = {
+    "log": np.log,
+    "log10": np.log10,
+    "ln": np.log,
+    "sqrt": np.sqrt,
+    "exp": np.exp,
+    "abs": np.abs,
+    "floor": np.floor,
+    "ceil": np.ceil,
+}
+
+
+def apply_transformation(
+    df: pd.DataFrame, columns: List[int], transformation: str
+) -> pd.DataFrame:
+    """
+    Apply the specified transformation to the given columns of the dataframe.
+
+    Parameters:
+    df (pd.DataFrame): The input dataframe.
+    columns (List[int]): The 0-based indices of the columns to transform.
+    transformation (str): The transformation to apply.
+
+    Returns:
+    pd.DataFrame: The dataframe with the transformation applied.
+    """
+    try:
+        transform_func = TRANSFORMATIONS[transformation]
+        for column_index in columns:
+            column_name = df.columns[column_index]
+            df[column_name] = transform_func(df[column_name])
+        return df
+    except KeyError as e:
+        logging.error(f"Invalid transformation: {e}")
+        raise
+    except IndexError as e:
+        logging.error(f"Invalid column index: {e}")
+        raise
+    except Exception as e:
+        logging.error(f"Error applying transformation: {e}")
+        raise
+
+
+def main(
+    input_dataset: pd.DataFrame,
+    columns: List[int],
+    transformation: str,
+    output_dataset: Tuple[Callable[[pd.DataFrame, str], None], str],
+) -> None:
+    """
+    Main function to load the dataset, apply the transformation, and save the result.
+
+    Parameters:
+    input_dataset (pd.DataFrame): The input dataset.
+    columns (List[int]): The 0-based indices of the columns to transform.
+    transformation (str): The transformation to apply.
+    output_dataset (Tuple[Callable[[pd.DataFrame, str], None], str]): The output dataset and its file extension.
+    """
+    try:
+        df = apply_transformation(input_dataset, columns, transformation)
+        write_func, file_path = output_dataset
+        write_func(df, file_path)
+    except Exception as e:
+        logging.error(f"Error in main function: {e}")
+        raise
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    parser = argparse.ArgumentParser(
+        description="Apply mathematical transformations to dataframe columns."
+    )
+    parser.add_argument(
+        "--input_dataset",
+        nargs=2,
+        action=LoadDataAction,
+        required=True,
+        help="Path to the input dataset and its file extension (csv, tsv, parquet)",
+    )
+    parser.add_argument(
+        "--columns",
+        action=SplitColumnIndicesAction,
+        required=True,
+        help="Comma-separated list of 1-based indices of the columns to apply the transformation on",
+    )
+    parser.add_argument(
+        "--transformation",
+        type=str,
+        choices=TRANSFORMATIONS.keys(),
+        required=True,
+        help="Transformation to apply",
+    )
+    parser.add_argument(
+        "--output_dataset",
+        nargs=2,
+        action=StoreOutputAction,
+        required=True,
+        help="Path to the output dataset and its file extension (csv, tsv, parquet)",
+    )
+
+    args = parser.parse_args()
+    main(args.input_dataset, args.columns, args.transformation, args.output_dataset)