view table_pandas_arithmetics.py @ 0:e6d5fee8c7a6 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/tables commit d0ff40eb2b536fec6c973c3a9ea8e7f31cd9a0d6
author recetox
date Wed, 29 Jan 2025 15:35:42 +0000
parents
children
line wrap: on
line source

import argparse
import logging
from typing import List, Tuple


import numpy as np
import pandas as pd
from utils import LoadDataAction, SplitColumnIndicesAction, StoreOutputAction


# Constants for operations
OPERATIONS = {
    "mul": np.multiply,
    "sub": np.subtract,
    "div": np.divide,
    "add": np.add,
    "pow": np.power,
}


def perform_operation(df: pd.DataFrame, column_indices: List[int], operation: str, operand: float):
    """
    Perform the specified arithmetic operation on the given columns of the dataframe.

    Parameters:
    df (pd.DataFrame): The input dataframe.
    column_indices (list): The 0-based indices of the columns to perform the operation on.
    operation (str): The arithmetic operation to perform.
    operand (float): The operand for the arithmetic operation.

    Returns:
    pd.DataFrame: The dataframe with the operation applied.
    """
    for column_index in column_indices:
        column_name = df.columns[column_index]
        df[column_name] = OPERATIONS[operation](df[column_name], operand)
    return df


def main(input_dataset: pd.DataFrame, column_indices: List[int], operation: str, operand: float, output_dataset: Tuple[callable, str]):
    """
    Main function to load the dataset, perform the operation, and save the result.

    Parameters:
    input_dataset (tuple): The input dataset and its file extension.
    column_indices (list): The 0-based indices of the columns to perform the operation on.
    operation (str): The arithmetic operation to perform.
    operand (float): The operand for the arithmetic operation.
    output_dataset (tuple): The output dataset and its file extension.
    """
    try:
        df = perform_operation(input_dataset, column_indices, operation, operand)
        write_func, file_path = output_dataset
        write_func(df, file_path)
    except Exception as e:
        logging.error(f"Error in main function: {e}")
        raise


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    parser = argparse.ArgumentParser(
        description="Perform arithmetic operations on dataframe columns."
    )
    parser.add_argument(
        "--input_dataset",
        nargs=2,
        action=LoadDataAction,
        required=True,
        help="Path to the input dataset and its file extension (csv, tsv, parquet)",
    )
    parser.add_argument(
        "--columns",
        action=SplitColumnIndicesAction,
        required=True,
        help="Comma-separated list of 1-based indices of the columns to perform the operation on",
    )
    parser.add_argument(
        "--operation",
        type=str,
        choices=OPERATIONS.keys(),
        required=True,
        help="Arithmetic operation to perform",
    )
    parser.add_argument(
        "--operand",
        type=float,
        required=True,
        help="Operand for the arithmetic operation",
    )
    parser.add_argument(
        "--output_dataset",
        nargs=2,
        action=StoreOutputAction,
        required=True,
        help="Path to the output dataset and its file extension (csv, tsv, parquet)",
    )

    args = parser.parse_args()
    main(
        args.input_dataset,
        args.columns,
        args.operation,
        args.operand,
        args.output_dataset,
    )