Mercurial > repos > recetox > rename_annotated_feature
view rename_annotated_feature.py @ 0:268fcec93d9c draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rename_annotated_feature commit 7948bcdd36cec524d201712dc20c438973b4cc28
author | recetox |
---|---|
date | Tue, 21 May 2024 07:44:25 +0000 |
parents | |
children |
line wrap: on
line source
import argparse from collections import defaultdict from typing import Tuple import pandas as pd def parse_arguments() -> argparse.Namespace: """Parses command-line arguments. Returns: argparse.Namespace: Namespace with argument values as attributes. """ parser = argparse.ArgumentParser(description='Rename annotated feature.') parser.add_argument('--annotations_table_path', type=str, required=True, help='Path to the annotations table file.') parser.add_argument('--abundance_table_path', type=str, required=True, help='Path to the abundance table file.') parser.add_argument('--mode', type=str, choices=['single', 'multiple'], default='single', help='Mode to use for renaming. Can be "single" or "multiple".') parser.add_argument('--output_path', type=str, default='output.csv', help='Path to the output CSV file.') return parser.parse_args() def load_tables(annotations_table_path: str, abundance_table_path: str) -> Tuple[pd.DataFrame, pd.DataFrame]: """Loads annotation and abundance tables from files. Args: annotations_table_path (str): Path to the annotations table file. abundance_table_path (str): Path to the abundance table file. Returns: Tuple[pd.DataFrame, pd.DataFrame]: Tuple of DataFrames for annotations and abundance tables. """ annotations_table = pd.read_table(annotations_table_path) abundance_table = pd.read_table(abundance_table_path) annotations_table.columns = annotations_table.columns.str.strip() abundance_table.columns = abundance_table.columns.str.strip() return annotations_table, abundance_table def rename_single(annotations_table: pd.DataFrame, abundance_table: pd.DataFrame) -> None: """Renames columns in abundance table based on single best match in annotations table. Args: annotations_table (pd.DataFrame): DataFrame of annotations. abundance_table (pd.DataFrame): DataFrame of abundance data. """ scores_col = annotations_table.columns[-1] ref_idxs = annotations_table.groupby("query")[scores_col].idxmax() results = annotations_table.loc[ref_idxs] queries = results["query"] refs = results["reference"] mapping = dict(zip(queries, refs)) abundance_table.rename(columns=mapping, inplace=True) def rename_multiple(annotations_table: pd.DataFrame, abundance_table: pd.DataFrame) -> None: """Renames columns in abundance table based on multiple matches in annotations table. Args: annotations_table (pd.DataFrame): DataFrame of annotations. abundance_table (pd.DataFrame): DataFrame of abundance data. """ queries = annotations_table["query"] refs = annotations_table["reference"] mapping = defaultdict(list) for query, ref in zip(queries, refs): mapping[query].append(ref) for query, refs in mapping.items(): new_column_name = ', '.join(refs) if query in abundance_table.columns: abundance_table.rename(columns={query: new_column_name}, inplace=True) def main() -> None: """Main function to parse arguments, load tables, rename columns, and save output.""" args = parse_arguments() annotations_table, abundance_table = load_tables(args.annotations_table_path, args.abundance_table_path) if args.mode == "single": rename_single(annotations_table, abundance_table) else: rename_multiple(annotations_table, abundance_table) abundance_table.to_csv(args.output_path, sep="\t", index=False) if __name__ == "__main__": main()