Mercurial > repos > ebi-gxa > decoupler_pathway_inference
view decoupler_pathway_inference.py @ 1:e9b06a8fb73a draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
author | ebi-gxa |
---|---|
date | Mon, 15 Apr 2024 13:20:27 +0000 |
parents | 77d680b36e23 |
children | c6787c2aee46 |
line wrap: on
line source
# import the necessary packages import argparse import anndata as ad import decoupler as dc import pandas as pd # define arguments for the script parser = argparse.ArgumentParser() # add AnnData input file option parser.add_argument( "-i", "--input_anndata", help="AnnData input file", required=True ) # add network input file option parser.add_argument( "-n", "--input_network", help="Network input file", required=True ) # output file prefix parser.add_argument( "-o", "--output", help="output files prefix", default=None, ) # path to save Activities AnnData file parser.add_argument( "-a", "--activities_path", help="Path to save Activities AnnData file", default=None ) # Column name in net with source nodes parser.add_argument( "-s", "--source", help="Column name in net with source nodes.", default="source" ) # Column name in net with target nodes parser.add_argument( "-t", "--target", help="Column name in net with target nodes.", default="target" ) # Column name in net with weights. parser.add_argument( "-w", "--weight", help="Column name in net with weights.", default="weight" ) # add boolean argument for use_raw parser.add_argument( "--use_raw", action="store_true", default=False, help="Whether to use the raw part of the AnnData object" ) # add argument for min_cells parser.add_argument( "--min_n", help="Minimum of targets per source. If less, sources are removed.", default=5, type=int ) # add activity inference method option parser.add_argument( "-m", "--method", help="Activity inference method", default="mlm", required=True ) args = parser.parse_args() # check that either -o or --output is specified if args.output is None: raise ValueError("Please specify either -o or --output") # read in the AnnData input file adata = ad.read_h5ad(args.input_anndata) # read in the input file network input file network = pd.read_csv(args.input_network, sep='\t') if ( args.source not in network.columns or args.target not in network.columns or args.weight not in network.columns ): raise ValueError( "Source, target, and weight columns are not present in the network" ) print(type(args.min_n)) if args.method == "mlm": dc.run_mlm( mat=adata, net=network, source=args.source, target=args.target, weight=args.weight, verbose=True, min_n=args.min_n, use_raw=args.use_raw ) if args.output is not None: # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files combined_df = pd.concat([adata.obsm["mlm_estimate"], adata.obsm["mlm_pvals"]], axis=1) # Save the combined dataframe to a file combined_df.to_csv(args.output + ".tsv", sep="\t") # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path if args.activities_path is not None: acts = dc.get_acts(adata, obsm_key="mlm_estimate") acts.write_h5ad(args.activities_path) elif args.method == "ulm": dc.run_ulm( mat=adata, net=network, source=args.source, target=args.target, weight=args.weight, verbose=True, min_n=args.min_n, use_raw=args.use_raw ) if args.output is not None: # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files combined_df = pd.concat([adata.obsm["ulm_estimate"], adata.obsm["ulm_pvals"]], axis=1) # Save the combined dataframe to a file combined_df.to_csv(args.output + ".tsv", sep="\t") # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path if args.activities_path is not None: acts = dc.get_acts(adata, obsm_key="ulm_estimate") acts.write_h5ad(args.activities_path)