diff decoupler_pathway_inference.py @ 5:893ff9213a34 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
author ebi-gxa
date Fri, 15 Mar 2024 12:18:11 +0000
parents
children 93f61ea19336
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/decoupler_pathway_inference.py	Fri Mar 15 12:18:11 2024 +0000
@@ -0,0 +1,132 @@
+# import the necessary packages
+import argparse
+
+import anndata as ad
+import decoupler as dc
+import pandas as pd
+
+# define arguments for the script
+parser = argparse.ArgumentParser()
+
+# add AnnData input file option
+parser.add_argument(
+    "-i", "--input_anndata", help="AnnData input file", required=True
+)
+
+# add network input file option
+parser.add_argument(
+    "-n", "--input_network", help="Network input file", required=True
+)
+
+# output file prefix
+parser.add_argument(
+    "-o", "--output",
+    help="output files prefix",
+    default=None,
+)
+
+# path to save Activities AnnData file
+parser.add_argument(
+    "-a", "--activities_path", help="Path to save Activities AnnData file", default=None
+)
+
+# Column name in net with source nodes
+parser.add_argument(
+    "-s", "--source", help="Column name in net with source nodes.", default="source"
+)
+
+# Column name in net with target nodes
+parser.add_argument(
+    "-t", "--target", help="Column name in net with target nodes.", default="target"
+)
+
+# Column name in net with weights.
+parser.add_argument(
+    "-w", "--weight", help="Column name in net with weights.", default="weight"
+)
+
+# add boolean argument for use_raw
+parser.add_argument(
+    "--use_raw", action="store_true", default=False, help="Whether to use the raw part of the AnnData object"
+)
+
+# add argument for min_cells
+parser.add_argument(
+    "--min_n", help="Minimum of targets per source. If less, sources are removed.", default=5, type=int
+)
+
+# add activity inference method option
+parser.add_argument(
+    "-m", "--method", help="Activity inference method", default="mlm", required=True
+)
+args = parser.parse_args()
+
+# check that either -o or --output is specified
+if args.output is None:
+    raise ValueError("Please specify either -o or --output")
+
+# read in the AnnData input file
+adata = ad.read_h5ad(args.input_anndata)
+
+# read in the input file network input file
+network = pd.read_csv(args.input_network, sep='\t')
+
+if (
+    args.source not in network.columns
+    or args.target not in network.columns
+    or args.weight not in network.columns
+):
+    raise ValueError(
+        "Source, target, and weight columns are not present in the network"
+    )
+
+
+print(type(args.min_n))
+
+if args.method == "mlm":
+    dc.run_mlm(
+        mat=adata,
+        net=network,
+        source=args.source,
+        target=args.target,
+        weight=args.weight,
+        verbose=True,
+        min_n=args.min_n,
+        use_raw=args.use_raw 
+    )
+
+    if args.output is not None:
+        # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files
+        combined_df = pd.concat([adata.obsm["mlm_estimate"], adata.obsm["mlm_pvals"]], axis=1)
+
+        # Save the combined dataframe to a file
+        combined_df.to_csv(args.output + ".tsv", sep="\t")
+
+    # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path
+    if args.activities_path is not None:
+        acts = dc.get_acts(adata, obsm_key="mlm_estimate")
+        acts.write_h5ad(args.activities_path)
+
+elif args.method == "ulm":
+    dc.run_ulm(
+        mat=adata,
+        net=network,
+        source=args.source,
+        target=args.target,
+        weight=args.weight,
+        verbose=True,
+        min_n=args.min_n,
+        use_raw=args.use_raw 
+    )
+
+    if args.output is not None:
+        # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files
+        combined_df = pd.concat([adata.obsm["ulm_estimate"], adata.obsm["ulm_pvals"]], axis=1)
+
+        # Save the combined dataframe to a file
+        combined_df.to_csv(args.output + ".tsv", sep="\t")
+
+    # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path
+    if args.activities_path is not None:
+        acts = dc.get_acts(adata, obsm_key="ulm_estimate")
+        acts.write_h5ad(args.activities_path)