comparison decoupler_pathway_inference.py @ 2:c700f0381e84 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
author ebi-gxa
date Fri, 15 Mar 2024 12:18:05 +0000
parents
children c9aaac87c583
comparison
equal deleted inserted replaced
1:e024d8280886 2:c700f0381e84
1 # import the necessary packages
2 import argparse
3
4 import anndata as ad
5 import decoupler as dc
6 import pandas as pd
7
8 # define arguments for the script
9 parser = argparse.ArgumentParser()
10
11 # add AnnData input file option
12 parser.add_argument(
13 "-i", "--input_anndata", help="AnnData input file", required=True
14 )
15
16 # add network input file option
17 parser.add_argument(
18 "-n", "--input_network", help="Network input file", required=True
19 )
20
21 # output file prefix
22 parser.add_argument(
23 "-o", "--output",
24 help="output files prefix",
25 default=None,
26 )
27
28 # path to save Activities AnnData file
29 parser.add_argument(
30 "-a", "--activities_path", help="Path to save Activities AnnData file", default=None
31 )
32
33 # Column name in net with source nodes
34 parser.add_argument(
35 "-s", "--source", help="Column name in net with source nodes.", default="source"
36 )
37
38 # Column name in net with target nodes
39 parser.add_argument(
40 "-t", "--target", help="Column name in net with target nodes.", default="target"
41 )
42
43 # Column name in net with weights.
44 parser.add_argument(
45 "-w", "--weight", help="Column name in net with weights.", default="weight"
46 )
47
48 # add boolean argument for use_raw
49 parser.add_argument(
50 "--use_raw", action="store_true", default=False, help="Whether to use the raw part of the AnnData object"
51 )
52
53 # add argument for min_cells
54 parser.add_argument(
55 "--min_n", help="Minimum of targets per source. If less, sources are removed.", default=5, type=int
56 )
57
58 # add activity inference method option
59 parser.add_argument(
60 "-m", "--method", help="Activity inference method", default="mlm", required=True
61 )
62 args = parser.parse_args()
63
64 # check that either -o or --output is specified
65 if args.output is None:
66 raise ValueError("Please specify either -o or --output")
67
68 # read in the AnnData input file
69 adata = ad.read_h5ad(args.input_anndata)
70
71 # read in the input file network input file
72 network = pd.read_csv(args.input_network, sep='\t')
73
74 if (
75 args.source not in network.columns
76 or args.target not in network.columns
77 or args.weight not in network.columns
78 ):
79 raise ValueError(
80 "Source, target, and weight columns are not present in the network"
81 )
82
83
84 print(type(args.min_n))
85
86 if args.method == "mlm":
87 dc.run_mlm(
88 mat=adata,
89 net=network,
90 source=args.source,
91 target=args.target,
92 weight=args.weight,
93 verbose=True,
94 min_n=args.min_n,
95 use_raw=args.use_raw
96 )
97
98 if args.output is not None:
99 # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files
100 combined_df = pd.concat([adata.obsm["mlm_estimate"], adata.obsm["mlm_pvals"]], axis=1)
101
102 # Save the combined dataframe to a file
103 combined_df.to_csv(args.output + ".tsv", sep="\t")
104
105 # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path
106 if args.activities_path is not None:
107 acts = dc.get_acts(adata, obsm_key="mlm_estimate")
108 acts.write_h5ad(args.activities_path)
109
110 elif args.method == "ulm":
111 dc.run_ulm(
112 mat=adata,
113 net=network,
114 source=args.source,
115 target=args.target,
116 weight=args.weight,
117 verbose=True,
118 min_n=args.min_n,
119 use_raw=args.use_raw
120 )
121
122 if args.output is not None:
123 # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files
124 combined_df = pd.concat([adata.obsm["ulm_estimate"], adata.obsm["ulm_pvals"]], axis=1)
125
126 # Save the combined dataframe to a file
127 combined_df.to_csv(args.output + ".tsv", sep="\t")
128
129 # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path
130 if args.activities_path is not None:
131 acts = dc.get_acts(adata, obsm_key="ulm_estimate")
132 acts.write_h5ad(args.activities_path)