Repository 'decoupler_pseudobulk'
hg clone https://toolshed.g2.bx.psu.edu/repos/ebi-gxa/decoupler_pseudobulk

Changeset 5:893ff9213a34 (2024-03-15)
Previous changeset 4:f321c60167d4 (2023-11-16) Next changeset 6:ed2a77422e00 (2024-04-15)
Commit message:
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
modified:
get_test_data.sh
added:
decoupler_pathway_inference.py
test-data/progeny_test.tsv
test-data/progeny_test_2.tsv
b
diff -r f321c60167d4 -r 893ff9213a34 decoupler_pathway_inference.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/decoupler_pathway_inference.py Fri Mar 15 12:18:11 2024 +0000
[
@@ -0,0 +1,132 @@
+# import the necessary packages
+import argparse
+
+import anndata as ad
+import decoupler as dc
+import pandas as pd
+
+# define arguments for the script
+parser = argparse.ArgumentParser()
+
+# add AnnData input file option
+parser.add_argument(
+    "-i", "--input_anndata", help="AnnData input file", required=True
+)
+
+# add network input file option
+parser.add_argument(
+    "-n", "--input_network", help="Network input file", required=True
+)
+
+# output file prefix
+parser.add_argument(
+    "-o", "--output",
+    help="output files prefix",
+    default=None,
+)
+
+# path to save Activities AnnData file
+parser.add_argument(
+    "-a", "--activities_path", help="Path to save Activities AnnData file", default=None
+)
+
+# Column name in net with source nodes
+parser.add_argument(
+    "-s", "--source", help="Column name in net with source nodes.", default="source"
+)
+
+# Column name in net with target nodes
+parser.add_argument(
+    "-t", "--target", help="Column name in net with target nodes.", default="target"
+)
+
+# Column name in net with weights.
+parser.add_argument(
+    "-w", "--weight", help="Column name in net with weights.", default="weight"
+)
+
+# add boolean argument for use_raw
+parser.add_argument(
+    "--use_raw", action="store_true", default=False, help="Whether to use the raw part of the AnnData object"
+)
+
+# add argument for min_cells
+parser.add_argument(
+    "--min_n", help="Minimum of targets per source. If less, sources are removed.", default=5, type=int
+)
+
+# add activity inference method option
+parser.add_argument(
+    "-m", "--method", help="Activity inference method", default="mlm", required=True
+)
+args = parser.parse_args()
+
+# check that either -o or --output is specified
+if args.output is None:
+    raise ValueError("Please specify either -o or --output")
+
+# read in the AnnData input file
+adata = ad.read_h5ad(args.input_anndata)
+
+# read in the input file network input file
+network = pd.read_csv(args.input_network, sep='\t')
+
+if (
+    args.source not in network.columns
+    or args.target not in network.columns
+    or args.weight not in network.columns
+):
+    raise ValueError(
+        "Source, target, and weight columns are not present in the network"
+    )
+
+
+print(type(args.min_n))
+
+if args.method == "mlm":
+    dc.run_mlm(
+        mat=adata,
+        net=network,
+        source=args.source,
+        target=args.target,
+        weight=args.weight,
+        verbose=True,
+        min_n=args.min_n,
+        use_raw=args.use_raw 
+    )
+
+    if args.output is not None:
+        # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files
+        combined_df = pd.concat([adata.obsm["mlm_estimate"], adata.obsm["mlm_pvals"]], axis=1)
+
+        # Save the combined dataframe to a file
+        combined_df.to_csv(args.output + ".tsv", sep="\t")
+
+    # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path
+    if args.activities_path is not None:
+        acts = dc.get_acts(adata, obsm_key="mlm_estimate")
+        acts.write_h5ad(args.activities_path)
+
+elif args.method == "ulm":
+    dc.run_ulm(
+        mat=adata,
+        net=network,
+        source=args.source,
+        target=args.target,
+        weight=args.weight,
+        verbose=True,
+        min_n=args.min_n,
+        use_raw=args.use_raw 
+    )
+
+    if args.output is not None:
+        # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files
+        combined_df = pd.concat([adata.obsm["ulm_estimate"], adata.obsm["ulm_pvals"]], axis=1)
+
+        # Save the combined dataframe to a file
+        combined_df.to_csv(args.output + ".tsv", sep="\t")
+
+    # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path
+    if args.activities_path is not None:
+        acts = dc.get_acts(adata, obsm_key="ulm_estimate")
+        acts.write_h5ad(args.activities_path)
b
diff -r f321c60167d4 -r 893ff9213a34 get_test_data.sh
--- a/get_test_data.sh Thu Nov 16 20:05:16 2023 +0000
+++ b/get_test_data.sh Fri Mar 15 12:18:11 2024 +0000
b
@@ -19,3 +19,19 @@
 mkdir -p test-data
 pushd test-data
 get_data $MTX_LINK $BASENAME_FILE
+
+
+# Download input anndata for decoupler-pathway_inference
+BASENAME_FILE='pbmc3k_processed.h5ad'
+
+MTX_LINK='https://zenodo.org/records/3752813/files/pbmc3k_processed.h5ad'
+
+get_data $MTX_LINK $BASENAME_FILE
+
+# Download output anndata for decoupler-pathway_inference
+BASENAME_FILE='test.h5ad'
+
+MTX_LINK='https://zenodo.org/records/10401958/files/test.h5ad'
+
+get_data $MTX_LINK $BASENAME_FILE
+
b
diff -r f321c60167d4 -r 893ff9213a34 test-data/progeny_test.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/progeny_test.tsv Fri Mar 15 12:18:11 2024 +0000
b
@@ -0,0 +1,71 @@
+ source target weight p_value
+0 Androgen TMPRSS2 11.490631 0.0
+1 Androgen NKX3-1 10.622551 2.2e-44
+2 Androgen MBOAT2 10.472733 4.6e-44
+3 Androgen KLK2 10.176186 1.94441e-40
+4 Androgen SARG 11.386852 2.79021e-40
+5 EGFR LZTFL1 -1.8738769 2.0809955e-18
+6 EGFR PHLDA2 3.5051384 2.0530624e-17
+7 EGFR DUSP6 12.6293125 6.537324e-17
+8 EGFR DUSP5 7.9430394 6.86669e-17
+9 EGFR PHLDA1 6.619626 3.4106933e-16
+10 Estrogen GREB1 17.240173 0.0
+11 Estrogen RET 10.718027 0.0
+12 Estrogen TFF1 14.430255 0.0
+13 Estrogen HEY2 11.482369 3.1e-44
+14 Estrogen RAPGEFL1 10.544896 5.2e-43
+15 Hypoxia FAM162A 8.335551 0.0
+16 Hypoxia NDRG1 22.08712 0.0
+17 Hypoxia ENO2 14.32694 0.0
+18 Hypoxia PDK1 13.120449 0.0
+19 Hypoxia ANKRD37 8.484976 0.0
+20 JAK-STAT OAS1 15.028714 1.058e-41
+21 JAK-STAT HERC6 8.769676 1.3450407e-38
+22 JAK-STAT OAS3 10.618842 1.2143582e-37
+23 JAK-STAT PLSCR1 8.481604 8.955206e-37
+24 JAK-STAT DDX60 12.198234 9.150971e-36
+25 MAPK DUSP6 16.859016 0.0
+26 MAPK SPRED2 3.5018346 0.0
+27 MAPK SPRY2 9.481585 9.19e-43
+28 MAPK ETV5 5.9887094 6.7425e-41
+29 MAPK EPHA2 6.3140125 3.7492e-40
+30 NFkB NFKB1 9.513637 0.0
+31 NFkB CXCL3 22.946114 0.0
+32 NFkB NFKB2 5.5155754 0.0
+33 NFkB NFKBIA 11.444533 0.0
+34 NFkB BCL2A1 14.416924 0.0
+35 PI3K MLANA -9.985743 1.84e-43
+36 PI3K PMEL -6.5903482 6.8747866e-36
+37 PI3K FAXDC2 -12.421274 3.297515e-34
+38 PI3K HSD17B8 -8.601571 9.948224e-34
+39 PI3K CTSF -9.172143 1.0235212e-31
+40 TGFb LINC00312 4.428987 2.0074443e-17
+41 TGFb TSPAN2 5.502326 3.1451768e-16
+42 TGFb SMAD7 7.6311436 7.3087106e-16
+43 TGFb NOX4 5.913813 3.8292238e-15
+44 TGFb COL4A1 6.3374896 9.052501e-15
+45 TNFa CSF2 8.35548 0.0
+46 TNFa CXCL5 10.0813675 0.0
+47 TNFa NFKBIE 10.356205 0.0
+48 TNFa TNFAIP3 35.40072 0.0
+49 TNFa EFNA1 18.63111 0.0
+50 Trail FRMPD1 -2.2346141 9.378505e-07
+51 Trail WT1-AS 2.2251053 2.0316747e-06
+52 Trail WNT8A -1.8469616 3.795469e-05
+53 Trail GPR18 3.240805 6.1090715e-05
+54 Trail TEC 2.0513217 6.32898e-05
+55 VEGF CRACD -4.87119 6.7185365e-25
+56 VEGF VWA8 -3.6068044 1.4495265e-18
+57 VEGF NLGN1 -5.618075 2.6587072e-18
+58 VEGF NRG3 -5.823747 1.0848074e-16
+59 VEGF KCNK10 2.8833063 1.8129868e-16
+60 WNT BMP4 5.936831 2.511717e-10
+61 WNT SIGLEC6 2.0207362 2.347858e-09
+62 WNT NPY2R 1.3872339 8.666917e-09
+63 WNT CSF3R 1.9323153 3.0219417e-07
+64 WNT KRT23 4.1216116 5.463989e-07
+65 p53 GLS2 6.452465 7.444302e-37
+66 p53 MDM2 8.193488 2.1194304e-35
+67 p53 ZNF79 4.020263 4.5987433e-34
+68 p53 FDXR 11.994496 5.589482e-32
+69 p53 LCE1B 11.813737 7.8095406e-30
b
diff -r f321c60167d4 -r 893ff9213a34 test-data/progeny_test_2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/progeny_test_2.tsv Fri Mar 15 12:18:11 2024 +0000
b
@@ -0,0 +1,71 @@
+source target weight p_value
+Androgen TMPRSS2 11.490631 0.0
+Androgen NKX3-1 10.622551 2.2e-44
+Androgen MBOAT2 10.472733 4.6e-44
+Androgen KLK2 10.176186 1.94441e-40
+Androgen SARG 11.386852 2.79021e-40
+EGFR LZTFL1 -1.8738769 2.0809955e-18
+EGFR PHLDA2 3.5051384 2.0530624e-17
+EGFR DUSP6 12.6293125 6.537324e-17
+EGFR DUSP5 7.9430394 6.86669e-17
+EGFR PHLDA1 6.619626 3.4106933e-16
+Estrogen GREB1 17.240173 0.0
+Estrogen RET 10.718027 0.0
+Estrogen TFF1 14.430255 0.0
+Estrogen HEY2 11.482369 3.1e-44
+Estrogen RAPGEFL1 10.544896 5.2e-43
+Hypoxia FAM162A 8.335551 0.0
+Hypoxia NDRG1 22.08712 0.0
+Hypoxia ENO2 14.32694 0.0
+Hypoxia PDK1 13.120449 0.0
+Hypoxia ANKRD37 8.484976 0.0
+JAK-STAT OAS1 15.028714 1.058e-41
+JAK-STAT HERC6 8.769676 1.3450407e-38
+JAK-STAT OAS3 10.618842 1.2143582e-37
+JAK-STAT PLSCR1 8.481604 8.955206e-37
+JAK-STAT DDX60 12.198234 9.150971e-36
+MAPK DUSP6 16.859016 0.0
+MAPK SPRED2 3.5018346 0.0
+MAPK SPRY2 9.481585 9.19e-43
+MAPK ETV5 5.9887094 6.7425e-41
+MAPK EPHA2 6.3140125 3.7492e-40
+NFkB NFKB1 9.513637 0.0
+NFkB CXCL3 22.946114 0.0
+NFkB NFKB2 5.5155754 0.0
+NFkB NFKBIA 11.444533 0.0
+NFkB BCL2A1 14.416924 0.0
+PI3K MLANA -9.985743 1.84e-43
+PI3K PMEL -6.5903482 6.8747866e-36
+PI3K FAXDC2 -12.421274 3.297515e-34
+PI3K HSD17B8 -8.601571 9.948224e-34
+PI3K CTSF -9.172143 1.0235212e-31
+TGFb LINC00312 4.428987 2.0074443e-17
+TGFb TSPAN2 5.502326 3.1451768e-16
+TGFb SMAD7 7.6311436 7.3087106e-16
+TGFb NOX4 5.913813 3.8292238e-15
+TGFb COL4A1 6.3374896 9.052501e-15
+TNFa CSF2 8.35548 0.0
+TNFa CXCL5 10.0813675 0.0
+TNFa NFKBIE 10.356205 0.0
+TNFa TNFAIP3 35.40072 0.0
+TNFa EFNA1 18.63111 0.0
+Trail FRMPD1 -2.2346141 9.378505e-07
+Trail WT1-AS 2.2251053 2.0316747e-06
+Trail WNT8A -1.8469616 3.795469e-05
+Trail GPR18 3.240805 6.1090715e-05
+Trail TEC 2.0513217 6.32898e-05
+VEGF CRACD -4.87119 6.7185365e-25
+VEGF VWA8 -3.6068044 1.4495265e-18
+VEGF NLGN1 -5.618075 2.6587072e-18
+VEGF NRG3 -5.823747 1.0848074e-16
+VEGF KCNK10 2.8833063 1.8129868e-16
+WNT BMP4 5.936831 2.511717e-10
+WNT SIGLEC6 2.0207362 2.347858e-09
+WNT NPY2R 1.3872339 8.666917e-09
+WNT CSF3R 1.9323153 3.0219417e-07
+WNT KRT23 4.1216116 5.463989e-07
+p53 GLS2 6.452465 7.444302e-37
+p53 MDM2 8.193488 2.1194304e-35
+p53 ZNF79 4.020263 4.5987433e-34
+p53 FDXR 11.994496 5.589482e-32
+p53 LCE1B 11.813737 7.8095406e-30