Mercurial > repos > ebi-gxa > decoupler_pseudobulk
changeset 5:893ff9213a34 draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
author | ebi-gxa |
---|---|
date | Fri, 15 Mar 2024 12:18:11 +0000 |
parents | f321c60167d4 |
children | ed2a77422e00 |
files | decoupler_pathway_inference.py get_test_data.sh test-data/progeny_test.tsv test-data/progeny_test_2.tsv |
diffstat | 4 files changed, 290 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/decoupler_pathway_inference.py Fri Mar 15 12:18:11 2024 +0000 @@ -0,0 +1,132 @@ +# import the necessary packages +import argparse + +import anndata as ad +import decoupler as dc +import pandas as pd + +# define arguments for the script +parser = argparse.ArgumentParser() + +# add AnnData input file option +parser.add_argument( + "-i", "--input_anndata", help="AnnData input file", required=True +) + +# add network input file option +parser.add_argument( + "-n", "--input_network", help="Network input file", required=True +) + +# output file prefix +parser.add_argument( + "-o", "--output", + help="output files prefix", + default=None, +) + +# path to save Activities AnnData file +parser.add_argument( + "-a", "--activities_path", help="Path to save Activities AnnData file", default=None +) + +# Column name in net with source nodes +parser.add_argument( + "-s", "--source", help="Column name in net with source nodes.", default="source" +) + +# Column name in net with target nodes +parser.add_argument( + "-t", "--target", help="Column name in net with target nodes.", default="target" +) + +# Column name in net with weights. +parser.add_argument( + "-w", "--weight", help="Column name in net with weights.", default="weight" +) + +# add boolean argument for use_raw +parser.add_argument( + "--use_raw", action="store_true", default=False, help="Whether to use the raw part of the AnnData object" +) + +# add argument for min_cells +parser.add_argument( + "--min_n", help="Minimum of targets per source. If less, sources are removed.", default=5, type=int +) + +# add activity inference method option +parser.add_argument( + "-m", "--method", help="Activity inference method", default="mlm", required=True +) +args = parser.parse_args() + +# check that either -o or --output is specified +if args.output is None: + raise ValueError("Please specify either -o or --output") + +# read in the AnnData input file +adata = ad.read_h5ad(args.input_anndata) + +# read in the input file network input file +network = pd.read_csv(args.input_network, sep='\t') + +if ( + args.source not in network.columns + or args.target not in network.columns + or args.weight not in network.columns +): + raise ValueError( + "Source, target, and weight columns are not present in the network" + ) + + +print(type(args.min_n)) + +if args.method == "mlm": + dc.run_mlm( + mat=adata, + net=network, + source=args.source, + target=args.target, + weight=args.weight, + verbose=True, + min_n=args.min_n, + use_raw=args.use_raw + ) + + if args.output is not None: + # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files + combined_df = pd.concat([adata.obsm["mlm_estimate"], adata.obsm["mlm_pvals"]], axis=1) + + # Save the combined dataframe to a file + combined_df.to_csv(args.output + ".tsv", sep="\t") + + # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path + if args.activities_path is not None: + acts = dc.get_acts(adata, obsm_key="mlm_estimate") + acts.write_h5ad(args.activities_path) + +elif args.method == "ulm": + dc.run_ulm( + mat=adata, + net=network, + source=args.source, + target=args.target, + weight=args.weight, + verbose=True, + min_n=args.min_n, + use_raw=args.use_raw + ) + + if args.output is not None: + # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files + combined_df = pd.concat([adata.obsm["ulm_estimate"], adata.obsm["ulm_pvals"]], axis=1) + + # Save the combined dataframe to a file + combined_df.to_csv(args.output + ".tsv", sep="\t") + + # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path + if args.activities_path is not None: + acts = dc.get_acts(adata, obsm_key="ulm_estimate") + acts.write_h5ad(args.activities_path)
--- a/get_test_data.sh Thu Nov 16 20:05:16 2023 +0000 +++ b/get_test_data.sh Fri Mar 15 12:18:11 2024 +0000 @@ -19,3 +19,19 @@ mkdir -p test-data pushd test-data get_data $MTX_LINK $BASENAME_FILE + + +# Download input anndata for decoupler-pathway_inference +BASENAME_FILE='pbmc3k_processed.h5ad' + +MTX_LINK='https://zenodo.org/records/3752813/files/pbmc3k_processed.h5ad' + +get_data $MTX_LINK $BASENAME_FILE + +# Download output anndata for decoupler-pathway_inference +BASENAME_FILE='test.h5ad' + +MTX_LINK='https://zenodo.org/records/10401958/files/test.h5ad' + +get_data $MTX_LINK $BASENAME_FILE +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/progeny_test.tsv Fri Mar 15 12:18:11 2024 +0000 @@ -0,0 +1,71 @@ + source target weight p_value +0 Androgen TMPRSS2 11.490631 0.0 +1 Androgen NKX3-1 10.622551 2.2e-44 +2 Androgen MBOAT2 10.472733 4.6e-44 +3 Androgen KLK2 10.176186 1.94441e-40 +4 Androgen SARG 11.386852 2.79021e-40 +5 EGFR LZTFL1 -1.8738769 2.0809955e-18 +6 EGFR PHLDA2 3.5051384 2.0530624e-17 +7 EGFR DUSP6 12.6293125 6.537324e-17 +8 EGFR DUSP5 7.9430394 6.86669e-17 +9 EGFR PHLDA1 6.619626 3.4106933e-16 +10 Estrogen GREB1 17.240173 0.0 +11 Estrogen RET 10.718027 0.0 +12 Estrogen TFF1 14.430255 0.0 +13 Estrogen HEY2 11.482369 3.1e-44 +14 Estrogen RAPGEFL1 10.544896 5.2e-43 +15 Hypoxia FAM162A 8.335551 0.0 +16 Hypoxia NDRG1 22.08712 0.0 +17 Hypoxia ENO2 14.32694 0.0 +18 Hypoxia PDK1 13.120449 0.0 +19 Hypoxia ANKRD37 8.484976 0.0 +20 JAK-STAT OAS1 15.028714 1.058e-41 +21 JAK-STAT HERC6 8.769676 1.3450407e-38 +22 JAK-STAT OAS3 10.618842 1.2143582e-37 +23 JAK-STAT PLSCR1 8.481604 8.955206e-37 +24 JAK-STAT DDX60 12.198234 9.150971e-36 +25 MAPK DUSP6 16.859016 0.0 +26 MAPK SPRED2 3.5018346 0.0 +27 MAPK SPRY2 9.481585 9.19e-43 +28 MAPK ETV5 5.9887094 6.7425e-41 +29 MAPK EPHA2 6.3140125 3.7492e-40 +30 NFkB NFKB1 9.513637 0.0 +31 NFkB CXCL3 22.946114 0.0 +32 NFkB NFKB2 5.5155754 0.0 +33 NFkB NFKBIA 11.444533 0.0 +34 NFkB BCL2A1 14.416924 0.0 +35 PI3K MLANA -9.985743 1.84e-43 +36 PI3K PMEL -6.5903482 6.8747866e-36 +37 PI3K FAXDC2 -12.421274 3.297515e-34 +38 PI3K HSD17B8 -8.601571 9.948224e-34 +39 PI3K CTSF -9.172143 1.0235212e-31 +40 TGFb LINC00312 4.428987 2.0074443e-17 +41 TGFb TSPAN2 5.502326 3.1451768e-16 +42 TGFb SMAD7 7.6311436 7.3087106e-16 +43 TGFb NOX4 5.913813 3.8292238e-15 +44 TGFb COL4A1 6.3374896 9.052501e-15 +45 TNFa CSF2 8.35548 0.0 +46 TNFa CXCL5 10.0813675 0.0 +47 TNFa NFKBIE 10.356205 0.0 +48 TNFa TNFAIP3 35.40072 0.0 +49 TNFa EFNA1 18.63111 0.0 +50 Trail FRMPD1 -2.2346141 9.378505e-07 +51 Trail WT1-AS 2.2251053 2.0316747e-06 +52 Trail WNT8A -1.8469616 3.795469e-05 +53 Trail GPR18 3.240805 6.1090715e-05 +54 Trail TEC 2.0513217 6.32898e-05 +55 VEGF CRACD -4.87119 6.7185365e-25 +56 VEGF VWA8 -3.6068044 1.4495265e-18 +57 VEGF NLGN1 -5.618075 2.6587072e-18 +58 VEGF NRG3 -5.823747 1.0848074e-16 +59 VEGF KCNK10 2.8833063 1.8129868e-16 +60 WNT BMP4 5.936831 2.511717e-10 +61 WNT SIGLEC6 2.0207362 2.347858e-09 +62 WNT NPY2R 1.3872339 8.666917e-09 +63 WNT CSF3R 1.9323153 3.0219417e-07 +64 WNT KRT23 4.1216116 5.463989e-07 +65 p53 GLS2 6.452465 7.444302e-37 +66 p53 MDM2 8.193488 2.1194304e-35 +67 p53 ZNF79 4.020263 4.5987433e-34 +68 p53 FDXR 11.994496 5.589482e-32 +69 p53 LCE1B 11.813737 7.8095406e-30
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/progeny_test_2.tsv Fri Mar 15 12:18:11 2024 +0000 @@ -0,0 +1,71 @@ +source target weight p_value +Androgen TMPRSS2 11.490631 0.0 +Androgen NKX3-1 10.622551 2.2e-44 +Androgen MBOAT2 10.472733 4.6e-44 +Androgen KLK2 10.176186 1.94441e-40 +Androgen SARG 11.386852 2.79021e-40 +EGFR LZTFL1 -1.8738769 2.0809955e-18 +EGFR PHLDA2 3.5051384 2.0530624e-17 +EGFR DUSP6 12.6293125 6.537324e-17 +EGFR DUSP5 7.9430394 6.86669e-17 +EGFR PHLDA1 6.619626 3.4106933e-16 +Estrogen GREB1 17.240173 0.0 +Estrogen RET 10.718027 0.0 +Estrogen TFF1 14.430255 0.0 +Estrogen HEY2 11.482369 3.1e-44 +Estrogen RAPGEFL1 10.544896 5.2e-43 +Hypoxia FAM162A 8.335551 0.0 +Hypoxia NDRG1 22.08712 0.0 +Hypoxia ENO2 14.32694 0.0 +Hypoxia PDK1 13.120449 0.0 +Hypoxia ANKRD37 8.484976 0.0 +JAK-STAT OAS1 15.028714 1.058e-41 +JAK-STAT HERC6 8.769676 1.3450407e-38 +JAK-STAT OAS3 10.618842 1.2143582e-37 +JAK-STAT PLSCR1 8.481604 8.955206e-37 +JAK-STAT DDX60 12.198234 9.150971e-36 +MAPK DUSP6 16.859016 0.0 +MAPK SPRED2 3.5018346 0.0 +MAPK SPRY2 9.481585 9.19e-43 +MAPK ETV5 5.9887094 6.7425e-41 +MAPK EPHA2 6.3140125 3.7492e-40 +NFkB NFKB1 9.513637 0.0 +NFkB CXCL3 22.946114 0.0 +NFkB NFKB2 5.5155754 0.0 +NFkB NFKBIA 11.444533 0.0 +NFkB BCL2A1 14.416924 0.0 +PI3K MLANA -9.985743 1.84e-43 +PI3K PMEL -6.5903482 6.8747866e-36 +PI3K FAXDC2 -12.421274 3.297515e-34 +PI3K HSD17B8 -8.601571 9.948224e-34 +PI3K CTSF -9.172143 1.0235212e-31 +TGFb LINC00312 4.428987 2.0074443e-17 +TGFb TSPAN2 5.502326 3.1451768e-16 +TGFb SMAD7 7.6311436 7.3087106e-16 +TGFb NOX4 5.913813 3.8292238e-15 +TGFb COL4A1 6.3374896 9.052501e-15 +TNFa CSF2 8.35548 0.0 +TNFa CXCL5 10.0813675 0.0 +TNFa NFKBIE 10.356205 0.0 +TNFa TNFAIP3 35.40072 0.0 +TNFa EFNA1 18.63111 0.0 +Trail FRMPD1 -2.2346141 9.378505e-07 +Trail WT1-AS 2.2251053 2.0316747e-06 +Trail WNT8A -1.8469616 3.795469e-05 +Trail GPR18 3.240805 6.1090715e-05 +Trail TEC 2.0513217 6.32898e-05 +VEGF CRACD -4.87119 6.7185365e-25 +VEGF VWA8 -3.6068044 1.4495265e-18 +VEGF NLGN1 -5.618075 2.6587072e-18 +VEGF NRG3 -5.823747 1.0848074e-16 +VEGF KCNK10 2.8833063 1.8129868e-16 +WNT BMP4 5.936831 2.511717e-10 +WNT SIGLEC6 2.0207362 2.347858e-09 +WNT NPY2R 1.3872339 8.666917e-09 +WNT CSF3R 1.9323153 3.0219417e-07 +WNT KRT23 4.1216116 5.463989e-07 +p53 GLS2 6.452465 7.444302e-37 +p53 MDM2 8.193488 2.1194304e-35 +p53 ZNF79 4.020263 4.5987433e-34 +p53 FDXR 11.994496 5.589482e-32 +p53 LCE1B 11.813737 7.8095406e-30