Mercurial > repos > ebi-gxa > score_genes_aucell
changeset 1:e024d8280886 draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
author | ebi-gxa |
---|---|
date | Thu, 16 Nov 2023 20:05:21 +0000 |
parents | 1e8697931d73 |
children | c700f0381e84 |
files | decoupler_aucell_score.py decoupler_aucell_score.xml |
diffstat | 2 files changed, 40 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/decoupler_aucell_score.py Thu Nov 09 11:36:08 2023 +0000 +++ b/decoupler_aucell_score.py Thu Nov 16 20:05:21 2023 +0000 @@ -109,15 +109,19 @@ if __name__ == "__main__": # Create command-line arguments parser parser = argparse.ArgumentParser(description="Score genes using Aucell") - parser.add_argument("--input_file", type=str, help="Path to input AnnData file") - parser.add_argument("--output_file", type=str, help="Path to output file") + parser.add_argument( + "--input_file", type=str, help="Path to input AnnData file", required=True + ) + parser.add_argument( + "--output_file", type=str, help="Path to output file", required=True + ) parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False) # add argument for gene sets to score parser.add_argument( "--gene_sets_to_score", type=str, required=False, - help="Comma separated list of gene sets to score (the need to be in the gmt file)", + help="Optional comma separated list of gene sets to score (the need to be in the gmt file)", ) # add argument for gene list (comma separated) to score parser.add_argument( @@ -137,6 +141,7 @@ "--gene_symbols_field", type=str, help="Name of the gene symbols field in the AnnData object", + required=True, ) parser.add_argument("--use_raw", action="store_true", help="Use raw data") parser.add_argument( @@ -149,15 +154,15 @@ # Load input AnnData object adata = anndata.read_h5ad(args.input_file) - if args.gene_sets_to_score is not None and args.gmt_file is not None: + if args.gmt_file is not None: # Load MSigDB file in GMT format msigdb = read_gmt(args.gmt_file) - gene_sets_to_score = args.gene_sets_to_score.split(",") + gene_sets_to_score = args.gene_sets_to_score.split(",") if args.gene_sets_to_score else [] # Score genes by their ensembl ids using the score_genes_aucell function for _, row in msigdb.iterrows(): gene_set_name = row["gene_set_name"] - if gene_set_name in gene_sets_to_score: + if not gene_sets_to_score or gene_set_name in gene_sets_to_score: genes = row["genes"].split(",") # Convert gene symbols to ensembl ids by using the columns gene_symbols and index in adata.var specific to the gene set ens_gene_ids = adata.var[
--- a/decoupler_aucell_score.xml Thu Nov 09 11:36:08 2023 +0000 +++ b/decoupler_aucell_score.xml Thu Nov 16 20:05:21 2023 +0000 @@ -1,5 +1,5 @@ <?xml version="1.0"?> -<tool id="score_genes_aucell" name="Decoupler AUCell" version="1.4.0+galaxy0" profile="20.05"> +<tool id="score_genes_aucell" name="Decoupler AUCell" version="1.4.0+galaxy1" profile="20.05"> <description> scores cells using the AUCell method for gene sets. </description> @@ -10,11 +10,13 @@ python '$__tool_directory__/decoupler_aucell_score.py' --input_file '$input_file' #if $gene_lists_source.source == "gmt" - --gmt_file '$gmt_file' - --gene_sets_to_score '$gene_sets_to_score' + --gmt_file '$gene_lists_source.gmt_file' + #if $gene_lists_source.gene_sets_to_score + --gene_sets_to_score '$gene_lists_source.gene_sets_to_score' + #end if #else: - --gene_lists_to_score '$gene_lists_to_score' - --score_names '$score_names' + --gene_lists_to_score '$gene_lists_source.gene_lists_to_score' + --score_names '$gene_lists_source.score_names' #end if --gene_symbols_field '$gene_symbols_field' $use_raw @@ -34,7 +36,7 @@ </param> <when value="gmt"> <param name="gmt_file" type="data" format="txt" label="GMT file with gene sets" /> - <param name="gene_sets_to_score" type="text" label="Gene sets to score within the GMT file" /> + <param name="gene_sets_to_score" type="text" optional="true" label="Gene sets to score within the GMT file" /> </when> <when value="enumerated"> <param name="gene_lists_to_score" type="text" label="Genes to score" /> @@ -56,11 +58,11 @@ <tests> <test expect_num_outputs="1"> <param name="input_file" value="mito_counted_anndata.h5ad"/> - <param name="gene_sets_to_score" value="HALLMARK_NOTCH_SIGNALING,HALLMARK_APICAL_SURFACE"/> - <param name="gmt_file" value="mouse_hallmark_ss.gmt"/> <param name="gene_symbols_field" value="Symbol"/> <param name="write_anndata" value="true"/> <conditional name="gene_lists_source"> + <param name="gene_sets_to_score" value="HALLMARK_NOTCH_SIGNALING,HALLMARK_APICAL_SURFACE"/> + <param name="gmt_file" value="mouse_hallmark_ss.gmt"/> <param name="source" value="gmt"/> </conditional> <output name="output_ad"> @@ -72,12 +74,27 @@ </test> <test expect_num_outputs="1"> <param name="input_file" value="mito_counted_anndata.h5ad"/> - <param name="gene_lists_to_score" value="Cd8b1,Cd8b2,Cd8a,Cd4,Nrp1,Cd80:Il1a,Il1b,Il6,Nos2,Tlr2,Tlr4,Cd80"/> - <param name="score_names" value="TCell,Macro"/> + <param name="gene_symbols_field" value="Symbol"/> + <param name="write_anndata" value="true"/> + <conditional name="gene_lists_source"> + <param name="source" value="gmt"/> + <param name="gmt_file" value="mouse_hallmark_ss.gmt"/> + </conditional> + <output name="output_ad"> + <assert_contents> + <has_h5_keys keys="obs/AUCell_HALLMARK_NOTCH_SIGNALING"/> + <has_h5_keys keys="obs/AUCell_HALLMARK_APICAL_SURFACE"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_file" value="mito_counted_anndata.h5ad"/> <param name="gene_symbols_field" value="Symbol"/> <param name="write_anndata" value="true"/> <conditional name="gene_lists_source"> <param name="source" value="enumerated"/> + <param name="gene_lists_to_score" value="Cd8b1,Cd8b2,Cd8a,Cd4,Nrp1,Cd80:Il1a,Il1b,Il6,Nos2,Tlr2,Tlr4,Cd80"/> + <param name="score_names" value="TCell,Macro"/> </conditional> <output name="output_ad"> <assert_contents> @@ -88,12 +105,12 @@ </test> <test expect_num_outputs="1"> <param name="input_file" value="mito_counted_anndata.h5ad"/> - <param name="gene_sets_to_score" value="HALLMARK_NOTCH_SIGNALING,HALLMARK_APICAL_SURFACE"/> - <param name="gmt_file" value="mouse_hallmark_ss.gmt"/> <param name="gene_symbols_field" value="Symbol"/> <param name="write_anndata" value="False"/> <conditional name="gene_lists_source"> <param name="source" value="gmt"/> + <param name="gene_sets_to_score" value="HALLMARK_NOTCH_SIGNALING,HALLMARK_APICAL_SURFACE"/> + <param name="gmt_file" value="mouse_hallmark_ss.gmt"/> </conditional> <output name="output_table"> <assert_contents>