changeset 1:e024d8280886 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
author ebi-gxa
date Thu, 16 Nov 2023 20:05:21 +0000
parents 1e8697931d73
children c700f0381e84
files decoupler_aucell_score.py decoupler_aucell_score.xml
diffstat 2 files changed, 40 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/decoupler_aucell_score.py	Thu Nov 09 11:36:08 2023 +0000
+++ b/decoupler_aucell_score.py	Thu Nov 16 20:05:21 2023 +0000
@@ -109,15 +109,19 @@
 if __name__ == "__main__":
     # Create command-line arguments parser
     parser = argparse.ArgumentParser(description="Score genes using Aucell")
-    parser.add_argument("--input_file", type=str, help="Path to input AnnData file")
-    parser.add_argument("--output_file", type=str, help="Path to output file")
+    parser.add_argument(
+        "--input_file", type=str, help="Path to input AnnData file", required=True
+    )
+    parser.add_argument(
+        "--output_file", type=str, help="Path to output file", required=True
+    )
     parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False)
     # add argument for gene sets to score
     parser.add_argument(
         "--gene_sets_to_score",
         type=str,
         required=False,
-        help="Comma separated list of gene sets to score (the need to be in the gmt file)",
+        help="Optional comma separated list of gene sets to score (the need to be in the gmt file)",
     )
     # add argument for gene list (comma separated) to score
     parser.add_argument(
@@ -137,6 +141,7 @@
         "--gene_symbols_field",
         type=str,
         help="Name of the gene symbols field in the AnnData object",
+        required=True,
     )
     parser.add_argument("--use_raw", action="store_true", help="Use raw data")
     parser.add_argument(
@@ -149,15 +154,15 @@
     # Load input AnnData object
     adata = anndata.read_h5ad(args.input_file)
 
-    if args.gene_sets_to_score is not None and args.gmt_file is not None:
+    if args.gmt_file is not None:
         # Load MSigDB file in GMT format
         msigdb = read_gmt(args.gmt_file)
 
-        gene_sets_to_score = args.gene_sets_to_score.split(",")
+        gene_sets_to_score = args.gene_sets_to_score.split(",") if args.gene_sets_to_score else []
         # Score genes by their ensembl ids using the score_genes_aucell function
         for _, row in msigdb.iterrows():
             gene_set_name = row["gene_set_name"]
-            if gene_set_name in gene_sets_to_score:
+            if not gene_sets_to_score or gene_set_name in gene_sets_to_score:
                 genes = row["genes"].split(",")
                 # Convert gene symbols to ensembl ids by using the columns gene_symbols and index in adata.var specific to the gene set
                 ens_gene_ids = adata.var[
--- a/decoupler_aucell_score.xml	Thu Nov 09 11:36:08 2023 +0000
+++ b/decoupler_aucell_score.xml	Thu Nov 16 20:05:21 2023 +0000
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<tool id="score_genes_aucell" name="Decoupler AUCell" version="1.4.0+galaxy0" profile="20.05">
+<tool id="score_genes_aucell" name="Decoupler AUCell" version="1.4.0+galaxy1" profile="20.05">
     <description>
         scores cells using the AUCell method for gene sets.
     </description>
@@ -10,11 +10,13 @@
         python '$__tool_directory__/decoupler_aucell_score.py'
             --input_file '$input_file'
             #if $gene_lists_source.source == "gmt"
-            --gmt_file '$gmt_file'
-            --gene_sets_to_score '$gene_sets_to_score'
+            --gmt_file '$gene_lists_source.gmt_file'
+            #if $gene_lists_source.gene_sets_to_score
+            --gene_sets_to_score '$gene_lists_source.gene_sets_to_score'
+            #end if
             #else:
-            --gene_lists_to_score '$gene_lists_to_score'
-            --score_names '$score_names'
+            --gene_lists_to_score '$gene_lists_source.gene_lists_to_score'
+            --score_names '$gene_lists_source.score_names'
             #end if
             --gene_symbols_field '$gene_symbols_field'
             $use_raw
@@ -34,7 +36,7 @@
             </param>
             <when value="gmt">
                 <param name="gmt_file" type="data" format="txt" label="GMT file with gene sets" />
-                <param name="gene_sets_to_score" type="text" label="Gene sets to score within the GMT file" />
+                <param name="gene_sets_to_score" type="text" optional="true" label="Gene sets to score within the GMT file" />
             </when>
             <when value="enumerated">
                 <param name="gene_lists_to_score" type="text" label="Genes to score" />
@@ -56,11 +58,11 @@
     <tests>
         <test expect_num_outputs="1">
             <param name="input_file" value="mito_counted_anndata.h5ad"/>
-            <param name="gene_sets_to_score" value="HALLMARK_NOTCH_SIGNALING,HALLMARK_APICAL_SURFACE"/>
-            <param name="gmt_file" value="mouse_hallmark_ss.gmt"/>
             <param name="gene_symbols_field" value="Symbol"/>
             <param name="write_anndata" value="true"/>
             <conditional name="gene_lists_source">
+                <param name="gene_sets_to_score" value="HALLMARK_NOTCH_SIGNALING,HALLMARK_APICAL_SURFACE"/>
+                <param name="gmt_file" value="mouse_hallmark_ss.gmt"/>
                 <param name="source" value="gmt"/>
             </conditional>
             <output name="output_ad">
@@ -72,12 +74,27 @@
         </test>
         <test expect_num_outputs="1">
             <param name="input_file" value="mito_counted_anndata.h5ad"/>
-            <param name="gene_lists_to_score" value="Cd8b1,Cd8b2,Cd8a,Cd4,Nrp1,Cd80:Il1a,Il1b,Il6,Nos2,Tlr2,Tlr4,Cd80"/>
-            <param name="score_names" value="TCell,Macro"/>
+            <param name="gene_symbols_field" value="Symbol"/>
+            <param name="write_anndata" value="true"/>
+            <conditional name="gene_lists_source">
+                <param name="source" value="gmt"/>
+                <param name="gmt_file" value="mouse_hallmark_ss.gmt"/>
+            </conditional>
+            <output name="output_ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/AUCell_HALLMARK_NOTCH_SIGNALING"/>
+                    <has_h5_keys keys="obs/AUCell_HALLMARK_APICAL_SURFACE"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_file" value="mito_counted_anndata.h5ad"/>
             <param name="gene_symbols_field" value="Symbol"/>
             <param name="write_anndata" value="true"/>
             <conditional name="gene_lists_source">
                 <param name="source" value="enumerated"/>
+                <param name="gene_lists_to_score" value="Cd8b1,Cd8b2,Cd8a,Cd4,Nrp1,Cd80:Il1a,Il1b,Il6,Nos2,Tlr2,Tlr4,Cd80"/>
+                <param name="score_names" value="TCell,Macro"/>
             </conditional>
             <output name="output_ad">
                 <assert_contents>
@@ -88,12 +105,12 @@
         </test>
         <test expect_num_outputs="1">
             <param name="input_file" value="mito_counted_anndata.h5ad"/>
-            <param name="gene_sets_to_score" value="HALLMARK_NOTCH_SIGNALING,HALLMARK_APICAL_SURFACE"/>
-            <param name="gmt_file" value="mouse_hallmark_ss.gmt"/>
             <param name="gene_symbols_field" value="Symbol"/>
             <param name="write_anndata" value="False"/>
             <conditional name="gene_lists_source">
                 <param name="source" value="gmt"/>
+                <param name="gene_sets_to_score" value="HALLMARK_NOTCH_SIGNALING,HALLMARK_APICAL_SURFACE"/>
+                <param name="gmt_file" value="mouse_hallmark_ss.gmt"/>
             </conditional>
             <output name="output_table">
                 <assert_contents>