changeset 3:e887a7e8c5b4 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
author ebi-gxa
date Mon, 15 Apr 2024 13:20:21 +0000
parents c700f0381e84
children 515ac51db6e5
files decoupler_aucell_score.py decoupler_aucell_score.xml
diffstat 2 files changed, 30 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/decoupler_aucell_score.py	Fri Mar 15 12:18:05 2024 +0000
+++ b/decoupler_aucell_score.py	Mon Apr 15 13:20:21 2024 +0000
@@ -53,7 +53,7 @@
 
 
 def score_genes_aucell(
-    adata: anndata.AnnData, gene_list: list, score_name: str, use_raw=False
+    adata: anndata.AnnData, gene_list: list, score_name: str, use_raw=False, min_n_genes=5
 ):
     """Score genes using Aucell.
 
@@ -80,15 +80,19 @@
         }
     )
     # run decoupler's run_aucell
-    dc.run_aucell(
-        adata, net=geneset_df, source="geneset", target="gene_id", use_raw=use_raw
-    )
-    # copy .obsm['aucell_estimate'] matrix columns to adata.obs using the column names
-    adata.obs[score_name] = adata.obsm["aucell_estimate"][score_name]
+    # catch the value error
+    try:
+        dc.run_aucell(
+            adata, net=geneset_df, source="geneset", target="gene_id", use_raw=use_raw
+        )
+        # copy .obsm['aucell_estimate'] matrix columns to adata.obs using the column names
+        adata.obs[score_name] = adata.obsm["aucell_estimate"][score_name]
+    except ValueError as ve:
+        print(f"Gene list {score_name} failed, skipping: {str(ve)}")
 
 
 def run_for_genelists(
-    adata, gene_lists, score_names, use_raw=False, gene_symbols_field="gene_symbols"
+    adata, gene_lists, score_names, use_raw=False, gene_symbols_field="gene_symbols", min_n_genes=5
 ):
     if len(gene_lists) == len(score_names):
         for gene_list, score_names in zip(gene_lists, score_names):
@@ -99,6 +103,7 @@
                 ens_gene_ids,
                 f"AUCell_{score_names}",
                 use_raw,
+                min_n_genes
             )
     else:
         raise ValueError(
@@ -143,6 +148,14 @@
         help="Name of the gene symbols field in the AnnData object",
         required=True,
     )
+    # argument for min_n Minimum of targets per source. If less, sources are removed.
+    parser.add_argument(
+        "--min_n",
+        type=int,
+        required=False,
+        default=5,
+        help="Minimum of targets per source. If less, sources are removed.",
+    )
     parser.add_argument("--use_raw", action="store_true", help="Use raw data")
     parser.add_argument(
         "--write_anndata", action="store_true", help="Write the modified AnnData object"
@@ -158,7 +171,9 @@
         # Load MSigDB file in GMT format
         msigdb = read_gmt(args.gmt_file)
 
-        gene_sets_to_score = args.gene_sets_to_score.split(",") if args.gene_sets_to_score else []
+        gene_sets_to_score = (
+            args.gene_sets_to_score.split(",") if args.gene_sets_to_score else []
+        )
         # Score genes by their ensembl ids using the score_genes_aucell function
         for _, row in msigdb.iterrows():
             gene_set_name = row["gene_set_name"]
@@ -169,13 +184,13 @@
                     adata.var[args.gene_symbols_field].isin(genes)
                 ].index
                 score_genes_aucell(
-                    adata, ens_gene_ids, f"AUCell_{gene_set_name}", args.use_raw
+                    adata, ens_gene_ids, f"AUCell_{gene_set_name}", args.use_raw, args.min_n
                 )
     elif args.gene_lists_to_score is not None and args.score_names is not None:
         gene_lists = args.gene_lists_to_score.split(":")
         score_names = args.score_names.split(",")
         run_for_genelists(
-            adata, gene_lists, score_names, args.use_raw, args.gene_symbols_field
+            adata, gene_lists, score_names, args.use_raw, args.gene_symbols_field, args.min_n
         )
 
     # Save the modified AnnData object or generate a file with cells as rows and the new score_names columns
--- a/decoupler_aucell_score.xml	Fri Mar 15 12:18:05 2024 +0000
+++ b/decoupler_aucell_score.xml	Mon Apr 15 13:20:21 2024 +0000
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<tool id="score_genes_aucell" name="Decoupler AUCell" version="1.4.0+galaxy1" profile="20.05">
+<tool id="score_genes_aucell" name="Decoupler AUCell" version="1.4.0+galaxy2" profile="20.05">
     <description>
         scores cells using the AUCell method for gene sets.
     </description>
@@ -20,6 +20,9 @@
             #end if
             --gene_symbols_field '$gene_symbols_field'
             $use_raw
+            #if $min_n_genes:
+            --min_n '$min_n_genes'
+            #end if
             #if $write_anndata:
             --write_anndata
             --output_file anndata_aucell.h5ad
@@ -43,6 +46,7 @@
                 <param name="score_names" type="text" label="Score names" />
             </when>
         </conditional>
+        <param name="min_n_genes" type="integer" label="Minimum number of genes" help="Minimum number of genes to match in each gene set" optional="true" value="5"/>
         <param name="gene_symbols_field" type="text" label="Gene symbols field" help="The field in the AnnData var table where gene symbols are stored."/>
         <param name="use_raw" type="boolean" value="false" truevalue="--use_raw" falsevalue="" label="Use raw data" help="Use RAW data in the AnnData instead of the X matrix."/>
         <param name="write_anndata" type="boolean" value="false" truevalue="--write_anndata" falsevalue="" label="Write the modified AnnData object" help="Whether to write or not the same AnnData file again with the signatures on it. If unselected, a text files of cells in rows and signatures in columns (as in Obs) is produced."/>