Mercurial > repos > ebi-gxa > score_genes_aucell
changeset 3:e887a7e8c5b4 draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
author | ebi-gxa |
---|---|
date | Mon, 15 Apr 2024 13:20:21 +0000 |
parents | c700f0381e84 |
children | 515ac51db6e5 |
files | decoupler_aucell_score.py decoupler_aucell_score.xml |
diffstat | 2 files changed, 30 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/decoupler_aucell_score.py Fri Mar 15 12:18:05 2024 +0000 +++ b/decoupler_aucell_score.py Mon Apr 15 13:20:21 2024 +0000 @@ -53,7 +53,7 @@ def score_genes_aucell( - adata: anndata.AnnData, gene_list: list, score_name: str, use_raw=False + adata: anndata.AnnData, gene_list: list, score_name: str, use_raw=False, min_n_genes=5 ): """Score genes using Aucell. @@ -80,15 +80,19 @@ } ) # run decoupler's run_aucell - dc.run_aucell( - adata, net=geneset_df, source="geneset", target="gene_id", use_raw=use_raw - ) - # copy .obsm['aucell_estimate'] matrix columns to adata.obs using the column names - adata.obs[score_name] = adata.obsm["aucell_estimate"][score_name] + # catch the value error + try: + dc.run_aucell( + adata, net=geneset_df, source="geneset", target="gene_id", use_raw=use_raw + ) + # copy .obsm['aucell_estimate'] matrix columns to adata.obs using the column names + adata.obs[score_name] = adata.obsm["aucell_estimate"][score_name] + except ValueError as ve: + print(f"Gene list {score_name} failed, skipping: {str(ve)}") def run_for_genelists( - adata, gene_lists, score_names, use_raw=False, gene_symbols_field="gene_symbols" + adata, gene_lists, score_names, use_raw=False, gene_symbols_field="gene_symbols", min_n_genes=5 ): if len(gene_lists) == len(score_names): for gene_list, score_names in zip(gene_lists, score_names): @@ -99,6 +103,7 @@ ens_gene_ids, f"AUCell_{score_names}", use_raw, + min_n_genes ) else: raise ValueError( @@ -143,6 +148,14 @@ help="Name of the gene symbols field in the AnnData object", required=True, ) + # argument for min_n Minimum of targets per source. If less, sources are removed. + parser.add_argument( + "--min_n", + type=int, + required=False, + default=5, + help="Minimum of targets per source. If less, sources are removed.", + ) parser.add_argument("--use_raw", action="store_true", help="Use raw data") parser.add_argument( "--write_anndata", action="store_true", help="Write the modified AnnData object" @@ -158,7 +171,9 @@ # Load MSigDB file in GMT format msigdb = read_gmt(args.gmt_file) - gene_sets_to_score = args.gene_sets_to_score.split(",") if args.gene_sets_to_score else [] + gene_sets_to_score = ( + args.gene_sets_to_score.split(",") if args.gene_sets_to_score else [] + ) # Score genes by their ensembl ids using the score_genes_aucell function for _, row in msigdb.iterrows(): gene_set_name = row["gene_set_name"] @@ -169,13 +184,13 @@ adata.var[args.gene_symbols_field].isin(genes) ].index score_genes_aucell( - adata, ens_gene_ids, f"AUCell_{gene_set_name}", args.use_raw + adata, ens_gene_ids, f"AUCell_{gene_set_name}", args.use_raw, args.min_n ) elif args.gene_lists_to_score is not None and args.score_names is not None: gene_lists = args.gene_lists_to_score.split(":") score_names = args.score_names.split(",") run_for_genelists( - adata, gene_lists, score_names, args.use_raw, args.gene_symbols_field + adata, gene_lists, score_names, args.use_raw, args.gene_symbols_field, args.min_n ) # Save the modified AnnData object or generate a file with cells as rows and the new score_names columns
--- a/decoupler_aucell_score.xml Fri Mar 15 12:18:05 2024 +0000 +++ b/decoupler_aucell_score.xml Mon Apr 15 13:20:21 2024 +0000 @@ -1,5 +1,5 @@ <?xml version="1.0"?> -<tool id="score_genes_aucell" name="Decoupler AUCell" version="1.4.0+galaxy1" profile="20.05"> +<tool id="score_genes_aucell" name="Decoupler AUCell" version="1.4.0+galaxy2" profile="20.05"> <description> scores cells using the AUCell method for gene sets. </description> @@ -20,6 +20,9 @@ #end if --gene_symbols_field '$gene_symbols_field' $use_raw + #if $min_n_genes: + --min_n '$min_n_genes' + #end if #if $write_anndata: --write_anndata --output_file anndata_aucell.h5ad @@ -43,6 +46,7 @@ <param name="score_names" type="text" label="Score names" /> </when> </conditional> + <param name="min_n_genes" type="integer" label="Minimum number of genes" help="Minimum number of genes to match in each gene set" optional="true" value="5"/> <param name="gene_symbols_field" type="text" label="Gene symbols field" help="The field in the AnnData var table where gene symbols are stored."/> <param name="use_raw" type="boolean" value="false" truevalue="--use_raw" falsevalue="" label="Use raw data" help="Use RAW data in the AnnData instead of the X matrix."/> <param name="write_anndata" type="boolean" value="false" truevalue="--write_anndata" falsevalue="" label="Write the modified AnnData object" help="Whether to write or not the same AnnData file again with the signatures on it. If unselected, a text files of cells in rows and signatures in columns (as in Obs) is produced."/>