Mercurial > repos > ebi-gxa > decoupler_pseudobulk
diff decoupler_aucell_score.py @ 4:f321c60167d4 draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
author | ebi-gxa |
---|---|
date | Thu, 16 Nov 2023 20:05:16 +0000 |
parents | 4fa5f370599f |
children | ed2a77422e00 |
line wrap: on
line diff
--- a/decoupler_aucell_score.py Thu Nov 09 11:35:57 2023 +0000 +++ b/decoupler_aucell_score.py Thu Nov 16 20:05:16 2023 +0000 @@ -109,15 +109,19 @@ if __name__ == "__main__": # Create command-line arguments parser parser = argparse.ArgumentParser(description="Score genes using Aucell") - parser.add_argument("--input_file", type=str, help="Path to input AnnData file") - parser.add_argument("--output_file", type=str, help="Path to output file") + parser.add_argument( + "--input_file", type=str, help="Path to input AnnData file", required=True + ) + parser.add_argument( + "--output_file", type=str, help="Path to output file", required=True + ) parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False) # add argument for gene sets to score parser.add_argument( "--gene_sets_to_score", type=str, required=False, - help="Comma separated list of gene sets to score (the need to be in the gmt file)", + help="Optional comma separated list of gene sets to score (the need to be in the gmt file)", ) # add argument for gene list (comma separated) to score parser.add_argument( @@ -137,6 +141,7 @@ "--gene_symbols_field", type=str, help="Name of the gene symbols field in the AnnData object", + required=True, ) parser.add_argument("--use_raw", action="store_true", help="Use raw data") parser.add_argument( @@ -149,15 +154,15 @@ # Load input AnnData object adata = anndata.read_h5ad(args.input_file) - if args.gene_sets_to_score is not None and args.gmt_file is not None: + if args.gmt_file is not None: # Load MSigDB file in GMT format msigdb = read_gmt(args.gmt_file) - gene_sets_to_score = args.gene_sets_to_score.split(",") + gene_sets_to_score = args.gene_sets_to_score.split(",") if args.gene_sets_to_score else [] # Score genes by their ensembl ids using the score_genes_aucell function for _, row in msigdb.iterrows(): gene_set_name = row["gene_set_name"] - if gene_set_name in gene_sets_to_score: + if not gene_sets_to_score or gene_set_name in gene_sets_to_score: genes = row["genes"].split(",") # Convert gene symbols to ensembl ids by using the columns gene_symbols and index in adata.var specific to the gene set ens_gene_ids = adata.var[