Mercurial > repos > ebi-gxa > score_genes_aucell
comparison decoupler_aucell_score.py @ 1:e024d8280886 draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
author | ebi-gxa |
---|---|
date | Thu, 16 Nov 2023 20:05:21 +0000 |
parents | 1e8697931d73 |
children | e887a7e8c5b4 |
comparison
equal
deleted
inserted
replaced
0:1e8697931d73 | 1:e024d8280886 |
---|---|
107 | 107 |
108 | 108 |
109 if __name__ == "__main__": | 109 if __name__ == "__main__": |
110 # Create command-line arguments parser | 110 # Create command-line arguments parser |
111 parser = argparse.ArgumentParser(description="Score genes using Aucell") | 111 parser = argparse.ArgumentParser(description="Score genes using Aucell") |
112 parser.add_argument("--input_file", type=str, help="Path to input AnnData file") | 112 parser.add_argument( |
113 parser.add_argument("--output_file", type=str, help="Path to output file") | 113 "--input_file", type=str, help="Path to input AnnData file", required=True |
114 ) | |
115 parser.add_argument( | |
116 "--output_file", type=str, help="Path to output file", required=True | |
117 ) | |
114 parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False) | 118 parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False) |
115 # add argument for gene sets to score | 119 # add argument for gene sets to score |
116 parser.add_argument( | 120 parser.add_argument( |
117 "--gene_sets_to_score", | 121 "--gene_sets_to_score", |
118 type=str, | 122 type=str, |
119 required=False, | 123 required=False, |
120 help="Comma separated list of gene sets to score (the need to be in the gmt file)", | 124 help="Optional comma separated list of gene sets to score (the need to be in the gmt file)", |
121 ) | 125 ) |
122 # add argument for gene list (comma separated) to score | 126 # add argument for gene list (comma separated) to score |
123 parser.add_argument( | 127 parser.add_argument( |
124 "--gene_lists_to_score", | 128 "--gene_lists_to_score", |
125 type=str, | 129 type=str, |
135 ) | 139 ) |
136 parser.add_argument( | 140 parser.add_argument( |
137 "--gene_symbols_field", | 141 "--gene_symbols_field", |
138 type=str, | 142 type=str, |
139 help="Name of the gene symbols field in the AnnData object", | 143 help="Name of the gene symbols field in the AnnData object", |
144 required=True, | |
140 ) | 145 ) |
141 parser.add_argument("--use_raw", action="store_true", help="Use raw data") | 146 parser.add_argument("--use_raw", action="store_true", help="Use raw data") |
142 parser.add_argument( | 147 parser.add_argument( |
143 "--write_anndata", action="store_true", help="Write the modified AnnData object" | 148 "--write_anndata", action="store_true", help="Write the modified AnnData object" |
144 ) | 149 ) |
147 args = parser.parse_args() | 152 args = parser.parse_args() |
148 | 153 |
149 # Load input AnnData object | 154 # Load input AnnData object |
150 adata = anndata.read_h5ad(args.input_file) | 155 adata = anndata.read_h5ad(args.input_file) |
151 | 156 |
152 if args.gene_sets_to_score is not None and args.gmt_file is not None: | 157 if args.gmt_file is not None: |
153 # Load MSigDB file in GMT format | 158 # Load MSigDB file in GMT format |
154 msigdb = read_gmt(args.gmt_file) | 159 msigdb = read_gmt(args.gmt_file) |
155 | 160 |
156 gene_sets_to_score = args.gene_sets_to_score.split(",") | 161 gene_sets_to_score = args.gene_sets_to_score.split(",") if args.gene_sets_to_score else [] |
157 # Score genes by their ensembl ids using the score_genes_aucell function | 162 # Score genes by their ensembl ids using the score_genes_aucell function |
158 for _, row in msigdb.iterrows(): | 163 for _, row in msigdb.iterrows(): |
159 gene_set_name = row["gene_set_name"] | 164 gene_set_name = row["gene_set_name"] |
160 if gene_set_name in gene_sets_to_score: | 165 if not gene_sets_to_score or gene_set_name in gene_sets_to_score: |
161 genes = row["genes"].split(",") | 166 genes = row["genes"].split(",") |
162 # Convert gene symbols to ensembl ids by using the columns gene_symbols and index in adata.var specific to the gene set | 167 # Convert gene symbols to ensembl ids by using the columns gene_symbols and index in adata.var specific to the gene set |
163 ens_gene_ids = adata.var[ | 168 ens_gene_ids = adata.var[ |
164 adata.var[args.gene_symbols_field].isin(genes) | 169 adata.var[args.gene_symbols_field].isin(genes) |
165 ].index | 170 ].index |