comparison decoupler_aucell_score.py @ 1:e024d8280886 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
author ebi-gxa
date Thu, 16 Nov 2023 20:05:21 +0000
parents 1e8697931d73
children e887a7e8c5b4
comparison
equal deleted inserted replaced
0:1e8697931d73 1:e024d8280886
107 107
108 108
109 if __name__ == "__main__": 109 if __name__ == "__main__":
110 # Create command-line arguments parser 110 # Create command-line arguments parser
111 parser = argparse.ArgumentParser(description="Score genes using Aucell") 111 parser = argparse.ArgumentParser(description="Score genes using Aucell")
112 parser.add_argument("--input_file", type=str, help="Path to input AnnData file") 112 parser.add_argument(
113 parser.add_argument("--output_file", type=str, help="Path to output file") 113 "--input_file", type=str, help="Path to input AnnData file", required=True
114 )
115 parser.add_argument(
116 "--output_file", type=str, help="Path to output file", required=True
117 )
114 parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False) 118 parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False)
115 # add argument for gene sets to score 119 # add argument for gene sets to score
116 parser.add_argument( 120 parser.add_argument(
117 "--gene_sets_to_score", 121 "--gene_sets_to_score",
118 type=str, 122 type=str,
119 required=False, 123 required=False,
120 help="Comma separated list of gene sets to score (the need to be in the gmt file)", 124 help="Optional comma separated list of gene sets to score (the need to be in the gmt file)",
121 ) 125 )
122 # add argument for gene list (comma separated) to score 126 # add argument for gene list (comma separated) to score
123 parser.add_argument( 127 parser.add_argument(
124 "--gene_lists_to_score", 128 "--gene_lists_to_score",
125 type=str, 129 type=str,
135 ) 139 )
136 parser.add_argument( 140 parser.add_argument(
137 "--gene_symbols_field", 141 "--gene_symbols_field",
138 type=str, 142 type=str,
139 help="Name of the gene symbols field in the AnnData object", 143 help="Name of the gene symbols field in the AnnData object",
144 required=True,
140 ) 145 )
141 parser.add_argument("--use_raw", action="store_true", help="Use raw data") 146 parser.add_argument("--use_raw", action="store_true", help="Use raw data")
142 parser.add_argument( 147 parser.add_argument(
143 "--write_anndata", action="store_true", help="Write the modified AnnData object" 148 "--write_anndata", action="store_true", help="Write the modified AnnData object"
144 ) 149 )
147 args = parser.parse_args() 152 args = parser.parse_args()
148 153
149 # Load input AnnData object 154 # Load input AnnData object
150 adata = anndata.read_h5ad(args.input_file) 155 adata = anndata.read_h5ad(args.input_file)
151 156
152 if args.gene_sets_to_score is not None and args.gmt_file is not None: 157 if args.gmt_file is not None:
153 # Load MSigDB file in GMT format 158 # Load MSigDB file in GMT format
154 msigdb = read_gmt(args.gmt_file) 159 msigdb = read_gmt(args.gmt_file)
155 160
156 gene_sets_to_score = args.gene_sets_to_score.split(",") 161 gene_sets_to_score = args.gene_sets_to_score.split(",") if args.gene_sets_to_score else []
157 # Score genes by their ensembl ids using the score_genes_aucell function 162 # Score genes by their ensembl ids using the score_genes_aucell function
158 for _, row in msigdb.iterrows(): 163 for _, row in msigdb.iterrows():
159 gene_set_name = row["gene_set_name"] 164 gene_set_name = row["gene_set_name"]
160 if gene_set_name in gene_sets_to_score: 165 if not gene_sets_to_score or gene_set_name in gene_sets_to_score:
161 genes = row["genes"].split(",") 166 genes = row["genes"].split(",")
162 # Convert gene symbols to ensembl ids by using the columns gene_symbols and index in adata.var specific to the gene set 167 # Convert gene symbols to ensembl ids by using the columns gene_symbols and index in adata.var specific to the gene set
163 ens_gene_ids = adata.var[ 168 ens_gene_ids = adata.var[
164 adata.var[args.gene_symbols_field].isin(genes) 169 adata.var[args.gene_symbols_field].isin(genes)
165 ].index 170 ].index