Mercurial > repos > ebi-gxa > decoupler_pseudobulk
annotate decoupler_aucell_score.py @ 6:ed2a77422e00 draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
author | ebi-gxa |
---|---|
date | Mon, 15 Apr 2024 13:20:32 +0000 |
parents | f321c60167d4 |
children | 68a2b5445558 |
rev | line source |
---|---|
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
1 import argparse |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
2 import os |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
3 import tempfile |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
4 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
5 import anndata |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
6 import decoupler as dc |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
7 import pandas as pd |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
8 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
9 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
10 def read_gmt(gmt_file): |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
11 """ |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
12 Reads a GMT file into a Pandas DataFrame. |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
13 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
14 Parameters |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
15 ---------- |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
16 gmt_file : str |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
17 Path to the GMT file. |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
18 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
19 Returns |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
20 ------- |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
21 pd.DataFrame |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
22 A DataFrame with the gene sets. Each row represents a gene set, and the columns are "gene_set_name", "gene_set_url", and "genes". |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
23 >>> line = "HALLMARK_NOTCH_SIGNALING\\thttp://www.gsea-msigdb.org/gsea/msigdb/human/geneset/HALLMARK_NOTCH_SIGNALING\\tJAG1\\tNOTCH3\\tNOTCH2\\tAPH1A\\tHES1\\tCCND1\\tFZD1\\tPSEN2\\tFZD7\\tDTX1\\tDLL1\\tFZD5\\tMAML2\\tNOTCH1\\tPSENEN\\tWNT5A\\tCUL1\\tWNT2\\tDTX4\\tSAP30\\tPPARD\\tKAT2A\\tHEYL\\tSKP1\\tRBX1\\tTCF7L2\\tARRB1\\tLFNG\\tPRKCA\\tDTX2\\tST3GAL6\\tFBXW11\\n" |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
24 >>> line2 = "HALLMARK_APICAL_SURFACE\\thttp://www.gsea-msigdb.org/gsea/msigdb/human/geneset/HALLMARK_APICAL_SURFACE\\tB4GALT1\\tRHCG\\tMAL\\tLYPD3\\tPKHD1\\tATP6V0A4\\tCRYBG1\\tSHROOM2\\tSRPX\\tMDGA1\\tTMEM8B\\tTHY1\\tPCSK9\\tEPHB4\\tDCBLD2\\tGHRL\\tLYN\\tGAS1\\tFLOT2\\tPLAUR\\tAKAP7\\tATP8B1\\tEFNA5\\tSLC34A3\\tAPP\\tGSTM3\\tHSPB1\\tSLC2A4\\tIL2RB\\tRTN4RL1\\tNCOA6\\tSULF2\\tADAM10\\tBRCA1\\tGATA3\\tAFAP1L2\\tIL2RG\\tCD160\\tADIPOR2\\tSLC22A12\\tNTNG1\\tSCUBE1\\tCX3CL1\\tCROCC\\n" |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
25 >>> temp_dir = tempfile.gettempdir() |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
26 >>> temp_gmt = os.path.join(temp_dir, "temp_file.gmt") |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
27 >>> with open(temp_gmt, "w") as f: |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
28 ... f.write(line) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
29 ... f.write(line2) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
30 288 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
31 380 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
32 >>> df = read_gmt(temp_gmt) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
33 >>> df.shape[0] |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
34 2 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
35 >>> df.columns == ["gene_set_name", "genes"] |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
36 array([ True, True]) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
37 >>> df.loc[df["gene_set_name"] == "HALLMARK_APICAL_SURFACE"].genes.tolist()[0].startswith("B4GALT1") |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
38 True |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
39 """ |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
40 # Read the GMT file into a list of lines |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
41 with open(gmt_file, "r") as f: |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
42 lines = f.readlines() |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
43 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
44 # Create a list of dictionaries, where each dictionary represents a gene set |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
45 gene_sets = [] |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
46 for line in lines: |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
47 fields = line.strip().split("\t") |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
48 gene_set = {"gene_set_name": fields[0], "genes": ",".join(fields[2:])} |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
49 gene_sets.append(gene_set) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
50 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
51 # Convert the list of dictionaries to a DataFrame |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
52 return pd.DataFrame(gene_sets) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
53 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
54 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
55 def score_genes_aucell( |
6
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
56 adata: anndata.AnnData, gene_list: list, score_name: str, use_raw=False, min_n_genes=5 |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
57 ): |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
58 """Score genes using Aucell. |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
59 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
60 Parameters |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
61 ---------- |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
62 adata : anndata.AnnData |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
63 gene_list : list |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
64 score_names : str |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
65 use_raw : bool, optional |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
66 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
67 >>> import scanpy as sc |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
68 >>> import decoupler as dc |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
69 >>> adata = sc.datasets.pbmc68k_reduced() |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
70 >>> gene_list = adata.var[adata.var.index.str.startswith("RP")].index.tolist() |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
71 >>> score_genes_aucell(adata, gene_list, "ribosomal_aucell", use_raw=False) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
72 >>> "ribosomal_aucell" in adata.obs.columns |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
73 True |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
74 """ |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
75 # make a data.frame with two columns, geneset and gene_id, geneset filled with score_names and gene_id with gene_list, one row per element |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
76 geneset_df = pd.DataFrame( |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
77 { |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
78 "gene_id": gene_list, |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
79 "geneset": score_name, |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
80 } |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
81 ) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
82 # run decoupler's run_aucell |
6
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
83 # catch the value error |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
84 try: |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
85 dc.run_aucell( |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
86 adata, net=geneset_df, source="geneset", target="gene_id", use_raw=use_raw |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
87 ) |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
88 # copy .obsm['aucell_estimate'] matrix columns to adata.obs using the column names |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
89 adata.obs[score_name] = adata.obsm["aucell_estimate"][score_name] |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
90 except ValueError as ve: |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
91 print(f"Gene list {score_name} failed, skipping: {str(ve)}") |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
92 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
93 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
94 def run_for_genelists( |
6
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
95 adata, gene_lists, score_names, use_raw=False, gene_symbols_field="gene_symbols", min_n_genes=5 |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
96 ): |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
97 if len(gene_lists) == len(score_names): |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
98 for gene_list, score_names in zip(gene_lists, score_names): |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
99 genes = gene_list.split(",") |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
100 ens_gene_ids = adata.var[adata.var[gene_symbols_field].isin(genes)].index |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
101 score_genes_aucell( |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
102 adata, |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
103 ens_gene_ids, |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
104 f"AUCell_{score_names}", |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
105 use_raw, |
6
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
106 min_n_genes |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
107 ) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
108 else: |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
109 raise ValueError( |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
110 "The number of gene lists (separated by :) and score names (separated by :) must be the same" |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
111 ) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
112 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
113 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
114 if __name__ == "__main__": |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
115 # Create command-line arguments parser |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
116 parser = argparse.ArgumentParser(description="Score genes using Aucell") |
4
f321c60167d4
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents:
3
diff
changeset
|
117 parser.add_argument( |
f321c60167d4
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents:
3
diff
changeset
|
118 "--input_file", type=str, help="Path to input AnnData file", required=True |
f321c60167d4
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents:
3
diff
changeset
|
119 ) |
f321c60167d4
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents:
3
diff
changeset
|
120 parser.add_argument( |
f321c60167d4
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents:
3
diff
changeset
|
121 "--output_file", type=str, help="Path to output file", required=True |
f321c60167d4
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents:
3
diff
changeset
|
122 ) |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
123 parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
124 # add argument for gene sets to score |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
125 parser.add_argument( |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
126 "--gene_sets_to_score", |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
127 type=str, |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
128 required=False, |
4
f321c60167d4
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents:
3
diff
changeset
|
129 help="Optional comma separated list of gene sets to score (the need to be in the gmt file)", |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
130 ) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
131 # add argument for gene list (comma separated) to score |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
132 parser.add_argument( |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
133 "--gene_lists_to_score", |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
134 type=str, |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
135 required=False, |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
136 help="Comma separated list of genes to score. You can have more than one set of genes, separated by colon :", |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
137 ) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
138 # argument for the score name when using the gene list |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
139 parser.add_argument( |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
140 "--score_names", |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
141 type=str, |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
142 required=False, |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
143 help="Name of the score column when using the gene list. You can have more than one set of score names, separated by colon :. It should be the same length as the number of gene lists.", |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
144 ) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
145 parser.add_argument( |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
146 "--gene_symbols_field", |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
147 type=str, |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
148 help="Name of the gene symbols field in the AnnData object", |
4
f321c60167d4
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents:
3
diff
changeset
|
149 required=True, |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
150 ) |
6
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
151 # argument for min_n Minimum of targets per source. If less, sources are removed. |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
152 parser.add_argument( |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
153 "--min_n", |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
154 type=int, |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
155 required=False, |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
156 default=5, |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
157 help="Minimum of targets per source. If less, sources are removed.", |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
158 ) |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
159 parser.add_argument("--use_raw", action="store_true", help="Use raw data") |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
160 parser.add_argument( |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
161 "--write_anndata", action="store_true", help="Write the modified AnnData object" |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
162 ) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
163 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
164 # Parse command-line arguments |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
165 args = parser.parse_args() |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
166 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
167 # Load input AnnData object |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
168 adata = anndata.read_h5ad(args.input_file) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
169 |
4
f321c60167d4
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents:
3
diff
changeset
|
170 if args.gmt_file is not None: |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
171 # Load MSigDB file in GMT format |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
172 msigdb = read_gmt(args.gmt_file) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
173 |
6
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
174 gene_sets_to_score = ( |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
175 args.gene_sets_to_score.split(",") if args.gene_sets_to_score else [] |
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
176 ) |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
177 # Score genes by their ensembl ids using the score_genes_aucell function |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
178 for _, row in msigdb.iterrows(): |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
179 gene_set_name = row["gene_set_name"] |
4
f321c60167d4
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents:
3
diff
changeset
|
180 if not gene_sets_to_score or gene_set_name in gene_sets_to_score: |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
181 genes = row["genes"].split(",") |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
182 # Convert gene symbols to ensembl ids by using the columns gene_symbols and index in adata.var specific to the gene set |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
183 ens_gene_ids = adata.var[ |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
184 adata.var[args.gene_symbols_field].isin(genes) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
185 ].index |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
186 score_genes_aucell( |
6
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
187 adata, ens_gene_ids, f"AUCell_{gene_set_name}", args.use_raw, args.min_n |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
188 ) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
189 elif args.gene_lists_to_score is not None and args.score_names is not None: |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
190 gene_lists = args.gene_lists_to_score.split(":") |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
191 score_names = args.score_names.split(",") |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
192 run_for_genelists( |
6
ed2a77422e00
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
4
diff
changeset
|
193 adata, gene_lists, score_names, args.use_raw, args.gene_symbols_field, args.min_n |
3
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
194 ) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
195 |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
196 # Save the modified AnnData object or generate a file with cells as rows and the new score_names columns |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
197 if args.write_anndata: |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
198 adata.write_h5ad(args.output_file) |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
199 else: |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
200 new_columns = [col for col in adata.obs.columns if col.startswith("AUCell_")] |
4fa5f370599f
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff
changeset
|
201 adata.obs[new_columns].to_csv(args.output_file, sep="\t", index=True) |