Mercurial > repos > ebi-gxa > decoupler_pathway_inference
annotate decoupler_pseudobulk.py @ 11:db14ac3f6b43 draft default tip
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
author | ebi-gxa |
---|---|
date | Wed, 19 Feb 2025 16:55:51 +0000 |
parents | 2c5686d627c0 |
children |
rev | line source |
---|---|
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
1 import argparse |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
2 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
3 import anndata |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
4 import decoupler |
11
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
5 import numpy as np |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
6 import pandas as pd |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
7 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
8 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
9 def get_pseudobulk( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
10 adata, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
11 sample_col, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
12 groups_col, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
13 layer=None, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
14 mode="sum", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
15 min_cells=10, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
16 min_counts=1000, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
17 use_raw=False, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
18 ): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
19 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
20 >>> import scanpy as sc |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
21 >>> adata = sc.datasets.pbmc68k_reduced() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
22 >>> adata.X = abs(adata.X).astype(int) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
23 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
24 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
25 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
26 return decoupler.get_pseudobulk( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
27 adata, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
28 sample_col=sample_col, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
29 groups_col=groups_col, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
30 layer=layer, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
31 mode=mode, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
32 use_raw=use_raw, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
33 min_cells=min_cells, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
34 min_counts=min_counts, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
35 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
36 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
37 |
11
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
38 def create_pseudo_replicates(adata, sample_key, num_replicates, seed=None): |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
39 """ |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
40 Create pseudo replicates for each sample in the sample_key groups. |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
41 |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
42 Parameters |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
43 ---------- |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
44 adata : anndata.AnnData |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
45 The AnnData object. |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
46 sample_key : str |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
47 The column in adata.obs that defines the samples. |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
48 num_replicates : int |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
49 Number of pseudo replicates to create per sample. |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
50 |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
51 Returns |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
52 ------- |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
53 anndata.AnnData |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
54 The AnnData object with pseudo replicates. |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
55 |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
56 Examples |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
57 -------- |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
58 >>> import anndata |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
59 >>> import pandas as pd |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
60 >>> import numpy as np |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
61 >>> data = { |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
62 ... 'obs': pd.DataFrame({'sample': ['A', 'A', 'B', 'B']}), |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
63 ... 'X': np.array([[1, 0], [0, 1], [1, 1], [0, 0]]) |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
64 ... } |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
65 >>> adata = anndata.AnnData(X=data['X'], obs=data['obs']) |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
66 >>> adata = create_pseudo_replicates(adata, 'sample', 2) |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
67 >>> adata.obs['sample_pseudo'].tolist() |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
68 ['A_rep1', 'A_rep2', 'B_rep1', 'B_rep2'] |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
69 """ |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
70 if seed is not None: |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
71 np.random.seed(seed) |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
72 |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
73 new_sample_key = f"{sample_key}_pseudo" |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
74 adata.obs[new_sample_key] = adata.obs[sample_key].astype(str) |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
75 |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
76 for sample in adata.obs[sample_key].unique(): |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
77 sample_indices = adata.obs[ |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
78 adata.obs[sample_key] == sample].index.to_numpy() |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
79 np.random.shuffle(sample_indices) # Shuffle the indices to randomize |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
80 replicate_size = int(len(sample_indices) / num_replicates) |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
81 for i in range(num_replicates): |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
82 start_idx = i * replicate_size |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
83 end_idx = start_idx + replicate_size |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
84 replicate_indices = sample_indices[start_idx:end_idx] |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
85 adata.obs.loc[replicate_indices, new_sample_key] = ( |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
86 adata.obs.loc[replicate_indices, new_sample_key] + f"_rep{i+1}" |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
87 ) |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
88 |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
89 return adata |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
90 |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
91 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
92 def prepend_c_to_index(index_value): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
93 if index_value and index_value[0].isdigit(): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
94 return "C" + index_value |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
95 return index_value |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
96 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
97 |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
98 def genes_to_ignore_per_contrast_field( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
99 count_matrix_df, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
100 samples_metadata, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
101 sample_metadata_col_contrasts, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
102 min_counts_per_sample=5, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
103 use_cpms=False, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
104 ): |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
105 """ |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
106 # This function calculates the genes to ignore per contrast field |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
107 # (e.g., bulk_labels, louvain). |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
108 # It does this by first getting the count matrix for each group, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
109 # then identifying genes with a count below a specified threshold. |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
110 # The genes to ignore are those that are present in more than a specified |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
111 # number of groups. |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
112 |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
113 >>> import pandas as pd |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
114 >>> samples_metadata = pd.DataFrame({'sample': |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
115 ... ['S1', 'S2', 'S3', |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
116 ... 'S4', 'S5', 'S6'], |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
117 ... 'contrast_field': |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
118 ... ['A', 'A', 'A', 'B', 'B', 'B']}) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
119 >>> count_matrix_df = pd.DataFrame( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
120 ... {'S1': |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
121 ... [30, 1, 40, 50, 30], |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
122 ... 'S2': |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
123 ... [40, 2, 60, 50, 80], |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
124 ... 'S3': |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
125 ... [80, 1, 60, 50, 50], |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
126 ... 'S4': [1, 50, 50, 50, 2], |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
127 ... 'S5': [3, 40, 40, 40, 2], |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
128 ... 'S6': [0, 50, 50, 50, 1]}) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
129 >>> count_matrix_df.index = ['Gene1', 'Gene2', 'Gene3', 'Gene4', 'Gene5'] |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
130 >>> df = genes_to_ignore_per_contrast_field(count_matrix_df, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
131 ... samples_metadata, min_counts_per_sample=5, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
132 ... sample_metadata_col_contrasts='contrast_field') |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
133 >>> df[df['contrast_field'] == 'A'].genes_to_ignore.tolist()[0] |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
134 'Gene2' |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
135 >>> df[df['contrast_field'] == 'B'].genes_to_ignore.tolist()[0] |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
136 'Gene1' |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
137 >>> df[df['contrast_field'] == 'B'].genes_to_ignore.tolist()[1] |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
138 'Gene5' |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
139 """ |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
140 |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
141 # Initialize a dictionary to store the genes to ignore per contrast field |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
142 contrast_fields = [] |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
143 genes_to_ignore = [] |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
144 |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
145 # Iterate over the contrast fields |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
146 for contrast_field in samples_metadata[ |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
147 sample_metadata_col_contrasts |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
148 ].unique(): |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
149 # Get the count matrix for the current contrast field |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
150 count_matrix_field = count_matrix_df.loc[ |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
151 :, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
152 ( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
153 samples_metadata[sample_metadata_col_contrasts] |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
154 == contrast_field |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
155 ).tolist(), |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
156 ] |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
157 |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
158 # We derive min_counts from the number of samples with that |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
159 # contrast_field value |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
160 min_counts = count_matrix_field.shape[1] * min_counts_per_sample |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
161 |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
162 if use_cpms: |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
163 # Convert counts to counts per million (CPM) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
164 count_matrix_field = ( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
165 count_matrix_field.div(count_matrix_field.sum(axis=1), axis=0) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
166 * 1e6 |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
167 ) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
168 min_counts = 1 # use 1 CPM |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
169 |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
170 # Calculate the total number of cells in the current contrast field |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
171 # (this produces a vector of counts per gene) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
172 total_counts_per_gene = count_matrix_field.sum(axis=1) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
173 |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
174 # Identify genes with a count below the specified threshold |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
175 genes = total_counts_per_gene[ |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
176 total_counts_per_gene < min_counts |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
177 ].index.tolist() |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
178 if len(genes) > 0: |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
179 # genes_to_ignore[contrast_field] = " ".join(genes) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
180 for gene in genes: |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
181 genes_to_ignore.append(gene) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
182 contrast_fields.append(contrast_field) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
183 # transform gene_to_ignore to a DataFrame |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
184 # genes_to_ignore_df = pd.DataFrame(genes_to_ignore.items(), |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
185 # columns=["contrast_field", "genes_to_ignore"]) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
186 genes_to_ignore_df = pd.DataFrame( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
187 {"contrast_field": contrast_fields, "genes_to_ignore": genes_to_ignore} |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
188 ) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
189 return genes_to_ignore_df |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
190 |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
191 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
192 # write results for loading into DESeq2 |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
193 def write_DESeq2_inputs( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
194 pdata, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
195 layer=None, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
196 output_dir="", |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
197 factor_fields=None, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
198 min_counts_per_sample_marking=20, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
199 ): |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
200 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
201 >>> import scanpy as sc |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
202 >>> adata = sc.datasets.pbmc68k_reduced() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
203 >>> adata.X = abs(adata.X).astype(int) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
204 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
205 >>> write_DESeq2_inputs(pseudobulk) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
206 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
207 # add / to output_dir if is not empty or if it doesn't end with / |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
208 if output_dir != "" and not output_dir.endswith("/"): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
209 output_dir = output_dir + "/" |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
210 obs_for_deseq = pdata.obs.copy() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
211 # replace any index starting with digits to start with C instead. |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
212 obs_for_deseq.rename(index=prepend_c_to_index, inplace=True) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
213 # avoid dash that is read as point on R colnames. |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
214 obs_for_deseq.index = obs_for_deseq.index.str.replace("-", "_") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
215 obs_for_deseq.index = obs_for_deseq.index.str.replace(" ", "_") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
216 col_metadata_file = f"{output_dir}col_metadata.tsv" |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
217 # write obs to a col_metadata file |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
218 if factor_fields: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
219 # only output the index plus the columns in factor_fields in that order |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
220 obs_for_deseq[factor_fields].to_csv( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
221 col_metadata_file, sep="\t", index=True |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
222 ) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
223 else: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
224 obs_for_deseq.to_csv(col_metadata_file, sep="\t", index=True) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
225 # write var to a gene_metadata file |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
226 pdata.var.to_csv(f"{output_dir}gene_metadata.tsv", sep="\t", index=True) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
227 # write the counts matrix of a specified layer to file |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
228 if layer is None: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
229 # write the X numpy matrix transposed to file |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
230 df = pd.DataFrame( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
231 pdata.X.T, index=pdata.var.index, columns=obs_for_deseq.index |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
232 ) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
233 else: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
234 df = pd.DataFrame( |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
235 pdata.layers[layer].T, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
236 index=pdata.var.index, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
237 columns=obs_for_deseq.index, |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
238 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
239 df.to_csv(f"{output_dir}counts_matrix.tsv", sep="\t", index_label="") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
240 |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
241 if factor_fields: |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
242 df_genes_ignore = genes_to_ignore_per_contrast_field( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
243 count_matrix_df=df, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
244 samples_metadata=obs_for_deseq, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
245 sample_metadata_col_contrasts=factor_fields[0], |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
246 min_counts_per_sample=min_counts_per_sample_marking, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
247 ) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
248 df_genes_ignore.to_csv( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
249 f"{output_dir}genes_to_ignore_per_contrast_field.tsv", sep="\t" |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
250 ) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
251 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
252 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
253 def plot_pseudobulk_samples( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
254 pseudobulk_data, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
255 groupby, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
256 figsize=(10, 10), |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
257 save_path=None, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
258 ): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
259 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
260 >>> import scanpy as sc |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
261 >>> adata = sc.datasets.pbmc68k_reduced() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
262 >>> adata.X = abs(adata.X).astype(int) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
263 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain") |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
264 >>> plot_pseudobulk_samples(pseudobulk, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
265 ... groupby=["bulk_labels", "louvain"], |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
266 ... figsize=(10, 10)) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
267 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
268 fig = decoupler.plot_psbulk_samples( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
269 pseudobulk_data, groupby=groupby, figsize=figsize, return_fig=True |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
270 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
271 if save_path: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
272 fig.savefig(f"{save_path}/pseudobulk_samples.png") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
273 else: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
274 fig.show() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
275 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
276 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
277 def plot_filter_by_expr( |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
278 pseudobulk_data, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
279 group, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
280 min_count=None, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
281 min_total_count=None, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
282 save_path=None, |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
283 ): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
284 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
285 >>> import scanpy as sc |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
286 >>> adata = sc.datasets.pbmc68k_reduced() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
287 >>> adata.X = abs(adata.X).astype(int) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
288 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain") |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
289 >>> plot_filter_by_expr(pseudobulk, group="bulk_labels", |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
290 ... min_count=10, min_total_count=200) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
291 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
292 fig = decoupler.plot_filter_by_expr( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
293 pseudobulk_data, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
294 group=group, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
295 min_count=min_count, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
296 min_total_count=min_total_count, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
297 return_fig=True, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
298 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
299 if save_path: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
300 fig.savefig(f"{save_path}/filter_by_expr.png") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
301 else: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
302 fig.show() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
303 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
304 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
305 def filter_by_expr(pdata, min_count=None, min_total_count=None): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
306 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
307 >>> import scanpy as sc |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
308 >>> adata = sc.datasets.pbmc68k_reduced() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
309 >>> adata.X = abs(adata.X).astype(int) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
310 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain") |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
311 >>> pdata_filt = filter_by_expr(pseudobulk, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
312 ... min_count=10, min_total_count=200) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
313 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
314 genes = decoupler.filter_by_expr( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
315 pdata, min_count=min_count, min_total_count=min_total_count |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
316 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
317 return pdata[:, genes].copy() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
318 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
319 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
320 def check_fields(fields, adata, obs=True, context=None): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
321 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
322 >>> import scanpy as sc |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
323 >>> adata = sc.datasets.pbmc68k_reduced() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
324 >>> check_fields(["bulk_labels", "louvain"], adata, obs=True) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
325 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
326 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
327 legend = "" |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
328 if context: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
329 legend = f", passed in {context}," |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
330 if obs: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
331 if not set(fields).issubset(set(adata.obs.columns)): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
332 raise ValueError( |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
333 f"Some of the following fields {legend} are not present \ |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
334 in adata.obs: {fields}. \ |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
335 Possible fields are: {list(set(adata.obs.columns))}" |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
336 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
337 else: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
338 if not set(fields).issubset(set(adata.var.columns)): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
339 raise ValueError( |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
340 f"Some of the following fields {legend} are not present \ |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
341 in adata.var: {fields}. \ |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
342 Possible fields are: {list(set(adata.var.columns))}" |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
343 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
344 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
345 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
346 def main(args): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
347 # Load AnnData object from file |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
348 adata = anndata.read_h5ad(args.adata_file) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
349 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
350 # Merge adata.obs fields specified in args.adata_obs_fields_to_merge |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
351 if args.adata_obs_fields_to_merge: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
352 # first split potential groups by ":" and iterate over them |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
353 for group in args.adata_obs_fields_to_merge.split(":"): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
354 fields = group.split(",") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
355 check_fields(fields, adata) |
5
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
356 merge_adata_obs_fields(fields, adata) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
357 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
358 check_fields([args.groupby, args.sample_key], adata) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
359 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
360 factor_fields = None |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
361 if args.factor_fields: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
362 factor_fields = args.factor_fields.split(",") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
363 check_fields(factor_fields, adata) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
364 |
11
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
365 # Create pseudo replicates if specified |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
366 if args.num_pseudo_replicates: |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
367 adata = create_pseudo_replicates( |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
368 adata, args.sample_key, args.num_pseudo_replicates, seed=args.seed |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
369 ) |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
370 args.sample_key = f"{args.sample_key}_pseudo" |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
371 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
372 print(f"Using mode: {args.mode}") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
373 # Perform pseudobulk analysis |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
374 pseudobulk_data = get_pseudobulk( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
375 adata, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
376 sample_col=args.sample_key, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
377 groups_col=args.groupby, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
378 layer=args.layer, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
379 mode=args.mode, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
380 use_raw=args.use_raw, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
381 min_cells=args.min_cells, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
382 min_counts=args.min_counts, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
383 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
384 |
4
6c30272fb587
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents:
3
diff
changeset
|
385 print("Created pseudo-bulk AnnData, checking if fields still make sense.") |
6c30272fb587
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents:
3
diff
changeset
|
386 print( |
5
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
387 "If this fails this check, it might mean that you asked for factors \ |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
388 that are not compatible with you sample identifiers (ie. asked for \ |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
389 phase in the factors, but each sample contains more than one phase,\ |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
390 try joining fields)." |
4
6c30272fb587
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents:
3
diff
changeset
|
391 ) |
6c30272fb587
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents:
3
diff
changeset
|
392 if factor_fields: |
6c30272fb587
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents:
3
diff
changeset
|
393 check_fields( |
6c30272fb587
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents:
3
diff
changeset
|
394 factor_fields, |
6c30272fb587
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents:
3
diff
changeset
|
395 pseudobulk_data, |
6c30272fb587
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents:
3
diff
changeset
|
396 context=" after creation of pseudo-bulk AnnData", |
6c30272fb587
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents:
3
diff
changeset
|
397 ) |
6c30272fb587
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents:
3
diff
changeset
|
398 print("Factors requested are adequate for the pseudo-bulked AnnData!") |
6c30272fb587
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents:
3
diff
changeset
|
399 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
400 # Plot pseudobulk samples |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
401 plot_pseudobulk_samples( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
402 pseudobulk_data, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
403 args.groupby, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
404 save_path=args.save_path, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
405 figsize=args.plot_samples_figsize, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
406 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
407 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
408 plot_filter_by_expr( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
409 pseudobulk_data, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
410 group=args.groupby, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
411 min_count=args.min_counts, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
412 min_total_count=args.min_total_counts, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
413 save_path=args.save_path, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
414 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
415 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
416 # Filter by expression if enabled |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
417 if args.filter_expr: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
418 filtered_adata = filter_by_expr( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
419 pseudobulk_data, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
420 min_count=args.min_counts, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
421 min_total_count=args.min_total_counts, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
422 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
423 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
424 pseudobulk_data = filtered_adata |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
425 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
426 # Save the pseudobulk data |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
427 if args.anndata_output_path: |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
428 pseudobulk_data.write_h5ad( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
429 args.anndata_output_path, compression="gzip" |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
430 ) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
431 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
432 write_DESeq2_inputs( |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
433 pseudobulk_data, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
434 output_dir=args.deseq2_output_path, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
435 factor_fields=factor_fields, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
436 min_counts_per_sample_marking=args.min_counts_per_sample_marking, |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
437 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
438 |
5
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
439 # if contrasts file is provided, produce a file with genes that should be |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
440 # filtered for each contrasts |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
441 if args.contrasts_file: |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
442 contrast_genes_df = identify_genes_to_filter_per_contrast( |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
443 contrast_file=args.contrasts_file, |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
444 min_perc_cells_expression=args.min_gene_exp_perc_per_cell, |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
445 adata=adata, |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
446 obs_field=args.groupby |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
447 ) |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
448 contrast_genes_df.to_csv( |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
449 f"{args.save_path}/genes_to_filter_by_contrast.tsv", |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
450 sep="\t", |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
451 index=False, |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
452 ) |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
453 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
454 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
455 def merge_adata_obs_fields(obs_fields_to_merge, adata): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
456 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
457 Merge adata.obs fields specified in args.adata_obs_fields_to_merge |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
458 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
459 Parameters |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
460 ---------- |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
461 obs_fields_to_merge : str |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
462 Fields in adata.obs to merge, comma separated |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
463 adata : anndata.AnnData |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
464 The AnnData object |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
465 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
466 Returns |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
467 ------- |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
468 anndata.AnnData |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
469 The merged AnnData object |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
470 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
471 docstring tests: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
472 >>> import scanpy as sc |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
473 >>> ad = sc.datasets.pbmc68k_reduced() |
5
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
474 >>> merge_adata_obs_fields(["bulk_labels","louvain"], ad) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
475 >>> ad.obs.columns |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
476 Index(['bulk_labels', 'n_genes', 'percent_mito', 'n_counts', 'S_score', |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
477 'G2M_score', 'phase', 'louvain', 'bulk_labels_louvain'], |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
478 dtype='object') |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
479 """ |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
480 field_name = "_".join(obs_fields_to_merge) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
481 for field in obs_fields_to_merge: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
482 if field not in adata.obs.columns: |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
483 raise ValueError( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
484 f"The '{field}' column is not present in adata.obs." |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
485 ) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
486 if field_name not in adata.obs.columns: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
487 adata.obs[field_name] = adata.obs[field].astype(str) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
488 else: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
489 adata.obs[field_name] = ( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
490 adata.obs[field_name] + "_" + adata.obs[field].astype(str) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
491 ) |
5
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
492 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
493 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
494 def identify_genes_to_filter_per_contrast( |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
495 contrast_file, min_perc_cells_expression, adata, obs_field |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
496 ): |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
497 """ |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
498 Identify genes to filter per contrast based on expression percentage. |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
499 We need those genes to be under the threshold for all conditions |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
500 in a contrast to be identified for further filtering. If |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
501 one condition has the gene expressed above the threshold, the gene |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
502 becomes of interest (it can be highly up or down regulated). |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
503 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
504 Parameters |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
505 ---------- |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
506 contrast_file : str |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
507 Path to the contrasts file. |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
508 min_perc_cells_expression : float |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
509 Minimum percentage of cells that should express a gene. |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
510 adata: adata |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
511 Original AnnData file |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
512 obs_field: str |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
513 Field in the AnnData observations where the contrasts are defined. |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
514 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
515 Returns |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
516 ------- |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
517 None |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
518 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
519 Examples |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
520 -------- |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
521 >>> import anndata |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
522 >>> import pandas as pd |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
523 >>> import numpy as np |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
524 >>> import os |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
525 >>> from io import StringIO |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
526 >>> contrast_file = StringIO(f"contrast{os.linesep}condition1-\ |
7
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
527 condition2{os.linesep}\ |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
528 2*(condition1)-condition2{os.linesep}") |
5
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
529 >>> min_perc_cells_expression = 30.0 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
530 >>> data = { |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
531 ... 'obs': pd.DataFrame({'condition': ['condition1', 'condition1', |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
532 ... 'condition2', 'condition2']}), |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
533 ... 'X': np.array([[1, 0, 0, 0, 0], [0, 0, 2, 2, 0], |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
534 ... [0, 0, 1, 1, 0], [0, 0, 0, 2, 0]]), |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
535 ... } |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
536 >>> adata = anndata.AnnData(X=data['X'], obs=data['obs']) |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
537 >>> df = identify_genes_to_filter_per_contrast( |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
538 ... contrast_file, min_perc_cells_expression, adata, 'condition' |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
539 ... ) # doctest:+ELLIPSIS |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
540 Identifying genes to filter using ... |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
541 >>> df.head() # doctest:+ELLIPSIS |
7
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
542 contrast gene |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
543 0 condition1-condition2... |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
544 1 condition1-condition2... |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
545 2 2*(condition1)-condition2... |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
546 3 2*(condition1)-condition2... |
5
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
547 """ |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
548 import re |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
549 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
550 # Implement the logic to identify genes to filter per contrast |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
551 # This is a placeholder implementation |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
552 print( |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
553 f"Identifying genes to filter using {contrast_file} " |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
554 f"with min expression {min_perc_cells_expression}%" |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
555 ) |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
556 sides_regex = re.compile(r"[\+\-\*\/\(\)\^]+") |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
557 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
558 contrasts = pd.read_csv(contrast_file, sep="\t") |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
559 # Iterate over each line in the contrast file |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
560 genes_filter_for_contrast = dict() |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
561 for contrast in contrasts.iloc[:, 0]: |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
562 conditions = set(sides_regex.split(contrast)) |
7
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
563 |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
564 selected_conditions = [] |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
565 failed_conditions = [] |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
566 for condition in conditions: |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
567 # remove any starting or trailing whitespaces from condition |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
568 condition = condition.strip() |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
569 if len(condition) == 0: |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
570 continue |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
571 # check if the condition is simply a number, then skip it |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
572 if condition.isnumeric(): |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
573 continue |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
574 if condition not in adata.obs[obs_field].unique(): |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
575 # add condition to failed_conditions |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
576 failed_conditions.append(condition) |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
577 continue |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
578 # append to selected_conditions |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
579 selected_conditions.append(condition) |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
580 |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
581 if len(failed_conditions) > 0: |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
582 raise ValueError( |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
583 f"Condition(s) '{failed_conditions}' " |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
584 f"from contrast {contrast} " |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
585 f"is/are not present in the " |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
586 f"obs_field '{obs_field}' from the AnnData object." |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
587 f"Possible values are: " |
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
588 f"{', '.join(adata.obs[obs_field].unique())}.") |
5
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
589 # we want to find the genes that are below the threshold |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
590 # of % of cells expressed for ALL the conditions in the |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
591 # contrast. It is enough for one of the conditions |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
592 # of the contrast to have the genes expressed above |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
593 # the threshold of % of cells to be of interest. |
7
2c5686d627c0
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents:
6
diff
changeset
|
594 for condition in selected_conditions: |
5
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
595 # check the percentage of cells that express each gene |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
596 # Filter the AnnData object based on the obs_field value |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
597 adata_filtered = adata[adata.obs[obs_field] == condition] |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
598 # Calculate the percentage of cells expressing each gene |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
599 gene_expression = (adata_filtered.X > 0).mean(axis=0) * 100 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
600 genes_to_filter = set(adata_filtered.var[ |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
601 gene_expression.transpose() < min_perc_cells_expression |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
602 ].index.tolist()) |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
603 # Update the genes_filter_for_contrast dictionary |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
604 if contrast in genes_filter_for_contrast.keys(): |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
605 genes_filter_for_contrast[contrast].intersection_update( |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
606 genes_to_filter |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
607 ) |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
608 else: |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
609 genes_filter_for_contrast[contrast] = genes_to_filter |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
610 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
611 # write the genes_filter_for_contrast to pandas dataframe of two columns: |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
612 # contrast and gene |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
613 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
614 # Initialize an empty list to store the expanded pairs |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
615 expanded_pairs = [] |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
616 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
617 # Iterate over the dictionary |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
618 for contrast, genes in genes_filter_for_contrast.items(): |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
619 for gene in genes: |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
620 expanded_pairs.append((contrast, gene)) |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
621 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
622 # Create the DataFrame |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
623 contrast_genes_df = pd.DataFrame( |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
624 expanded_pairs, columns=["contrast", "gene"] |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
625 ) |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
626 |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
627 return contrast_genes_df |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
628 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
629 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
630 if __name__ == "__main__": |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
631 # Create argument parser |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
632 parser = argparse.ArgumentParser( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
633 description="Perform pseudobulk analysis on an AnnData object" |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
634 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
635 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
636 # Add arguments |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
637 parser.add_argument( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
638 "adata_file", type=str, help="Path to the AnnData file" |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
639 ) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
640 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
641 "-m", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
642 "--adata_obs_fields_to_merge", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
643 type=str, |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
644 help="Fields in adata.obs to merge, comma separated. \ |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
645 You can have more than one set of fields, \ |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
646 separated by semi-colon ;", |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
647 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
648 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
649 "--groupby", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
650 type=str, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
651 required=True, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
652 help="The column in adata.obs that defines the groups", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
653 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
654 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
655 "--sample_key", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
656 required=True, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
657 type=str, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
658 help="The column in adata.obs that defines the samples", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
659 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
660 # add argument for layer |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
661 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
662 "--layer", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
663 type=str, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
664 default=None, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
665 help="The name of the layer of the AnnData object to use", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
666 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
667 # add argument for mode |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
668 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
669 "--mode", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
670 type=str, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
671 default="sum", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
672 help="The mode for Decoupler pseudobulk analysis", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
673 choices=["sum", "mean", "median"], |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
674 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
675 # add boolean argument for use_raw |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
676 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
677 "--use_raw", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
678 action="store_true", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
679 default=False, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
680 help="Whether to use the raw part of the AnnData object", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
681 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
682 # add argument for min_cells |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
683 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
684 "--min_cells", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
685 type=int, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
686 default=10, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
687 help="Minimum number of cells for pseudobulk analysis", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
688 ) |
5
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
689 # add argument for min percentage of cells that should express a gene |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
690 parser.add_argument( |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
691 "--min_gene_exp_perc_per_cell", |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
692 type=float, |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
693 default=50, |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
694 help="If all the conditions of one side of a contrast express a \ |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
695 gene in less than this percentage of cells, then the genes \ |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
696 will be added to a list of genes to ignore for that contrast.\ |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
697 Requires the contrast file to be provided.", |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
698 ) |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
699 parser.add_argument( |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
700 "--contrasts_file", |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
701 type=str, |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
702 required=False, |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
703 help="Contrasts file, a one column tsv with a header, each line \ |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
704 represents a contrast as a combination of conditions at each \ |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
705 side of a substraction.", |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
706 ) |
87f1eaa410cc
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents:
4
diff
changeset
|
707 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
708 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
709 "--save_path", type=str, help="Path to save the plot (optional)" |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
710 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
711 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
712 "--min_counts", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
713 type=int, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
714 help="Minimum count threshold for filtering by expression", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
715 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
716 parser.add_argument( |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
717 "--min_counts_per_sample_marking", |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
718 type=int, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
719 default=20, |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
720 help="Minimum count threshold per sample for \ |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
721 marking genes to be ignored after DE", |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
722 ) |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
723 parser.add_argument( |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
724 "--min_total_counts", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
725 type=int, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
726 help="Minimum total count threshold for filtering by expression", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
727 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
728 parser.add_argument( |
11
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
729 "--num_pseudo_replicates", |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
730 type=int, |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
731 choices=range(3, 1000), |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
732 help="Number of pseudo replicates to create per sample (at least 3)", |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
733 required=False |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
734 ) |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
735 parser.add_argument( |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
736 "--seed", |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
737 type=int, |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
738 default=None, |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
739 help="Random seed for pseudo replicate sampling", |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
740 ) |
db14ac3f6b43
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents:
7
diff
changeset
|
741 parser.add_argument( |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
742 "--anndata_output_path", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
743 type=str, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
744 help="Path to save the filtered AnnData object or pseudobulk data", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
745 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
746 parser.add_argument( |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
747 "--filter_expr", |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
748 action="store_true", |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
749 help="Enable filtering by expression", |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
750 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
751 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
752 "--factor_fields", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
753 type=str, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
754 help="Comma separated list of fields for the factors", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
755 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
756 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
757 "--deseq2_output_path", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
758 type=str, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
759 help="Path to save the DESeq2 inputs", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
760 required=True, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
761 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
762 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
763 "--plot_samples_figsize", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
764 type=int, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
765 default=[10, 10], |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
766 nargs=2, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
767 help="Size of the samples plot as a tuple (two arguments)", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
768 ) |
3
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
769 parser.add_argument( |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
770 "--plot_filtering_figsize", type=int, default=[10, 10], nargs=2 |
c6787c2aee46
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents:
0
diff
changeset
|
771 ) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
772 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
773 # Parse the command line arguments |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
774 args = parser.parse_args() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
775 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
776 # Call the main function |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
777 main(args) |