annotate decoupler_pseudobulk.py @ 7:2c5686d627c0 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
author ebi-gxa
date Sun, 27 Oct 2024 20:39:33 +0000
parents 1d140c4a5875
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
1 import argparse
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
2
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
3 import anndata
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
4 import decoupler
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
5 import pandas as pd
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
6
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
7
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
8 def get_pseudobulk(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
9 adata,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
10 sample_col,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
11 groups_col,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
12 layer=None,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
13 mode="sum",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
14 min_cells=10,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
15 min_counts=1000,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
16 use_raw=False,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
17 ):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
18 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
19 >>> import scanpy as sc
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
20 >>> adata = sc.datasets.pbmc68k_reduced()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
21 >>> adata.X = abs(adata.X).astype(int)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
22 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
23 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
24
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
25 return decoupler.get_pseudobulk(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
26 adata,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
27 sample_col=sample_col,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
28 groups_col=groups_col,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
29 layer=layer,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
30 mode=mode,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
31 use_raw=use_raw,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
32 min_cells=min_cells,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
33 min_counts=min_counts,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
34 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
35
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
36
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
37 def prepend_c_to_index(index_value):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
38 if index_value and index_value[0].isdigit():
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
39 return "C" + index_value
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
40 return index_value
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
41
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
42
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
43 def genes_to_ignore_per_contrast_field(
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
44 count_matrix_df,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
45 samples_metadata,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
46 sample_metadata_col_contrasts,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
47 min_counts_per_sample=5,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
48 use_cpms=False,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
49 ):
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
50 """
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
51 # This function calculates the genes to ignore per contrast field
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
52 # (e.g., bulk_labels, louvain).
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
53 # It does this by first getting the count matrix for each group,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
54 # then identifying genes with a count below a specified threshold.
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
55 # The genes to ignore are those that are present in more than a specified
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
56 # number of groups.
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
57
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
58 >>> import pandas as pd
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
59 >>> samples_metadata = pd.DataFrame({'sample':
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
60 ... ['S1', 'S2', 'S3',
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
61 ... 'S4', 'S5', 'S6'],
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
62 ... 'contrast_field':
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
63 ... ['A', 'A', 'A', 'B', 'B', 'B']})
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
64 >>> count_matrix_df = pd.DataFrame(
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
65 ... {'S1':
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
66 ... [30, 1, 40, 50, 30],
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
67 ... 'S2':
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
68 ... [40, 2, 60, 50, 80],
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
69 ... 'S3':
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
70 ... [80, 1, 60, 50, 50],
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
71 ... 'S4': [1, 50, 50, 50, 2],
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
72 ... 'S5': [3, 40, 40, 40, 2],
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
73 ... 'S6': [0, 50, 50, 50, 1]})
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
74 >>> count_matrix_df.index = ['Gene1', 'Gene2', 'Gene3', 'Gene4', 'Gene5']
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
75 >>> df = genes_to_ignore_per_contrast_field(count_matrix_df,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
76 ... samples_metadata, min_counts_per_sample=5,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
77 ... sample_metadata_col_contrasts='contrast_field')
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
78 >>> df[df['contrast_field'] == 'A'].genes_to_ignore.tolist()[0]
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
79 'Gene2'
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
80 >>> df[df['contrast_field'] == 'B'].genes_to_ignore.tolist()[0]
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
81 'Gene1'
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
82 >>> df[df['contrast_field'] == 'B'].genes_to_ignore.tolist()[1]
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
83 'Gene5'
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
84 """
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
85
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
86 # Initialize a dictionary to store the genes to ignore per contrast field
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
87 contrast_fields = []
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
88 genes_to_ignore = []
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
89
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
90 # Iterate over the contrast fields
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
91 for contrast_field in samples_metadata[
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
92 sample_metadata_col_contrasts
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
93 ].unique():
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
94 # Get the count matrix for the current contrast field
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
95 count_matrix_field = count_matrix_df.loc[
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
96 :,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
97 (
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
98 samples_metadata[sample_metadata_col_contrasts]
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
99 == contrast_field
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
100 ).tolist(),
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
101 ]
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
102
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
103 # We derive min_counts from the number of samples with that
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
104 # contrast_field value
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
105 min_counts = count_matrix_field.shape[1] * min_counts_per_sample
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
106
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
107 if use_cpms:
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
108 # Convert counts to counts per million (CPM)
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
109 count_matrix_field = (
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
110 count_matrix_field.div(count_matrix_field.sum(axis=1), axis=0)
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
111 * 1e6
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
112 )
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
113 min_counts = 1 # use 1 CPM
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
114
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
115 # Calculate the total number of cells in the current contrast field
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
116 # (this produces a vector of counts per gene)
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
117 total_counts_per_gene = count_matrix_field.sum(axis=1)
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
118
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
119 # Identify genes with a count below the specified threshold
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
120 genes = total_counts_per_gene[
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
121 total_counts_per_gene < min_counts
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
122 ].index.tolist()
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
123 if len(genes) > 0:
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
124 # genes_to_ignore[contrast_field] = " ".join(genes)
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
125 for gene in genes:
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
126 genes_to_ignore.append(gene)
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
127 contrast_fields.append(contrast_field)
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
128 # transform gene_to_ignore to a DataFrame
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
129 # genes_to_ignore_df = pd.DataFrame(genes_to_ignore.items(),
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
130 # columns=["contrast_field", "genes_to_ignore"])
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
131 genes_to_ignore_df = pd.DataFrame(
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
132 {"contrast_field": contrast_fields, "genes_to_ignore": genes_to_ignore}
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
133 )
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
134 return genes_to_ignore_df
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
135
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
136
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
137 # write results for loading into DESeq2
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
138 def write_DESeq2_inputs(
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
139 pdata,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
140 layer=None,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
141 output_dir="",
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
142 factor_fields=None,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
143 min_counts_per_sample_marking=20,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
144 ):
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
145 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
146 >>> import scanpy as sc
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
147 >>> adata = sc.datasets.pbmc68k_reduced()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
148 >>> adata.X = abs(adata.X).astype(int)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
149 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
150 >>> write_DESeq2_inputs(pseudobulk)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
151 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
152 # add / to output_dir if is not empty or if it doesn't end with /
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
153 if output_dir != "" and not output_dir.endswith("/"):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
154 output_dir = output_dir + "/"
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
155 obs_for_deseq = pdata.obs.copy()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
156 # replace any index starting with digits to start with C instead.
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
157 obs_for_deseq.rename(index=prepend_c_to_index, inplace=True)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
158 # avoid dash that is read as point on R colnames.
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
159 obs_for_deseq.index = obs_for_deseq.index.str.replace("-", "_")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
160 obs_for_deseq.index = obs_for_deseq.index.str.replace(" ", "_")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
161 col_metadata_file = f"{output_dir}col_metadata.tsv"
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
162 # write obs to a col_metadata file
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
163 if factor_fields:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
164 # only output the index plus the columns in factor_fields in that order
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
165 obs_for_deseq[factor_fields].to_csv(
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
166 col_metadata_file, sep="\t", index=True
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
167 )
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
168 else:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
169 obs_for_deseq.to_csv(col_metadata_file, sep="\t", index=True)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
170 # write var to a gene_metadata file
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
171 pdata.var.to_csv(f"{output_dir}gene_metadata.tsv", sep="\t", index=True)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
172 # write the counts matrix of a specified layer to file
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
173 if layer is None:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
174 # write the X numpy matrix transposed to file
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
175 df = pd.DataFrame(
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
176 pdata.X.T, index=pdata.var.index, columns=obs_for_deseq.index
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
177 )
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
178 else:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
179 df = pd.DataFrame(
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
180 pdata.layers[layer].T,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
181 index=pdata.var.index,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
182 columns=obs_for_deseq.index,
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
183 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
184 df.to_csv(f"{output_dir}counts_matrix.tsv", sep="\t", index_label="")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
185
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
186 if factor_fields:
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
187 df_genes_ignore = genes_to_ignore_per_contrast_field(
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
188 count_matrix_df=df,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
189 samples_metadata=obs_for_deseq,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
190 sample_metadata_col_contrasts=factor_fields[0],
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
191 min_counts_per_sample=min_counts_per_sample_marking,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
192 )
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
193 df_genes_ignore.to_csv(
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
194 f"{output_dir}genes_to_ignore_per_contrast_field.tsv", sep="\t"
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
195 )
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
196
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
197
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
198 def plot_pseudobulk_samples(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
199 pseudobulk_data,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
200 groupby,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
201 figsize=(10, 10),
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
202 save_path=None,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
203 ):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
204 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
205 >>> import scanpy as sc
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
206 >>> adata = sc.datasets.pbmc68k_reduced()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
207 >>> adata.X = abs(adata.X).astype(int)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
208 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
209 >>> plot_pseudobulk_samples(pseudobulk,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
210 ... groupby=["bulk_labels", "louvain"],
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
211 ... figsize=(10, 10))
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
212 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
213 fig = decoupler.plot_psbulk_samples(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
214 pseudobulk_data, groupby=groupby, figsize=figsize, return_fig=True
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
215 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
216 if save_path:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
217 fig.savefig(f"{save_path}/pseudobulk_samples.png")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
218 else:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
219 fig.show()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
220
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
221
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
222 def plot_filter_by_expr(
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
223 pseudobulk_data,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
224 group,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
225 min_count=None,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
226 min_total_count=None,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
227 save_path=None,
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
228 ):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
229 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
230 >>> import scanpy as sc
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
231 >>> adata = sc.datasets.pbmc68k_reduced()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
232 >>> adata.X = abs(adata.X).astype(int)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
233 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
234 >>> plot_filter_by_expr(pseudobulk, group="bulk_labels",
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
235 ... min_count=10, min_total_count=200)
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
236 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
237 fig = decoupler.plot_filter_by_expr(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
238 pseudobulk_data,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
239 group=group,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
240 min_count=min_count,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
241 min_total_count=min_total_count,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
242 return_fig=True,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
243 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
244 if save_path:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
245 fig.savefig(f"{save_path}/filter_by_expr.png")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
246 else:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
247 fig.show()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
248
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
249
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
250 def filter_by_expr(pdata, min_count=None, min_total_count=None):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
251 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
252 >>> import scanpy as sc
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
253 >>> adata = sc.datasets.pbmc68k_reduced()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
254 >>> adata.X = abs(adata.X).astype(int)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
255 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
256 >>> pdata_filt = filter_by_expr(pseudobulk,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
257 ... min_count=10, min_total_count=200)
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
258 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
259 genes = decoupler.filter_by_expr(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
260 pdata, min_count=min_count, min_total_count=min_total_count
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
261 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
262 return pdata[:, genes].copy()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
263
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
264
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
265 def check_fields(fields, adata, obs=True, context=None):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
266 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
267 >>> import scanpy as sc
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
268 >>> adata = sc.datasets.pbmc68k_reduced()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
269 >>> check_fields(["bulk_labels", "louvain"], adata, obs=True)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
270 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
271
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
272 legend = ""
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
273 if context:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
274 legend = f", passed in {context},"
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
275 if obs:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
276 if not set(fields).issubset(set(adata.obs.columns)):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
277 raise ValueError(
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
278 f"Some of the following fields {legend} are not present \
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
279 in adata.obs: {fields}. \
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
280 Possible fields are: {list(set(adata.obs.columns))}"
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
281 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
282 else:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
283 if not set(fields).issubset(set(adata.var.columns)):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
284 raise ValueError(
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
285 f"Some of the following fields {legend} are not present \
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
286 in adata.var: {fields}. \
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
287 Possible fields are: {list(set(adata.var.columns))}"
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
288 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
289
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
290
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
291 def main(args):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
292 # Load AnnData object from file
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
293 adata = anndata.read_h5ad(args.adata_file)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
294
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
295 # Merge adata.obs fields specified in args.adata_obs_fields_to_merge
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
296 if args.adata_obs_fields_to_merge:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
297 # first split potential groups by ":" and iterate over them
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
298 for group in args.adata_obs_fields_to_merge.split(":"):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
299 fields = group.split(",")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
300 check_fields(fields, adata)
5
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
301 merge_adata_obs_fields(fields, adata)
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
302
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
303 check_fields([args.groupby, args.sample_key], adata)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
304
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
305 factor_fields = None
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
306 if args.factor_fields:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
307 factor_fields = args.factor_fields.split(",")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
308 check_fields(factor_fields, adata)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
309
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
310 print(f"Using mode: {args.mode}")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
311 # Perform pseudobulk analysis
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
312 pseudobulk_data = get_pseudobulk(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
313 adata,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
314 sample_col=args.sample_key,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
315 groups_col=args.groupby,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
316 layer=args.layer,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
317 mode=args.mode,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
318 use_raw=args.use_raw,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
319 min_cells=args.min_cells,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
320 min_counts=args.min_counts,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
321 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
322
4
6c30272fb587 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 3
diff changeset
323 print("Created pseudo-bulk AnnData, checking if fields still make sense.")
6c30272fb587 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 3
diff changeset
324 print(
5
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
325 "If this fails this check, it might mean that you asked for factors \
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
326 that are not compatible with you sample identifiers (ie. asked for \
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
327 phase in the factors, but each sample contains more than one phase,\
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
328 try joining fields)."
4
6c30272fb587 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 3
diff changeset
329 )
6c30272fb587 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 3
diff changeset
330 if factor_fields:
6c30272fb587 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 3
diff changeset
331 check_fields(
6c30272fb587 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 3
diff changeset
332 factor_fields,
6c30272fb587 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 3
diff changeset
333 pseudobulk_data,
6c30272fb587 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 3
diff changeset
334 context=" after creation of pseudo-bulk AnnData",
6c30272fb587 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 3
diff changeset
335 )
6c30272fb587 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 3
diff changeset
336 print("Factors requested are adequate for the pseudo-bulked AnnData!")
6c30272fb587 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 3
diff changeset
337
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
338 # Plot pseudobulk samples
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
339 plot_pseudobulk_samples(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
340 pseudobulk_data,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
341 args.groupby,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
342 save_path=args.save_path,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
343 figsize=args.plot_samples_figsize,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
344 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
345
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
346 plot_filter_by_expr(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
347 pseudobulk_data,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
348 group=args.groupby,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
349 min_count=args.min_counts,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
350 min_total_count=args.min_total_counts,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
351 save_path=args.save_path,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
352 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
353
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
354 # Filter by expression if enabled
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
355 if args.filter_expr:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
356 filtered_adata = filter_by_expr(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
357 pseudobulk_data,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
358 min_count=args.min_counts,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
359 min_total_count=args.min_total_counts,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
360 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
361
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
362 pseudobulk_data = filtered_adata
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
363
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
364 # Save the pseudobulk data
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
365 if args.anndata_output_path:
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
366 pseudobulk_data.write_h5ad(
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
367 args.anndata_output_path, compression="gzip"
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
368 )
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
369
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
370 write_DESeq2_inputs(
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
371 pseudobulk_data,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
372 output_dir=args.deseq2_output_path,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
373 factor_fields=factor_fields,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
374 min_counts_per_sample_marking=args.min_counts_per_sample_marking,
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
375 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
376
5
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
377 # if contrasts file is provided, produce a file with genes that should be
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
378 # filtered for each contrasts
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
379 if args.contrasts_file:
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
380 contrast_genes_df = identify_genes_to_filter_per_contrast(
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
381 contrast_file=args.contrasts_file,
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
382 min_perc_cells_expression=args.min_gene_exp_perc_per_cell,
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
383 adata=adata,
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
384 obs_field=args.groupby
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
385 )
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
386 contrast_genes_df.to_csv(
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
387 f"{args.save_path}/genes_to_filter_by_contrast.tsv",
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
388 sep="\t",
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
389 index=False,
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
390 )
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
391
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
392
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
393 def merge_adata_obs_fields(obs_fields_to_merge, adata):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
394 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
395 Merge adata.obs fields specified in args.adata_obs_fields_to_merge
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
396
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
397 Parameters
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
398 ----------
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
399 obs_fields_to_merge : str
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
400 Fields in adata.obs to merge, comma separated
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
401 adata : anndata.AnnData
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
402 The AnnData object
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
403
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
404 Returns
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
405 -------
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
406 anndata.AnnData
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
407 The merged AnnData object
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
408
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
409 docstring tests:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
410 >>> import scanpy as sc
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
411 >>> ad = sc.datasets.pbmc68k_reduced()
5
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
412 >>> merge_adata_obs_fields(["bulk_labels","louvain"], ad)
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
413 >>> ad.obs.columns
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
414 Index(['bulk_labels', 'n_genes', 'percent_mito', 'n_counts', 'S_score',
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
415 'G2M_score', 'phase', 'louvain', 'bulk_labels_louvain'],
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
416 dtype='object')
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
417 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
418 field_name = "_".join(obs_fields_to_merge)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
419 for field in obs_fields_to_merge:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
420 if field not in adata.obs.columns:
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
421 raise ValueError(
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
422 f"The '{field}' column is not present in adata.obs."
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
423 )
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
424 if field_name not in adata.obs.columns:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
425 adata.obs[field_name] = adata.obs[field].astype(str)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
426 else:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
427 adata.obs[field_name] = (
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
428 adata.obs[field_name] + "_" + adata.obs[field].astype(str)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
429 )
5
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
430
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
431
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
432 def identify_genes_to_filter_per_contrast(
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
433 contrast_file, min_perc_cells_expression, adata, obs_field
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
434 ):
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
435 """
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
436 Identify genes to filter per contrast based on expression percentage.
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
437 We need those genes to be under the threshold for all conditions
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
438 in a contrast to be identified for further filtering. If
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
439 one condition has the gene expressed above the threshold, the gene
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
440 becomes of interest (it can be highly up or down regulated).
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
441
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
442 Parameters
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
443 ----------
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
444 contrast_file : str
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
445 Path to the contrasts file.
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
446 min_perc_cells_expression : float
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
447 Minimum percentage of cells that should express a gene.
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
448 adata: adata
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
449 Original AnnData file
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
450 obs_field: str
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
451 Field in the AnnData observations where the contrasts are defined.
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
452
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
453 Returns
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
454 -------
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
455 None
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
456
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
457 Examples
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
458 --------
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
459 >>> import anndata
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
460 >>> import pandas as pd
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
461 >>> import numpy as np
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
462 >>> import os
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
463 >>> from io import StringIO
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
464 >>> contrast_file = StringIO(f"contrast{os.linesep}condition1-\
7
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
465 condition2{os.linesep}\
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
466 2*(condition1)-condition2{os.linesep}")
5
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
467 >>> min_perc_cells_expression = 30.0
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
468 >>> data = {
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
469 ... 'obs': pd.DataFrame({'condition': ['condition1', 'condition1',
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
470 ... 'condition2', 'condition2']}),
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
471 ... 'X': np.array([[1, 0, 0, 0, 0], [0, 0, 2, 2, 0],
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
472 ... [0, 0, 1, 1, 0], [0, 0, 0, 2, 0]]),
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
473 ... }
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
474 >>> adata = anndata.AnnData(X=data['X'], obs=data['obs'])
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
475 >>> df = identify_genes_to_filter_per_contrast(
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
476 ... contrast_file, min_perc_cells_expression, adata, 'condition'
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
477 ... ) # doctest:+ELLIPSIS
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
478 Identifying genes to filter using ...
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
479 >>> df.head() # doctest:+ELLIPSIS
7
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
480 contrast gene
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
481 0 condition1-condition2...
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
482 1 condition1-condition2...
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
483 2 2*(condition1)-condition2...
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
484 3 2*(condition1)-condition2...
5
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
485 """
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
486 import re
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
487
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
488 # Implement the logic to identify genes to filter per contrast
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
489 # This is a placeholder implementation
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
490 print(
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
491 f"Identifying genes to filter using {contrast_file} "
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
492 f"with min expression {min_perc_cells_expression}%"
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
493 )
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
494 sides_regex = re.compile(r"[\+\-\*\/\(\)\^]+")
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
495
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
496 contrasts = pd.read_csv(contrast_file, sep="\t")
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
497 # Iterate over each line in the contrast file
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
498 genes_filter_for_contrast = dict()
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
499 for contrast in contrasts.iloc[:, 0]:
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
500 conditions = set(sides_regex.split(contrast))
7
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
501
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
502 selected_conditions = []
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
503 failed_conditions = []
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
504 for condition in conditions:
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
505 # remove any starting or trailing whitespaces from condition
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
506 condition = condition.strip()
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
507 if len(condition) == 0:
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
508 continue
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
509 # check if the condition is simply a number, then skip it
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
510 if condition.isnumeric():
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
511 continue
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
512 if condition not in adata.obs[obs_field].unique():
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
513 # add condition to failed_conditions
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
514 failed_conditions.append(condition)
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
515 continue
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
516 # append to selected_conditions
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
517 selected_conditions.append(condition)
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
518
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
519 if len(failed_conditions) > 0:
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
520 raise ValueError(
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
521 f"Condition(s) '{failed_conditions}' "
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
522 f"from contrast {contrast} "
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
523 f"is/are not present in the "
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
524 f"obs_field '{obs_field}' from the AnnData object."
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
525 f"Possible values are: "
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
526 f"{', '.join(adata.obs[obs_field].unique())}.")
5
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
527 # we want to find the genes that are below the threshold
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
528 # of % of cells expressed for ALL the conditions in the
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
529 # contrast. It is enough for one of the conditions
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
530 # of the contrast to have the genes expressed above
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
531 # the threshold of % of cells to be of interest.
7
2c5686d627c0 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 6
diff changeset
532 for condition in selected_conditions:
5
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
533 # check the percentage of cells that express each gene
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
534 # Filter the AnnData object based on the obs_field value
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
535 adata_filtered = adata[adata.obs[obs_field] == condition]
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
536 # Calculate the percentage of cells expressing each gene
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
537 gene_expression = (adata_filtered.X > 0).mean(axis=0) * 100
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
538 genes_to_filter = set(adata_filtered.var[
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
539 gene_expression.transpose() < min_perc_cells_expression
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
540 ].index.tolist())
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
541 # Update the genes_filter_for_contrast dictionary
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
542 if contrast in genes_filter_for_contrast.keys():
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
543 genes_filter_for_contrast[contrast].intersection_update(
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
544 genes_to_filter
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
545 )
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
546 else:
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
547 genes_filter_for_contrast[contrast] = genes_to_filter
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
548
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
549 # write the genes_filter_for_contrast to pandas dataframe of two columns:
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
550 # contrast and gene
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
551
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
552 # Initialize an empty list to store the expanded pairs
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
553 expanded_pairs = []
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
554
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
555 # Iterate over the dictionary
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
556 for contrast, genes in genes_filter_for_contrast.items():
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
557 for gene in genes:
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
558 expanded_pairs.append((contrast, gene))
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
559
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
560 # Create the DataFrame
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
561 contrast_genes_df = pd.DataFrame(
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
562 expanded_pairs, columns=["contrast", "gene"]
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
563 )
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
564
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
565 return contrast_genes_df
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
566
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
567
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
568 if __name__ == "__main__":
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
569 # Create argument parser
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
570 parser = argparse.ArgumentParser(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
571 description="Perform pseudobulk analysis on an AnnData object"
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
572 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
573
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
574 # Add arguments
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
575 parser.add_argument(
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
576 "adata_file", type=str, help="Path to the AnnData file"
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
577 )
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
578 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
579 "-m",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
580 "--adata_obs_fields_to_merge",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
581 type=str,
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
582 help="Fields in adata.obs to merge, comma separated. \
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
583 You can have more than one set of fields, \
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
584 separated by semi-colon ;",
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
585 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
586 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
587 "--groupby",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
588 type=str,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
589 required=True,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
590 help="The column in adata.obs that defines the groups",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
591 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
592 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
593 "--sample_key",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
594 required=True,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
595 type=str,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
596 help="The column in adata.obs that defines the samples",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
597 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
598 # add argument for layer
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
599 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
600 "--layer",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
601 type=str,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
602 default=None,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
603 help="The name of the layer of the AnnData object to use",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
604 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
605 # add argument for mode
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
606 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
607 "--mode",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
608 type=str,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
609 default="sum",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
610 help="The mode for Decoupler pseudobulk analysis",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
611 choices=["sum", "mean", "median"],
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
612 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
613 # add boolean argument for use_raw
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
614 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
615 "--use_raw",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
616 action="store_true",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
617 default=False,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
618 help="Whether to use the raw part of the AnnData object",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
619 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
620 # add argument for min_cells
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
621 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
622 "--min_cells",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
623 type=int,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
624 default=10,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
625 help="Minimum number of cells for pseudobulk analysis",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
626 )
5
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
627 # add argument for min percentage of cells that should express a gene
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
628 parser.add_argument(
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
629 "--min_gene_exp_perc_per_cell",
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
630 type=float,
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
631 default=50,
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
632 help="If all the conditions of one side of a contrast express a \
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
633 gene in less than this percentage of cells, then the genes \
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
634 will be added to a list of genes to ignore for that contrast.\
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
635 Requires the contrast file to be provided.",
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
636 )
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
637 parser.add_argument(
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
638 "--contrasts_file",
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
639 type=str,
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
640 required=False,
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
641 help="Contrasts file, a one column tsv with a header, each line \
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
642 represents a contrast as a combination of conditions at each \
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
643 side of a substraction.",
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
644 )
87f1eaa410cc planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 4
diff changeset
645
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
646 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
647 "--save_path", type=str, help="Path to save the plot (optional)"
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
648 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
649 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
650 "--min_counts",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
651 type=int,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
652 help="Minimum count threshold for filtering by expression",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
653 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
654 parser.add_argument(
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
655 "--min_counts_per_sample_marking",
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
656 type=int,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
657 default=20,
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
658 help="Minimum count threshold per sample for \
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
659 marking genes to be ignored after DE",
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
660 )
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
661 parser.add_argument(
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
662 "--min_total_counts",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
663 type=int,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
664 help="Minimum total count threshold for filtering by expression",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
665 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
666 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
667 "--anndata_output_path",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
668 type=str,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
669 help="Path to save the filtered AnnData object or pseudobulk data",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
670 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
671 parser.add_argument(
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
672 "--filter_expr",
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
673 action="store_true",
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
674 help="Enable filtering by expression",
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
675 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
676 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
677 "--factor_fields",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
678 type=str,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
679 help="Comma separated list of fields for the factors",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
680 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
681 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
682 "--deseq2_output_path",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
683 type=str,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
684 help="Path to save the DESeq2 inputs",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
685 required=True,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
686 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
687 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
688 "--plot_samples_figsize",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
689 type=int,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
690 default=[10, 10],
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
691 nargs=2,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
692 help="Size of the samples plot as a tuple (two arguments)",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
693 )
3
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
694 parser.add_argument(
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
695 "--plot_filtering_figsize", type=int, default=[10, 10], nargs=2
c6787c2aee46 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
696 )
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
697
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
698 # Parse the command line arguments
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
699 args = parser.parse_args()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
700
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
701 # Call the main function
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
702 main(args)