annotate decoupler_pseudobulk.py @ 7:617e50767215 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
author ebi-gxa
date Wed, 02 Oct 2024 08:27:01 +0000
parents a33eb7d3b053
children 3c18dda2ea3f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
1 import argparse
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
2
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
3 import anndata
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
4 import decoupler
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
5 import pandas as pd
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
6
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
7
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
8 def get_pseudobulk(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
9 adata,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
10 sample_col,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
11 groups_col,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
12 layer=None,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
13 mode="sum",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
14 min_cells=10,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
15 min_counts=1000,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
16 use_raw=False,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
17 ):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
18 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
19 >>> import scanpy as sc
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
20 >>> adata = sc.datasets.pbmc68k_reduced()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
21 >>> adata.X = abs(adata.X).astype(int)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
22 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
23 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
24
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
25 return decoupler.get_pseudobulk(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
26 adata,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
27 sample_col=sample_col,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
28 groups_col=groups_col,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
29 layer=layer,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
30 mode=mode,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
31 use_raw=use_raw,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
32 min_cells=min_cells,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
33 min_counts=min_counts,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
34 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
35
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
36
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
37 def prepend_c_to_index(index_value):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
38 if index_value and index_value[0].isdigit():
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
39 return "C" + index_value
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
40 return index_value
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
41
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
42
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
43 def genes_to_ignore_per_contrast_field(
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
44 count_matrix_df,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
45 samples_metadata,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
46 sample_metadata_col_contrasts,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
47 min_counts_per_sample=5,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
48 use_cpms=False,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
49 ):
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
50 """
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
51 # This function calculates the genes to ignore per contrast field
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
52 # (e.g., bulk_labels, louvain).
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
53 # It does this by first getting the count matrix for each group,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
54 # then identifying genes with a count below a specified threshold.
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
55 # The genes to ignore are those that are present in more than a specified
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
56 # number of groups.
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
57
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
58 >>> import pandas as pd
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
59 >>> samples_metadata = pd.DataFrame({'sample':
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
60 ... ['S1', 'S2', 'S3',
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
61 ... 'S4', 'S5', 'S6'],
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
62 ... 'contrast_field':
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
63 ... ['A', 'A', 'A', 'B', 'B', 'B']})
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
64 >>> count_matrix_df = pd.DataFrame(
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
65 ... {'S1':
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
66 ... [30, 1, 40, 50, 30],
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
67 ... 'S2':
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
68 ... [40, 2, 60, 50, 80],
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
69 ... 'S3':
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
70 ... [80, 1, 60, 50, 50],
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
71 ... 'S4': [1, 50, 50, 50, 2],
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
72 ... 'S5': [3, 40, 40, 40, 2],
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
73 ... 'S6': [0, 50, 50, 50, 1]})
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
74 >>> count_matrix_df.index = ['Gene1', 'Gene2', 'Gene3', 'Gene4', 'Gene5']
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
75 >>> df = genes_to_ignore_per_contrast_field(count_matrix_df,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
76 ... samples_metadata, min_counts_per_sample=5,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
77 ... sample_metadata_col_contrasts='contrast_field')
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
78 >>> df[df['contrast_field'] == 'A'].genes_to_ignore.tolist()[0]
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
79 'Gene2'
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
80 >>> df[df['contrast_field'] == 'B'].genes_to_ignore.tolist()[0]
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
81 'Gene1'
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
82 >>> df[df['contrast_field'] == 'B'].genes_to_ignore.tolist()[1]
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
83 'Gene5'
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
84 """
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
85
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
86 # Initialize a dictionary to store the genes to ignore per contrast field
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
87 contrast_fields = []
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
88 genes_to_ignore = []
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
89
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
90 # Iterate over the contrast fields
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
91 for contrast_field in samples_metadata[
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
92 sample_metadata_col_contrasts
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
93 ].unique():
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
94 # Get the count matrix for the current contrast field
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
95 count_matrix_field = count_matrix_df.loc[
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
96 :,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
97 (
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
98 samples_metadata[sample_metadata_col_contrasts]
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
99 == contrast_field
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
100 ).tolist(),
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
101 ]
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
102
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
103 # We derive min_counts from the number of samples with that
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
104 # contrast_field value
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
105 min_counts = count_matrix_field.shape[1] * min_counts_per_sample
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
106
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
107 if use_cpms:
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
108 # Convert counts to counts per million (CPM)
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
109 count_matrix_field = (
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
110 count_matrix_field.div(count_matrix_field.sum(axis=1), axis=0)
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
111 * 1e6
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
112 )
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
113 min_counts = 1 # use 1 CPM
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
114
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
115 # Calculate the total number of cells in the current contrast field
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
116 # (this produces a vector of counts per gene)
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
117 total_counts_per_gene = count_matrix_field.sum(axis=1)
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
118
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
119 # Identify genes with a count below the specified threshold
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
120 genes = total_counts_per_gene[
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
121 total_counts_per_gene < min_counts
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
122 ].index.tolist()
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
123 if len(genes) > 0:
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
124 # genes_to_ignore[contrast_field] = " ".join(genes)
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
125 for gene in genes:
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
126 genes_to_ignore.append(gene)
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
127 contrast_fields.append(contrast_field)
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
128 # transform gene_to_ignore to a DataFrame
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
129 # genes_to_ignore_df = pd.DataFrame(genes_to_ignore.items(),
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
130 # columns=["contrast_field", "genes_to_ignore"])
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
131 genes_to_ignore_df = pd.DataFrame(
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
132 {"contrast_field": contrast_fields, "genes_to_ignore": genes_to_ignore}
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
133 )
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
134 return genes_to_ignore_df
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
135
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
136
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
137 # write results for loading into DESeq2
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
138 def write_DESeq2_inputs(
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
139 pdata,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
140 layer=None,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
141 output_dir="",
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
142 factor_fields=None,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
143 min_counts_per_sample_marking=20,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
144 ):
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
145 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
146 >>> import scanpy as sc
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
147 >>> adata = sc.datasets.pbmc68k_reduced()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
148 >>> adata.X = abs(adata.X).astype(int)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
149 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
150 >>> write_DESeq2_inputs(pseudobulk)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
151 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
152 # add / to output_dir if is not empty or if it doesn't end with /
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
153 if output_dir != "" and not output_dir.endswith("/"):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
154 output_dir = output_dir + "/"
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
155 obs_for_deseq = pdata.obs.copy()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
156 # replace any index starting with digits to start with C instead.
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
157 obs_for_deseq.rename(index=prepend_c_to_index, inplace=True)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
158 # avoid dash that is read as point on R colnames.
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
159 obs_for_deseq.index = obs_for_deseq.index.str.replace("-", "_")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
160 obs_for_deseq.index = obs_for_deseq.index.str.replace(" ", "_")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
161 col_metadata_file = f"{output_dir}col_metadata.tsv"
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
162 # write obs to a col_metadata file
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
163 if factor_fields:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
164 # only output the index plus the columns in factor_fields in that order
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
165 obs_for_deseq[factor_fields].to_csv(
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
166 col_metadata_file, sep="\t", index=True
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
167 )
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
168 else:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
169 obs_for_deseq.to_csv(col_metadata_file, sep="\t", index=True)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
170 # write var to a gene_metadata file
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
171 pdata.var.to_csv(f"{output_dir}gene_metadata.tsv", sep="\t", index=True)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
172 # write the counts matrix of a specified layer to file
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
173 if layer is None:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
174 # write the X numpy matrix transposed to file
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
175 df = pd.DataFrame(
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
176 pdata.X.T, index=pdata.var.index, columns=obs_for_deseq.index
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
177 )
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
178 else:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
179 df = pd.DataFrame(
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
180 pdata.layers[layer].T,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
181 index=pdata.var.index,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
182 columns=obs_for_deseq.index,
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
183 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
184 df.to_csv(f"{output_dir}counts_matrix.tsv", sep="\t", index_label="")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
185
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
186 if factor_fields:
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
187 df_genes_ignore = genes_to_ignore_per_contrast_field(
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
188 count_matrix_df=df,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
189 samples_metadata=obs_for_deseq,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
190 sample_metadata_col_contrasts=factor_fields[0],
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
191 min_counts_per_sample=min_counts_per_sample_marking,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
192 )
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
193 df_genes_ignore.to_csv(
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
194 f"{output_dir}genes_to_ignore_per_contrast_field.tsv", sep="\t"
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
195 )
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
196
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
197
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
198 def plot_pseudobulk_samples(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
199 pseudobulk_data,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
200 groupby,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
201 figsize=(10, 10),
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
202 save_path=None,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
203 ):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
204 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
205 >>> import scanpy as sc
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
206 >>> adata = sc.datasets.pbmc68k_reduced()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
207 >>> adata.X = abs(adata.X).astype(int)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
208 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
209 >>> plot_pseudobulk_samples(pseudobulk,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
210 ... groupby=["bulk_labels", "louvain"],
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
211 ... figsize=(10, 10))
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
212 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
213 fig = decoupler.plot_psbulk_samples(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
214 pseudobulk_data, groupby=groupby, figsize=figsize, return_fig=True
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
215 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
216 if save_path:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
217 fig.savefig(f"{save_path}/pseudobulk_samples.png")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
218 else:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
219 fig.show()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
220
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
221
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
222 def plot_filter_by_expr(
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
223 pseudobulk_data,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
224 group,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
225 min_count=None,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
226 min_total_count=None,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
227 save_path=None,
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
228 ):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
229 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
230 >>> import scanpy as sc
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
231 >>> adata = sc.datasets.pbmc68k_reduced()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
232 >>> adata.X = abs(adata.X).astype(int)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
233 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
234 >>> plot_filter_by_expr(pseudobulk, group="bulk_labels",
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
235 ... min_count=10, min_total_count=200)
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
236 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
237 fig = decoupler.plot_filter_by_expr(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
238 pseudobulk_data,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
239 group=group,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
240 min_count=min_count,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
241 min_total_count=min_total_count,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
242 return_fig=True,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
243 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
244 if save_path:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
245 fig.savefig(f"{save_path}/filter_by_expr.png")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
246 else:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
247 fig.show()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
248
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
249
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
250 def filter_by_expr(pdata, min_count=None, min_total_count=None):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
251 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
252 >>> import scanpy as sc
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
253 >>> adata = sc.datasets.pbmc68k_reduced()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
254 >>> adata.X = abs(adata.X).astype(int)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
255 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
256 >>> pdata_filt = filter_by_expr(pseudobulk,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
257 ... min_count=10, min_total_count=200)
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
258 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
259 genes = decoupler.filter_by_expr(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
260 pdata, min_count=min_count, min_total_count=min_total_count
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
261 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
262 return pdata[:, genes].copy()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
263
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
264
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
265 def check_fields(fields, adata, obs=True, context=None):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
266 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
267 >>> import scanpy as sc
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
268 >>> adata = sc.datasets.pbmc68k_reduced()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
269 >>> check_fields(["bulk_labels", "louvain"], adata, obs=True)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
270 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
271
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
272 legend = ""
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
273 if context:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
274 legend = f", passed in {context},"
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
275 if obs:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
276 if not set(fields).issubset(set(adata.obs.columns)):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
277 raise ValueError(
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
278 f"Some of the following fields {legend} are not present \
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
279 in adata.obs: {fields}. \
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
280 Possible fields are: {list(set(adata.obs.columns))}"
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
281 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
282 else:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
283 if not set(fields).issubset(set(adata.var.columns)):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
284 raise ValueError(
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
285 f"Some of the following fields {legend} are not present \
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
286 in adata.var: {fields}. \
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
287 Possible fields are: {list(set(adata.var.columns))}"
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
288 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
289
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
290
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
291 def main(args):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
292 # Load AnnData object from file
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
293 adata = anndata.read_h5ad(args.adata_file)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
294
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
295 # Merge adata.obs fields specified in args.adata_obs_fields_to_merge
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
296 if args.adata_obs_fields_to_merge:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
297 # first split potential groups by ":" and iterate over them
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
298 for group in args.adata_obs_fields_to_merge.split(":"):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
299 fields = group.split(",")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
300 check_fields(fields, adata)
7
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
301 merge_adata_obs_fields(fields, adata)
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
302
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
303 check_fields([args.groupby, args.sample_key], adata)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
304
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
305 factor_fields = None
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
306 if args.factor_fields:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
307 factor_fields = args.factor_fields.split(",")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
308 check_fields(factor_fields, adata)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
309
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
310 print(f"Using mode: {args.mode}")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
311 # Perform pseudobulk analysis
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
312 pseudobulk_data = get_pseudobulk(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
313 adata,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
314 sample_col=args.sample_key,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
315 groups_col=args.groupby,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
316 layer=args.layer,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
317 mode=args.mode,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
318 use_raw=args.use_raw,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
319 min_cells=args.min_cells,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
320 min_counts=args.min_counts,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
321 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
322
6
a33eb7d3b053 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 5
diff changeset
323 print("Created pseudo-bulk AnnData, checking if fields still make sense.")
a33eb7d3b053 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 5
diff changeset
324 print(
7
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
325 "If this fails this check, it might mean that you asked for factors \
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
326 that are not compatible with you sample identifiers (ie. asked for \
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
327 phase in the factors, but each sample contains more than one phase,\
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
328 try joining fields)."
6
a33eb7d3b053 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 5
diff changeset
329 )
a33eb7d3b053 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 5
diff changeset
330 if factor_fields:
a33eb7d3b053 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 5
diff changeset
331 check_fields(
a33eb7d3b053 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 5
diff changeset
332 factor_fields,
a33eb7d3b053 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 5
diff changeset
333 pseudobulk_data,
a33eb7d3b053 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 5
diff changeset
334 context=" after creation of pseudo-bulk AnnData",
a33eb7d3b053 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 5
diff changeset
335 )
a33eb7d3b053 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 5
diff changeset
336 print("Factors requested are adequate for the pseudo-bulked AnnData!")
a33eb7d3b053 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 5
diff changeset
337
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
338 # Plot pseudobulk samples
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
339 plot_pseudobulk_samples(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
340 pseudobulk_data,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
341 args.groupby,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
342 save_path=args.save_path,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
343 figsize=args.plot_samples_figsize,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
344 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
345
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
346 plot_filter_by_expr(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
347 pseudobulk_data,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
348 group=args.groupby,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
349 min_count=args.min_counts,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
350 min_total_count=args.min_total_counts,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
351 save_path=args.save_path,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
352 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
353
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
354 # Filter by expression if enabled
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
355 if args.filter_expr:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
356 filtered_adata = filter_by_expr(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
357 pseudobulk_data,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
358 min_count=args.min_counts,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
359 min_total_count=args.min_total_counts,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
360 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
361
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
362 pseudobulk_data = filtered_adata
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
363
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
364 # Save the pseudobulk data
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
365 if args.anndata_output_path:
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
366 pseudobulk_data.write_h5ad(
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
367 args.anndata_output_path, compression="gzip"
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
368 )
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
369
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
370 write_DESeq2_inputs(
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
371 pseudobulk_data,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
372 output_dir=args.deseq2_output_path,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
373 factor_fields=factor_fields,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
374 min_counts_per_sample_marking=args.min_counts_per_sample_marking,
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
375 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
376
7
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
377 # if contrasts file is provided, produce a file with genes that should be
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
378 # filtered for each contrasts
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
379 if args.contrasts_file:
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
380 contrast_genes_df = identify_genes_to_filter_per_contrast(
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
381 contrast_file=args.contrasts_file,
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
382 min_perc_cells_expression=args.min_gene_exp_perc_per_cell,
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
383 adata=adata,
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
384 obs_field=args.groupby
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
385 )
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
386 contrast_genes_df.to_csv(
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
387 f"{args.save_path}/genes_to_filter_by_contrast.tsv",
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
388 sep="\t",
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
389 index=False,
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
390 )
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
391
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
392
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
393 def merge_adata_obs_fields(obs_fields_to_merge, adata):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
394 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
395 Merge adata.obs fields specified in args.adata_obs_fields_to_merge
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
396
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
397 Parameters
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
398 ----------
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
399 obs_fields_to_merge : str
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
400 Fields in adata.obs to merge, comma separated
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
401 adata : anndata.AnnData
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
402 The AnnData object
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
403
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
404 Returns
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
405 -------
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
406 anndata.AnnData
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
407 The merged AnnData object
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
408
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
409 docstring tests:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
410 >>> import scanpy as sc
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
411 >>> ad = sc.datasets.pbmc68k_reduced()
7
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
412 >>> merge_adata_obs_fields(["bulk_labels","louvain"], ad)
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
413 >>> ad.obs.columns
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
414 Index(['bulk_labels', 'n_genes', 'percent_mito', 'n_counts', 'S_score',
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
415 'G2M_score', 'phase', 'louvain', 'bulk_labels_louvain'],
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
416 dtype='object')
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
417 """
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
418 field_name = "_".join(obs_fields_to_merge)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
419 for field in obs_fields_to_merge:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
420 if field not in adata.obs.columns:
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
421 raise ValueError(
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
422 f"The '{field}' column is not present in adata.obs."
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
423 )
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
424 if field_name not in adata.obs.columns:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
425 adata.obs[field_name] = adata.obs[field].astype(str)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
426 else:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
427 adata.obs[field_name] = (
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
428 adata.obs[field_name] + "_" + adata.obs[field].astype(str)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
429 )
7
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
430
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
431
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
432 def identify_genes_to_filter_per_contrast(
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
433 contrast_file, min_perc_cells_expression, adata, obs_field
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
434 ):
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
435 """
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
436 Identify genes to filter per contrast based on expression percentage.
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
437 We need those genes to be under the threshold for all conditions
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
438 in a contrast to be identified for further filtering. If
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
439 one condition has the gene expressed above the threshold, the gene
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
440 becomes of interest (it can be highly up or down regulated).
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
441
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
442 Parameters
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
443 ----------
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
444 contrast_file : str
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
445 Path to the contrasts file.
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
446 min_perc_cells_expression : float
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
447 Minimum percentage of cells that should express a gene.
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
448 adata: adata
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
449 Original AnnData file
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
450 obs_field: str
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
451 Field in the AnnData observations where the contrasts are defined.
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
452
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
453 Returns
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
454 -------
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
455 None
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
456
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
457 Examples
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
458 --------
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
459 >>> import anndata
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
460 >>> import pandas as pd
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
461 >>> import numpy as np
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
462 >>> import os
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
463 >>> from io import StringIO
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
464 >>> contrast_file = StringIO(f"contrast{os.linesep}condition1-\
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
465 condition2{os.linesep}")
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
466 >>> min_perc_cells_expression = 30.0
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
467 >>> data = {
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
468 ... 'obs': pd.DataFrame({'condition': ['condition1', 'condition1',
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
469 ... 'condition2', 'condition2']}),
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
470 ... 'X': np.array([[1, 0, 0, 0, 0], [0, 0, 2, 2, 0],
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
471 ... [0, 0, 1, 1, 0], [0, 0, 0, 2, 0]]),
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
472 ... }
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
473 >>> adata = anndata.AnnData(X=data['X'], obs=data['obs'])
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
474 >>> df = identify_genes_to_filter_per_contrast(
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
475 ... contrast_file, min_perc_cells_expression, adata, 'condition'
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
476 ... ) # doctest:+ELLIPSIS
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
477 Identifying genes to filter using ...
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
478 >>> df.head() # doctest:+ELLIPSIS
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
479 contrast gene
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
480 0 condition1-condition2 ...
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
481 1 condition1-condition2 ...
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
482 """
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
483 import re
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
484
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
485 # Implement the logic to identify genes to filter per contrast
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
486 # This is a placeholder implementation
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
487 print(
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
488 f"Identifying genes to filter using {contrast_file} "
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
489 f"with min expression {min_perc_cells_expression}%"
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
490 )
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
491 sides_regex = re.compile(r"[\+\-\*\/\(\)\^]+")
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
492
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
493 contrasts = pd.read_csv(contrast_file, sep="\t")
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
494 # Iterate over each line in the contrast file
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
495 genes_filter_for_contrast = dict()
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
496 for contrast in contrasts.iloc[:, 0]:
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
497 conditions = set(sides_regex.split(contrast))
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
498 # we want to find the genes that are below the threshold
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
499 # of % of cells expressed for ALL the conditions in the
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
500 # contrast. It is enough for one of the conditions
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
501 # of the contrast to have the genes expressed above
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
502 # the threshold of % of cells to be of interest.
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
503 for condition in conditions:
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
504 # remove any starting or trailing whitespaces from condition
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
505 condition = condition.strip()
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
506 # check the percentage of cells that express each gene
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
507 # Filter the AnnData object based on the obs_field value
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
508 adata_filtered = adata[adata.obs[obs_field] == condition]
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
509 # Calculate the percentage of cells expressing each gene
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
510 gene_expression = (adata_filtered.X > 0).mean(axis=0) * 100
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
511 genes_to_filter = set(adata_filtered.var[
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
512 gene_expression.transpose() < min_perc_cells_expression
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
513 ].index.tolist())
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
514 # Update the genes_filter_for_contrast dictionary
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
515 if contrast in genes_filter_for_contrast.keys():
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
516 genes_filter_for_contrast[contrast].intersection_update(
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
517 genes_to_filter
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
518 )
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
519 else:
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
520 genes_filter_for_contrast[contrast] = genes_to_filter
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
521
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
522 # write the genes_filter_for_contrast to pandas dataframe of two columns:
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
523 # contrast and gene
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
524
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
525 # Initialize an empty list to store the expanded pairs
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
526 expanded_pairs = []
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
527
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
528 # Iterate over the dictionary
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
529 for contrast, genes in genes_filter_for_contrast.items():
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
530 for gene in genes:
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
531 expanded_pairs.append((contrast, gene))
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
532
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
533 # Create the DataFrame
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
534 contrast_genes_df = pd.DataFrame(
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
535 expanded_pairs, columns=["contrast", "gene"]
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
536 )
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
537
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
538 return contrast_genes_df
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
539
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
540
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
541 if __name__ == "__main__":
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
542 # Create argument parser
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
543 parser = argparse.ArgumentParser(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
544 description="Perform pseudobulk analysis on an AnnData object"
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
545 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
546
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
547 # Add arguments
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
548 parser.add_argument(
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
549 "adata_file", type=str, help="Path to the AnnData file"
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
550 )
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
551 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
552 "-m",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
553 "--adata_obs_fields_to_merge",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
554 type=str,
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
555 help="Fields in adata.obs to merge, comma separated. \
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
556 You can have more than one set of fields, \
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
557 separated by semi-colon ;",
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
558 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
559 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
560 "--groupby",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
561 type=str,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
562 required=True,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
563 help="The column in adata.obs that defines the groups",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
564 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
565 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
566 "--sample_key",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
567 required=True,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
568 type=str,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
569 help="The column in adata.obs that defines the samples",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
570 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
571 # add argument for layer
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
572 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
573 "--layer",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
574 type=str,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
575 default=None,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
576 help="The name of the layer of the AnnData object to use",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
577 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
578 # add argument for mode
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
579 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
580 "--mode",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
581 type=str,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
582 default="sum",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
583 help="The mode for Decoupler pseudobulk analysis",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
584 choices=["sum", "mean", "median"],
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
585 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
586 # add boolean argument for use_raw
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
587 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
588 "--use_raw",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
589 action="store_true",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
590 default=False,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
591 help="Whether to use the raw part of the AnnData object",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
592 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
593 # add argument for min_cells
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
594 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
595 "--min_cells",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
596 type=int,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
597 default=10,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
598 help="Minimum number of cells for pseudobulk analysis",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
599 )
7
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
600 # add argument for min percentage of cells that should express a gene
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
601 parser.add_argument(
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
602 "--min_gene_exp_perc_per_cell",
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
603 type=float,
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
604 default=50,
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
605 help="If all the conditions of one side of a contrast express a \
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
606 gene in less than this percentage of cells, then the genes \
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
607 will be added to a list of genes to ignore for that contrast.\
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
608 Requires the contrast file to be provided.",
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
609 )
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
610 parser.add_argument(
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
611 "--contrasts_file",
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
612 type=str,
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
613 required=False,
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
614 help="Contrasts file, a one column tsv with a header, each line \
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
615 represents a contrast as a combination of conditions at each \
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
616 side of a substraction.",
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
617 )
617e50767215 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 6
diff changeset
618
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
619 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
620 "--save_path", type=str, help="Path to save the plot (optional)"
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
621 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
622 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
623 "--min_counts",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
624 type=int,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
625 help="Minimum count threshold for filtering by expression",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
626 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
627 parser.add_argument(
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
628 "--min_counts_per_sample_marking",
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
629 type=int,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
630 default=20,
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
631 help="Minimum count threshold per sample for \
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
632 marking genes to be ignored after DE",
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
633 )
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
634 parser.add_argument(
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
635 "--min_total_counts",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
636 type=int,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
637 help="Minimum total count threshold for filtering by expression",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
638 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
639 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
640 "--anndata_output_path",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
641 type=str,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
642 help="Path to save the filtered AnnData object or pseudobulk data",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
643 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
644 parser.add_argument(
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
645 "--filter_expr",
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
646 action="store_true",
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
647 help="Enable filtering by expression",
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
648 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
649 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
650 "--factor_fields",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
651 type=str,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
652 help="Comma separated list of fields for the factors",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
653 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
654 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
655 "--deseq2_output_path",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
656 type=str,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
657 help="Path to save the DESeq2 inputs",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
658 required=True,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
659 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
660 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
661 "--plot_samples_figsize",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
662 type=int,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
663 default=[10, 10],
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
664 nargs=2,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
665 help="Size of the samples plot as a tuple (two arguments)",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
666 )
5
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
667 parser.add_argument(
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
668 "--plot_filtering_figsize", type=int, default=[10, 10], nargs=2
c9aaac87c583 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 0
diff changeset
669 )
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
670
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
671 # Parse the command line arguments
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
672 args = parser.parse_args()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
673
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
674 # Call the main function
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
675 main(args)