annotate decoupler_pseudobulk.py @ 8:93f61ea19336 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
author ebi-gxa
date Mon, 15 Jul 2024 10:56:42 +0000
parents 130e25d3ce92
children bd4b54b75888
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
1 import argparse
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
2
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
3 import anndata
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
4 import decoupler
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
5 import pandas as pd
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
6
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
7
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
8 def get_pseudobulk(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
9 adata,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
10 sample_col,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
11 groups_col,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
12 layer=None,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
13 mode="sum",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
14 min_cells=10,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
15 min_counts=1000,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
16 use_raw=False,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
17 ):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
18 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
19 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
20 >>> adata = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
21 >>> adata.X = abs(adata.X).astype(int)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
22 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
23 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
24
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
25 return decoupler.get_pseudobulk(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
26 adata,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
27 sample_col=sample_col,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
28 groups_col=groups_col,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
29 layer=layer,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
30 mode=mode,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
31 use_raw=use_raw,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
32 min_cells=min_cells,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
33 min_counts=min_counts,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
34 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
35
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
36
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
37 def prepend_c_to_index(index_value):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
38 if index_value and index_value[0].isdigit():
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
39 return "C" + index_value
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
40 return index_value
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
41
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
42
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
43 def genes_to_ignore_per_contrast_field(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
44 count_matrix_df,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
45 samples_metadata,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
46 sample_metadata_col_contrasts,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
47 min_counts_per_sample=5,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
48 use_cpms=False,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
49 ):
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
50 """
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
51 # This function calculates the genes to ignore per contrast field
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
52 # (e.g., bulk_labels, louvain).
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
53 # It does this by first getting the count matrix for each group,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
54 # then identifying genes with a count below a specified threshold.
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
55 # The genes to ignore are those that are present in more than a specified
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
56 # number of groups.
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
57
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
58 >>> import pandas as pd
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
59 >>> samples_metadata = pd.DataFrame({'sample':
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
60 ... ['S1', 'S2', 'S3',
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
61 ... 'S4', 'S5', 'S6'],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
62 ... 'contrast_field':
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
63 ... ['A', 'A', 'A', 'B', 'B', 'B']})
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
64 >>> count_matrix_df = pd.DataFrame(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
65 ... {'S1':
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
66 ... [30, 1, 40, 50, 30],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
67 ... 'S2':
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
68 ... [40, 2, 60, 50, 80],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
69 ... 'S3':
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
70 ... [80, 1, 60, 50, 50],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
71 ... 'S4': [1, 50, 50, 50, 2],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
72 ... 'S5': [3, 40, 40, 40, 2],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
73 ... 'S6': [0, 50, 50, 50, 1]})
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
74 >>> count_matrix_df.index = ['Gene1', 'Gene2', 'Gene3', 'Gene4', 'Gene5']
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
75 >>> df = genes_to_ignore_per_contrast_field(count_matrix_df,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
76 ... samples_metadata, min_counts_per_sample=5,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
77 ... sample_metadata_col_contrasts='contrast_field')
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
78 >>> df[df['contrast_field'] == 'A'].genes_to_ignore.tolist()[0]
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
79 'Gene2'
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
80 >>> df[df['contrast_field'] == 'B'].genes_to_ignore.tolist()[0]
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
81 'Gene1'
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
82 >>> df[df['contrast_field'] == 'B'].genes_to_ignore.tolist()[1]
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
83 'Gene5'
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
84 """
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
85
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
86 # Initialize a dictionary to store the genes to ignore per contrast field
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
87 contrast_fields = []
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
88 genes_to_ignore = []
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
89
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
90 # Iterate over the contrast fields
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
91 for contrast_field in samples_metadata[
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
92 sample_metadata_col_contrasts
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
93 ].unique():
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
94 # Get the count matrix for the current contrast field
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
95 count_matrix_field = count_matrix_df.loc[
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
96 :,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
97 (
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
98 samples_metadata[sample_metadata_col_contrasts]
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
99 == contrast_field
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
100 ).tolist(),
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
101 ]
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
102
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
103 # We derive min_counts from the number of samples with that
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
104 # contrast_field value
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
105 min_counts = count_matrix_field.shape[1] * min_counts_per_sample
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
106
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
107 if use_cpms:
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
108 # Convert counts to counts per million (CPM)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
109 count_matrix_field = (
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
110 count_matrix_field.div(count_matrix_field.sum(axis=1), axis=0)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
111 * 1e6
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
112 )
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
113 min_counts = 1 # use 1 CPM
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
114
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
115 # Calculate the total number of cells in the current contrast field
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
116 # (this produces a vector of counts per gene)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
117 total_counts_per_gene = count_matrix_field.sum(axis=1)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
118
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
119 # Identify genes with a count below the specified threshold
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
120 genes = total_counts_per_gene[
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
121 total_counts_per_gene < min_counts
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
122 ].index.tolist()
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
123 if len(genes) > 0:
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
124 # genes_to_ignore[contrast_field] = " ".join(genes)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
125 for gene in genes:
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
126 genes_to_ignore.append(gene)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
127 contrast_fields.append(contrast_field)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
128 # transform gene_to_ignore to a DataFrame
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
129 # genes_to_ignore_df = pd.DataFrame(genes_to_ignore.items(),
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
130 # columns=["contrast_field", "genes_to_ignore"])
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
131 genes_to_ignore_df = pd.DataFrame(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
132 {"contrast_field": contrast_fields, "genes_to_ignore": genes_to_ignore}
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
133 )
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
134 return genes_to_ignore_df
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
135
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
136
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
137 # write results for loading into DESeq2
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
138 def write_DESeq2_inputs(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
139 pdata,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
140 layer=None,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
141 output_dir="",
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
142 factor_fields=None,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
143 min_counts_per_sample_marking=20,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
144 ):
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
145 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
146 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
147 >>> adata = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
148 >>> adata.X = abs(adata.X).astype(int)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
149 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
150 >>> write_DESeq2_inputs(pseudobulk)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
151 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
152 # add / to output_dir if is not empty or if it doesn't end with /
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
153 if output_dir != "" and not output_dir.endswith("/"):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
154 output_dir = output_dir + "/"
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
155 obs_for_deseq = pdata.obs.copy()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
156 # replace any index starting with digits to start with C instead.
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
157 obs_for_deseq.rename(index=prepend_c_to_index, inplace=True)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
158 # avoid dash that is read as point on R colnames.
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
159 obs_for_deseq.index = obs_for_deseq.index.str.replace("-", "_")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
160 obs_for_deseq.index = obs_for_deseq.index.str.replace(" ", "_")
1
046d8ff974ff planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 45145f380b27c3092e1fa2249adc36d7d6fdf5fe
ebi-gxa
parents: 0
diff changeset
161 col_metadata_file = f"{output_dir}col_metadata.tsv"
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
162 # write obs to a col_metadata file
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
163 if factor_fields:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
164 # only output the index plus the columns in factor_fields in that order
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
165 obs_for_deseq[factor_fields].to_csv(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
166 col_metadata_file, sep="\t", index=True
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
167 )
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
168 else:
1
046d8ff974ff planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 45145f380b27c3092e1fa2249adc36d7d6fdf5fe
ebi-gxa
parents: 0
diff changeset
169 obs_for_deseq.to_csv(col_metadata_file, sep="\t", index=True)
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
170 # write var to a gene_metadata file
1
046d8ff974ff planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 45145f380b27c3092e1fa2249adc36d7d6fdf5fe
ebi-gxa
parents: 0
diff changeset
171 pdata.var.to_csv(f"{output_dir}gene_metadata.tsv", sep="\t", index=True)
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
172 # write the counts matrix of a specified layer to file
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
173 if layer is None:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
174 # write the X numpy matrix transposed to file
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
175 df = pd.DataFrame(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
176 pdata.X.T, index=pdata.var.index, columns=obs_for_deseq.index
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
177 )
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
178 else:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
179 df = pd.DataFrame(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
180 pdata.layers[layer].T,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
181 index=pdata.var.index,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
182 columns=obs_for_deseq.index,
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
183 )
1
046d8ff974ff planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 45145f380b27c3092e1fa2249adc36d7d6fdf5fe
ebi-gxa
parents: 0
diff changeset
184 df.to_csv(f"{output_dir}counts_matrix.tsv", sep="\t", index_label="")
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
185
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
186 if factor_fields:
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
187 df_genes_ignore = genes_to_ignore_per_contrast_field(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
188 count_matrix_df=df,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
189 samples_metadata=obs_for_deseq,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
190 sample_metadata_col_contrasts=factor_fields[0],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
191 min_counts_per_sample=min_counts_per_sample_marking,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
192 )
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
193 df_genes_ignore.to_csv(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
194 f"{output_dir}genes_to_ignore_per_contrast_field.tsv", sep="\t"
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
195 )
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
196
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
197
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
198 def plot_pseudobulk_samples(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
199 pseudobulk_data,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
200 groupby,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
201 figsize=(10, 10),
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
202 save_path=None,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
203 ):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
204 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
205 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
206 >>> adata = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
207 >>> adata.X = abs(adata.X).astype(int)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
208 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
209 >>> plot_pseudobulk_samples(pseudobulk,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
210 ... groupby=["bulk_labels", "louvain"],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
211 ... figsize=(10, 10))
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
212 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
213 fig = decoupler.plot_psbulk_samples(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
214 pseudobulk_data, groupby=groupby, figsize=figsize, return_fig=True
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
215 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
216 if save_path:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
217 fig.savefig(f"{save_path}/pseudobulk_samples.png")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
218 else:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
219 fig.show()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
220
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
221
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
222 def plot_filter_by_expr(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
223 pseudobulk_data,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
224 group,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
225 min_count=None,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
226 min_total_count=None,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
227 save_path=None,
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
228 ):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
229 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
230 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
231 >>> adata = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
232 >>> adata.X = abs(adata.X).astype(int)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
233 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
234 >>> plot_filter_by_expr(pseudobulk, group="bulk_labels",
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
235 ... min_count=10, min_total_count=200)
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
236 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
237 fig = decoupler.plot_filter_by_expr(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
238 pseudobulk_data,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
239 group=group,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
240 min_count=min_count,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
241 min_total_count=min_total_count,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
242 return_fig=True,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
243 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
244 if save_path:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
245 fig.savefig(f"{save_path}/filter_by_expr.png")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
246 else:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
247 fig.show()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
248
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
249
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
250 def filter_by_expr(pdata, min_count=None, min_total_count=None):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
251 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
252 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
253 >>> adata = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
254 >>> adata.X = abs(adata.X).astype(int)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
255 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
256 >>> pdata_filt = filter_by_expr(pseudobulk,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
257 ... min_count=10, min_total_count=200)
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
258 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
259 genes = decoupler.filter_by_expr(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
260 pdata, min_count=min_count, min_total_count=min_total_count
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
261 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
262 return pdata[:, genes].copy()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
263
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
264
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
265 def check_fields(fields, adata, obs=True, context=None):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
266 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
267 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
268 >>> adata = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
269 >>> check_fields(["bulk_labels", "louvain"], adata, obs=True)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
270 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
271
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
272 legend = ""
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
273 if context:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
274 legend = f", passed in {context},"
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
275 if obs:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
276 if not set(fields).issubset(set(adata.obs.columns)):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
277 raise ValueError(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
278 f"Some of the following fields {legend} are not present \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
279 in adata.obs: {fields}. \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
280 Possible fields are: {list(set(adata.obs.columns))}"
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
281 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
282 else:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
283 if not set(fields).issubset(set(adata.var.columns)):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
284 raise ValueError(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
285 f"Some of the following fields {legend} are not present \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
286 in adata.var: {fields}. \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
287 Possible fields are: {list(set(adata.var.columns))}"
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
288 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
289
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
290
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
291 def main(args):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
292 # Load AnnData object from file
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
293 adata = anndata.read_h5ad(args.adata_file)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
294
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
295 # Merge adata.obs fields specified in args.adata_obs_fields_to_merge
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
296 if args.adata_obs_fields_to_merge:
2
130e25d3ce92 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 85a3118a571246d88aaad26733d0c62009cb736b
ebi-gxa
parents: 1
diff changeset
297 # first split potential groups by ":" and iterate over them
130e25d3ce92 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 85a3118a571246d88aaad26733d0c62009cb736b
ebi-gxa
parents: 1
diff changeset
298 for group in args.adata_obs_fields_to_merge.split(":"):
130e25d3ce92 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 85a3118a571246d88aaad26733d0c62009cb736b
ebi-gxa
parents: 1
diff changeset
299 fields = group.split(",")
130e25d3ce92 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 85a3118a571246d88aaad26733d0c62009cb736b
ebi-gxa
parents: 1
diff changeset
300 check_fields(fields, adata)
130e25d3ce92 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 85a3118a571246d88aaad26733d0c62009cb736b
ebi-gxa
parents: 1
diff changeset
301 adata = merge_adata_obs_fields(fields, adata)
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
302
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
303 check_fields([args.groupby, args.sample_key], adata)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
304
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
305 factor_fields = None
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
306 if args.factor_fields:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
307 factor_fields = args.factor_fields.split(",")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
308 check_fields(factor_fields, adata)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
309
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
310 print(f"Using mode: {args.mode}")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
311 # Perform pseudobulk analysis
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
312 pseudobulk_data = get_pseudobulk(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
313 adata,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
314 sample_col=args.sample_key,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
315 groups_col=args.groupby,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
316 layer=args.layer,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
317 mode=args.mode,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
318 use_raw=args.use_raw,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
319 min_cells=args.min_cells,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
320 min_counts=args.min_counts,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
321 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
322
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
323 # Plot pseudobulk samples
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
324 plot_pseudobulk_samples(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
325 pseudobulk_data,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
326 args.groupby,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
327 save_path=args.save_path,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
328 figsize=args.plot_samples_figsize,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
329 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
330
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
331 plot_filter_by_expr(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
332 pseudobulk_data,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
333 group=args.groupby,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
334 min_count=args.min_counts,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
335 min_total_count=args.min_total_counts,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
336 save_path=args.save_path,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
337 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
338
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
339 # Filter by expression if enabled
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
340 if args.filter_expr:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
341 filtered_adata = filter_by_expr(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
342 pseudobulk_data,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
343 min_count=args.min_counts,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
344 min_total_count=args.min_total_counts,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
345 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
346
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
347 pseudobulk_data = filtered_adata
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
348
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
349 # Save the pseudobulk data
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
350 if args.anndata_output_path:
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
351 pseudobulk_data.write_h5ad(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
352 args.anndata_output_path, compression="gzip"
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
353 )
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
354
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
355 write_DESeq2_inputs(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
356 pseudobulk_data,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
357 output_dir=args.deseq2_output_path,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
358 factor_fields=factor_fields,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
359 min_counts_per_sample_marking=args.min_counts_per_sample_marking,
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
360 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
361
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
362
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
363 def merge_adata_obs_fields(obs_fields_to_merge, adata):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
364 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
365 Merge adata.obs fields specified in args.adata_obs_fields_to_merge
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
366
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
367 Parameters
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
368 ----------
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
369 obs_fields_to_merge : str
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
370 Fields in adata.obs to merge, comma separated
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
371 adata : anndata.AnnData
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
372 The AnnData object
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
373
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
374 Returns
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
375 -------
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
376 anndata.AnnData
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
377 The merged AnnData object
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
378
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
379 docstring tests:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
380 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
381 >>> ad = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
382 >>> ad = merge_adata_obs_fields(["bulk_labels","louvain"], ad)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
383 >>> ad.obs.columns
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
384 Index(['bulk_labels', 'n_genes', 'percent_mito', 'n_counts', 'S_score',
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
385 'G2M_score', 'phase', 'louvain', 'bulk_labels_louvain'],
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
386 dtype='object')
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
387 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
388 field_name = "_".join(obs_fields_to_merge)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
389 for field in obs_fields_to_merge:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
390 if field not in adata.obs.columns:
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
391 raise ValueError(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
392 f"The '{field}' column is not present in adata.obs."
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
393 )
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
394 if field_name not in adata.obs.columns:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
395 adata.obs[field_name] = adata.obs[field].astype(str)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
396 else:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
397 adata.obs[field_name] = (
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
398 adata.obs[field_name] + "_" + adata.obs[field].astype(str)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
399 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
400 return adata
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
401
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
402
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
403 if __name__ == "__main__":
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
404 # Create argument parser
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
405 parser = argparse.ArgumentParser(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
406 description="Perform pseudobulk analysis on an AnnData object"
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
407 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
408
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
409 # Add arguments
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
410 parser.add_argument(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
411 "adata_file", type=str, help="Path to the AnnData file"
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
412 )
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
413 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
414 "-m",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
415 "--adata_obs_fields_to_merge",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
416 type=str,
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
417 help="Fields in adata.obs to merge, comma separated. \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
418 You can have more than one set of fields, \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
419 separated by semi-colon ;",
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
420 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
421 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
422 "--groupby",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
423 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
424 required=True,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
425 help="The column in adata.obs that defines the groups",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
426 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
427 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
428 "--sample_key",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
429 required=True,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
430 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
431 help="The column in adata.obs that defines the samples",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
432 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
433 # add argument for layer
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
434 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
435 "--layer",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
436 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
437 default=None,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
438 help="The name of the layer of the AnnData object to use",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
439 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
440 # add argument for mode
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
441 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
442 "--mode",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
443 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
444 default="sum",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
445 help="The mode for Decoupler pseudobulk analysis",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
446 choices=["sum", "mean", "median"],
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
447 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
448 # add boolean argument for use_raw
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
449 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
450 "--use_raw",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
451 action="store_true",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
452 default=False,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
453 help="Whether to use the raw part of the AnnData object",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
454 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
455 # add argument for min_cells
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
456 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
457 "--min_cells",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
458 type=int,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
459 default=10,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
460 help="Minimum number of cells for pseudobulk analysis",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
461 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
462 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
463 "--save_path", type=str, help="Path to save the plot (optional)"
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
464 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
465 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
466 "--min_counts",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
467 type=int,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
468 help="Minimum count threshold for filtering by expression",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
469 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
470 parser.add_argument(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
471 "--min_counts_per_sample_marking",
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
472 type=int,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
473 default=20,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
474 help="Minimum count threshold per sample for \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
475 marking genes to be ignored after DE",
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
476 )
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
477 parser.add_argument(
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
478 "--min_total_counts",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
479 type=int,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
480 help="Minimum total count threshold for filtering by expression",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
481 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
482 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
483 "--anndata_output_path",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
484 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
485 help="Path to save the filtered AnnData object or pseudobulk data",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
486 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
487 parser.add_argument(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
488 "--filter_expr",
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
489 action="store_true",
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
490 help="Enable filtering by expression",
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
491 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
492 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
493 "--factor_fields",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
494 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
495 help="Comma separated list of fields for the factors",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
496 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
497 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
498 "--deseq2_output_path",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
499 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
500 help="Path to save the DESeq2 inputs",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
501 required=True,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
502 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
503 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
504 "--plot_samples_figsize",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
505 type=int,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
506 default=[10, 10],
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
507 nargs=2,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
508 help="Size of the samples plot as a tuple (two arguments)",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
509 )
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
510 parser.add_argument(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
511 "--plot_filtering_figsize", type=int, default=[10, 10], nargs=2
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
512 )
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
513
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
514 # Parse the command line arguments
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
515 args = parser.parse_args()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
516
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
517 # Call the main function
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
518 main(args)