Mercurial > repos > ebi-gxa > decoupler_pathway_inference
comparison decoupler_pathway_inference.xml @ 10:97c2c52a7ab4 draft default tip
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b581a5b4ba88c5bf06f6223ba9aec51a8564796c
author | ebi-gxa |
---|---|
date | Fri, 29 Nov 2024 11:34:09 +0000 |
parents | 81ccee273bc6 |
children |
comparison
equal
deleted
inserted
replaced
9:81ccee273bc6 | 10:97c2c52a7ab4 |
---|---|
1 <tool id="decoupler_pathway_inference" name="Decoupler Pathway Inference" version="1.4.0+galaxy2" profile="20.05" license="MIT"> | 1 <tool id="decoupler_pathway_inference" name="Decoupler Pathway Inference" version="1.4.0+galaxy3" profile="20.05" license="MIT"> |
2 <description> | 2 <description> |
3 of functional genesets/pathways for scRNA-seq data. | 3 of functional genesets/pathways for scRNA-seq data. |
4 </description> | 4 </description> |
5 <requirements> | 5 <requirements> |
6 <requirement type="package" version="1.4.0">decoupler</requirement> | 6 <requirement type="package" version="1.4.0">decoupler</requirement> |
7 </requirements> | 7 </requirements> |
8 <command> | 8 <command> |
9 #if $inp.format == 'h5ad': | |
10 #set $input_fname = "input.h5ad" | |
11 #else: | |
12 #set $input_fname = "input.tsv" | |
13 #end if | |
14 ln -s '$input' '$input_fname'; | |
15 | |
9 python '$__tool_directory__/decoupler_pathway_inference.py' | 16 python '$__tool_directory__/decoupler_pathway_inference.py' |
10 -i '$input_anndata' | 17 -i '$input_fname' |
11 -n '$input_network_file' | 18 -n '$input_network_file' |
12 --min_n "$min_n" | 19 --min_n "$min_n" |
13 --method '$method' | 20 --method '$method' |
14 $use_raw | 21 |
15 --source '$source' | 22 --source '$source' |
16 --target '$target' | 23 --target '$target' |
17 --weight '$weight' | 24 --weight '$weight' |
18 #if $gene_symbols_field: | 25 #if str($inp.format) == "tabular": |
19 --var_gene_symbols_field '$gene_symbols_field' | 26 #if $inp.stat_field: |
27 --stat "${inp.stat_field}" | |
28 #end if | |
29 #if $inp.p_value_column: | |
30 --p_value_column "${inp.p_value_column}" | |
31 --p_value_threshold "${inp.p_value_threshold}" | |
32 #end if | |
33 #else: | |
34 #if $inp.gene_symbols_field: | |
35 --var_gene_symbols_field "${inp.gene_symbols_field}" | |
36 #end if | |
37 #if $inp.use_raw: | |
38 ${inp.use_raw} | |
39 #end if | |
40 #if $inp.write_activities_path: | |
41 ${inp.write_activities_path} | |
42 #end if | |
20 #end if | 43 #end if |
21 --output "inference" | 44 --output "inference" |
22 $write_activities_path | 45 |
23 </command> | 46 </command> |
24 <inputs> | 47 <inputs> |
25 <param name="input_anndata" type="data" format="h5ad" label="Input AnnData file" /> | 48 <param name="input" type="data" format="h5ad,tabular" label="Input AnnData/Expression file"/> |
26 <param name="input_network_file" type="data" format="tabular" label="Input Network file" help="Tabular file with columns Source, Target and Weight. A source gene/pathway regulates/contains a target gene, weights can be either positive or negative. The source element needs to be part of the network, the target is a gene in the network and in the dataset" /> | 49 <param name="input_network_file" type="data" format="tabular" label="Input Network file" help="Tabular file with columns Source, Target and Weight. A source gene/pathway regulates/contains a target gene, weights can be either positive or negative. The source element needs to be part of the network, the target is a gene in the network and in the dataset"/> |
27 <param name="min_n" type="integer" min="0" value="5" label="Minimum targets per source." help="If targets are less than minimum, sources are removed" /> | 50 <param name="min_n" type="integer" min="0" value="5" label="Minimum targets per source." help="If targets are less than minimum, sources are removed"/> |
51 <conditional name="inp"> | |
52 <param name="format" type="select" label="Input Format" help="Whether the provided file is AnnData or a Table of differential expression results (usually from bulk)."> | |
53 <option value="h5ad">AnnData</option> | |
54 <option value="tabular">Differential Expression Table</option> | |
55 </param> | |
56 <when value="h5ad"> | |
57 <param name="use_raw" type="boolean" truevalue="--use_raw" falsevalue="" checked="false" label="Use the raw part of the AnnData object"/> | |
58 <param name="write_activities_path" type="boolean" truevalue="--activities_path anndata_activities_path.h5ad" falsevalue="" checked="true" label="Write the activities AnnData object." help="Contains the MLM/ULM/Consensus activity results for each pathway and each cell in the main matrix, it is not a replacement of the original AnnData provided as input."/> | |
59 <param name="gene_symbols_field" type="text" optional="true" label="Gene symbols field" help="The field in the AnnData var table where gene symbols are stored."/> | |
60 </when> | |
61 <when value="tabular"> | |
62 <param name="stat_field" type="text" label="Statistic column name" optional="false" help="Defines which column will be passed to the decoupler method, usually you want something like the log2FC or the t-stat (this must be a column in your table)"/> | |
63 <param argument="--p_value_column" type="text" label="P-value/FDR column name" help="Defines which column will be passed to the decoupler method as p-value, usually you want something like the log2FC or the t-stat (this must be a column in your table)"/> | |
64 <param argument="--p_value_threshold" value="0.05" type="float" label="P-value/FDR thresholds" help="Will filter out any rows in the file that are above the value (in the set P-value/FDR column)"/> | |
65 </when> | |
66 </conditional> | |
28 <param name="method" type="select" label="Activity inference method"> | 67 <param name="method" type="select" label="Activity inference method"> |
29 <option value="mlm" selected="true">Multivariate linear model (MLM)</option> | 68 <option value="mlm" selected="true">Multivariate linear model (MLM)</option> |
30 <option value="ulm">Univariate linear model (ULM)</option> | 69 <option value="ulm">Univariate linear model (ULM)</option> |
70 <option value="consensus">Consensus (use for TFs with CollecTri)</option> | |
31 </param> | 71 </param> |
32 <param name="use_raw" type="boolean" truevalue="--use_raw" falsevalue="" checked="false" label="Use the raw part of the AnnData object" /> | 72 <param name="source" type="text" value="source" label="Column name in network with source nodes." help="Usually the regulators. If empty then default is 'source' is used."/> |
33 <param name="write_activities_path" type="boolean" truevalue="--activities_path anndata_activities_path.h5ad" falsevalue="" checked="true" label="Write the activities AnnData object." help="Contains the MLM/ULM activity results for each pathway and each cell in the main matrix, it is not a replacement of the original AnnData provided as input."/> | 73 <param name="target" type="text" value="target" label="Column name in network with target nodes." help="Usually the regulated genes. If empty then default is 'target' is used."/> |
34 <param name="source" type="text" value='source' label="Column name in network with source nodes." help="Usually the regulators. If empty then default is 'source' is used." /> | 74 <param name="weight" type="text" value="weight" label="Column name in network with weight." help="If empty then default is 'weight' is used."/> |
35 <param name="target" type="text" value='target' label="Column name in network with target nodes." help="Usually the regulated genes. If empty then default is 'target' is used." /> | |
36 <param name="weight" type="text" value='weight' label="Column name in network with weight." help="If empty then default is 'weight' is used." /> | |
37 <param name="gene_symbols_field" type="text" optional="true" label="Gene symbols field" help="The field in the AnnData var table where gene symbols are stored."/> | |
38 </inputs> | 75 </inputs> |
39 <outputs> | 76 <outputs> |
40 <data name="output_ad" format="h5ad" from_work_dir="anndata_activities_path.h5ad" label="${tool.name} on ${on_string}: Regulators/Pathways activity AnnData file"> | 77 <data name="output_ad" format="h5ad" from_work_dir="anndata_activities_path.h5ad" label="${tool.name} on ${on_string}: Regulators/Pathways activity AnnData file"> |
41 <filter>write_activities_path</filter> | 78 <filter>inp['format'] == "h5ad" and inp['write_activities_path'] is True</filter> |
42 </data> | 79 </data> |
43 <data name="output_table" format="tabular" from_work_dir="inference.tsv" label="${tool.name} on ${on_string}: Output estimate table" /> | 80 <data name="output_table" format="tabular" from_work_dir="inference.tsv" label="${tool.name} on ${on_string}: Output estimate table"/> |
44 </outputs> | 81 </outputs> |
45 <tests> | 82 <tests> |
46 <!-- Hint: You can use [ctrl+alt+t] after defining the inputs/outputs to auto-scaffold some basic test cases. --> | 83 <!-- Hint: You can use [ctrl+alt+t] after defining the inputs/outputs to auto-scaffold some basic test cases. --> |
47 | 84 <test expect_num_outputs="2"> |
48 <test expect_num_outputs="2"> | 85 <param name="input" value="pbmc3k_processed.h5ad"/> |
49 <param name="input_anndata" value="pbmc3k_processed.h5ad"/> | 86 <param name="inp|format" value="h5ad"/> |
50 <param name="input_network_file" value="progeny_test.tsv"/> | 87 <param name="input_network_file" value="progeny_test.tsv"/> |
51 <param name="min_n" value="0"/> | 88 <param name="min_n" value="0"/> |
52 <param name="method" value="mlm"/> | 89 <param name="method" value="mlm"/> |
53 <param name="use_raw" value="false"/> | 90 <param name="inp|use_raw" value="false"/> |
54 <param name="write_activities_path" value="true"/> | 91 <param name="inp|write_activities_path" value="true"/> |
55 <param name="source" value="source"/> | 92 <param name="source" value="source"/> |
56 <param name="target" value="target"/> | 93 <param name="target" value="target"/> |
57 <param name="weight" value="weight"/> | 94 <param name="weight" value="weight"/> |
58 <output name="output_ad"> | 95 <output name="output_ad"> |
59 <assert_contents> | 96 <assert_contents> |
60 <has_h5_keys keys="obsm/mlm_estimate"/> | 97 <has_h5_keys keys="obsm/mlm_estimate"/> |
61 </assert_contents> | 98 </assert_contents> |
62 </output> | 99 </output> |
63 <output name="output_table"> | 100 <output name="output_table"> |
64 <assert_contents> | 101 <assert_contents> |
65 <has_n_columns n="5"/> | 102 <has_n_columns n="5"/> |
66 </assert_contents> | 103 </assert_contents> |
67 </output> | 104 </output> |
68 </test> | 105 </test> |
69 <test> | 106 <test expect_num_outputs="2"> |
70 <param name="input_anndata" value="pbmc3k_processed.h5ad"/> | 107 <param name="input" value="pbmc3k_processed.h5ad"/> |
71 <param name="input_network_file" value="progeny_test_2.tsv"/> | 108 <param name="inp|format" value="h5ad"/> |
72 <param name="min_n" value="0"/> | 109 <param name="input_network_file" value="progeny_test_2.tsv"/> |
73 <param name="method" value="ulm"/> | 110 <param name="min_n" value="0"/> |
74 <param name="use_raw" value="false"/> | 111 <param name="method" value="ulm"/> |
75 <param name="write_activities_path" value="true"/> | 112 <param name="inp|use_raw" value="false"/> |
76 <param name="source" value="source"/> | 113 <param name="inp|write_activities_path" value="true"/> |
77 <param name="target" value="target"/> | 114 <param name="source" value="source"/> |
78 <param name="weight" value="weight"/> | 115 <param name="target" value="target"/> |
79 <output name="output_ad"> | 116 <param name="weight" value="weight"/> |
80 <assert_contents> | 117 <output name="output_ad"> |
81 <has_h5_keys keys="obsm/ulm_estimate"/> | 118 <assert_contents> |
82 </assert_contents> | 119 <has_h5_keys keys="obsm/ulm_estimate"/> |
83 </output> | 120 </assert_contents> |
84 <output name="output_table"> | 121 </output> |
85 <assert_contents> | 122 <output name="output_table"> |
86 <has_n_columns n="5"/> | 123 <assert_contents> |
87 </assert_contents> | 124 <has_n_columns n="5"/> |
88 </output> | 125 </assert_contents> |
89 </test> | 126 </output> |
90 <test> | 127 </test> |
91 <param name="input_anndata" value="mito_counted_anndata.h5ad"/> | 128 <test expect_num_outputs="2"> |
92 <param name="input_network_file" value="mouse_progeny.tsv"/> | 129 <param name="input" value="mito_counted_anndata.h5ad"/> |
93 <param name="min_n" value="0"/> | 130 <param name="inp|format" value="h5ad"/> |
94 <param name="method" value="ulm"/> | 131 <param name="input_network_file" value="mouse_progeny.tsv"/> |
95 <param name="use_raw" value="false"/> | 132 <param name="min_n" value="0"/> |
96 <param name="write_activities_path" value="true"/> | 133 <param name="method" value="ulm"/> |
97 <param name="source" value="source"/> | 134 <param name="inp|use_raw" value="false"/> |
98 <param name="target" value="target"/> | 135 <param name="inp|write_activities_path" value="true"/> |
99 <param name="weight" value="weight"/> | 136 <param name="source" value="source"/> |
100 <param name="gene_symbols_field" value="Symbol"/> | 137 <param name="target" value="target"/> |
101 <output name="output_ad"> | 138 <param name="weight" value="weight"/> |
102 <assert_contents> | 139 <param name="inp|gene_symbols_field" value="Symbol"/> |
103 <has_h5_keys keys="obsm/ulm_estimate"/> | 140 <output name="output_ad"> |
104 </assert_contents> | 141 <assert_contents> |
105 </output> | 142 <has_h5_keys keys="obsm/ulm_estimate"/> |
106 <output name="output_table"> | 143 </assert_contents> |
107 <assert_contents> | 144 </output> |
108 <has_n_columns n="29"/> | 145 <output name="output_table"> |
109 </assert_contents> | 146 <assert_contents> |
110 </output> | 147 <has_n_columns n="29"/> |
111 </test> | 148 </assert_contents> |
149 </output> | |
150 </test> | |
151 <test expect_num_outputs="1"> | |
152 <param name="input" value="diff_exp_result.tab"/> | |
153 <param name="inp|format" value="tabular"/> | |
154 <param name="input_network_file" value="progeny_test.tsv"/> | |
155 <param name="min_n" value="0"/> | |
156 <param name="method" value="mlm"/> | |
157 <param name="inp|stat_field" value="log2FoldChange"/> | |
158 <param name="inp|write_activities_path" value="false"/> | |
159 <param name="source" value="source"/> | |
160 <param name="target" value="target"/> | |
161 <param name="weight" value="weight"/> | |
162 <output name="output_table"> | |
163 <assert_contents> | |
164 <has_n_columns n="3"/> | |
165 </assert_contents> | |
166 </output> | |
167 </test> | |
112 </tests> | 168 </tests> |
113 <help> | 169 <help> |
114 **What it does** | 170 **What it does** |
115 | 171 |
116 Usage | 172 Usage |
117 ..... | 173 ..... |
118 | 174 |
119 | 175 |
120 **Description** | 176 **Description** |
121 | 177 |
122 This tool extracts pathway activity inference using decoupler. | 178 This tool extracts pathway activity inference using decoupler. For more information on the underlying algorithms, |
123 | 179 the `decoupler documentation`_ , in particular the Pathway Activity |
124 **Input** | 180 and Transcription factor activity inference sections. |
125 | 181 |
126 The input file should be an AnnData object in H5AD format. The tool accepts an H5AD file containing raw or normalized data. | 182 .. _`decoupler documentation`: https://decoupler-py.readthedocs.io/en/latest/ |
127 | 183 |
128 The tool also takes network file containing a collection of pathways and their target genes, with weights for each interaction. | 184 **Input** |
129 Example: | 185 |
130 ``` | 186 The tool accepts two types of input files: |
131 source target weight | 187 |
132 0 T1 G01 1.0 | 188 1. An AnnData object in H5AD format: |
133 1 T1 G02 1.0 | 189 |
134 2 T1 G03 0.7 | 190 - The H5AD file can contain raw or normalized data. |
135 3 T2 G04 1.0 | 191 - You can specify whether to use the raw data in the AnnData object instead of the X matrix using the "use_raw" parameter. |
136 4 T2 G06 -0.5 | 192 - Minimum of targets per source can be specified using "min_n". |
137 ``` | 193 |
138 | 194 2. A tabular file with differential expression data: |
139 You can also specify whether to use the raw data in the AnnData object instead of the X matrix using the "use_raw" parameter and Minimum of targets per source using "min_n". | 195 |
196 - The file should have genes in rows (and the first column by the gene symbols). | |
197 - The file needs a header, that is, columns names for every column. | |
198 - Columns must include at least fields similar to log2FC and a p-value or FDR field. | |
199 - If this file is provided, the tool will score each source in the network file according to the differential expression of the provided genes. | |
200 | |
201 The tool also requires a network file containing a collection of pathways and their target genes, with weights for each interaction. | |
202 | |
203 Example of a network file: | |
204 | |
205 +---------+--------+--------+ | |
206 | source | target | weight | | |
207 +=========+========+========+ | |
208 | T1 | G01 | 1.0 | | |
209 +---------+--------+--------+ | |
210 | T1 | G02 | 1.0 | | |
211 +---------+--------+--------+ | |
212 | T1 | G03 | 0.7 | | |
213 +---------+--------+--------+ | |
214 | T2 | G04 | 1.0 | | |
215 +---------+--------+--------+ | |
216 | T2 | G06 | -0.5 | | |
217 +---------+--------+--------+ | |
140 | 218 |
141 | 219 |
142 **Output** | 220 **Output** |
143 | 221 |
144 The tool outputs an AnnData object containing the scores in the "obs" field, and tab-separated text files containing the scores for each cell. | 222 Depending on the input file type, the tool outputs: |
145 | 223 |
146 If the "write_activities_path" parameter is set to "true", the tool will write the modified AnnData object to an H5AD file. | 224 - If an AnnData file is used: |
147 If the "write_inference" parameter is set to "true", the tool will output a tab-separated text file containing the scores for each cell. | 225 |
148 | 226 - An AnnData object containing the scores in the "obs" field. |
149 | 227 - Tab-separated text files containing the scores for each cell. |
228 - If the "write_activities_path" parameter is set to "true", the tool will write the modified AnnData object to an H5AD file. | |
229 - If the "write_inference" parameter is set to "true", the tool will output a tab-separated text file containing the scores for each cell. | |
230 | |
231 - If a tabular differential expression file is used: | |
232 | |
233 - A tab-separated text file where each source in the network file is scored according to the differential expression of the provided genes. | |
150 | 234 |
151 </help> | 235 </help> |
152 <citations> | 236 <citations> |
153 <citation type="doi">10.1093/bioadv/vbac016 </citation> | 237 <citation type="doi">10.1093/bioadv/vbac016 </citation> |
154 </citations> | 238 </citations> |