comparison decoupler_pathway_inference.xml @ 10:97c2c52a7ab4 draft default tip

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b581a5b4ba88c5bf06f6223ba9aec51a8564796c
author ebi-gxa
date Fri, 29 Nov 2024 11:34:09 +0000
parents 81ccee273bc6
children
comparison
equal deleted inserted replaced
9:81ccee273bc6 10:97c2c52a7ab4
1 <tool id="decoupler_pathway_inference" name="Decoupler Pathway Inference" version="1.4.0+galaxy2" profile="20.05" license="MIT"> 1 <tool id="decoupler_pathway_inference" name="Decoupler Pathway Inference" version="1.4.0+galaxy3" profile="20.05" license="MIT">
2 <description> 2 <description>
3 of functional genesets/pathways for scRNA-seq data. 3 of functional genesets/pathways for scRNA-seq data.
4 </description> 4 </description>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="1.4.0">decoupler</requirement> 6 <requirement type="package" version="1.4.0">decoupler</requirement>
7 </requirements> 7 </requirements>
8 <command> 8 <command>
9 #if $inp.format == 'h5ad':
10 #set $input_fname = "input.h5ad"
11 #else:
12 #set $input_fname = "input.tsv"
13 #end if
14 ln -s '$input' '$input_fname';
15
9 python '$__tool_directory__/decoupler_pathway_inference.py' 16 python '$__tool_directory__/decoupler_pathway_inference.py'
10 -i '$input_anndata' 17 -i '$input_fname'
11 -n '$input_network_file' 18 -n '$input_network_file'
12 --min_n "$min_n" 19 --min_n "$min_n"
13 --method '$method' 20 --method '$method'
14 $use_raw 21
15 --source '$source' 22 --source '$source'
16 --target '$target' 23 --target '$target'
17 --weight '$weight' 24 --weight '$weight'
18 #if $gene_symbols_field: 25 #if str($inp.format) == "tabular":
19 --var_gene_symbols_field '$gene_symbols_field' 26 #if $inp.stat_field:
27 --stat "${inp.stat_field}"
28 #end if
29 #if $inp.p_value_column:
30 --p_value_column "${inp.p_value_column}"
31 --p_value_threshold "${inp.p_value_threshold}"
32 #end if
33 #else:
34 #if $inp.gene_symbols_field:
35 --var_gene_symbols_field "${inp.gene_symbols_field}"
36 #end if
37 #if $inp.use_raw:
38 ${inp.use_raw}
39 #end if
40 #if $inp.write_activities_path:
41 ${inp.write_activities_path}
42 #end if
20 #end if 43 #end if
21 --output "inference" 44 --output "inference"
22 $write_activities_path 45
23 </command> 46 </command>
24 <inputs> 47 <inputs>
25 <param name="input_anndata" type="data" format="h5ad" label="Input AnnData file" /> 48 <param name="input" type="data" format="h5ad,tabular" label="Input AnnData/Expression file"/>
26 <param name="input_network_file" type="data" format="tabular" label="Input Network file" help="Tabular file with columns Source, Target and Weight. A source gene/pathway regulates/contains a target gene, weights can be either positive or negative. The source element needs to be part of the network, the target is a gene in the network and in the dataset" /> 49 <param name="input_network_file" type="data" format="tabular" label="Input Network file" help="Tabular file with columns Source, Target and Weight. A source gene/pathway regulates/contains a target gene, weights can be either positive or negative. The source element needs to be part of the network, the target is a gene in the network and in the dataset"/>
27 <param name="min_n" type="integer" min="0" value="5" label="Minimum targets per source." help="If targets are less than minimum, sources are removed" /> 50 <param name="min_n" type="integer" min="0" value="5" label="Minimum targets per source." help="If targets are less than minimum, sources are removed"/>
51 <conditional name="inp">
52 <param name="format" type="select" label="Input Format" help="Whether the provided file is AnnData or a Table of differential expression results (usually from bulk).">
53 <option value="h5ad">AnnData</option>
54 <option value="tabular">Differential Expression Table</option>
55 </param>
56 <when value="h5ad">
57 <param name="use_raw" type="boolean" truevalue="--use_raw" falsevalue="" checked="false" label="Use the raw part of the AnnData object"/>
58 <param name="write_activities_path" type="boolean" truevalue="--activities_path anndata_activities_path.h5ad" falsevalue="" checked="true" label="Write the activities AnnData object." help="Contains the MLM/ULM/Consensus activity results for each pathway and each cell in the main matrix, it is not a replacement of the original AnnData provided as input."/>
59 <param name="gene_symbols_field" type="text" optional="true" label="Gene symbols field" help="The field in the AnnData var table where gene symbols are stored."/>
60 </when>
61 <when value="tabular">
62 <param name="stat_field" type="text" label="Statistic column name" optional="false" help="Defines which column will be passed to the decoupler method, usually you want something like the log2FC or the t-stat (this must be a column in your table)"/>
63 <param argument="--p_value_column" type="text" label="P-value/FDR column name" help="Defines which column will be passed to the decoupler method as p-value, usually you want something like the log2FC or the t-stat (this must be a column in your table)"/>
64 <param argument="--p_value_threshold" value="0.05" type="float" label="P-value/FDR thresholds" help="Will filter out any rows in the file that are above the value (in the set P-value/FDR column)"/>
65 </when>
66 </conditional>
28 <param name="method" type="select" label="Activity inference method"> 67 <param name="method" type="select" label="Activity inference method">
29 <option value="mlm" selected="true">Multivariate linear model (MLM)</option> 68 <option value="mlm" selected="true">Multivariate linear model (MLM)</option>
30 <option value="ulm">Univariate linear model (ULM)</option> 69 <option value="ulm">Univariate linear model (ULM)</option>
70 <option value="consensus">Consensus (use for TFs with CollecTri)</option>
31 </param> 71 </param>
32 <param name="use_raw" type="boolean" truevalue="--use_raw" falsevalue="" checked="false" label="Use the raw part of the AnnData object" /> 72 <param name="source" type="text" value="source" label="Column name in network with source nodes." help="Usually the regulators. If empty then default is 'source' is used."/>
33 <param name="write_activities_path" type="boolean" truevalue="--activities_path anndata_activities_path.h5ad" falsevalue="" checked="true" label="Write the activities AnnData object." help="Contains the MLM/ULM activity results for each pathway and each cell in the main matrix, it is not a replacement of the original AnnData provided as input."/> 73 <param name="target" type="text" value="target" label="Column name in network with target nodes." help="Usually the regulated genes. If empty then default is 'target' is used."/>
34 <param name="source" type="text" value='source' label="Column name in network with source nodes." help="Usually the regulators. If empty then default is 'source' is used." /> 74 <param name="weight" type="text" value="weight" label="Column name in network with weight." help="If empty then default is 'weight' is used."/>
35 <param name="target" type="text" value='target' label="Column name in network with target nodes." help="Usually the regulated genes. If empty then default is 'target' is used." />
36 <param name="weight" type="text" value='weight' label="Column name in network with weight." help="If empty then default is 'weight' is used." />
37 <param name="gene_symbols_field" type="text" optional="true" label="Gene symbols field" help="The field in the AnnData var table where gene symbols are stored."/>
38 </inputs> 75 </inputs>
39 <outputs> 76 <outputs>
40 <data name="output_ad" format="h5ad" from_work_dir="anndata_activities_path.h5ad" label="${tool.name} on ${on_string}: Regulators/Pathways activity AnnData file"> 77 <data name="output_ad" format="h5ad" from_work_dir="anndata_activities_path.h5ad" label="${tool.name} on ${on_string}: Regulators/Pathways activity AnnData file">
41 <filter>write_activities_path</filter> 78 <filter>inp['format'] == "h5ad" and inp['write_activities_path'] is True</filter>
42 </data> 79 </data>
43 <data name="output_table" format="tabular" from_work_dir="inference.tsv" label="${tool.name} on ${on_string}: Output estimate table" /> 80 <data name="output_table" format="tabular" from_work_dir="inference.tsv" label="${tool.name} on ${on_string}: Output estimate table"/>
44 </outputs> 81 </outputs>
45 <tests> 82 <tests>
46 <!-- Hint: You can use [ctrl+alt+t] after defining the inputs/outputs to auto-scaffold some basic test cases. --> 83 <!-- Hint: You can use [ctrl+alt+t] after defining the inputs/outputs to auto-scaffold some basic test cases. -->
47 84 <test expect_num_outputs="2">
48 <test expect_num_outputs="2"> 85 <param name="input" value="pbmc3k_processed.h5ad"/>
49 <param name="input_anndata" value="pbmc3k_processed.h5ad"/> 86 <param name="inp|format" value="h5ad"/>
50 <param name="input_network_file" value="progeny_test.tsv"/> 87 <param name="input_network_file" value="progeny_test.tsv"/>
51 <param name="min_n" value="0"/> 88 <param name="min_n" value="0"/>
52 <param name="method" value="mlm"/> 89 <param name="method" value="mlm"/>
53 <param name="use_raw" value="false"/> 90 <param name="inp|use_raw" value="false"/>
54 <param name="write_activities_path" value="true"/> 91 <param name="inp|write_activities_path" value="true"/>
55 <param name="source" value="source"/> 92 <param name="source" value="source"/>
56 <param name="target" value="target"/> 93 <param name="target" value="target"/>
57 <param name="weight" value="weight"/> 94 <param name="weight" value="weight"/>
58 <output name="output_ad"> 95 <output name="output_ad">
59 <assert_contents> 96 <assert_contents>
60 <has_h5_keys keys="obsm/mlm_estimate"/> 97 <has_h5_keys keys="obsm/mlm_estimate"/>
61 </assert_contents> 98 </assert_contents>
62 </output> 99 </output>
63 <output name="output_table"> 100 <output name="output_table">
64 <assert_contents> 101 <assert_contents>
65 <has_n_columns n="5"/> 102 <has_n_columns n="5"/>
66 </assert_contents> 103 </assert_contents>
67 </output> 104 </output>
68 </test> 105 </test>
69 <test> 106 <test expect_num_outputs="2">
70 <param name="input_anndata" value="pbmc3k_processed.h5ad"/> 107 <param name="input" value="pbmc3k_processed.h5ad"/>
71 <param name="input_network_file" value="progeny_test_2.tsv"/> 108 <param name="inp|format" value="h5ad"/>
72 <param name="min_n" value="0"/> 109 <param name="input_network_file" value="progeny_test_2.tsv"/>
73 <param name="method" value="ulm"/> 110 <param name="min_n" value="0"/>
74 <param name="use_raw" value="false"/> 111 <param name="method" value="ulm"/>
75 <param name="write_activities_path" value="true"/> 112 <param name="inp|use_raw" value="false"/>
76 <param name="source" value="source"/> 113 <param name="inp|write_activities_path" value="true"/>
77 <param name="target" value="target"/> 114 <param name="source" value="source"/>
78 <param name="weight" value="weight"/> 115 <param name="target" value="target"/>
79 <output name="output_ad"> 116 <param name="weight" value="weight"/>
80 <assert_contents> 117 <output name="output_ad">
81 <has_h5_keys keys="obsm/ulm_estimate"/> 118 <assert_contents>
82 </assert_contents> 119 <has_h5_keys keys="obsm/ulm_estimate"/>
83 </output> 120 </assert_contents>
84 <output name="output_table"> 121 </output>
85 <assert_contents> 122 <output name="output_table">
86 <has_n_columns n="5"/> 123 <assert_contents>
87 </assert_contents> 124 <has_n_columns n="5"/>
88 </output> 125 </assert_contents>
89 </test> 126 </output>
90 <test> 127 </test>
91 <param name="input_anndata" value="mito_counted_anndata.h5ad"/> 128 <test expect_num_outputs="2">
92 <param name="input_network_file" value="mouse_progeny.tsv"/> 129 <param name="input" value="mito_counted_anndata.h5ad"/>
93 <param name="min_n" value="0"/> 130 <param name="inp|format" value="h5ad"/>
94 <param name="method" value="ulm"/> 131 <param name="input_network_file" value="mouse_progeny.tsv"/>
95 <param name="use_raw" value="false"/> 132 <param name="min_n" value="0"/>
96 <param name="write_activities_path" value="true"/> 133 <param name="method" value="ulm"/>
97 <param name="source" value="source"/> 134 <param name="inp|use_raw" value="false"/>
98 <param name="target" value="target"/> 135 <param name="inp|write_activities_path" value="true"/>
99 <param name="weight" value="weight"/> 136 <param name="source" value="source"/>
100 <param name="gene_symbols_field" value="Symbol"/> 137 <param name="target" value="target"/>
101 <output name="output_ad"> 138 <param name="weight" value="weight"/>
102 <assert_contents> 139 <param name="inp|gene_symbols_field" value="Symbol"/>
103 <has_h5_keys keys="obsm/ulm_estimate"/> 140 <output name="output_ad">
104 </assert_contents> 141 <assert_contents>
105 </output> 142 <has_h5_keys keys="obsm/ulm_estimate"/>
106 <output name="output_table"> 143 </assert_contents>
107 <assert_contents> 144 </output>
108 <has_n_columns n="29"/> 145 <output name="output_table">
109 </assert_contents> 146 <assert_contents>
110 </output> 147 <has_n_columns n="29"/>
111 </test> 148 </assert_contents>
149 </output>
150 </test>
151 <test expect_num_outputs="1">
152 <param name="input" value="diff_exp_result.tab"/>
153 <param name="inp|format" value="tabular"/>
154 <param name="input_network_file" value="progeny_test.tsv"/>
155 <param name="min_n" value="0"/>
156 <param name="method" value="mlm"/>
157 <param name="inp|stat_field" value="log2FoldChange"/>
158 <param name="inp|write_activities_path" value="false"/>
159 <param name="source" value="source"/>
160 <param name="target" value="target"/>
161 <param name="weight" value="weight"/>
162 <output name="output_table">
163 <assert_contents>
164 <has_n_columns n="3"/>
165 </assert_contents>
166 </output>
167 </test>
112 </tests> 168 </tests>
113 <help> 169 <help>
114 **What it does** 170 **What it does**
115 171
116 Usage 172 Usage
117 ..... 173 .....
118 174
119 175
120 **Description** 176 **Description**
121 177
122 This tool extracts pathway activity inference using decoupler. 178 This tool extracts pathway activity inference using decoupler. For more information on the underlying algorithms,
123 179 the `decoupler documentation`_ , in particular the Pathway Activity
124 **Input** 180 and Transcription factor activity inference sections.
125 181
126 The input file should be an AnnData object in H5AD format. The tool accepts an H5AD file containing raw or normalized data. 182 .. _`decoupler documentation`: https://decoupler-py.readthedocs.io/en/latest/
127 183
128 The tool also takes network file containing a collection of pathways and their target genes, with weights for each interaction. 184 **Input**
129 Example: 185
130 ``` 186 The tool accepts two types of input files:
131 source target weight 187
132 0 T1 G01 1.0 188 1. An AnnData object in H5AD format:
133 1 T1 G02 1.0 189
134 2 T1 G03 0.7 190 - The H5AD file can contain raw or normalized data.
135 3 T2 G04 1.0 191 - You can specify whether to use the raw data in the AnnData object instead of the X matrix using the "use_raw" parameter.
136 4 T2 G06 -0.5 192 - Minimum of targets per source can be specified using "min_n".
137 ``` 193
138 194 2. A tabular file with differential expression data:
139 You can also specify whether to use the raw data in the AnnData object instead of the X matrix using the "use_raw" parameter and Minimum of targets per source using "min_n". 195
196 - The file should have genes in rows (and the first column by the gene symbols).
197 - The file needs a header, that is, columns names for every column.
198 - Columns must include at least fields similar to log2FC and a p-value or FDR field.
199 - If this file is provided, the tool will score each source in the network file according to the differential expression of the provided genes.
200
201 The tool also requires a network file containing a collection of pathways and their target genes, with weights for each interaction.
202
203 Example of a network file:
204
205 +---------+--------+--------+
206 | source | target | weight |
207 +=========+========+========+
208 | T1 | G01 | 1.0 |
209 +---------+--------+--------+
210 | T1 | G02 | 1.0 |
211 +---------+--------+--------+
212 | T1 | G03 | 0.7 |
213 +---------+--------+--------+
214 | T2 | G04 | 1.0 |
215 +---------+--------+--------+
216 | T2 | G06 | -0.5 |
217 +---------+--------+--------+
140 218
141 219
142 **Output** 220 **Output**
143 221
144 The tool outputs an AnnData object containing the scores in the "obs" field, and tab-separated text files containing the scores for each cell. 222 Depending on the input file type, the tool outputs:
145 223
146 If the "write_activities_path" parameter is set to "true", the tool will write the modified AnnData object to an H5AD file. 224 - If an AnnData file is used:
147 If the "write_inference" parameter is set to "true", the tool will output a tab-separated text file containing the scores for each cell. 225
148 226 - An AnnData object containing the scores in the "obs" field.
149 227 - Tab-separated text files containing the scores for each cell.
228 - If the "write_activities_path" parameter is set to "true", the tool will write the modified AnnData object to an H5AD file.
229 - If the "write_inference" parameter is set to "true", the tool will output a tab-separated text file containing the scores for each cell.
230
231 - If a tabular differential expression file is used:
232
233 - A tab-separated text file where each source in the network file is scored according to the differential expression of the provided genes.
150 234
151 </help> 235 </help>
152 <citations> 236 <citations>
153 <citation type="doi">10.1093/bioadv/vbac016 </citation> 237 <citation type="doi">10.1093/bioadv/vbac016 </citation>
154 </citations> 238 </citations>