Mercurial > repos > iuc > masigpro
comparison masigpro.xml @ 0:c8c290f3ea7d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/masigpro commit 5798bd978553dee97521c39920d263dd750e0755
author | iuc |
---|---|
date | Mon, 15 May 2017 07:29:03 -0400 |
parents | |
children | cc96abdef027 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c8c290f3ea7d |
---|---|
1 <tool id="masigpro" name="maSigPro" version="1.49.0.0"> | |
2 <description>Significant Gene Expression Profile Differences in Time Course Gene Expression Data</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.49.0">bioconductor-masigpro</requirement> | |
5 <requirement type="package" version="1.3.2">r-optparse</requirement> | |
6 <requirement type="package" version="4.4">sed</requirement> | |
7 </requirements> | |
8 <stdio> | |
9 <regex match="Execution halted" | |
10 source="both" | |
11 level="fatal" | |
12 description="Execution halted." /> | |
13 <regex match="Error in" | |
14 source="both" | |
15 level="fatal" | |
16 description="An undefined error occurred, please check your input carefully and contact your administrator." /> | |
17 <regex match="Fatal error" | |
18 source="both" | |
19 level="fatal" | |
20 description="An undefined error occurred, please check your input carefully and contact your administrator." /> | |
21 </stdio> | |
22 <version_command> | |
23 <![CDATA[ | |
24 echo $(R --version | grep version | grep -v GNU)", maSigPro version" $(R --vanilla --slave -e "library(maSigPro); cat(sessionInfo()\$otherPkgs\$maSigPro\$Version)" 2> /dev/null | grep -v -i "WARNING: ") | |
25 ]]> | |
26 </version_command> | |
27 <command> | |
28 <![CDATA[ | |
29 #if str($source.source_selector) == "advanced": | |
30 paste | |
31 #set $start = True | |
32 #set $header = '' | |
33 #for $time in $source.rep_time: | |
34 #for $file in $time.files: | |
35 #if $start: | |
36 <(cut -f1 $file) | |
37 #set $start = False | |
38 #end if | |
39 #set $header += ' "' + $file.name + '"' | |
40 <(cut -f2 $file) | |
41 #end for | |
42 #end for | |
43 > data && sed -i '1i$header' data && | |
44 #if $source.enable_output: | |
45 ln -f data $data_out && ln -f $design_matrix $edesign_out && | |
46 #end if | |
47 #set $data = 'data' | |
48 #set $edesign = $design_matrix | |
49 #else: | |
50 #set $data = $source.data | |
51 #set $edesign = $source.edesign | |
52 #end if | |
53 Rscript '${__tool_directory__}/masigpro.R' | |
54 -e '$edesign' | |
55 -d '$data' | |
56 -o '$masigpro_out' | |
57 #if str($source.source_selector) == "defaults": | |
58 --time_col $source.time_col | |
59 --repl_col $source.repl_col | |
60 #end if | |
61 --degree $makeDesignMatrix.degree | |
62 --qvalue $p_vector.qvalue | |
63 --min_obs $p_vector.min_obs | |
64 --step_method '$Tfit.step_method' | |
65 --nvar_correction $Tfit.nvar_correction | |
66 --alfa $Tfit.alfa | |
67 --rsq $getSiggenes.rsq | |
68 --vars '$getSiggenes.vars' | |
69 --significant_intercept '$getSiggenes.significant_intercept' | |
70 #if $pdf.pdf_selector: | |
71 --cluster_data $pdf.seeGenes.clusterData | |
72 -k $pdf.seeGenes.k | |
73 --cluster_method $pdf.seeGenes.clustering.clusterMethod | |
74 #if str($pdf.seeGenes.clustering.clusterMethod) == "hclust": | |
75 --distance $pdf.seeGenes.clustering.distance | |
76 --agglo_method $pdf.seeGenes.clustering.aggloMethod | |
77 #end if | |
78 #if str($pdf.seeGenes.clustering.clusterMethod) == "kmeans": | |
79 --iter_max $pdf.seeGenes.clustering.iterMax | |
80 #end if | |
81 --color_mode $pdf.seeGenes.colorMode | |
82 --show_fit $pdf.seeGenes.showFit | |
83 --show_lines $pdf.seeGenes.showLines | |
84 --cexlab $pdf.seeGenes.cexlab | |
85 --legend $pdf.seeGenes.legend | |
86 #end if | |
87 ]]> | |
88 </command> | |
89 <configfiles> | |
90 <configfile name="design_matrix">#if str($source.source_selector) == "advanced": | |
91 #set $header = "Name Time Replicate" | |
92 #for $group in $source.rep_groups: | |
93 #set $header = $header + ' ' + str($group.name) | |
94 #end for | |
95 $header | |
96 #set $c = len($source.rep_repl) + 1 | |
97 #for $time in $source.rep_time: | |
98 #for $file in $time.files: | |
99 #set $is_repl = False | |
100 #for $i, $repl in enumerate($source.rep_repl): | |
101 #if str($file) in str($repl.files): | |
102 #set $r = $i + 1 | |
103 #set $is_repl = True | |
104 #end if | |
105 #end for | |
106 #if $is_repl == False: | |
107 #set $r = $c | |
108 #set $c += 1 | |
109 #end if | |
110 #set $line = '"' + str($file.name) + '" ' + str($time.time) + ' ' + str($r) | |
111 #for $group in $source.rep_groups: | |
112 #if str($file) in str($group.files): | |
113 #set $line += " 1" | |
114 #else | |
115 #set $line += " 0" | |
116 #end if | |
117 #end for | |
118 $line | |
119 #end for | |
120 #end for | |
121 #end if | |
122 </configfile> | |
123 </configfiles> | |
124 <inputs> | |
125 <conditional name="source"> | |
126 <param label="Choose data source" name="source_selector" | |
127 help="Choose if you want to provide seperate count files (e.g. from HTSeq-count or feature-seq) | |
128 and define your experiment design matrix here, or if you have maSigPro edesign and data input files already." | |
129 type="select"> | |
130 <option value="defaults">Use maSigPro edesign and data files</option> | |
131 <option value="advanced">Seperate count data (e.g. from HTSeq-count or feature-count)</option> | |
132 </param> | |
133 <when value="defaults"> | |
134 <param name="edesign" format="tabular,txt" type="data" label="Experiment matrix" | |
135 help="Matrix describing experimental design. Rows must be arrays and columns experiment descriptors" /> | |
136 <param name="data" format="tabular,txt" type="data" label="Gene expression matrix" | |
137 help="Matrix containing normalized gene expression data. Genes must be in rows and arrays in columns" /> | |
138 <param name="time_col" label="Column number containing time values" type="integer" value="1" | |
139 help="Column number in edesign containing time values. Default is first column" /> | |
140 <param name="repl_col" label="Column number containing replicate coding" type="integer" value="2" | |
141 help="Column number in edesign containing coding for replicate arrays. Default is second column" /> | |
142 </when> | |
143 <when value="advanced"> | |
144 <param name="enable_output" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output generated maSigPro input files?" | |
145 help="Choose if you want to output the generated edesign and data files for direct use in maSigPro as history elements." /> | |
146 <repeat name="rep_time" title="Time values" min="1" default="1"> | |
147 <param name="time" type="integer" value="0" label="Specify a numerical time value" help="Only numbers will be allowed"> | |
148 <sanitizer> | |
149 <valid initial="string.digits"></valid> | |
150 </sanitizer> | |
151 </param> | |
152 <param name="files" type="data" format="tabular" multiple="true" label="Counts file(s) at this time value" /> | |
153 </repeat> | |
154 <repeat name="rep_groups" title="Experimental groups" min="1" default="1"> | |
155 <param name="name" type="text" value="Group title" label="Specify the name of this experimental group" | |
156 help="Use a single name without spaces or special characters"> | |
157 </param> | |
158 <param name="files" type="data" format="tabular" multiple="true" | |
159 label="Counts file(s) belonging to this experimental group" /> | |
160 </repeat> | |
161 <repeat name="rep_repl" title="Replicates" min="0" default="0"> | |
162 <param name="files" type="data" format="tabular" multiple="true" label="Counts files that are replicates" /> | |
163 </repeat> | |
164 </when> | |
165 </conditional> | |
166 <section name="makeDesignMatrix" | |
167 title="Step 1: make.Design.Matrix - Defining the regression model" | |
168 help="‘make.design.matrix’ creates the design matrix of dummies for | |
169 fitting time series micorarray gene expression experiments."> | |
170 <param name="degree" type="integer" value="1" | |
171 label="Degree of regression fit polynome" | |
172 help="The degree of the regression fit polynome. ‘degree’ = 1 returns | |
173 linear regression, ‘degree’ = 2 returns quadratic regression, etc" /> | |
174 </section> | |
175 <section name="p_vector" | |
176 title="Step 2: p.vector - Finding significant genes" | |
177 help="‘p.vector’ performs a regression fit for each gene taking all | |
178 variables present in the model given by a regression matrix and | |
179 returns a list of FDR corrected significant genes"> | |
180 <param name="qvalue" type="float" value="0.05" label="Q" help="Significance level" /> | |
181 <param name="min_obs" label="Minimum values" type="integer" value="6" | |
182 help="Genes with less than this number of true numerical values | |
183 will be excluded from the analysis. Minimum value to estimate | |
184 the model is (degree+1)xGroups+1. Default is 6." /> | |
185 </section> | |
186 <section name="Tfit" title="Step 3: T.fit - Finding significant differences" | |
187 help="‘T.fit’ selects the best regression model for each gene using | |
188 stepwise regression. In the maSigPro approach ‘p.vector’ and ‘T.fit’ are subsequent | |
189 steps, meaning that significant genes are first selected on the | |
190 basis of a general model and then the significant variables for | |
191 each gene are found by step-wise regression."> | |
192 <param name="step_method" type="select" label="Step regression" | |
193 help="The step regression can be ‘backward’ or ‘forward’ indicating | |
194 whether the step procedure starts from the model with all or none | |
195 variables. With the ‘two.ways.backward’ or ‘two.ways.forward’ | |
196 options the variables are both allowed to get in and out. At each | |
197 step the p-value of each variable is computed and variables get | |
198 in/out the model when this p-value is lower or higher than given | |
199 threshold alfa."> | |
200 <option selected="True" value="backward">backward</option> | |
201 <option value="forward">forward</option> | |
202 <option value="two.ways.backward">two.ways.backward</option> | |
203 <option value="two.ways.forward">two.ways.forward</option> | |
204 </param> | |
205 <param type="boolean" name="nvar_correction" label="nvar correction" truevalue="TRUE" falsevalue="FALSE" checked="false" | |
206 help="When nvar.correction is TRUE the given significance | |
207 level is corrected by the number of variables in the model."> | |
208 <option selected="True" value="FALSE">False</option> | |
209 <option value="TRUE">True</option> | |
210 </param> | |
211 <param name="alfa" type="float" value="0.05" label="alfa" help="Significance level used for variable selection in the stepwise regression" /> | |
212 </section> | |
213 <section name="getSiggenes" | |
214 title="Step 4: get.siggenes - Obtaining lists of significant genes" | |
215 help="This function creates lists of significant genes for a set of | |
216 variables whose significance value has been computed with the | |
217 ‘T.fit’ function."> | |
218 <param name="rsq" type="float" value="0.7" label="rsq" | |
219 help="cut-off level at the R-squared value for the stepwise | |
220 regression fit. Only genes with R-squared more than rsq are | |
221 selected" /> | |
222 <param name="vars" type="select" label="Variables" | |
223 help="Variables for which to extract significant genes. | |
224 ‘all’: generates one single matrix or gene list with all | |
225 significant genes. | |
226 | |
227 ‘each’: generates as many significant genes extractions as | |
228 variables in the general regression model. Each extraction | |
229 contains the significant genes for that variable. | |
230 | |
231 ‘groups’: generates a significant genes extraction for each | |
232 experimental group. | |
233 | |
234 The difference between ‘each’ and ‘groups’ is that in the | |
235 first case the variables of the same group (e.g. ‘TreatmentA’ | |
236 and ‘time*TreatmentA’) will be extracted separately and in t | |
237 he | |
238 second case jointly."> | |
239 <option selected="True" value="groups">Groups</option> | |
240 <option value="each">Each</option> | |
241 <option value="all">All</option> | |
242 </param> | |
243 <param name="significant_intercept" type="select" label="Significant intercept" | |
244 help="The argument ‘significant.intercept’ modulates the treatment for | |
245 intercept coefficients to apply for selecting significant genes | |
246 when ‘vars’ equals ‘groups’. There are three possible values: | |
247 ‘none’, no significant intercept (differences) are considered | |
248 for significant gene selection, ‘dummy’, includes genes with | |
249 significant intercept differences between control and experimental | |
250 groups, and ‘all’ when both significant intercept coefficient | |
251 for the control group and significant intercept differences are | |
252 considered for selecting significant genes."> | |
253 <option selected="True" value="dummy">Dummy</option> | |
254 <option value="none">None</option> | |
255 <option value="all">All</option> | |
256 </param> | |
257 </section> | |
258 <conditional name="pdf"> | |
259 <param label="Generate visualization PDF" name="pdf_selector" type="boolean" | |
260 truevalue="1" falsevalue="0" checked="true" | |
261 help="Choose if you want to generate a PDF file containing the visualizations" /> | |
262 <when value="1"> | |
263 <section name="seeGenes" title="Step 5: see.genes - Visualization" | |
264 help="This function provides visualisation tools for gene expression | |
265 values in a time course experiment. The function first calls the | |
266 heatmap function for a general overview of experiment results. | |
267 Next a partioning of the data is generated using a clustering | |
268 method. The results of the clustering are visualized both as gene | |
269 expression profiles extended along all arrays in the experiment, | |
270 as provided by the plot.profiles function, and as summary | |
271 expression profiles for comparison among experimental groups."> | |
272 <param name="clusterData" label="Cluster Data" type="integer" value="1" | |
273 help="Data clustering can be done on the basis of either the original | |
274 expression values, the regression coefficients, or the t.scores. | |
275 In case ‘data’ is a ‘get.siggenes’ object, this is given by | |
276 providing the element names of the list | |
277 ‘c(sig.profiles,coefficients,t.score)’ of their list | |
278 position (1,2 or 3)." /> | |
279 <param name="k" type="integer" label="Number of clusters for data partioning" value="9" /> | |
280 <conditional name="clustering"> | |
281 <param name="clusterMethod" label="Cluster Method" type="select" | |
282 help="clustering method for data partioning. Currently | |
283 ‘hclust’, ‘kmeans’ and ‘Mclust’ are supported"> | |
284 <option selected="True" value="hclust">hclust</option> | |
285 <option value="kmeans">kmeans</option> | |
286 <option value="Mclust">Mclust</option> | |
287 </param> | |
288 <when value="hclust"> | |
289 <param name="distance" type="select" label="Distance measure" | |
290 help="Distance measurement function when ‘cluster.method’ is | |
291 ‘hclust’. Default uses correlation."> | |
292 <option selected="True" value="cor">Correlation</option> | |
293 <option value="euclidean">Euclidean</option> | |
294 <option value="maximum">Maximum</option> | |
295 <option value="manhattan">Manhattan</option> | |
296 <option value="Canberra">Canberra</option> | |
297 <option value="binary">Binary</option> | |
298 <option value="minkowski">Minkowski</option> | |
299 </param> | |
300 <param name="aggloMethod" type="select" label="Agglomeration method" | |
301 help="The agglomeration method to be used when ‘cluster.method’ is ‘hclust’."> | |
302 <option selected="True" value="ward.D">ward.D</option> | |
303 <option value="ward.D2">ward.D2</option> | |
304 <option value="single">single</option> | |
305 <option value="complete">complete</option> | |
306 <option value="average">average (= UPGMA)</option> | |
307 <option value="mcquitty">mcquitty (= WPGMA)</option> | |
308 <option value="median">median (= WPGMC)</option> | |
309 <option value="centroid">centroid (= UPGMC)</option> | |
310 </param> | |
311 </when> | |
312 <when value="kmeans"> | |
313 <param name="iterMax" type="integer" label="Maximum number of iterations" value="500" | |
314 help="Maximum number of iterations when ‘cluster.method’ is ‘kmeans’" /> | |
315 </when> | |
316 </conditional> | |
317 <param name="colorMode" label="Color Mode" type="select" help="Color scale for plotting profiles. Can be either ‘rainbow’ or ‘gray’"> | |
318 <option selected="True" value="rainbow">Rainbow</option> | |
319 <option value="gray">Gray</option> | |
320 </param> | |
321 <param name="showFit" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Show regression fit curves?" | |
322 help="Indicating whether regression fit curves must be plotted" /> | |
323 <param name="showLines" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Draw lines?" | |
324 help="Indicating whether a line must be drawn joining plotted data points for each group" /> | |
325 <param name="cexlab" type="float" value="0.8" label="Magnification for x labels" | |
326 help="Graphical parameter maginfication to be used for x labels in plotting functions" /> | |
327 <param name="legend" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Add legend to plotting profiles?" | |
328 help="Indicating whether legend must be added when plotting profiles" /> | |
329 </section> | |
330 </when> | |
331 </conditional> | |
332 </inputs> | |
333 <outputs> | |
334 <data format="tabular" name="masigpro_out" label="maSigPro result file on ${on_string}"> | |
335 </data> | |
336 <data format="txt" name="edesign_out" label="maSigPro edesign file on ${on_string}"> | |
337 <filter> | |
338 (( | |
339 source['source_selector'] == 'advanced' and | |
340 source['enable_output'] == True | |
341 )) | |
342 </filter> | |
343 </data> | |
344 <data format="txt" name="data_out" label="maSigPro data file on ${on_string}"> | |
345 <filter> | |
346 (( | |
347 source['source_selector'] == 'advanced' and | |
348 source['enable_output'] == True | |
349 )) | |
350 </filter> | |
351 </data> | |
352 <data format="pdf" name="pdf_out" from_work_dir="Results.pdf" label="maSigPro Plot file on ${on_string}"> | |
353 <filter> | |
354 (( | |
355 pdf['pdf_selector'] == True | |
356 )) | |
357 </filter> | |
358 </data> | |
359 </outputs> | |
360 <tests> | |
361 <test> | |
362 <param name="source_selector" value="advanced" /> | |
363 <param name="enable_output" value="1" /> | |
364 <repeat name="rep_time"> | |
365 <param name="time" value="1" /> | |
366 <param name="files" value="control_1H.counts,treat_1H.counts" /> | |
367 </repeat> | |
368 <repeat name="rep_time"> | |
369 <param name="time" value="2" /> | |
370 <param name="files" value="control_2H.counts,treat_2H.counts" /> | |
371 </repeat> | |
372 <repeat name="rep_time"> | |
373 <param name="time" value="3" /> | |
374 <param name="files" value="control_3H.counts,treat_3H_1.counts,treat_3H_2.counts" /> | |
375 </repeat> | |
376 <param name="replicates_selector" value="advanced" /> | |
377 <repeat name="rep_repl"> | |
378 <param name="files" value="treat_3H_1.counts,treat_3H_2.counts" /> | |
379 </repeat> | |
380 <repeat name="rep_groups"> | |
381 <param name="name" value="Control" /> | |
382 <param name="files" value="control_1H.counts,control_2H.counts,control_3H.counts" /> | |
383 </repeat> | |
384 <repeat name="rep_groups"> | |
385 <param name="name" value="Treatment" /> | |
386 <param name="files" value="treat_1H.counts,treat_2H.counts,treat_3H_1.counts,treat_3H_2.counts" /> | |
387 </repeat> | |
388 <output name="masigpro_out" file="masigpro_out.tab" /> | |
389 <output name="data_out" file="data_out.txt" /> | |
390 <output name="edesign_out" file="edesign_out.txt" /> | |
391 <output name="pdf_out" file="Results.pdf" /> | |
392 </test> | |
393 <test> | |
394 <param name="source_selector" value="defaults" /> | |
395 <param name="edesign" value="edesign_out.txt" /> | |
396 <param name="data" value="data_out.txt" /> | |
397 <output name="masigpro_out" file="masigpro_out.tab" /> | |
398 <output name="pdf_out" file="Results.pdf" /> | |
399 </test> | |
400 </tests> | |
401 <help> | |
402 <![CDATA[ | |
403 .. class:: infomark | |
404 | |
405 **What it does** | |
406 | |
407 maSigPro_ is a regression based approach to find genes for which there are significant gene expression profile differences between experimental groups in time course microarray and RNA-Seq experiments. | |
408 | |
409 **Inputs** | |
410 | |
411 The maSigPro wrapper has two options for input data: | |
412 | |
413 - directly through two seperate text files containing the experiment design (edesign) and the data or | |
414 - count tables generated from HTSeq-count. Count tables must be generated for each sample individually. | |
415 | |
416 To set up an experimental design from seperate count files you first have to select which files belong to a certain time point. | |
417 Likewise you can specify which files are replicates. In a third step you have to create the experimental groups and select the related files. | |
418 For a more comfortable setup in future analysis you have the option to output the generated edesign and data files. | |
419 | |
420 **Output** | |
421 | |
422 maSigPro_ generates a summary file containing the list of significant genes. Additionally you can obtain a PDF file containing plots of profiles and groups that visualize the clustering analysis. | |
423 | |
424 .. _maSigPro: https://bioconductor.org/packages/release/bioc/html/maSigPro.html | |
425 ]]> | |
426 </help> | |
427 <citations> | |
428 <citation type="doi">10.1093/bioinformatics/btl056</citation> | |
429 </citations> | |
430 </tool> |