comparison create_model.xml @ 0:9d5f4f5f764b

Initial commit to toolshed
author pieter.lukasse@wur.nl
date Thu, 16 Jan 2014 13:10:00 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9d5f4f5f764b
1 <tool id="create_poly_model" name="RIQC-Create Regression Model" version="1.0.2">
2 <description>Generate coefficients to enable the regression from one GC-column
3 to another GC-column</description>
4 <command interpreter="Rscript">Rscripts/ridb-regression.R
5 $ridb
6 $out_model
7 $out_log
8 $min_residuals
9 $range_mod
10 $pvalue
11 $rsquared
12 $method
13 $plot
14 #if $plot
15 $model_graphics
16 #end if
17 </command>
18 <inputs>
19 <param format="tabular" name="ridb" type="select" label="Retention Index (RI) and GC columns Library file"
20 help="Select the RI library file of which all GC columns and their RI values
21 will be used to create a model"
22 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RI_DB_libraries")'/>
23
24 <param name="method" type="select" label="Select regression method"
25 help="Method to use for calculating the model" >
26 <option value="poly" selected="True">Polynomial (3rd degree)</option>
27 <option value="linear">Linear</option>
28 </param>
29 <param name="min_residuals" type="integer" value="10" optional="False"
30 label="Minimum number of residuals" help="The minimum number of residuals
31 (datapoints) that both columns should have in common when calculating
32 the model" />
33 <param name="range_mod" type="integer" value="0" optional="False"
34 label="Range modifier" help="Moves the range of the usable RI space by the
35 given percentage. Set to 0 to use the full range of available data." />
36 <param name="pvalue" type="float" value="0.05" optional="False" min="0" max="1"
37 label="Pvalue to filter on" help="Set the upper limit for the pvalue (calculated)
38 by performing an ANOVA analysis on the created model). All models with higher
39 pvalues are discarded." />
40 <param name="rsquared" type="float" value="0.95" optional="False" min="0" max="1"
41 label="R-squared to filter on" help="Set the lower limit for the R-squared,
42 all models with lower values are discarded." />
43 <param name="plot" type="boolean" label="Create a separate plot for each model"
44 help="This will create a ZIP file in the history containing PDF plots" />
45 </inputs>
46 <code file="match_library.py" />
47 <outputs>
48 <data format="zip" label="Model Graphics of ${on_string}" name="model_graphics" >
49 <filter>(plot)</filter>
50 </data>
51 <data format="tabular" label="Regression logfile of ${on_string}" name="out_log" />
52 <data format="tabular" label="Regression model of ${on_string}" name="out_model" />
53 </outputs>
54 <help>
55 Calculates regression models for a permutation of all GC columns contained in the selected
56 RI database file. The method used for creating the model is either based on a 3rd degree
57 polynomial or a standard linear model.
58
59 The *Minimum number of residuals* option will only allow regression if the columns it is based
60 on has at least that number of datapoints on the same compound.
61
62 Filtering is possible by setting an upper limit for the *p-value* and / or a lower limit for
63 the *R squared* value. The produced logfile will state how many models have been discarded due
64 to this filtering. The output model file also includes the p-value and R squared value for
65 each created model.
66
67 Graphical output of the models is available by selecting the plot option which shows the
68 data points used for the model as well as the fit itself and the range of data that will
69 be usable.
70
71 .. class:: infomark
72
73 **Notes**
74
75 The output file produced by this tool is required as input for the CasLookup tool when
76 selecting to apply regression when finding hits in the RIDB.
77 </help>
78 </tool>