view rankfilterGCMS_tabular.xml @ 62:9bd2597c8851 default tip

r
author pieter.lukasse@wur.nl
date Fri, 06 Feb 2015 15:49:26 +0100
parents 19d8fd10248e
children
line wrap: on
line source

<tool id="rankfilterGCMS_tabular" name="RIQC-RankFilter GC-MS from tabular file" version="1.0.2">
  <description>Convert Retention Time to Retention Index</description>
  <command interpreter="python">rankfilter_GCMS/rankfilter.py $input_file</command>
  <inputs>
    <param format="tabular" name="sample" type="data" label="Sample File" 
	       help="Select a tab delimited NIST metabolite identifications file (converted from PDF)" />
	<!-- question: is this calibration file not column specific as it includes RT info?? -->
    <!-- this one should be input file for now:<param name="calibration"  type="select" label="Calibration File" 
           help="Calibration file with reference masses (e.g. alkanes) with their RT and RI values"
    		dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RankFilter_Calibration_Files")'/>
    		-->
    <param name="calibration" format="any" type="data" label="Calibration File" 
           help="Calibration file containing reference masses (e.g. alkanes) with their respective RT and RI values"/>

    <param name="analysis_type" type="select" format="text" label="Analysis Type"
    	   help="Select the type of analysis that has been used to generate the sample file">
      <option value="NIST">NIST</option>
      <option value="AMDIS">AMDIS</option>
    </param>
    <param name="model" type="select" format="text" label="Select a model to be used "
    	   help="Both linear and (3rd degree) polynomial models are available ">
      <option value="linear">Linear</option>
      <option value="poly">Polynomial</option>
    </param>
    <param name="lib_data" type="select" label="Library" 
	       help="Reference global lookup library file with CAS numbers and respective (previously calculated) RIsvr values" 
           dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RankFilter_lookup_libraries")'/>	       
	       
    <param name="window" type="float" label="Window" value="10.56" />
  </inputs>
  <outputs>
    <data format="tabular" label="${tool.name}" name="onefile" />
  </outputs>
  <!-- file with implementation of the function get_directory_files() used above  -->
  <code file="match_library.py" />
  <configfiles>
    <configfile name="input_file">
      sample = ${sample}
      calibration = ${calibration}
      lib_data = ${lib_data}
      window = ${window}
      analysis_type = ${analysis_type}
      tabular = True
      onefile = ${onefile}
      model = ${model}
    </configfile>
  </configfiles>
  <help>
Basically estimates the experimental RI (RIexp) by building a RI(RT) function based on the
given calibration file.   

It also determines the estimated RI (RIsvr) by looking up for each entry of the given input file (Sample File), 
based on its CAS number, its respective RIsvr value in the given global lookup library
(this step is also called the "RankFilter analysis" -see reference below; Sample File may be either from NIST or AMDIS). 
This generates an prediction of the RI for 
a compound according to the "RankFilter procedure" (RIsvr). 

Output is a tab separated file in which four columns are added:

	- **Rank** Calculated rank
	- **RIexp** Experimental Retention Index (RI)
	- **RIsvr** Calculated RI based on support vector regression (SVR)
	- **%rel.err** Relative RI error (%rel.error = 100 * (RISVR − RIexp) / RIexp)

.. class:: infomark

**Notes**

	- The layout of the Calibration file should include the following columns: 'MW', 'R.T.' and 'RI'.
	- Selecting 'Polynomial' in the model parameter will calculate a 3rd degree polynomial model that will
	  be used to convert from XXXX to YYYY.

-----

**References**

    - **RankFilter**: Mihaleva et. al. (2009) *Automated procedure for candidate compound selection in GC-MS 
      metabolomics based on prediction of Kovats retention index*. Bioinformatics, 25 (2009), pp. 787–794
  </help>
</tool>