view masscomb_dbsearch_xtandem.xml @ 4:b34fb7461546

Putting back old fix
author pieter.lukasse@wur.nl
date Fri, 10 Jan 2014 17:35:38 +0100
parents d6001e8d7441
children 153e9eb5f2ff
line wrap: on
line source

<tool name="X!Tandem" id="masscomb_xtandem200" version="2.0.0">
	<description>MS/MS DB search</description>
	<requirements>
		<requirement type="set_environment">XTANDEM_12_10_01_PATH</requirement>
		<requirement type="package" version="12.10011">xtandem</requirement>
	</requirements>
	<!-- 
	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 
	-->    
	<command interpreter="java -jar">
	    MassComb.jar 
	    -action XTANDEMSEARCH 
	    -outputFile $outputFile 
	    -fileGrouping $fileType.type
    	-inputFile $fileType.inputFile
       	-parametersFile $parametersFile
		-outputTsv $outTsv
		-outReport $htmlReportFile
	    -outReportPicturesPath $htmlReportFile.files_path
	    </command>
<inputs>
  <conditional name="fileType">
    <param name="type" type="select" label="select MS/MS input type">
      <option value="single" selected="true">single-File</option>
      <option value="fileSet">fileSet</option>
    </param>
    <when value="single">
   		<param name="inputFile" type="data" format="mzml" label="MS/MS input file (mzml)"/>
    </when>
    <when value="fileSet">
      <param name="inputFile" type="data" format="prims.fileset.zip" label="input file"/>
    </when>
  </conditional>

   <param name="precursor_mass_tolerance_lower" type="text" size="30" label="precursor monoisotopic mass_tolerance_lower" value="100" help=""/>
   <param name="precursor_mass_tolerance_upper" type="text" size="30" label="precursor monoisotopic mass_tolerance_upper" value="100" help=""/>
   <param name="precursor_error_units" type="select" label="precursor_error_units" help="">
   	<option value="ppm" selected="true">ppm</option>
   	<option value="Daltons">Daltons</option>
   </param>
   <param name="fragment_mass_tolerance" type="text" size="30" label="fragment_mass_tolerance" value="0.4" help=""/>
   <param name="fragment_error_units" type="select" label="fragment_error_units" help="">
   	<option value="ppm">ppm</option>
   	<option value="Daltons" selected="true">Daltons</option>
   </param>
   <param name="database" type="data" format="fasta" label="Protein sequences DB (FASTA)"/>
   <!-- 
   <param name="min_precursor_charge" type="text" size="30" label="min_precursor_charge" value="1" help=""/>
   <param name="max_precursor_charge" type="text" size="30" label="max_precursor_charge" value="4" help=""/>-->
   <param name="fixed_modifications" type="select" display="checkboxes" multiple="true" label="Complete modifications" help="">
	   	<option value="57.021464@C">Carbamidomethyl (C)</option>
		<option value="57.021464@C,10.008269@R,8.014199@K">Cam+SILAC (8@K,10@R)</option>
		<option value="57.021464@C,4.025107@K,6.020129@R">Cam+SILAC (4@K,6@R)</option>
		<option value="57.021464@C,4.025107@K,6.020129@R,6.020129@L">Cam+SILAC (4@K,6@R,6@L)</option>
		<option value="58.005479@C">Carboxymethyl (C)</option>
		<option value="45.987721@C">Methylthio (C)</option>
		<option value="47.984744@C">Trioxidation (C)</option>
		<option value="442.224991@C">ICAT-D (C)</option>
		<option value="450.275205@C">ICAT-D:2H(8) (C)</option>
		<option value="227.126991@C">ICAT-C (C)</option>
		<option value="236.157185@C">ICAT-C:13C(9) (C)</option>
		<option value="58.005479@C">Carboxymethyl (C)</option>
		<option value="105.057849@C">Pyridylethyl (C)</option>
		<option value="71.037114@C">Propionamide (C)</option>
		<option value="125.047679@C">Nethylmaleimide (C)</option>
		<option value="144.102063@[,144.102063@K">iTRAQ (N-term,K)</option>
		<option value="57.021464@C,144.102063@[,144.102063@K">Cam + iTRAQ (C,N-term,K)</option>
		<option value="57.021464@C,224.152478@K,224.152478@[">Cam + TMT (C,K,nt)</option>
		<option value="57.021464@C,225.155833@K,225.155833@[">Cam + TMT2plex (C,K,nt)</option>
		<option value="57.021464@C,229.1629328@K,229.1629328@[">Cam + TMT6plex (C,K,nt)</option>
		<option value="57.021464@C,28.0313@[,28.0313@K">Cam + Dimethyl (C,28@N-term,K)</option>
		<option value="57.021464@C,32.0564@[,32.0564@K">Cam + Dimethyl (C,32@N-term,K)</option>
		<option value="57.021464@C,36.0757@[,36.0757@K">Cam + Dimethyl (C,36@N-term,K)</option>
		<option value="45.987721@C,144.102063@[,144.102063@K">Methylthio + iTRAQ (C,N-term,K)</option>
		<option value="45.987721@C,224.152478@K,224.152478@[">Methylthio + TMT (C,K,nt)</option>
		<option value="45.987721@C,225.155833@K,225.155833@[">Methylthio + TMT2plex (C,K,nt)</option>
		<option value="45.987721@C,229.1629328@K,229.1629328@[">Methylthio + TMT6plex (C,K,nt)</option>
		<option value="14.0156@],14.0156@D,14.0156@E">Methy +14Da (D,E,C-term)</option>
   </param>
      <param name="potential_modifications" type="select" display="checkboxes" multiple="true" label="Potential modifications" help="">
      	<option value="15.994915@M">Oxidation (M)</option>
		<option value="15.994915@W">Oxidation (W)</option>
		<option value="0.984016@N">Deamidation (N)</option>
		<option value="0.984016@Q">Deamidation (Q)</option>
		<option value="79.966331@S">Phospho (S)</option>
		<option value="79.966331@T">Phospho (T)</option>
		<option value="79.966331@Y">Phospho (Y)</option>
		<option value="79.956815@Y">Sulfo (Y)</option>
		<option value="42.010565@K">Acetyl (K)</option>
		<option value="43.005814@[">Carbamyl (nt)</option>
		<option value="43.005814@K">Carbamyl (K)</option>
		<option value="72.021129@[">Carboxyethyl (nt)</option>
		<option value="72.021129@K">Carboxyethyl (K)</option>
		<option value="57.021464@[">Carbamidomethyl (nt)</option>
		<option value="57.021464@K">Carbamidomethyl (K)</option>
		<option value="57.021464@C">Carbamidomethyl (C)</option>
		<option value="58.005479@C">Carboxymethyl (C)</option>
		<option value="45.987721@C">Methylthio (C)</option>
		<option value="125.047679@C">Nethylmaleimide (C)</option>
		<option value="31.989829@C">Dioxidation (C)</option>
		<option value="47.984744@C">Trioxidation (C)</option>
		<option value="27.994915@K">formyl (K)</option>
		<option value="27.994915@[">formyl (nt)</option>
		<option value="114.042927@K">GlyGly (K)</option>
		<option value="8.0502@C">ICAT-D:2H(8) (C)</option>
		<option value="9.0302@C">ICAT-C:13C(9) (C)</option>
		<option value="144.102063@[">iTRAQ (N-term)</option>
		<option value="144.102063@K">iTRAQ (K)</option>
		<option value="6.020129@L">Label:13C(6) (L)</option>
		<option value="6.020129@K">Label:13C(6) (K)</option>
		<option value="8.014199@K">Label:13C(6)15N(2) (K)</option>
		<option value="6.020129@R">Label:13C(6) (R)</option>
		<option value="4.025107@K">Label:2H(4) (K)</option>
		<option value="125.047679@C">Nethylmaleimide (C)</option>
		<option value="31.005814@C">Sulfinamide (C)</option>
		<option value="224.152478@K,224.152478@[">TMT (K,nt)</option>
		<option value="225.155833@K,225.155833@[">TMT2plex (K,nt)</option>
		<option value="229.1629328@K,229.1629328@[">TMT6plex (K,nt)</option>      	
      </param>
      <!-- 
      <param name="missed_cleavages" type="text" size="30" label="missed_cleavages" value="1" help="Nr. of possible cleavage sites missed by the enzyme"/>-->
      <param name="minimum_fragment_mz" type="text" size="30" label="minimum_fragment_mz" value="150" help=""/>
      <param name="cleavage_site" type="select" label="cleavage_site" help="">
      	<option selected="true" value="[RK]|{P}">trypsin, [RK]|{P}</option>
		<option value="[R]|[X]">endo-arg-C, [R]|[X]</option>
		<option value="[K]|[X]">endo-lys-C, [K]|[X]</option>
		<option value="[E]|[X]">endo-glu-C, [E]|[X]</option>
		<option value="[X]|[D]">endo-asp-N, [X]|[D]</option>
		<option value="[ED]|[X]">V8, [ED]|[X]</option>
		<option value="[FYWL]|{P}">chymotrypsin, [FYWL]|{P}</option>
      </param>
      <param name="maximum_missed_cleavage_sites" type="integer" size="10" value="1" 
      label="maximum missed cleavage sites" 
      help="maximum number of missed cleavage sites allowed within a peptide. For a specific, 
      aggressive enzyme such as trypsin, the number of missed sites will be low: a value of 1 or 2 is appropriate. 
      For a non-specific enzyme, such as pepsin, then a value of 50 is more appropriate."/>
      <param name="max_valid_expect" type="text" size="30" label="maximum valid expectation value" value="0.1" 
      help="Max E-Value of a hit to be reported. All results with expectation values less than this value 
      are considered to be statisitically significant and are recorded. "/>
      <conditional name="refinementOpt">
	      <param name="refinement" type="select" label="Refine search" help="Select this to enable a second round of more 
	      detailed searching, using only the set of proteins found by the contraints above. E.g. 
	      Rather than entering the 'potential modifications' in the options above, try entering them here only. This is 
	      faster and limits this more thorough searching to a set of proteins for which there is already some evidence.">
	      	<option value="yes">Yes</option>
	      	<option value="no" selected="true">No</option>
	      </param>
	      <when value="yes">
            <param name="refine_point_mutation" type="select" label="Allow for point mutations (substitutions)" 
            help="Test the selected sequences for the possibility of a point mutation in each one of the 
            peptides generated with the initial cleavage chemistry.">
		      	<option value="yes" selected="true">Yes</option>
		      	<option value="no">No</option>
			</param>
			<param name="refine_potential_modifications" type="select" display="checkboxes" multiple="true" label="Potential modifications to look for in refined search" help="">
		      	<option value="15.994915@M">Oxidation (M)</option>
				<option value="15.994915@W">Oxidation (W)</option>
				<option value="0.984016@N">Deamidation (N)</option>
				<option value="0.984016@Q">Deamidation (Q)</option>
				<option value="79.966331@S">Phospho (S)</option>
				<option value="79.966331@T">Phospho (T)</option>
				<option value="79.966331@Y">Phospho (Y)</option>
				<option value="79.956815@Y">Sulfo (Y)</option>
				<option value="42.010565@K">Acetyl (K)</option>
				<option value="43.005814@[">Carbamyl (nt)</option>
				<option value="43.005814@K">Carbamyl (K)</option>
				<option value="72.021129@[">Carboxyethyl (nt)</option>
				<option value="72.021129@K">Carboxyethyl (K)</option>
				<option value="57.021464@[">Carbamidomethyl (nt)</option>
				<option value="57.021464@K">Carbamidomethyl (K)</option>
				<option value="57.021464@C">Carbamidomethyl (C)</option>
				<option value="58.005479@C">Carboxymethyl (C)</option>
				<option value="45.987721@C">Methylthio (C)</option>
				<option value="125.047679@C">Nethylmaleimide (C)</option>
				<option value="31.989829@C">Dioxidation (C)</option>
				<option value="47.984744@C">Trioxidation (C)</option>
				<option value="27.994915@K">formyl (K)</option>
				<option value="27.994915@[">formyl (nt)</option>
				<option value="114.042927@K">GlyGly (K)</option>
				<option value="8.0502@C">ICAT-D:2H(8) (C)</option>
				<option value="9.0302@C">ICAT-C:13C(9) (C)</option>
				<option value="144.102063@[">iTRAQ (N-term)</option>
				<option value="144.102063@K">iTRAQ (K)</option>
				<option value="6.020129@L">Label:13C(6) (L)</option>
				<option value="6.020129@K">Label:13C(6) (K)</option>
				<option value="8.014199@K">Label:13C(6)15N(2) (K)</option>
				<option value="6.020129@R">Label:13C(6) (R)</option>
				<option value="4.025107@K">Label:2H(4) (K)</option>
				<option value="125.047679@C">Nethylmaleimide (C)</option>
				<option value="31.005814@C">Sulfinamide (C)</option>
				<option value="224.152478@K,224.152478@[">TMT (K,nt)</option>
				<option value="225.155833@K,225.155833@[">TMT2plex (K,nt)</option>
				<option value="229.1629328@K,229.1629328@[">TMT6plex (K,nt)</option>      	
      		</param>
      		<param name="refine_max_valid_expect" type="text" size="30" label="maximum valid expectation value for identifications coming from refine step" value="0.01" 
      		help="Max E-Value of a 'refine based' hit to be reported. Notice that the default value here is stricter than
      		the same parameter for 'non-refine based' identifications above. "/>
	      </when>
	  </conditional>
      <param name="reverse_scoring" type="select" label="Scoring, include reverse" help=" Use the X! Tandem protein sequence reverse method (sequences are reversed in memory and searched again, the tag ':reversed' is added to the protein description).">
      	<option value="yes">Yes</option>
      	<option value="no" selected="true">No</option>
      	<option value="only">Only</option>
      </param>

</inputs>
<configfiles>
<configfile name="parametersFile">&lt;?xml version="1.0" encoding="UTF-8"?&gt;
&lt;tns:Program xmlns:tns="http://masscomb.pri.com/toolparameters/" name="XTandemWrapper" program="XTandemWrapper"&gt;
	&lt;Files/&gt;
	&lt;Parameters&gt;  
		  &lt;Attribute attributeName="xtandemLocation" value="\${XTANDEM_12_10_01_PATH}/bin" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="database" value="${database}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="precursor_mass_tolerance_lower" toolSpecificName="spectrum, parent monoisotopic mass error minus" value="${precursor_mass_tolerance_lower}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="precursor_mass_tolerance_upper" toolSpecificName="spectrum, parent monoisotopic mass error plus" value="${precursor_mass_tolerance_upper}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="precursor_error_units" toolSpecificName="spectrum, parent monoisotopic mass error units" value="${precursor_error_units}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="fragment_mass_tolerance" toolSpecificName="spectrum, fragment monoisotopic mass error" value="${fragment_mass_tolerance}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="fragment_error_units" toolSpecificName="spectrum, fragment monoisotopic mass error units" value="${fragment_error_units}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="fixed_modifications" toolSpecificName="residue, modification mass" value="${fixed_modifications}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="potential_modifications" toolSpecificName="residue, potential modification mass" value="${potential_modifications}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="minimum_fragment_mz" toolSpecificName="spectrum, minimum fragment mz" value="${minimum_fragment_mz}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="cleavage_site" toolSpecificName="protein, cleavage site" value="${cleavage_site}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="maximum_missed_cleavage_sites" toolSpecificName="scoring, maximum missed cleavage sites" value="${maximum_missed_cleavage_sites}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="max_valid_expect" toolSpecificName="output, maximum valid expectation value" value="${max_valid_expect}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="refinement" toolSpecificName="refine" value="${refinementOpt.refinement}" type="Unknown" description=""/&gt;
	#if $refinementOpt.refinement == "yes"
		  &lt;Attribute attributeName="refine_point_mutation" toolSpecificName="refine, point mutations" value="${refinementOpt.refine_point_mutation}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="refine_potential_modifications" toolSpecificName="refine, potential modification mass" value="${refinementOpt.refine_potential_modifications}" type="Unknown" description=""/&gt;
		  &lt;Attribute attributeName="refine_max_valid_expect" toolSpecificName="refine, maximum valid expectation value" value="${refinementOpt.refine_max_valid_expect}" type="Unknown" description=""/&gt;
	#end if
		  &lt;Attribute attributeName="reverse_scoring" toolSpecificName="scoring, include reverse" value="${reverse_scoring}" type="Unknown" description=""/&gt;
	&lt;/Parameters&gt;
&lt;/tns:Program&gt;  
</configfile>
</configfiles>
<outputs>
	<data name="outputFile" format="bioml"  label="${tool.name} on ${on_string} - Results XML">
		<change_format>
		    <when input="fileType.type" value="fileSet" format="prims.fileset.zip" />
		</change_format>
	</data>
	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"> </data>
	<data name="outTsv" format="tabular" label="${tool.name} on ${on_string} - TSV report"> </data>
</outputs>
<tests>
	<test>
	</test>
</tests>
<help>

.. class:: infomark
  
This tool searches MS/MS spectra against a database using X!Tandem.

For a complete set of parameters and their default values see `the X!Tandem parameters documentation page`_ . 
Parameters that are not
made available in the UI above but are listed in the given link are submitted with their
default values.

For more information on the refine step see: `Why should I use "refinement" to find modifications?`_ .

For more information on the expectation value calculation see: 
`A Method for Assessing the Statistical Significance of Mass Spectrometry-Based Protein Identifications Using General Scoring Schemes`_
, David Fenyƶ and Ronald C. Beavis, Anal. Chem., 2003, 75, 768-774.
This reference describes how peptides are scored by X!Tandem. 
The expectation values on the individual peptides are calculated using this method. 
<!-- Add this from Ron's email ? :
They are an estimate of the spectrum-to-peptide match E-value associated with the 
null-hypothesis "all spectrum-to-peptide matches are stochasitic".
-->

.. _the X!Tandem parameters documentation page: http://www.thegpm.org/tandem/api/index.html

.. _Why should I use "refinement" to find modifications?: http://www.thegpm.org/GPM/refine.html

.. _A Method for Assessing the Statistical Significance of Mass Spectrometry-Based Protein Identifications Using General Scoring Schemes: http://www.ncbi.nlm.nih.gov/pubmed/12622365

-----

**Output**

This tools returns the X!Tandem XML output which can be converted to MzIdentML using the DBSearch converter tool.

It also returns an HTML file with the list of peptides and the option to visualize the peptide to spectrum match
using an embedded spectrum viewer. 

.. image:: $PATH_TO_IMAGES/xtandem_results_viewer.png 

Last but not least, it returns the list of identifications in TSV (tab separated values) format for users that are satisfied with this
and do not need further processing steps like protein inference. 

For the GPM web UI of X!Tandem see:
http://ppp.thegpm.org/tandem/thegpm_ppp.html

</help>
</tool>