diff masscomb_dbsearch_xtandem.xml @ 0:d6001e8d7441

Push to main toolshed
author pieter.lukasse@wur.nl
date Wed, 08 Jan 2014 11:34:51 +0100
parents
children b34fb7461546
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/masscomb_dbsearch_xtandem.xml	Wed Jan 08 11:34:51 2014 +0100
@@ -0,0 +1,293 @@
+<tool name="X!Tandem" id="masscomb_xtandem200" version="2.0.0">
+	<description>MS/MS DB search</description>
+	<!-- 
+	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
+	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 
+	-->    
+	<command interpreter="java -jar">
+	    MassComb.jar 
+	    -action XTANDEMSEARCH 
+	    -outputFile $outputFile 
+	    -fileGrouping $fileType.type
+    	-inputFile $fileType.inputFile
+       	-parametersFile $parametersFile
+		-outputTsv $outTsv
+		-outReport $htmlReportFile
+	    -outReportPicturesPath $htmlReportFile.files_path
+	    </command>
+<inputs>
+  <conditional name="fileType">
+    <param name="type" type="select" label="select MS/MS input type">
+      <option value="single" selected="true">single-File</option>
+      <option value="fileSet">fileSet</option>
+    </param>
+    <when value="single">
+   		<param name="inputFile" type="data" format="mzml" label="MS/MS input file (mzml)"/>
+    </when>
+    <when value="fileSet">
+      <param name="inputFile" type="data" format="prims.fileset.zip" label="input file"/>
+    </when>
+  </conditional>
+
+   <param name="precursor_mass_tolerance_lower" type="text" size="30" label="precursor monoisotopic mass_tolerance_lower" value="100" help=""/>
+   <param name="precursor_mass_tolerance_upper" type="text" size="30" label="precursor monoisotopic mass_tolerance_upper" value="100" help=""/>
+   <param name="precursor_error_units" type="select" label="precursor_error_units" help="">
+   	<option value="ppm" selected="true">ppm</option>
+   	<option value="Daltons">Daltons</option>
+   </param>
+   <param name="fragment_mass_tolerance" type="text" size="30" label="fragment_mass_tolerance" value="0.4" help=""/>
+   <param name="fragment_error_units" type="select" label="fragment_error_units" help="">
+   	<option value="ppm">ppm</option>
+   	<option value="Daltons" selected="true">Daltons</option>
+   </param>
+   <param name="database" type="data" format="fasta" label="Protein sequences DB (FASTA)"/>
+   <!-- 
+   <param name="min_precursor_charge" type="text" size="30" label="min_precursor_charge" value="1" help=""/>
+   <param name="max_precursor_charge" type="text" size="30" label="max_precursor_charge" value="4" help=""/>-->
+   <param name="fixed_modifications" type="select" display="checkboxes" multiple="true" label="Complete modifications" help="">
+	   	<option value="57.021464@C">Carbamidomethyl (C)</option>
+		<option value="57.021464@C,10.008269@R,8.014199@K">Cam+SILAC (8@K,10@R)</option>
+		<option value="57.021464@C,4.025107@K,6.020129@R">Cam+SILAC (4@K,6@R)</option>
+		<option value="57.021464@C,4.025107@K,6.020129@R,6.020129@L">Cam+SILAC (4@K,6@R,6@L)</option>
+		<option value="58.005479@C">Carboxymethyl (C)</option>
+		<option value="45.987721@C">Methylthio (C)</option>
+		<option value="47.984744@C">Trioxidation (C)</option>
+		<option value="442.224991@C">ICAT-D (C)</option>
+		<option value="450.275205@C">ICAT-D:2H(8) (C)</option>
+		<option value="227.126991@C">ICAT-C (C)</option>
+		<option value="236.157185@C">ICAT-C:13C(9) (C)</option>
+		<option value="58.005479@C">Carboxymethyl (C)</option>
+		<option value="105.057849@C">Pyridylethyl (C)</option>
+		<option value="71.037114@C">Propionamide (C)</option>
+		<option value="125.047679@C">Nethylmaleimide (C)</option>
+		<option value="144.102063@[,144.102063@K">iTRAQ (N-term,K)</option>
+		<option value="57.021464@C,144.102063@[,144.102063@K">Cam + iTRAQ (C,N-term,K)</option>
+		<option value="57.021464@C,224.152478@K,224.152478@[">Cam + TMT (C,K,nt)</option>
+		<option value="57.021464@C,225.155833@K,225.155833@[">Cam + TMT2plex (C,K,nt)</option>
+		<option value="57.021464@C,229.1629328@K,229.1629328@[">Cam + TMT6plex (C,K,nt)</option>
+		<option value="57.021464@C,28.0313@[,28.0313@K">Cam + Dimethyl (C,28@N-term,K)</option>
+		<option value="57.021464@C,32.0564@[,32.0564@K">Cam + Dimethyl (C,32@N-term,K)</option>
+		<option value="57.021464@C,36.0757@[,36.0757@K">Cam + Dimethyl (C,36@N-term,K)</option>
+		<option value="45.987721@C,144.102063@[,144.102063@K">Methylthio + iTRAQ (C,N-term,K)</option>
+		<option value="45.987721@C,224.152478@K,224.152478@[">Methylthio + TMT (C,K,nt)</option>
+		<option value="45.987721@C,225.155833@K,225.155833@[">Methylthio + TMT2plex (C,K,nt)</option>
+		<option value="45.987721@C,229.1629328@K,229.1629328@[">Methylthio + TMT6plex (C,K,nt)</option>
+		<option value="14.0156@],14.0156@D,14.0156@E">Methy +14Da (D,E,C-term)</option>
+   </param>
+      <param name="potential_modifications" type="select" display="checkboxes" multiple="true" label="Potential modifications" help="">
+      	<option value="15.994915@M">Oxidation (M)</option>
+		<option value="15.994915@W">Oxidation (W)</option>
+		<option value="0.984016@N">Deamidation (N)</option>
+		<option value="0.984016@Q">Deamidation (Q)</option>
+		<option value="79.966331@S">Phospho (S)</option>
+		<option value="79.966331@T">Phospho (T)</option>
+		<option value="79.966331@Y">Phospho (Y)</option>
+		<option value="79.956815@Y">Sulfo (Y)</option>
+		<option value="42.010565@K">Acetyl (K)</option>
+		<option value="43.005814@[">Carbamyl (nt)</option>
+		<option value="43.005814@K">Carbamyl (K)</option>
+		<option value="72.021129@[">Carboxyethyl (nt)</option>
+		<option value="72.021129@K">Carboxyethyl (K)</option>
+		<option value="57.021464@[">Carbamidomethyl (nt)</option>
+		<option value="57.021464@K">Carbamidomethyl (K)</option>
+		<option value="57.021464@C">Carbamidomethyl (C)</option>
+		<option value="58.005479@C">Carboxymethyl (C)</option>
+		<option value="45.987721@C">Methylthio (C)</option>
+		<option value="125.047679@C">Nethylmaleimide (C)</option>
+		<option value="31.989829@C">Dioxidation (C)</option>
+		<option value="47.984744@C">Trioxidation (C)</option>
+		<option value="27.994915@K">formyl (K)</option>
+		<option value="27.994915@[">formyl (nt)</option>
+		<option value="114.042927@K">GlyGly (K)</option>
+		<option value="8.0502@C">ICAT-D:2H(8) (C)</option>
+		<option value="9.0302@C">ICAT-C:13C(9) (C)</option>
+		<option value="144.102063@[">iTRAQ (N-term)</option>
+		<option value="144.102063@K">iTRAQ (K)</option>
+		<option value="6.020129@L">Label:13C(6) (L)</option>
+		<option value="6.020129@K">Label:13C(6) (K)</option>
+		<option value="8.014199@K">Label:13C(6)15N(2) (K)</option>
+		<option value="6.020129@R">Label:13C(6) (R)</option>
+		<option value="4.025107@K">Label:2H(4) (K)</option>
+		<option value="125.047679@C">Nethylmaleimide (C)</option>
+		<option value="31.005814@C">Sulfinamide (C)</option>
+		<option value="224.152478@K,224.152478@[">TMT (K,nt)</option>
+		<option value="225.155833@K,225.155833@[">TMT2plex (K,nt)</option>
+		<option value="229.1629328@K,229.1629328@[">TMT6plex (K,nt)</option>      	
+      </param>
+      <!-- 
+      <param name="missed_cleavages" type="text" size="30" label="missed_cleavages" value="1" help="Nr. of possible cleavage sites missed by the enzyme"/>-->
+      <param name="minimum_fragment_mz" type="text" size="30" label="minimum_fragment_mz" value="150" help=""/>
+      <param name="cleavage_site" type="select" label="cleavage_site" help="">
+      	<option selected="true" value="[RK]|{P}">trypsin, [RK]|{P}</option>
+		<option value="[R]|[X]">endo-arg-C, [R]|[X]</option>
+		<option value="[K]|[X]">endo-lys-C, [K]|[X]</option>
+		<option value="[E]|[X]">endo-glu-C, [E]|[X]</option>
+		<option value="[X]|[D]">endo-asp-N, [X]|[D]</option>
+		<option value="[ED]|[X]">V8, [ED]|[X]</option>
+		<option value="[FYWL]|{P}">chymotrypsin, [FYWL]|{P}</option>
+      </param>
+      <param name="maximum_missed_cleavage_sites" type="integer" size="10" value="1" 
+      label="maximum missed cleavage sites" 
+      help="maximum number of missed cleavage sites allowed within a peptide. For a specific, 
+      aggressive enzyme such as trypsin, the number of missed sites will be low: a value of 1 or 2 is appropriate. 
+      For a non-specific enzyme, such as pepsin, then a value of 50 is more appropriate."/>
+      <param name="max_valid_expect" type="text" size="30" label="maximum valid expectation value" value="0.1" 
+      help="Max E-Value of a hit to be reported. All results with expectation values less than this value 
+      are considered to be statisitically significant and are recorded. "/>
+      <conditional name="refinementOpt">
+	      <param name="refinement" type="select" label="Refine search" help="Select this to enable a second round of more 
+	      detailed searching, using only the set of proteins found by the contraints above. E.g. 
+	      Rather than entering the 'potential modifications' in the options above, try entering them here only. This is 
+	      faster and limits this more thorough searching to a set of proteins for which there is already some evidence.">
+	      	<option value="yes">Yes</option>
+	      	<option value="no" selected="true">No</option>
+	      </param>
+	      <when value="yes">
+            <param name="refine_point_mutation" type="select" label="Allow for point mutations (substitutions)" 
+            help="Test the selected sequences for the possibility of a point mutation in each one of the 
+            peptides generated with the initial cleavage chemistry.">
+		      	<option value="yes" selected="true">Yes</option>
+		      	<option value="no">No</option>
+			</param>
+			<param name="refine_potential_modifications" type="select" display="checkboxes" multiple="true" label="Potential modifications to look for in refined search" help="">
+		      	<option value="15.994915@M">Oxidation (M)</option>
+				<option value="15.994915@W">Oxidation (W)</option>
+				<option value="0.984016@N">Deamidation (N)</option>
+				<option value="0.984016@Q">Deamidation (Q)</option>
+				<option value="79.966331@S">Phospho (S)</option>
+				<option value="79.966331@T">Phospho (T)</option>
+				<option value="79.966331@Y">Phospho (Y)</option>
+				<option value="79.956815@Y">Sulfo (Y)</option>
+				<option value="42.010565@K">Acetyl (K)</option>
+				<option value="43.005814@[">Carbamyl (nt)</option>
+				<option value="43.005814@K">Carbamyl (K)</option>
+				<option value="72.021129@[">Carboxyethyl (nt)</option>
+				<option value="72.021129@K">Carboxyethyl (K)</option>
+				<option value="57.021464@[">Carbamidomethyl (nt)</option>
+				<option value="57.021464@K">Carbamidomethyl (K)</option>
+				<option value="57.021464@C">Carbamidomethyl (C)</option>
+				<option value="58.005479@C">Carboxymethyl (C)</option>
+				<option value="45.987721@C">Methylthio (C)</option>
+				<option value="125.047679@C">Nethylmaleimide (C)</option>
+				<option value="31.989829@C">Dioxidation (C)</option>
+				<option value="47.984744@C">Trioxidation (C)</option>
+				<option value="27.994915@K">formyl (K)</option>
+				<option value="27.994915@[">formyl (nt)</option>
+				<option value="114.042927@K">GlyGly (K)</option>
+				<option value="8.0502@C">ICAT-D:2H(8) (C)</option>
+				<option value="9.0302@C">ICAT-C:13C(9) (C)</option>
+				<option value="144.102063@[">iTRAQ (N-term)</option>
+				<option value="144.102063@K">iTRAQ (K)</option>
+				<option value="6.020129@L">Label:13C(6) (L)</option>
+				<option value="6.020129@K">Label:13C(6) (K)</option>
+				<option value="8.014199@K">Label:13C(6)15N(2) (K)</option>
+				<option value="6.020129@R">Label:13C(6) (R)</option>
+				<option value="4.025107@K">Label:2H(4) (K)</option>
+				<option value="125.047679@C">Nethylmaleimide (C)</option>
+				<option value="31.005814@C">Sulfinamide (C)</option>
+				<option value="224.152478@K,224.152478@[">TMT (K,nt)</option>
+				<option value="225.155833@K,225.155833@[">TMT2plex (K,nt)</option>
+				<option value="229.1629328@K,229.1629328@[">TMT6plex (K,nt)</option>      	
+      		</param>
+      		<param name="refine_max_valid_expect" type="text" size="30" label="maximum valid expectation value for identifications coming from refine step" value="0.01" 
+      		help="Max E-Value of a 'refine based' hit to be reported. Notice that the default value here is stricter than
+      		the same parameter for 'non-refine based' identifications above. "/>
+	      </when>
+	  </conditional>
+      <param name="reverse_scoring" type="select" label="Scoring, include reverse" help=" Use the X! Tandem protein sequence reverse method (sequences are reversed in memory and searched again, the tag ':reversed' is added to the protein description).">
+      	<option value="yes">Yes</option>
+      	<option value="no" selected="true">No</option>
+      	<option value="only">Only</option>
+      </param>
+
+</inputs>
+<configfiles>
+<configfile name="parametersFile">&lt;?xml version="1.0" encoding="UTF-8"?&gt;
+&lt;tns:Program xmlns:tns="http://masscomb.pri.com/toolparameters/" name="XTandemWrapper" program="XTandemWrapper"&gt;
+	&lt;Files/&gt;
+	&lt;Parameters&gt;  
+		  &lt;Attribute attributeName="xtandemLocation" value="/home/lukas007/galaxy-dist/tool-data/prims/tandem-linux-12-10-01-1/bin/" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="database" value="${database}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="precursor_mass_tolerance_lower" toolSpecificName="spectrum, parent monoisotopic mass error minus" value="${precursor_mass_tolerance_lower}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="precursor_mass_tolerance_upper" toolSpecificName="spectrum, parent monoisotopic mass error plus" value="${precursor_mass_tolerance_upper}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="precursor_error_units" toolSpecificName="spectrum, parent monoisotopic mass error units" value="${precursor_error_units}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="fragment_mass_tolerance" toolSpecificName="spectrum, fragment monoisotopic mass error" value="${fragment_mass_tolerance}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="fragment_error_units" toolSpecificName="spectrum, fragment monoisotopic mass error units" value="${fragment_error_units}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="fixed_modifications" toolSpecificName="residue, modification mass" value="${fixed_modifications}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="potential_modifications" toolSpecificName="residue, potential modification mass" value="${potential_modifications}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="minimum_fragment_mz" toolSpecificName="spectrum, minimum fragment mz" value="${minimum_fragment_mz}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="cleavage_site" toolSpecificName="protein, cleavage site" value="${cleavage_site}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="maximum_missed_cleavage_sites" toolSpecificName="scoring, maximum missed cleavage sites" value="${maximum_missed_cleavage_sites}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="max_valid_expect" toolSpecificName="output, maximum valid expectation value" value="${max_valid_expect}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="refinement" toolSpecificName="refine" value="${refinementOpt.refinement}" type="Unknown" description=""/&gt;
+	#if $refinementOpt.refinement == "yes"
+		  &lt;Attribute attributeName="refine_point_mutation" toolSpecificName="refine, point mutations" value="${refinementOpt.refine_point_mutation}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="refine_potential_modifications" toolSpecificName="refine, potential modification mass" value="${refinementOpt.refine_potential_modifications}" type="Unknown" description=""/&gt;
+		  &lt;Attribute attributeName="refine_max_valid_expect" toolSpecificName="refine, maximum valid expectation value" value="${refinementOpt.refine_max_valid_expect}" type="Unknown" description=""/&gt;
+	#end if
+		  &lt;Attribute attributeName="reverse_scoring" toolSpecificName="scoring, include reverse" value="${reverse_scoring}" type="Unknown" description=""/&gt;
+	&lt;/Parameters&gt;
+&lt;/tns:Program&gt;  
+</configfile>
+</configfiles>
+<outputs>
+	<data name="outputFile" format="bioml"  label="${tool.name} on ${on_string} - Results XML">
+		<change_format>
+		    <when input="fileType.type" value="fileSet" format="prims.fileset.zip" />
+		</change_format>
+	</data>
+	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"> </data>
+	<data name="outTsv" format="tabular" label="${tool.name} on ${on_string} - TSV report"> </data>
+</outputs>
+<tests>
+	<test>
+	</test>
+</tests>
+<help>
+
+.. class:: infomark
+  
+This tool searches MS/MS spectra against a database using X!Tandem.
+
+For a complete set of parameters and their default values see `the X!Tandem parameters documentation page`_ . 
+Parameters that are not
+made available in the UI above but are listed in the given link are submitted with their
+default values.
+
+For more information on the refine step see: `Why should I use "refinement" to find modifications?`_ .
+
+For more information on the expectation value calculation see: 
+`A Method for Assessing the Statistical Significance of Mass Spectrometry-Based Protein Identifications Using General Scoring Schemes`_
+, David Fenyƶ and Ronald C. Beavis, Anal. Chem., 2003, 75, 768-774.
+This reference describes how peptides are scored by X!Tandem. 
+The expectation values on the individual peptides are calculated using this method. 
+<!-- Add this from Ron's email ? :
+They are an estimate of the spectrum-to-peptide match E-value associated with the 
+null-hypothesis "all spectrum-to-peptide matches are stochasitic".
+-->
+
+.. _the X!Tandem parameters documentation page: http://www.thegpm.org/tandem/api/index.html
+
+.. _Why should I use "refinement" to find modifications?: http://www.thegpm.org/GPM/refine.html
+
+.. _A Method for Assessing the Statistical Significance of Mass Spectrometry-Based Protein Identifications Using General Scoring Schemes: http://www.ncbi.nlm.nih.gov/pubmed/12622365
+
+-----
+
+**Output**
+
+This tools returns the X!Tandem XML output which can be converted to MzIdentML using the DBSearch converter tool.
+
+It also returns an HTML file with the list of peptides and the option to visualize the peptide to spectrum match
+using an embedded spectrum viewer. 
+
+.. image:: $PATH_TO_IMAGES/xtandem_results_viewer.png 
+
+Last but not least, it returns the list of identifications in TSV (tab separated values) format for users that are satisfied with this
+and do not need further processing steps like protein inference. 
+
+For the GPM web UI of X!Tandem see:
+http://ppp.thegpm.org/tandem/thegpm_ppp.html
+
+</help>
+</tool>
\ No newline at end of file