changeset 9:e0a1e8a0500a draft

Uploaded
author iracooke
date Fri, 13 Jun 2014 18:36:33 -0400
parents 4f6cbe948065
children 86daefc0e88d
files README README.md msgfplus_search.xml repository_dependencies.xml tool-data/msgfplus_mods.loc.sample tool-data/pepxml_databases.loc.sample tool_dependencies.xml
diffstat 7 files changed, 8 insertions(+), 324 deletions(-) [+]
line wrap: on
line diff
--- a/README	Sun Jun 09 08:17:57 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-This package is a galaxy wrapper for the MSGF+ search tool.
-
-Requirements:
-This package depends on the galaxy_protk, protk_msgfplus, protk_proteowizard packages
-Please see instructions for those packages before installing
-
-In addition to basic requirements you must also have unzip and java 6 runtime (or greater) installed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Fri Jun 13 18:36:33 2014 -0400
@@ -0,0 +1,8 @@
+## What is it?
+Galaxy tool definition file and wrapper scripts for the [MSGF+ Search Engine](http://proteomics.ucsd.edu/Software/MSGFPlus.html).
+
+## Installation
+Install from the main galaxy toolshed at http://toolshed.g2.bx.psu.edu/
+
+Depends on command-line scripts and databases available in the [protk ruby gem](https://bitbucket.org/iracooke/protk). 
+
--- a/msgfplus_search.xml	Sun Jun 09 08:17:57 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,230 +0,0 @@
-<tool id="proteomics_search_msgfplus_1" name="MSGF+ MSMS Search" version="1.0.1">
-
-	<requirements>
-    	<requirement type="package" version="1.2.2">galaxy_protk</requirement>
-    	<requirement type="package" version="20130227">msgfplus</requirement>
-    	<requirement type="package" version="3_0_4388">proteowizard</requirement>
-   	</requirements>
-
-
-	<description>Run an MSGF+ Search</description>
-
-	<command>
-		rvm 1.9.3@protk-1.2.2 do msgfplus_search.rb
-		#if $database.source_select=="built_in":
-		 --galaxy -d $database.dbkey
-		#else
-		--galaxy -d $database.fasta_file
-		#end if
-
-		--var-mods='
-		$variable_mods
-		#for $custom_variable_mod in $custom_variable_mods:
-		,${custom_variable_mod.custom_mod}
-		#end for
-		'
-
-		--fix-mods='
-		$fixed_mods
-		#for $custom_fix_mod in $custom_fix_mods:
-		,${custom_fix_mod.custom_mod}
-		#end for
-		'
-
-		$input_file 
-
-		-o $output 
-
-		-r 
-
-		--enzyme=$enzyme 
-
-		--precursor-ion-tol-units=$precursor_tolu 
-
-		-v $missed_cleavages 
-
-		-f $fragment_ion_tol 
-
-		-p $precursor_ion_tol 
-
-		--instrument=$instrument
-		
-		--isotope-error-range=$isotope_error_range
-
-		--fragment-method=$fragment_method
-
-		--protocol=$protocol
-
-		--min-pep-len=$min_pep_len
-		--max-pep-len=$max_pep_len
-		--max-pep-charge=$max_pep_charge
-		--min-pep-charge=$min_pep_charge
-		--num-reported-matches=$num_reported_matches
-
-		--java-mem=$java_mem
-
-		#if $pepxml_output_use
-
-		#else
-		--no-pepxml
-		#end if
-
-	</command>
-
-	<inputs>	
-		<conditional name="database">
-			<param name="source_select" type="select" label="Database source">
-				<option value="built_in">Built-In</option>
-				<option value="input_ref">Your Upload File</option>
-			</param>
-			<when value="built_in">
-				<param name="dbkey" type="select" format="text" >
-					<label>Database</label>
-					<options from_file="pepxml_databases.loc">
-						<column name="name" index="0" />
-						<column name="value" index="2" />
-					</options>
-				</param>
-			</when>
-			<when value="input_ref">
-				<param name="fasta_file" type="data" format="fasta" label="Uploaded FASTA file" />
-			</when>
-		</conditional>
-		
-		<param name="input_file" type="data" format="mzml" multiple="false" label="MSMS File" help="An mzML file with MS/MS data"/>
-
-
-		<param name="variable_mods" format="text" type="select" multiple="true" label="Variable Modifications" help="Multiple Selection Allowed">
-			<options from_file="msgfplus_mods.loc">
-				<column name="name" index="0" />
-				<column name="value" index="2" />
-			</options>
-		</param>
-
-		<repeat name="custom_variable_mods" title="Custom Variable Modifications" help="See https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355 for details on how to create these">
-			<param name="custom_mod" type="text">
-			</param>
-		</repeat>
-		
-		
-		<param name="fixed_mods" format="text" type="select" multiple="true" label="Fixed Modifications" help="Multiple Selection Allowed">
-			<options from_file="msgfplus_mods.loc">
-				<column name="name" index="0" />
-				<column name="value" index="2" />
-			</options>
-		</param>
-
-		<repeat name="custom_fix_mods" title="Custom Fixed Modifications" help="See https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355 for details on how to create these">
-			<param name="custom_mod" type="text">
-			</param>
-		</repeat>
-		
-		
-
-		<param name="missed_cleavages" type="select" format="text" help="Allow peptides to contain up to this many missed enzyme cleavage sites">
-			<label>Missed Cleavages Allowed</label>
-		    <option value="0">0</option>		
-			<option value="1">1</option>
-			<option value="2">2</option>
-		</param>
-		
-		<param name="enzyme" type="select" format="text">
-		    <label>Enzyme</label>
-		    <option value="0">unspecific cleavage</option>
-		    <option value="1">Trypsin</option>
-		    <option value="2">Chymotrypsin</option>
-		    <option value="3">Lys-C</option>
-		   	<option value="4">Lys-N</option>
-			<option value="5">glutamyl endopeptidase</option>
-			<option value="6">Arg-C</option>
-			<option value="7">Asp-N</option>
-			<option value="8">alphaLP</option>
-			<option value="9">no cleavage</option>
-		</param>
-		
-		<param name="instrument" type="select" format="text">
-	    	<label>Instrument Type</label>
-			<option value="2">TOF</option>
-			<option value="0">Low-res LCQ/LTQ</option>
-			<option value="1">High-res LTQ</option>
-		</param>
-
-		<param name="fragment_method" type="select" format="text">
-	    	<label>Fragmentation Method</label>
-			<option value="0">Respect Input File</option>
-			<option value="1">CID</option>
-			<option value="2">ETD</option>
-			<option value="3">HCD</option>
-			<option value="4">Merge spectra from same precursor</option>
-		</param>
-
-		<param name="protocol" type="select" format="text">
-	    	<label>Protocol</label>
-			<option value="0">NoProtocol</option>
-			<option value="1">Phosphorylation</option>
-			<option value="2">iTRAQ</option>
-			<option value="3">iTRAQPhospho</option>
-		</param>
-
-		<param name="fragment_ion_tol" help="Fragment Ion Tolerance in Daltons" type="float" value="0.65" min="0" max="10000" label="Fragment ion tolerance"/>
-
-		<param name="precursor_ion_tol" help="Precursor Ion Tolerance (Da or ppm)" type="float" value="100" min="0" max="10000" label="Precursor ion tolerance"/>
-		<param name="precursor_tolu" type="select" format="text">
-		    <label>Precursor Ion Tolerance Units</label>
-		    <option value="ppm">ppm</option>		
-			<option value="Da">Da</option>
-		</param>
-		
-		<param name="isotope_error_range" help="Takes into account of the error introduced by chooosing a non-monoisotopic peak for fragmentation." type="text" size="80" value="0,1" label="Isotope Error Range"/>
-
-		<param name="min_pep_len" help="" type="integer" value="6" label="Minimum Peptide Length"/>
-		<param name="max_pep_len" help="" type="integer" value="40" label="Maximum Peptide Length"/>
-		<param name="min_pep_charge" help="" type="integer" value="2" label="Minimum Peptide Charge"/>
-		<param name="max_pep_charge" help="" type="integer" value="3" label="Maximum Peptide Charge"/>
-		<param name="num_reported_matches" help="Number of matches per spectrum to be reported" type="integer" value="1" label="Num reported matches"/>
-		<param name="java_mem" help="Increase this value if you get out of memory errors" type="text" size="80" value="3500M" label="Java Memory Limit"/>
-
-
-		<param name="pepxml_output_use" type="boolean" label="Convert results to pepXML" help="" truevalue="true" falsevalue="false" />
-
-	</inputs>
-
-
-<!-- 	<outputs>
-		<data format="raw_pepxml" name="output" metadata_source="input_file" label="MSGF+_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}.pepXML"/>
-	</outputs>
- -->
-	<outputs>
-    	<data format="mzid" name="output" metadata_source="input_file" label="MSGF+_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}">
-      <change_format>
-        <when input="pepxml_output_use" value="true" format="raw_pepxml" metadata_source="input_file" label="MSGF+_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}"/>
-      </change_format>
-    </data>
-  </outputs>
-
-
-	<tests>
-    	<test>
-    		<param name="source_select" value="input_ref"/>
-	      	<param name="fasta_file" value="bsa.fasta"/>
-   		   	<param name="input_file" value="bsa.mzML"/>
-      		<output name="output" file="bsa.mzid" compare="sim_size" delta="600" /> 
-    	</test>
-  	</tests>
-
-
-  <help>
-
-**What it does**
-
-Runs an MS/MS database search using the MSGFPlus search engine. Output is in the form of a pepXML file containing identified peptides along with their raw search scores.
-
-----
-
-**References**
-
-Please see http://proteomics.ucsd.edu/Software/MSGFPlus.html for details of the MSGFPlus search engine and references describing its algorithm
-
-  </help>
-
-</tool>
--- a/repository_dependencies.xml	Sun Jun 09 08:17:57 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<repositories description="Proteomics datatypes, MSGF+ and Protk">
-    
-	<repository toolshed="http://toolshed.g2.bx.psu.edu" name="proteomics_datatypes" owner="iracooke" changeset_revision="09b89b345de2"/>
-
- </repositories>
--- a/tool-data/msgfplus_mods.loc.sample	Sun Jun 09 08:17:57 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-#This file lists the names of inbuilt chemical modifications accepted by msgfplus
-#Each entry consists of 4 tab separated fields like this
-#<Displayed Name>	<DBKey>	<Modification String>	<DBKey>
-#
-#Modification strings should conform to the standard MSGFPlus syntax with the following exception
-#The ModType field is overridden by the msgfplus_search.rb tool. In other words any of these mods
-#May be passed to the tool as a variable or fixed mod and the tool will substitute the appropriate ModType
-#value
-#
-#Standard MSGFPlus syntax is
-#
-# To input a modification, use the following command:
-# Mass or CompositionStr, Residues, ModType, Position, Name (all the five fields are required).
-# CompositionStr (C[Num]H[Num]N[Num]O[Num]S[Num]P[Num])
-# 	- C (Carbon), H (Hydrogen), N (Nitrogen), O (Oxygen), S (Sulfer) and P (Phosphorus) are allowed.
-# 	- Atom can be omitted. The sequence of atoms must be followed. 
-# 	- Negative numbers are allowed.
-# 	- E.g. C2H2O1 (valid), H2C1O1 (invalid) 
-# Mass can be used instead of CompositionStr. It is important to specify accurate masses (integer masses are insufficient).
-# 	- E.g. 15.994915 
-# Residues: affected amino acids (must be upper letters)
-# 	- Must be uppor letters or *
-# 	- Use * if this modification is applicable to any residue. 
-# 	- * should not be "anywhere" modification (e.g. "15.994915, *, opt, any, Oxidation" is not allowed.) 
-# 	- E.g. NQ, *
-# ModType: "fix" for fixed modifications, "opt" for variable modifications (case insensitive)
-# Position: position in the peptide where the modification can be attached. 
-# 	- One of the following five values should be used:
-# 	- any (anywhere), N-term (peptide N-term), C-term (peptide C-term), Prot-N-term (protein N-term), Prot-C-term (protein C-term) 
-# 	- Case insensitive
-# 	- "-" can be omitted
-# 	- E.g. any, Any, Prot-n-Term, ProtNTerm => all valid
-# Name: name of the modification (Unimod PSI-MS name)
-# 	- For proper mzIdentML output, this name should be the same as the Unimod PSI-MS name
-# 	- E.g. Phospho, Acetyl
-#C2H3N1O1,C,fix,any,Carbamidomethyl 		# Fixed Carbamidomethyl C
-# Variable Modifications (default: none)
-#O1,M,opt,any,Oxidation				# Oxidation M
-#15.994915,M,opt,any,Oxidation			# Oxidation M (mass is used instead of CompositionStr)
-#H-1N-1O1,NQ,opt,any,Deamidated			# Negative numbers are allowed.
-#C2H3NO,*,opt,N-term,Carbamidomethyl		# Variable Carbamidomethyl N-term
-#H-2O-1,E,opt,N-term,Pyro_glu			# Pyro-glu from E
-#H-3N-1,Q,opt,N-term,Pyro-glu			# Pyro-glu from Q
-#C2H2O,*,opt,Prot-N-term,Acetyl			# Acetylation Protein N-term
-#C2H2O1,K,opt,any,Acetyl			# Acetylation K
-#CH2,K,opt,any,Methy				# Methylation K
-#HO3P,STY,opt,any,Phospho			# Phosphorylation STY
-
-Carbamidomethyl C	carbamidomethyl_c_	C2H3N1O1,C,opt,any,Carbamidomethyl	carbamidomethyl_c_
-Oxidation M	oxidation_m_	O1,M,opt,any,Oxidation	oxidation_m_
\ No newline at end of file
--- a/tool-data/pepxml_databases.loc.sample	Sun Jun 09 08:17:57 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-#This file lists the names of protein databases installed locally in protk. 
-# These are used by omssa and x!tandem as well as the "mascot to pepxml" tool
-# In order to combine search results with Interprophet searches must be run against an identical database
-#
-# Entries should follow the be structured as follows
-# Display_name omssa_tandem_dbname dbkey
-#
-#
-Swissprot	spall_	spall	spall_
-Combined PlasmboDB (falciparum) and Swissprot Human	plasmodb_pfalciparum_sphuman_	plasmodb_pfalciparum_sphuman	plasmodb_pfalciparum_sphuman_
-Swissprot Human	sphuman_	sphuman	sphuman_
-Combined Swissprot/TRembl Human	sptrhuman_	sptrhuman	sptrhuman_
-Swissprot Mouse	spmouse_	spmouse	spmouse_
--- a/tool_dependencies.xml	Sun Jun 09 08:17:57 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-
-
-    <package name="galaxy_protk" version="1.2.2">
-         <repository toolshed="http://toolshed.g2.bx.psu.edu" name="galaxy_protk" owner="iracooke" changeset_revision="c25df71f7b68" prior_installation_required="True"/>
-    </package>
-
-	<package name="proteowizard" version="3_0_4388">
-	     <repository toolshed="http://toolshed.g2.bx.psu.edu" name="protk_proteowizard" owner="iracooke" changeset_revision="863462ea0187"/>
-	</package>
-
-    <package name="msgfplus" version="20130227">
-	     <repository toolshed="http://toolshed.g2.bx.psu.edu" name="protk_msgfplus" owner="iracooke" changeset_revision="75a2edcb6d0c"/>
-    </package>
-
-
-</tool_dependency>