Galaxy |

Changeset 10:86daefc0e88d (2014-06-13)

Previous changeset 9:e0a1e8a0500a (2014-06-13) Next changeset 11:deb61a965680 (2014-06-14)

Commit message:
Uploaded

added:
README
msgfplus_search.xml
repository_dependencies.xml
tool-data/msgfplus_mods.loc.sample
tool-data/pepxml_databases.loc.sample

removed:
README.md

diff -r e0a1e8a0500a -r 86daefc0e88d README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Fri Jun 13 18:36:52 2014 -0400

@@ -0,0 +1,6 @@
+This package is a galaxy wrapper for the MSGF+ search tool.
+
+Requirements:
+This package uses protk, msgfplus and proteowizard, which must be installed separately.
+
+For instructions please see: https://github.com/iracooke/protk/#galaxy-integration
\ No newline at end of file

diff -r e0a1e8a0500a -r 86daefc0e88d README.md
--- a/README.md Fri Jun 13 18:36:33 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,8 +0,0 @@
-## What is it?
-Galaxy tool definition file and wrapper scripts for the [MSGF+ Search Engine](http://proteomics.ucsd.edu/Software/MSGFPlus.html).
-
-## Installation
-Install from the main galaxy toolshed at http://toolshed.g2.bx.psu.edu/
-
-Depends on command-line scripts and databases available in the [protk ruby gem](https://bitbucket.org/iracooke/protk).
-

diff -r e0a1e8a0500a -r 86daefc0e88d msgfplus_search.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/msgfplus_search.xml Fri Jun 13 18:36:52 2014 -0400

b'@@ -0,0 +1,188 @@\n+<tool id="proteomics_search_msgfplus_1" name="MSGF+ MSMS Search" version="1.0.3">\n+ <description>Run an MSGF+ Search</description>\n+ <requirements>\n+ <requirement type="package" version="1.3">protk</requirement>\n+ <requirement type="package" version="20140210">msgfplus</requirement>\n+ <requirement type="package" version="3_0_4388">proteowizard</requirement>\n+ </requirements>\n+ <command>\n+ msgfplus_search.rb\n+ #if $database.source_select=="built_in":\n+ --galaxy -d $database.dbkey\n+ #else\n+ --galaxy -d $database.fasta_file\n+ #end if\n+\n+ --var-mods=\'\n+ $variable_mods\n+ #for $custom_variable_mod in $custom_variable_mods:\n+ ,${custom_variable_mod.custom_mod}\n+ #end for\n+ \'\n+\n+ --fix-mods=\'\n+ $fixed_mods\n+ #for $custom_fix_mod in $custom_fix_mods:\n+ ,${custom_fix_mod.custom_mod}\n+ #end for\n+ \'\n+\n+ $input_file \n+ -o $output \n+ -r \n+ --enzyme=$enzyme \n+ --precursor-ion-tol-units=$precursor_tolu \n+ -v $missed_cleavages\n+ $cleavage_semi\n+ -f $fragment_ion_tol \n+ -p $precursor_ion_tol \n+ --instrument=$instrument\n+ --isotope-error-range=$isotope_error_range\n+ --fragment-method=$fragment_method\n+ --protocol=$protocol\n+ --min-pep-len=$min_pep_len\n+ --max-pep-len=$max_pep_len\n+ --max-pep-charge=$max_pep_charge\n+ --min-pep-charge=$min_pep_charge\n+ --num-reported-matches=$num_reported_matches\n+ --java-mem=$java_mem\n+ #unless $pepxml_output_use:\n+ --no-pepxml\n+ #end unless\n+ </command>\n+\n+ <inputs>\n+ <conditional name="database">\n+ <param name="source_select" type="select" label="Database source">\n+ <option value="built_in">Built-In</option>\n+ <option value="input_ref" selected="true">Your Upload File</option>\n+ </param>\n+ <when value="built_in">\n+ <param name="dbkey" type="select" format="text" >\n+ <label>Database</label>\n+ <options from_file="pepxml_databases.loc">\n+ <column name="name" index="0" />\n+ <column name="value" index="2" />\n+ </options>\n+ </param>\n+ </when>\n+ <when value="input_ref">\n+ <param name="fasta_file" type="data" format="fasta" label="Uploaded FASTA file" />\n+ </when>\n+ </conditional>\n+ <param name="input_file" type="data" format="mzml" multiple="false" label="MSMS File" help="An mzML file with MS/MS data"/>\n+ <param name="variable_mods" format="text" type="select" multiple="true" label="Variable Modifications" help="Multiple Selection Allowed">\n+ <options from_file="msgfplus_mods.loc">\n+ <column name="name" index="0" />\n+ <column name="value" index="2" />\n+ </options>\n+ </param>\n+ <repeat name="custom_variable_mods" title="Custom Variable Modifications" \n+ help="See https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355 for details on how to create these">\n+ <param name="custom_mod" type="text" />\n+ </repeat>\n+ <param name="fixed_mods" format="text" type="select" multiple="true" label="Fixed Modifications" help="Multiple Selection Allowed">\n+ <options from_file="msgfplus_mods.loc">\n+ <column name="name" index="0" />\n+ <column name="value" index="2" />\n+ </options>\n+ </param>\n+ <repeat name="custom_fix_mods" title="Custom Fixed Modifications" help="See https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355 for details on how to create these">\n+ <param name="custom_mod" type="text">\n+ </param>\n+ </repeat>\n+ '..b' name="instrument" type="select" format="text">\n+ <label>Instrument Type</label>\n+ <option value="2">TOF</option>\n+ <option value="0">Low-res LCQ/LTQ</option>\n+ <option value="1">High-res LTQ</option>\n+ </param>\n+\n+ <param name="fragment_method" type="select" format="text">\n+ <label>Fragmentation Method</label>\n+ <option value="0">Respect Input File</option>\n+ <option value="1">CID</option>\n+ <option value="2">ETD</option>\n+ <option value="3">HCD</option>\n+ <option value="4">Merge spectra from same precursor</option>\n+ </param>\n+\n+ <param name="protocol" type="select" format="text">\n+ <label>Protocol</label>\n+ <option value="0">NoProtocol</option>\n+ <option value="1">Phosphorylation</option>\n+ <option value="2">iTRAQ</option>\n+ <option value="3">iTRAQPhospho</option>\n+ </param>\n+\n+ <param name="fragment_ion_tol" help="Fragment Ion Tolerance in Daltons" type="float" value="0.65" min="0" max="10000" label="Fragment ion tolerance"/>\n+\n+ <param name="precursor_ion_tol" help="Precursor Ion Tolerance (Da or ppm)" type="float" value="100" min="0" max="10000" label="Precursor ion tolerance"/>\n+ <param name="precursor_tolu" type="select" format="text">\n+ <label>Precursor Ion Tolerance Units</label>\n+ <option value="ppm">ppm</option>\n+ <option value="Da">Da</option>\n+ </param>\n+\n+ <param name="isotope_error_range" help="Takes into account of the error introduced by chooosing a non-monoisotopic peak for fragmentation." type="text" size="80" value="0,1" label="Isotope Error Range"/>\n+ <param name="min_pep_len" help="" type="integer" value="6" label="Minimum Peptide Length"/>\n+ <param name="max_pep_len" help="" type="integer" value="40" label="Maximum Peptide Length"/>\n+ <param name="min_pep_charge" help="" type="integer" value="2" label="Minimum Peptide Charge"/>\n+ <param name="max_pep_charge" help="" type="integer" value="3" label="Maximum Peptide Charge"/>\n+ <param name="num_reported_matches" help="Number of matches per spectrum to be reported" type="integer" value="1" label="Num reported matches"/>\n+ <param name="java_mem" help="Increase this value if you get out of memory errors" type="text" size="80" value="3500M" label="Java Memory Limit"/>\n+ <param name="pepxml_output_use" type="boolean" label="Convert results to pepXML" help="" truevalue="true" falsevalue="false" />\n+ </inputs>\n+ <outputs>\n+ <data format="mzid" name="output" metadata_source="input_file" label="MSGF+_vs_${database.dbkey if $database.has_key(\'dbkey\') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}">\n+ <change_format>\n+ <when input="pepxml_output_use" value="true" format="raw_pepxml" metadata_source="input_file" \n+ label="MSGF+_vs_${database.dbkey if $database.has_key(\'dbkey\') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}"/>\n+ </change_format>\n+ </data>\n+ </outputs>\n+ <tests>\n+ <test>\n+ <param name="source_select" value="input_ref"/>\n+ <param name="fasta_file" value="bsa.fasta"/>\n+ <param name="input_file" value="bsa.mzML"/>\n+ <output name="output" file="bsa.mzid" compare="sim_size" delta="600" /> \n+ </test>\n+ </tests>\n+ <help>\n+\n+**What it does**\n+\n+Runs an MS/MS database search using the MSGFPlus search engine. Output is in the form of a pepXML file containing identified peptides along with their raw search scores.\n+\n+----\n+\n+**References**\n+\n+Please see http://proteomics.ucsd.edu/Software/MSGFPlus.html for details of the MSGFPlus search engine and references describing its algorithm\n+\n+ </help>\n+</tool>\n'

diff -r e0a1e8a0500a -r 86daefc0e88d repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Fri Jun 13 18:36:52 2014 -0400

@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="Proteomics datatypes">
+ <repository changeset_revision="f66f8ca7b7b9" name="proteomics_datatypes" owner="iracooke" toolshed="http://toolshed.g2.bx.psu.edu" />
+ </repositories>

diff -r e0a1e8a0500a -r 86daefc0e88d tool-data/msgfplus_mods.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/msgfplus_mods.loc.sample Fri Jun 13 18:36:52 2014 -0400

[

@@ -0,0 +1,50 @@
+#This file lists the names of inbuilt chemical modifications accepted by msgfplus
+#Each entry consists of 4 tab separated fields like this
+#<Displayed Name> <DBKey> <Modification String> <DBKey>
+#
+#Modification strings should conform to the standard MSGFPlus syntax with the following exception
+#The ModType field is overridden by the msgfplus_search.rb tool. In other words any of these mods
+#May be passed to the tool as a variable or fixed mod and the tool will substitute the appropriate ModType
+#value
+#
+#Standard MSGFPlus syntax is
+#
+# To input a modification, use the following command:
+# Mass or CompositionStr, Residues, ModType, Position, Name (all the five fields are required).
+# CompositionStr (C[Num]H[Num]N[Num]O[Num]S[Num]P[Num])
+# - C (Carbon), H (Hydrogen), N (Nitrogen), O (Oxygen), S (Sulfer) and P (Phosphorus) are allowed.
+# - Atom can be omitted. The sequence of atoms must be followed.
+# - Negative numbers are allowed.
+# - E.g. C2H2O1 (valid), H2C1O1 (invalid)
+# Mass can be used instead of CompositionStr. It is important to specify accurate masses (integer masses are insufficient).
+# - E.g. 15.994915
+# Residues: affected amino acids (must be upper letters)
+# - Must be uppor letters or *
+# - Use * if this modification is applicable to any residue.
+# - * should not be "anywhere" modification (e.g. "15.994915, *, opt, any, Oxidation" is not allowed.)
+# - E.g. NQ, *
+# ModType: "fix" for fixed modifications, "opt" for variable modifications (case insensitive)
+# Position: position in the peptide where the modification can be attached.
+# - One of the following five values should be used:
+# - any (anywhere), N-term (peptide N-term), C-term (peptide C-term), Prot-N-term (protein N-term), Prot-C-term (protein C-term)
+# - Case insensitive
+# - "-" can be omitted
+# - E.g. any, Any, Prot-n-Term, ProtNTerm => all valid
+# Name: name of the modification (Unimod PSI-MS name)
+# - For proper mzIdentML output, this name should be the same as the Unimod PSI-MS name
+# - E.g. Phospho, Acetyl
+#C2H3N1O1,C,fix,any,Carbamidomethyl # Fixed Carbamidomethyl C
+# Variable Modifications (default: none)
+#O1,M,opt,any,Oxidation # Oxidation M
+#15.994915,M,opt,any,Oxidation # Oxidation M (mass is used instead of CompositionStr)
+#H-1N-1O1,NQ,opt,any,Deamidated # Negative numbers are allowed.
+#C2H3NO,*,opt,N-term,Carbamidomethyl # Variable Carbamidomethyl N-term
+#H-2O-1,E,opt,N-term,Pyro_glu # Pyro-glu from E
+#H-3N-1,Q,opt,N-term,Pyro-glu # Pyro-glu from Q
+#C2H2O,*,opt,Prot-N-term,Acetyl # Acetylation Protein N-term
+#C2H2O1,K,opt,any,Acetyl # Acetylation K
+#CH2,K,opt,any,Methy # Methylation K
+#HO3P,STY,opt,any,Phospho # Phosphorylation STY
+
+Carbamidomethyl C carbamidomethyl_c_ C2H3N1O1,C,opt,any,Carbamidomethyl carbamidomethyl_c_
+Oxidation M oxidation_m_ O1,M,opt,any,Oxidation oxidation_m_
\ No newline at end of file

diff -r e0a1e8a0500a -r 86daefc0e88d tool-data/pepxml_databases.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/pepxml_databases.loc.sample Fri Jun 13 18:36:52 2014 -0400

@@ -0,0 +1,13 @@
+#This file lists the names of protein databases installed locally in protk.
+# These are used by omssa and x!tandem as well as the "mascot to pepxml" tool
+# In order to combine search results with Interprophet searches must be run against an identical database
+#
+# Entries should follow the be structured as follows
+# Display_name omssa_tandem_dbname dbkey
+#
+#
+Swissprot spall_ spall spall_
+Combined PlasmboDB (falciparum) and Swissprot Human plasmodb_pfalciparum_sphuman_ plasmodb_pfalciparum_sphuman plasmodb_pfalciparum_sphuman_
+Swissprot Human sphuman_ sphuman sphuman_
+Combined Swissprot/TRembl Human sptrhuman_ sptrhuman sptrhuman_
+Swissprot Mouse spmouse_ spmouse spmouse_