Repository 'msgfplus'
hg clone https://toolshed.g2.bx.psu.edu/repos/iracooke/msgfplus

Changeset 15:6c751c59ce18 (2015-03-26)
Previous changeset 14:32365fec702c (2014-06-26) Next changeset 16:66a149ef79f7 (2015-10-20)
Commit message:
Docker support and update for protk 1.4
modified:
msgfplus_search.xml
repository_dependencies.xml
added:
README.rst
test-data/testdb.fasta
test-data/tiny.mzML
removed:
README
b
diff -r 32365fec702c -r 6c751c59ce18 README
--- a/README Thu Jun 26 11:39:21 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
-This package is a galaxy wrapper for the MSGF+ search tool.
-
-Requirements:
-This package uses protk, msgfplus and proteowizard, which must be installed separately. 
-
-For instructions please see: https://github.com/iracooke/protk/#galaxy-integration
\ No newline at end of file
b
diff -r 32365fec702c -r 6c751c59ce18 README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Thu Mar 26 20:04:30 2015 -0400
b
@@ -0,0 +1,43 @@
+MS-GF+
+======
+
+Galaxy wrapper for the `MS-GF+`__ tandem ms search tool
+
+.. _msgfplus: http://proteomics.ucsd.edu/software-tools/ms-gf/
+__ msgfplus_
+
+Requirements
+------------
+
+This package uses protk_, msgfplus_ and the `idconvert` tool from Proteowizard_ which need to be present in order for the tool to work.
+
+.. _protk: https://github.com/iracooke/protk
+.. _Proteowizard: http://proteowizard.sourceforge.net/
+
+
+There are two ways you can satify these dependencies (choose one):
+
+1. **Manual Install:** Details on how to install protk_, msgfplus_ and Proteowizard_ manually are available here_.
+
+2. **Use Docker:** These tools are designed to run inside a docker_ container. If your galaxy supports `running tools within a docker container`__ you don't need to worry about dependencies. Simply install and things should just work.  The docker container itself is versioned and new versions of this tool will automatically download an update to the container if needed.
+
+.. _docker: https://www.docker.com/
+.. _here: https://github.com/iracooke/protk/#galaxy-integration
+.. _container: https://wiki.galaxyproject.org/Admin/Tools/Docker
+__ container_
+
+
+Further Info
+------------
+
+The source code for this tool and other protk galaxy tools is on github_.  Please visit the github page to contribute to the project or to `report an issue`__ 
+
+.. _github: https://github.com/iracooke/protk-galaxytools
+.. _issue: https://github.com/iracooke/protk-galaxytools/issues
+__ issue_
+
+
+Information on the MS-GF+ search tool itself can be found here_
+
+.. _here: http://proteowizard.sourceforge.net/
+
b
diff -r 32365fec702c -r 6c751c59ce18 msgfplus_search.xml
--- a/msgfplus_search.xml Thu Jun 26 11:39:21 2014 -0400
+++ b/msgfplus_search.xml Thu Mar 26 20:04:30 2015 -0400
b
@@ -1,7 +1,8 @@
-<tool id="proteomics_search_msgfplus_1" name="MSGF+ MSMS Search" version="1.0.3">
+<tool id="proteomics_search_msgfplus_1" name="MSGF+ MSMS Search" version="1.1.0">
     <description>Run an MSGF+ Search</description>
     <requirements>
-        <requirement type="package" version="1.3">protk</requirement>
+        <container type="docker">iracooke/protk-1.4.1</container>
+        <requirement type="package" version="1.4">protk</requirement>
         <requirement type="package" version="20140210">msgfplus</requirement>
         <requirement type="package" version="3_0_4388">proteowizard</requirement>
     </requirements>
@@ -45,6 +46,9 @@
         --min-pep-charge=$min_pep_charge
         --num-reported-matches=$num_reported_matches
         --java-mem=$java_mem
+
+        --threads $threads
+
         #if $pepxml_output_use:
         --pepxml
         #end if
@@ -94,7 +98,7 @@
             <label>Missed Cleavages Allowed</label>
             <option value="0">0</option>
             <option value="1">1</option>
-            <option value="2">2</option>
+            <option selected="true" value="2">2</option>
         </param>
         
         <param name="cleavage_semi" type="boolean" label="Allow semi-cleaved peptides" help="This can increase search time dramatically" truevalue="--cleavage-semi" falsevalue="" />
@@ -116,7 +120,7 @@
         <param name="instrument" type="select" format="text">
             <label>Instrument Type</label>
             <option value="2">TOF</option>
-            <option value="0">Low-res LCQ/LTQ</option>
+            <option value="0" selected="true">Low-res LCQ/LTQ</option>
             <option value="1">High-res LTQ</option>
         </param>
 
@@ -137,7 +141,7 @@
             <option value="3">iTRAQPhospho</option>
         </param>
 
-        <param name="precursor_ion_tol" help="Precursor Ion Tolerance (Da or ppm)" type="float" value="100" min="0" max="10000" label="Precursor ion tolerance"/>
+        <param name="precursor_ion_tol" help="Precursor Ion Tolerance (Da or ppm)" type="float" value="10" min="0" max="10000" label="Precursor ion tolerance"/>
         <param name="precursor_tolu" type="select" format="text">
             <label>Precursor Ion Tolerance Units</label>
             <option value="ppm">ppm</option>
@@ -151,24 +155,33 @@
         <param name="max_pep_charge" help="" type="integer" value="3" label="Maximum Peptide Charge"/>
         <param name="num_reported_matches" help="Number of matches per spectrum to be reported" type="integer" value="1" label="Num reported matches"/>
         <param name="java_mem" help="Increase this value if you get out of memory errors" type="text" size="80" value="3500M" label="Java Memory Limit"/>
-        <param name="pepxml_output_use" type="boolean" label="Convert results to pepXML" help="" truevalue="true" falsevalue="false" />
+        <param name="threads" type="integer" value="1" label="Threads" help="Number of threads to use for search."/>        
+        <param name="pepxml_output_use" checked="true" type="boolean" label="Convert results to pepXML" help="" truevalue="true" falsevalue="false" />
     </inputs>
     <outputs>
-        <data format="mzid" name="output" metadata_source="input_file" label="MSGF+_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}">
+        <data format="mzid" name="output" metadata_source="input_file" label="MSGF+_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}.${'pepXML' if $pepxml_output_use else 'mzid'}">
             <change_format>
-                <when input="pepxml_output_use" value="true" format="raw_pepxml" metadata_source="input_file" 
-                    label="MSGF+_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}"/>
+                <when input="pepxml_output_use" value="true" format="raw_pepxml" metadata_source="input_file"/>
             </change_format>
         </data>
     </outputs>
+
     <tests>
+        <!-- Just test that the tool runs and produces vaguely correct output -->
         <test>
             <param name="source_select" value="input_ref"/>
-              <param name="fasta_file" value="bsa.fasta"/>
-                  <param name="input_file" value="bsa.mzML"/>
-              <output name="output" file="bsa.mzid" compare="sim_size" delta="600" /> 
+            <param name="fasta_file" value="testdb.fasta" format="fasta"/>
+            <param name="input_file" value="tiny.mzML" format="mzml"/>
+            <param name="precursor_ion_tol" value="200"/>
+            <param name="pepxml_output_use" value="true"/>
+            <output name="output" format="raw_pepxml">
+                <assert_contents>
+                    <has_text text="FALPQYLKTVYQHQKAMKPWIQPKTKVIPYVRYL" />
+                </assert_contents>
+            </output>
         </test>
     </tests>
+
     <help>
 
 **What it does**
b
diff -r 32365fec702c -r 6c751c59ce18 repository_dependencies.xml
--- a/repository_dependencies.xml Thu Jun 26 11:39:21 2014 -0400
+++ b/repository_dependencies.xml Thu Mar 26 20:04:30 2015 -0400
b
@@ -1,4 +1,4 @@
 <?xml version="1.0"?>
 <repositories description="Proteomics datatypes">
-    <repository changeset_revision="f66f8ca7b7b9" name="proteomics_datatypes" owner="iracooke" toolshed="http://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="ac51d9dbfb4d" name="proteomics_datatypes" owner="iracooke" toolshed="https://toolshed.g2.bx.psu.edu" />
  </repositories>
b
diff -r 32365fec702c -r 6c751c59ce18 test-data/testdb.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/testdb.fasta Thu Mar 26 20:04:30 2015 -0400
b
@@ -0,0 +1,38 @@
+>sp|ALBU_BOVIN|
+MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGEEHFKGLVLIA
+FSQYLQQCPFDEHVKLVNELTEFAKTCVADESHAGCEKSLHTLFGDELCK
+VASLRETYGDMADCCEKQEPERNECFLSHKDDSPDLPKLKPDPNTLCDEF
+KADEKKFWGKYLYEIARRHPYFYAPELLYYANKYNGVFQECCQAEDKGAC
+LLPKIETMREKVLASSARQRLRCASIQKFGERALKAWSVARLSQKFPKAE
+FVEVTKLVTDLTKVHKECCHGDLLECADDRADLAKYICDNQDTISSKLKE
+CCDKPLLEKSHCIAEVEKDAIPENLPPLTADFAEDKDVCKNYQEAKDAFL
+GSFLYEYSRRHPEYAVSVLLRLAKEYEATLEECCAKDDPHACYSTVFDKL
+KHLVDEPQNLIKQNCDQFEKLGEYGFQNALIVRYTRKVPQVSTPTLVEVS
+RSLGKVGTRCCTKPESERMPCTEDYLSLILNRLCVLHEKTPVSEKVTKCC
+TESLVNRRPCFSALTPDETYVPKAFDEKLFTFHADICTLPDTEKQIKKQT
+ALVELLKHKPKATEEQLKTVMENFVAFVDKCCAADDKEACFAVEGPKLVV
+STQTALA
+>sp|AMYS_HUMAN|
+MKLFWLLFTIGFCWAQYSSNTQQGRTSIVHLFEWRWVDIALECERYLAPK
+GFGGVQVSPPNENVAIHNPFRPWWERYQPVSYKLCTRSGNEDEFRNMVTR
+CNNVGVRIYVDAVINHMCGNAVSAGTSSTCGSYFNPGSRDFPAVPYSGWD
+FNDGKCKTGSGDIENYNDATQVRDCRLSGLLDLALGKDYVRSKIAEYMNH
+LIDIGVAGFRIDASKHMWPGDIKAILDKLHNLNSNWFPEGSKPFIYQEVI
+DLGGEPIKSSDYFGNGRVTEFKYGAKLGTVIRKWNGEKMSYLKNWGEGWG
+FMPSDRALVFVDNHDNQRGHGAGGASILTFWDARLYKMAVGFMLAHPYGF
+TRVMSSYRWPRYFENGKDVNDWVGPPNDNGVTKEVTINPDTTCGNDWVCE
+HRWRQIRNMVNFRNVVDGQPFTNWYDNGSNQVAFGRGNRGFIVFNNDDWT
+FSLTLQTGLPAGTYCDVISGDKINGNCTGIKIYVSDDGKAHFSISNSAED
+PFIAIHAESKL
+>sp|CAS1_BOVIN|
+MKLLILTCLVAVALARPKHPIKHQGLPQEVLNENLLRFFVAPFPEVFGKE
+KVNELSKDIGSESTEDQAMEDIKQMEAESISSSEEIVPNSVEQKHIQKED
+VPSERYLGYLEQLLRLKKYKVPQLEIVPNSAEERLHSMKEGIHAQQKEPM
+IGVNQELAYFYPELFRQFYQLDAYPSGAWYYVPLGTQYTDAPSFSDIPNP
+IGSENSEKTTMPLW
+>sp|CAS2_BOVIN|
+MKFFIFTCLLAVALAKNTMEHVSSSEESIISQETYKQEKNMAINPSKENL
+CSTFCKEVVRNANEEEYSIGSSSEESAEVATEEVKITVDDKHYQKALNEI
+NQFYQKFPQYLQYLYQGPIVLNPWDQVKRNAVPITPTLNREQLSTSEENS
+KKTVDMESTEVFTKKTKLTEEEKNRLNFLKKISQRYQKFALPQYLKTVYQ
+HQKAMKPWIQPKTKVIPYVRYL
\ No newline at end of file
b
diff -r 32365fec702c -r 6c751c59ce18 test-data/tiny.mzML
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tiny.mzML Thu Mar 26 20:04:30 2015 -0400
b
b'@@ -0,0 +1,317 @@\n+<?xml version="1.0" encoding="ISO-8859-1"?>\n+<indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.1_idx.xsd">\n+  <mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" id="tiny" version="1.1.0">\n+    <cvList count="2">\n+      <cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" version="3.7.3" URI="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/>\n+      <cv id="UO" fullName="Unit Ontology" version="18:03:2011" URI="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo"/>\n+    </cvList>\n+    <fileDescription>\n+      <fileContent>\n+        <cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value=""/>\n+        <cvParam cvRef="MS" accession="MS:1000580" name="MSn spectrum" value=""/>\n+        <cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value=""/>\n+      </fileContent>\n+      <sourceFileList count="2">\n+        <sourceFile id="sourceFile" name="ANALYSIS.BAF" location="Q:/Data/MALDI/1110/MT573/Tilo%20Knobs%20S2_BAF.d">\n+          <cvParam cvRef="MS" accession="MS:1000772" name="Bruker BAF nativeID format" value=""/>\n+          <cvParam cvRef="MS" accession="MS:1000815" name="Bruker BAF file" value=""/>\n+          <cvParam cvRef="MS" accession="MS:1000569" name="SHA-1" value="2107bc8ebc6160293e3c95b1848d4afa96435c39"/>\n+        </sourceFile>\n+        <sourceFile id="tiny.mzML" name="tiny.mzML" location="file://.">\n+          <cvParam cvRef="MS" accession="MS:1000569" name="SHA-1" value="c52beeb6ca5b6865b55e67b919317a96a064854b"/>\n+        </sourceFile>\n+      </sourceFileList>\n+      <contact>\n+        <cvParam cvRef="MS" accession="MS:1000586" name="contact name" value=" pfaou"/>\n+        <cvParam cvRef="MS" accession="MS:1000590" name="contact organization" value=""/>\n+      </contact>\n+    </fileDescription>\n+    <sampleList count="1">\n+      <sample id="sample" name="">\n+      </sample>\n+    </sampleList>\n+    <softwareList count="3">\n+      <software id="exportSoftware" version="3.0.1">\n+        <cvParam cvRef="MS" accession="MS:1000717" name="CompassXport" value=""/>\n+      </software>\n+      <software id="instrumentSoftware" version="3.3.85.0">\n+        <cvParam cvRef="MS" accession="MS:1000692" name="Bruker software" value=""/>\n+        <userParam name="AcquisitionProgram" value="flexControl"/>\n+      </software>\n+      <software id="pwiz_2.2.0_x0020__x0028_TPP_x0020_v4.6_x0020_OCCUPY_x0020_rev_x0020_1_x002c__x0020_Build_x0020_201210300824_x0020__x0028_linux_x0029__x0029_" version="2.2.0 (TPP v4.6 OCCUPY rev 1, Build 201210300824 (linux))">\n+        <cvParam cvRef="MS" accession="MS:1000615" name="ProteoWizard" value=""/>\n+      </software>\n+    </softwareList>\n+    <instrumentConfigurationList count="1">\n+      <instrumentConfiguration id="instrument">\n+        <cvParam cvRef="MS" accession="MS:1000705" name="ultraflex III TOF/TOF" value=""/>\n+        <componentList count="3">\n+          <source order="1">\n+            <cvParam cvRef="MS" accession="MS:1000075" name="matrix-assisted laser desorption ionization" value=""/>\n+          </source>\n+          <analyzer order="2">\n+            <cvParam cvRef="MS" accession="MS:1000084" name="time-of-flight" value=""/>\n+          </analyzer>\n+          <detector order="3">\n+            <cvParam cvRef="MS" accession="MS:1000026" name="detector type" value=""/>\n+          </detector>\n+        </componentList>\n+        <softwareRef ref="instrumentSoftware"/>\n+      </instrumentConfiguration>\n+    </instrumentConfigurationList>\n+    <dataProcessingList count="2">\n+      <dataProcessing id="exportation">\n+        <processingMethod order="1" software'..b'85" name="total ion current" value="31088.32173543"/>\n+          <cvParam cvRef="MS" accession="MS:1000504" name="base peak m/z" value="1488.98220680" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/>\n+          <cvParam cvRef="MS" accession="MS:1000505" name="base peak intensity" value="9322.68373807" unitCvRef="MS" unitAccession="MS:1000131" unitName="number of counts"/>\n+          <cvParam cvRef="MS" accession="MS:1000511" name="ms level" value="2"/>\n+          <cvParam cvRef="MS" accession="MS:1000527" name="highest observed m/z" value="1491.24548135" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/>\n+          <cvParam cvRef="MS" accession="MS:1000528" name="lowest observed m/z" value="59.96272840" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/>\n+          <scanList count="1">\n+            <cvParam cvRef="MS" accession="MS:1000795" name="no combination" value=""/>\n+            <scan>\n+              <cvParam cvRef="MS" accession="MS:1000016" name="scan start time" value="87.8333" unitCvRef="UO" unitAccession="UO:0000031" unitName="minute"/>\n+            </scan>\n+          </scanList>\n+          <precursorList count="1">\n+            <precursor>\n+              <selectedIonList count="1">\n+                <selectedIon>\n+                  <cvParam cvRef="MS" accession="MS:1000744" name="selected ion m/z" value="1489.86" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/>\n+                </selectedIon>\n+              </selectedIonList>\n+              <activation>\n+                <cvParam cvRef="MS" accession="MS:1000044" name="dissociation method" value=""/>\n+              </activation>\n+            </precursor>\n+          </precursorList>\n+          <binaryDataArrayList count="2">\n+            <binaryDataArray encodedLength="512">\n+              <cvParam cvRef="MS" accession="MS:1000523" name="64-bit float" value=""/>\n+              <cvParam cvRef="MS" accession="MS:1000576" name="no compression" value=""/>\n+              <cvParam cvRef="MS" accession="MS:1000514" name="m/z array" value="" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/>\n+              <binary>AAAAoDr7TUAAAABA4U5TQAAAAKDOjFRAAAAAYBR9VUAAAACATqJYQAAAAICzeFlAAAAAoEN6W0AAAABgNflbQAAAAKDet1xAAAAAYM0WX0AAAABg/RtgQAAAAGDGVGBAAAAAQDQ5YUAAAACgC1piQAAAAICxOGNAAAAAAP7YY0AAAABAxxhlQAAAAKB6d2VAAAAAgDnYZUAAAACA/TdmQAAAACCTlmhAAAAAYF6UakAAAADglTRsQAAAAIDVk2xAAAAAQKAJcEAAAABAE6pxQAAAAKD3x3FAAAAAQLb4cUAAAABg4dV0QAAAAEDh03lAAAAAgBoBgEAAAACgj96DQAAAACB8o5VAAAAAwJp/lkAAAABg9o2WQAAAAGBQl5ZAAAAAgJ2glkAAAAAgIr+WQAAAAIDZ5JZAAAAAwL/wlkAAAACgFvqWQAAAAIDlBJdAAAAAINQQl0AAAACgzB6XQAAAAAAFLJdAAAAAYLg1l0AAAADA7UOXQAAAAGD7TJdA</binary>\n+            </binaryDataArray>\n+            <binaryDataArray encodedLength="256">\n+              <cvParam cvRef="MS" accession="MS:1000521" name="32-bit float" value=""/>\n+              <cvParam cvRef="MS" accession="MS:1000576" name="no compression" value=""/>\n+              <cvParam cvRef="MS" accession="MS:1000515" name="intensity array" value="" unitCvRef="MS" unitAccession="MS:1000131" unitName="number of counts"/>\n+              <binary>DykLQ5jS4kJ0ggxDC8pKQ/sT+0JBi/xC6V0RQ+8nhUOldBFD3dHoQm+cI0N9sgxDumcAQ3Zc8UKzwLNDiQUTQ8XNIUONE1lD3Q+gQ9aVJkPvReFD/Q4uQ2eyZkMvRitDeOMTQ8E0cUNzSotDWOkdQ83tfkP1u6xDDTHOQ575v0P4BVlDlp+dQ1Fkq0PCvJVDx2SfQ3ams0PnGgpENKcMRDawDkTCaiJEuCkyRLUbA0WRVBtFWFIdRbyqEUbw0T9F</binary>\n+            </binaryDataArray>\n+          </binaryDataArrayList>\n+        </spectrum>\n+      </spectrumList>\n+    </run>\n+  </mzML>\n+  <indexList count="2">\n+    <index name="spectrum">\n+      <offset idRef="scan=1">4983</offset>\n+      <offset idRef="scan=2">7468</offset>\n+      <offset idRef="scan=3">9985</offset>\n+      <offset idRef="scan=4">12475</offset>\n+      <offset idRef="scan=5">17113</offset>\n+      <offset idRef="scan=6">21331</offset>\n+    </index>\n+  </indexList>\n+  <indexListOffset>25171</indexListOffset>\n+  <fileChecksum>0a78dcf8705de44cf7673d1a237899b2eec9bafe</fileChecksum>\n+</indexedmzML>\n'