Mercurial > repos > jjohnson > encyclopedia_searchtolib

--- a/encyclopedia_searchtolib.xml	Fri Jun 19 10:21:31 2020 -0400
+++ b/encyclopedia_searchtolib.xml	Tue Jun 30 11:43:03 2020 -0400
@@ -1,5 +1,5 @@
 <tool id="encyclopedia_searchtolib" name="SearchToLib" version="@VERSION@.0">
-    <description>PeCAn-based Peptide Detection Directly from Data-Independent Acquisition (DIA) MS/MS Data</description>
+    <description>Build a Chromatogram Library from Data-Independent Acquisition (DIA) MS/MS Data</description>
     <macros>
         <import>macros.xml</import>
     </macros>
@@ -36,23 +36,16 @@
         @FASTA_INPUT@
         @TARGET_FASTA@
         @LIB_INPUT@
-        ## @COMMON_OPTIONS@
-        ## @MASS_LIBRARY_TOLERANCE@
-        ## @PERCOLATOR_OPTIONS@
-        ## @PEAK_OPTIONS@
-        ## @WINDOW_OPTIONS@
-        ## @MODIFICATION_OPTIONS@
-        ## @SEARCH_OPTIONS@
         -a $a
         -o chromatogram_library.elib
         | tee -a search2lib.log
     ]]></command>
     <inputs>
         <expand macro="scan_inputs"/>
-        <expand macro="lib_input" token_optional="true" token_help="If a prosit dlib is supplied, use EncycopeDIA, else use walnut"/>
+        <expand macro="lib_input" optional="true" libhelp="Use a Prosit dlib spectral library to make a chromatogram elib using EncyclopeDIA, or else leave blank to make a Chromatogram library from just the fasta using Walnut"/>
         <expand macro="fasta_input"/>
         <expand macro="target_fasta"/>
-        <param argument="-a" type="boolean" truevalue="true" falsevalue="false" checked="false" label="match between runs"/>
+        <param argument="-a" type="boolean" truevalue="true" falsevalue="false" checked="false" label="align between files" help="retention-time alignment of peptides is generally not needed when using narrow-window spectrums"/>
         <expand macro="common_options"/>
         <expand macro="mass_library_tolerance"/>
         <expand macro="percolator_options"/>
@@ -63,11 +56,13 @@
         <param name="select_outputs" type="select" label="Select outputs" multiple="true">
             <option value="log" selected="true">log</option>
             <option value="elib" selected="true">elib</option>
-            <option value="features" selected="true">concatenated_features.txt</option>
-            <option value="results" selected="true">concatenated_results.txt</option>
+            <option value="features" selected="false">concatenated_features.txt</option>
+            <option value="results" selected="false">concatenated_results.txt</option>
             <option value="decoy" selected="false">concatenated_decoy.txt</option>
-            <option value="peptides" selected="true">peptides.txt (requires match between runs)</option>
-            <option value="proteins" selected="true">proteins.txt (requires match between runs)</option>
+            <!--
+            <option value="peptides" selected="false">peptides.txt (requires align between files)</option>
+            <option value="proteins" selected="false">proteins.txt (requires align between files)</option>
+            -->
         </param>
     </inputs>
     <outputs>
@@ -95,8 +90,9 @@
                 <action name="column_names" type="metadata" default="PSMId,score,q-value,posterior_error_prob,peptide,proteinIds" />
             </actions>
         </data>
+        <!--
         <data name="peptides" format="tabular" label="${tool.name} ${on_string} peptides.txt" from_work_dir="chromatogram_library.peptides.txt">
-            <filter>a and peptides' in select_outputs</filter>
+            <filter>a and 'peptides' in select_outputs</filter>
             <actions>
                 <action name="column_names" type="metadata" default="Peptide,Protein,numFragments" />
             </actions>
@@ -107,56 +103,39 @@
                 <action name="column_names" type="metadata" default="Protein,NumPeptides,PeptideSequences" />
             </actions>
         </data>
-<!--
--->
+        -->
     </outputs>
     <help><![CDATA[
-SearchToLIB uses EncyclopeDIA, Walnut (Pecan), or XCorDIA to create an DIA Elib for EncyclopeDIA DIA search
-Walnut is a FASTA database search engine for DIA data that uses PECAN-style scoring.
-You should prefix your arguments with a high memory setting, e.g. "-Xmx8g" for 8gb
+
+**SearchToLIB**
+
+@ENCYCLOPEDIA_WIKI@
+
+SearchToLIB uses the EncyclopeDIA algorithm, or the Walnut (Pecan) algorithm to search Data-Independent Acquisition (DIA) MS/MS spectrum files and creates a DIA elib chromatogram library for EncyclopeDIA DIA quantitation search.

-Required Parameters:
-	-i	input .DIA or .MZML file
-	-f	background FASTA file
+SearchToLIB can also quantify peptides from the chromatogram library.
+
+@MSCONVERT_HELP@
+
+**Typical DIA Workflow**
+
+Two sets of Mass Spec MS/MS DIA data are collected for the experiment.  In addition to collecting wide-window DIA experiments on each quantitative replicate, a pool containing peptides from every condition is measured using several staggered narrow-window DIA experiments.

-Other Parameters:
-	-t	target FASTA file (default: background FASTA file)
-	-tp	true/false target FASTA file contains peptides (default: false)
-	-o	output report file (default: [input file].pecan.txt)
-	-acquisition                          (default: overlapping dia)
-	-addDecoysToBackground                (default: false)
-	-alpha                                (default: 1.8)
-	-beta                                 (default: 0.4)
-	-dontRunDecoys                        (default: false)
-	-enzyme                               (default: trypsin)
-	-filterPeaklists                      (default: false)
-	-fixed                                (default: C=57.0214635)
-	-foffset                              (default: 0)
-	-frag                                 (default: YONLY)
-	-ftol                                 (default: 10)
-	-ftolunits                            (default: ppm)
-	-maxCharge                            (default: 3)
-	-maxLength                            (default: 100)
-	-maxMissedCleavage                    (default: 1)
-	-minCharge                            (default: 2)
-	-minEluteTime                         (default: 12)
-	-minIntensity                         (default: -1.0)
-	-minLength                            (default: 5)
-	-minNumOfQuantitativePeaks            (default: 3)
-	-minQuantitativeIonNumber             (default: 3)
-	-numberOfQuantitativePeaks            (default: 5)
-	-numberOfReportedPeaks                (default: 1)
-	-numberOfThreadsUsed                  (default: 12)
-	-percolatorProteinThreshold           (default: 0.01)
-	-percolatorThreshold                  (default: 0.01)
-	-percolatorVersionNumber              (default: 3)
-	-poffset                              (default: 0)
-	-precursorIsolationMargin             (default: 0)
-	-precursorWindowSize                  (default: -1)
-	-ptol                                 (default: 10)
-	-ptolunits                            (default: ppm)
-	-requireVariableMods                  (default: false)
-	-variable                             (default: -)
+  1. SearchToLib is first run with the pooled narrow-window mzML files to create a combined DIA elib chromatogram library.
+     If a Spectral library argument is provided, for example from **Prosit**, SearchToLIB uses EncyclopeDIA to search each input spectrum mzML file.
+     Otherwise, SearchToLIB uses Walnut, a FASTA database search engine for DIA data that uses PECAN-style scoring.
+
+
+       * Prosit_ generates a predicted spectrum library of fragmentation patterns and retention times for every +2H and +3H tryptic peptide in a FASTA database, with up to one missed cleavage.
+
+
+  2. EncyclopeDIA Quantify is then run on the wide-window quantitative replicate mzML files using that chromatogram library to produce quantification results.
+
+.. image:: SearchToLib_Workflow.png
+  :height: 439
+  :width: 768
+
+.. _Prosit: https://www.proteomicsdb.org/prosit

     ]]></help>
     <expand macro="citations" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/encyclopedia_searchtolib.xml.bak	Tue Jun 30 11:43:03 2020 -0400
@@ -0,0 +1,138 @@
+<tool id="encyclopedia_searchtolib" name="SearchToLib" version="@VERSION@.0">
+    <description>Build a Chromatogram Library or quantify samples from Data-Independent Acquisition (DIA) MS/MS Data</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+        @CMD_IMPORTS@
+        @LINK_SCAN_INPUTS@
+        @LINK_FASTA_INPUT@
+        @LINK_TARGET_FASTA@
+        @LINK_LIB_INPUT@
+        for SCAN_FILE in `ls -1 inputs/*`; do
+        echo "\$SCAN_FILE" &&
+        EncyclopeDIA -Djava.awt.headless=true -Xmx20g
+        #if not $l
+            -walnut
+        #end if
+        -i \$SCAN_FILE
+        @FASTA_INPUT@
+        @TARGET_FASTA@
+        @LIB_INPUT@
+        @COMMON_OPTIONS@
+        @MASS_LIBRARY_TOLERANCE@
+        @PERCOLATOR_OPTIONS@
+        @PEAK_OPTIONS@
+        @WINDOW_OPTIONS@
+        @MODIFICATION_OPTIONS@
+        @SEARCH_OPTIONS@ | tee -a search2lib.log
+        ; done &&
+        EncyclopeDIA -Djava.awt.headless=true -Xmx12g -libexport
+        #if not $l
+            -pecan
+        #end if
+        @SCAN_INPUTS@
+        @FASTA_INPUT@
+        @TARGET_FASTA@
+        @LIB_INPUT@
+        -a $a
+        -o chromatogram_library.elib
+        | tee -a search2lib.log
+    ]]></command>
+    <inputs>
+        <expand macro="scan_inputs"/>
+        <expand macro="lib_input" optional="true" libhelp="Use a Chromatogram elib for quantification, or a Prosit dlib spectral library to make a chromatogram elib using EncyclopeDIA, or else leave blank to make a Chromatogram library from just the fasta using Walnut"/>
+        <expand macro="fasta_input"/>
+        <expand macro="target_fasta"/>
+        <param argument="-a" type="boolean" truevalue="true" falsevalue="false" checked="false" label="align between files" help="retention-time alignment of peptides should be enabled when quantifying samples"/>
+        <expand macro="common_options"/>
+        <expand macro="mass_library_tolerance"/>
+        <expand macro="percolator_options"/>
+        <expand macro="peak_options"/>
+        <expand macro="window_options"/>
+        <expand macro="modification_options"/>
+        <expand macro="search_options"/>
+        <param name="select_outputs" type="select" label="Select outputs" multiple="true">
+            <option value="log" selected="true">log</option>
+            <option value="elib" selected="true">elib</option>
+            <option value="features" selected="true">concatenated_features.txt</option>
+            <option value="results" selected="true">concatenated_results.txt</option>
+            <option value="decoy" selected="false">concatenated_decoy.txt</option>
+            <option value="peptides" selected="true">peptides.txt (requires match between runs)</option>
+            <option value="proteins" selected="true">proteins.txt (requires match between runs)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="log" format="txt" label="${tool.name} ${on_string} log" from_work_dir="search2lib.log">
+            <filter>'log' in select_outputs</filter>
+        </data>
+        <data name="elib" format="elib" label="${tool.name} ${on_string} elib" from_work_dir="chromatogram_library.elib">
+            <filter>'elib' in select_outputs</filter>
+        </data>
+        <data name="features" format="tabular" label="${tool.name} ${on_string} concatenated_features.txt" from_work_dir="inputs/chromatogram_library_concatenated_features.txt">
+            <filter>'features' in select_outputs</filter>
+            <actions>
+                <action name="column_names" type="metadata" default="id,TD,ScanNr,topN,rank,peakZScore,peakCalibratedScore,deltaSn,avgIdotp,midIdotp,peakScore,peakWeightedScore,NCI,CIMassErrMean,CIMassErrVar,precursorMassErrMean,precursorMassErrVar,peakSimilarity,sampledTimes,midTime,spectraNorm,pepLength,charge2,charge3,precursorMz,sequence,protein" />
+            </actions>
+        </data>
+        <data name="results" format="tabular" label="${tool.name} ${on_string} concatenated_results.txt" from_work_dir="inputs/chromatogram_library_concatenated_results.txt">
+            <filter>'results' in select_outputs</filter>
+            <actions>
+                <action name="column_names" type="metadata" default="PSMId,score,q-value,posterior_error_prob,peptide,proteinIds" />
+            </actions>
+        </data>
+        <data name="decoy" format="tabular" label="${tool.name} ${on_string} concatenated_decoy.txt" from_work_dir="inputs/chromatogram_library_concatenated_decoy.txt">
+            <filter>'decoy' in select_outputs</filter>
+            <actions>
+                <action name="column_names" type="metadata" default="PSMId,score,q-value,posterior_error_prob,peptide,proteinIds" />
+            </actions>
+        </data>
+        <data name="peptides" format="tabular" label="${tool.name} ${on_string} peptides.txt" from_work_dir="chromatogram_library.peptides.txt">
+            <filter>a and 'peptides' in select_outputs</filter>
+            <actions>
+                <action name="column_names" type="metadata" default="Peptide,Protein,numFragments" />
+            </actions>
+        </data>
+        <data name="proteins" format="tabular" label="${tool.name} ${on_string} proteins.txt" from_work_dir="chromatogram_library.proteins.txt">
+            <filter>a and 'proteins' in select_outputs</filter>
+            <actions>
+                <action name="column_names" type="metadata" default="Protein,NumPeptides,PeptideSequences" />
+            </actions>
+        </data>
+    </outputs>
+    <help><![CDATA[
+
+**SearchToLIB**
+
+@ENCYCLOPEDIA_WIKI@
+
+SearchToLIB uses the EncyclopeDIA algorithm, or the Walnut (Pecan) algorithm to search Data-Independent Acquisition (DIA) MS/MS spectrum files and creates a DIA elib chromatogram library for EncyclopeDIA DIA quantitation search.
+
+SearchToLIB can also quantify peptides from the chromatogram library.
+
+@MSCONVERT_HELP@
+
+**Typical DIA SearchToLib Workflow**
+
+Two sets of Mass Spec MS/MS DIA data are collected for the experiment.  In addition to collecting wide-window DIA experiments on each quantitative replicate, a pool containing peptides from every condition is measured using several staggered narrow-window DIA experiments.
+
+  1. SearchToLib is first run with the pooled narrow-window mzML files to create a combined DIA elib chromatogram library.
+     If a Spectral library argument is provided, for example from **Prosit**, SearchToLIB uses EncyclopeDIA to search each input spectrum mzML file.
+     Otherwise, SearchToLIB uses Walnut, a FASTA database search engine for DIA data that uses PECAN-style scoring.
+
+
+       * Prosit_ generates a predicted spectrum library of fragmentation patterns and retention times for every +2H and +3H tryptic peptide in a FASTA database, with up to one missed cleavage.
+
+
+  2. SearchToLib is then run on the wide-window quantitative replicate mzML files using that chromatogram library, with the *align between files* option, to produce quantification results.
+
+.. image:: SearchToLib_Workflow.png
+  :height: 439
+  :width: 768
+
+.. _Prosit: https://www.proteomicsdb.org/prosit
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- a/macros.xml	Fri Jun 19 10:21:31 2020 -0400
+++ b/macros.xml	Tue Jun 30 11:43:03 2020 -0400
@@ -30,11 +30,17 @@
             <yield/>
         </requirements>
     </xml>
+    <token name="@ENCYCLOPEDIA_WIKI@">
+EncyclopeDIA_ is library search engine comprised of several algorithms for DIA data analysis and can search for peptides using either DDA-based spectrum libraries or DIA-based chromatogram libraries.
+
+.. _EncyclopeDIA: https://bitbucket.org/searleb/encyclopedia/wiki/Home
+    </token>
     <xml name="citations">
         <citations>
             <citation type="doi">10.1038/s41467-018-07454-w</citation>
             <citation type="doi">10.1038/s41467-020-15346-1</citation>
-            <yield />
+            <citation type="doi">10.1074/mcp.P119.001913</citation>
+            <yield/>
         </citations>
     </xml>
     <token name="@CMD_IMPORTS@">
@@ -77,7 +83,9 @@
     </token>

     <xml name="scan_input">
-        <param argument="-i" type="data" format="imzml,mzml" label="Spectrum file, .mzml or .dia"/>
+        <param argument="-i" type="data" format="imzml,mzml" label="Spectrum file in mzML format">
+            <help>@MSCONVERT_RAW@</help>
+        </param>
     </xml>
     <token name="@LINK_SCAN_INPUT@"><![CDATA[
     #set $i_name = $ln_name($i)
@@ -88,7 +96,9 @@
     </token>

     <xml name="scan_inputs">
-        <param argument="-i" type="data" format="imzml,mzml" multiple="true" label="Spectrum file, .mzml or .dia"/>
+        <param argument="-i" type="data" format="imzml,mzml" multiple="true" label="Spectrum files in  mzML format">
+            <help>@MSCONVERT_RAW@</help>
+        </param>
     </xml>
     <token name="@LINK_SCAN_INPUTS@"><![CDATA[
     #set $inputs_dir = 'inputs'
@@ -103,7 +113,9 @@
     </token>

     <xml name="fasta_input">
-        <param argument="-f" type="data" format="fasta" label="Background protein fasta database"/>
+        <param argument="-f" type="data" format="fasta" label="Background proteome protein fasta database">
+            <help>provides the necessary peptide-to-protein links not specified in the spectrum library</help>
+        </param>
     </xml>
     <token name="@LINK_FASTA_INPUT@"><![CDATA[
     #set $f_name = $ln_name($f)
@@ -114,7 +126,7 @@
     </token>

     <xml name="target_fasta">
-        <param argument="-t" type="data" format="fasta" label="target FASTA file" optional="true"/>
+        <param argument="-t" type="data" format="fasta" label="Target fasta database" optional="true"/>
         <param argument="-tp" type="boolean" truevalue="true" falsevalue="false" checked="false" label="target FASTA file contains peptides"/>
     </xml>
     <token name="@LINK_TARGET_FASTA@"><![CDATA[
@@ -132,9 +144,9 @@
     #end if
     </token>

-    <xml name="lib_input" token_optional="true" token_help="">
+    <xml name="lib_input" token_optional="true" token_libhelp="">
         <param argument="-l" type="data" format="elib,dlib" optional="@OPTIONAL@" label="Library: Chromatagram .ELIB or Spectrum .DLIB">
-            <help>@HELP@</help>
+            <help>@LIBHELP@</help>
         </param>
     </xml>
     <token name="@LINK_LIB_INPUT@"><![CDATA[
@@ -484,106 +496,25 @@
         ## -dontRunDecoys $search.dontRunDecoys
     #end if
     </token>
-    <!--
-minNumOfQuantitativePeaks minQuantitativeIonNumber numberOfQuantitativePeaks numberOfReportedPeaksu
-	+acquisition                (default: overlapping dia)
-	+addDecoysToBackground      (default: false)
-	+alpha                      (default: 1.8)
-	+beta                       (default: 0.4)
-	+dontRunDecoys              (default: false)
-	+enzyme                     (default: trypsin)
-	+filterPeaklists            (default: false)
-	+fixed                      (default: C=57.0214635)
-	+foffset                    (default: 0)
-	=frag                       (default: YONLY)
-	+ftol                       (default: 10)
-	+ftolunits                  (default: ppm)
-	+maxCharge                  (default: 3)
-	+ftolunits                  (default: ppm)
-	+maxCharge                  (default: 3)
-	+maxLength                  (default: 100)
-	+maxMissedCleavage          (default: 1)
-	+minCharge                  (default: 2)
-	+minEluteTime               (default: 12)
-	+minIntensity               (default: -1.0)
-	+minLength                  (default: 5)
-	+minNumOfQuantitativePeaks  (default: 3)
-	+minQuantitativeIonNumber   (default: 3)
-	+numberOfQuantitativePeaks  (default: 5)
-	-numberOfReportedPeaks      (default: 1)
-	-numberOfThreadsUsed        (default: 12)
-	+percolatorProteinThreshol  (default: 0.01)
-	+percolatorThreshold        (default: 0.01)
-	+percolatorVersionNumber    (default: 3)
-	+poffset                    (default: 0)
-	+precursorIsolationMargin   (default: 0)
-	+precursorWindowSize        (default: -1)
-	+ptol                       (default: 10)
-	+ptolunits                  (default: ppm)
-	-requireVariableMods        (default: false)
-	-variable                   (default: -)
-   -->
     <xml name="libexport">
         <param argument="-a" type="boolean" truevalue="true" falsevalue="false" checked="false" label="align between files"/>
     </xml>
-</macros>
-<!--
-e w t x l	param
-+:+:+:+:+	i
-+:+:+:+:+	l
-+:+:+:+:+	f
-
-+:+:+:+:+	t
--:+:-:+:-	tp
--:+:-:+:+	a
-
-+:+:+:+:+	o
+    <token name="@MSCONVERT_CMD@"><![CDATA[
+      msconvert  --zlib --64 --mzML --simAsSpectra --filter "peakPicking true 1-" --filter "demultiplex optimization=overlap_only" *.raw
+]]>
+    </token>
+    <token name="@MSCONVERT_RAW@"><![CDATA[
+mzML conversion from RAW requires special options: @MSCONVERT_CMD@
+]]>
+    </token>
+    <token name="@MSCONVERT_HELP@"><![CDATA[

-+:+:+:+:-	acquisition
--:+:-:+:-	addDecoysToBackground
--:+:-:+:-	alpha
--:+:-:+:-	beta
--:-:-:-:+	blib
--:+:-:+:-	dontRunDecoys
-+:+:+:+:-	enzyme
-+:-:+:-:-	expectedPeakWidth
-+:+:+:+:-	filterPeaklists
-+:+:+:+:+	fixed
-+:+:+:+:+	foffset
-+:+:+:+:-	frag
-+:+:+:+:+	ftol
-+:+:+:+:+	ftolunits
-+:-:+:-:-	lftol
-+:-:+:-:-	lftolunits
-+:-:-:-:-	libexport
-+:-:+:-:+	localizationModification
--:+:-:+:-	maxCharge
--:+:-:+:-	maxLength
--:+:-:+:-	maxMissedCleavage
--:+:-:+:-	minCharge
--:+:-:+:-	minEluteTime
-+:+:+:+:-	minIntensity
--:+:-:+:-	minLength
-+:+:+:+:+	minNumOfQuantitativePeaks
-+:+:+:+:+	minQuantitativeIonNumber
-+:-:+:-:+	numberOfExtraDecoyLibrariesSearched
-+:+:+:+:+	numberOfQuantitativePeaks
--:+:-:+:-	numberOfReportedPeaks
--:+:-:+:-	numberOfThreadsUsed
--:-:-:-:+	percolatorLocation
--:+:-:+:-	percolatorProteinThreshol
-+:-:+:-:+	percolatorProteinThreshold
-+:+:+:+:+	percolatorThreshold
-+:+:+:+:-	percolatorVersionNumber
--:-:-:-:+	phospho
-+:+:+:+:-	poffset
-+:+:+:+:-	precursorIsolationMargin
-+:+:+:+:-	precursorWindowSize
-+:+:+:+:-	ptol
-+:+:+:+:-	ptolunits
--:+:-:+:-	requireVariableMods
-+:-:+:-:-	rtWindowInMin
-+:-:+:-:-	scoringBreadthType
--:+:-:+:-	variable
-+:-:+:-:-	verifyModificationIons
--->
+    The MSConvert command can be used to deconvolute DIA raw files. You need to use these options
+
+    ::
+
+      @MSCONVERT_CMD@
+
+]]>
+    </token>
+</macros>
Binary file static/images/SearchToLib_Workflow.png has changed