Mercurial > repos > jjohnson > encyclopedia_searchtolib
changeset 1:a7d52df6763f draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/encyclopedia/tools/encyclopedia commit 81e7c4d3d6066b99ad50374292f340302dc4f02d"
author | jjohnson |
---|---|
date | Tue, 30 Jun 2020 11:43:03 -0400 |
parents | 683b98838cf0 |
children | a74d446c8f5f |
files | encyclopedia_searchtolib.xml encyclopedia_searchtolib.xml.bak macros.xml static/images/SearchToLib_Workflow.png |
diffstat | 4 files changed, 215 insertions(+), 167 deletions(-) [+] |
line wrap: on
line diff
--- a/encyclopedia_searchtolib.xml Fri Jun 19 10:21:31 2020 -0400 +++ b/encyclopedia_searchtolib.xml Tue Jun 30 11:43:03 2020 -0400 @@ -1,5 +1,5 @@ <tool id="encyclopedia_searchtolib" name="SearchToLib" version="@VERSION@.0"> - <description>PeCAn-based Peptide Detection Directly from Data-Independent Acquisition (DIA) MS/MS Data</description> + <description>Build a Chromatogram Library from Data-Independent Acquisition (DIA) MS/MS Data</description> <macros> <import>macros.xml</import> </macros> @@ -36,23 +36,16 @@ @FASTA_INPUT@ @TARGET_FASTA@ @LIB_INPUT@ - ## @COMMON_OPTIONS@ - ## @MASS_LIBRARY_TOLERANCE@ - ## @PERCOLATOR_OPTIONS@ - ## @PEAK_OPTIONS@ - ## @WINDOW_OPTIONS@ - ## @MODIFICATION_OPTIONS@ - ## @SEARCH_OPTIONS@ -a $a -o chromatogram_library.elib | tee -a search2lib.log ]]></command> <inputs> <expand macro="scan_inputs"/> - <expand macro="lib_input" token_optional="true" token_help="If a prosit dlib is supplied, use EncycopeDIA, else use walnut"/> + <expand macro="lib_input" optional="true" libhelp="Use a Prosit dlib spectral library to make a chromatogram elib using EncyclopeDIA, or else leave blank to make a Chromatogram library from just the fasta using Walnut"/> <expand macro="fasta_input"/> <expand macro="target_fasta"/> - <param argument="-a" type="boolean" truevalue="true" falsevalue="false" checked="false" label="match between runs"/> + <param argument="-a" type="boolean" truevalue="true" falsevalue="false" checked="false" label="align between files" help="retention-time alignment of peptides is generally not needed when using narrow-window spectrums"/> <expand macro="common_options"/> <expand macro="mass_library_tolerance"/> <expand macro="percolator_options"/> @@ -63,11 +56,13 @@ <param name="select_outputs" type="select" label="Select outputs" multiple="true"> <option value="log" selected="true">log</option> <option value="elib" selected="true">elib</option> - <option value="features" selected="true">concatenated_features.txt</option> - <option value="results" selected="true">concatenated_results.txt</option> + <option value="features" selected="false">concatenated_features.txt</option> + <option value="results" selected="false">concatenated_results.txt</option> <option value="decoy" selected="false">concatenated_decoy.txt</option> - <option value="peptides" selected="true">peptides.txt (requires match between runs)</option> - <option value="proteins" selected="true">proteins.txt (requires match between runs)</option> + <!-- + <option value="peptides" selected="false">peptides.txt (requires align between files)</option> + <option value="proteins" selected="false">proteins.txt (requires align between files)</option> + --> </param> </inputs> <outputs> @@ -95,8 +90,9 @@ <action name="column_names" type="metadata" default="PSMId,score,q-value,posterior_error_prob,peptide,proteinIds" /> </actions> </data> + <!-- <data name="peptides" format="tabular" label="${tool.name} ${on_string} peptides.txt" from_work_dir="chromatogram_library.peptides.txt"> - <filter>a and peptides' in select_outputs</filter> + <filter>a and 'peptides' in select_outputs</filter> <actions> <action name="column_names" type="metadata" default="Peptide,Protein,numFragments" /> </actions> @@ -107,56 +103,39 @@ <action name="column_names" type="metadata" default="Protein,NumPeptides,PeptideSequences" /> </actions> </data> -<!-- ---> + --> </outputs> <help><![CDATA[ -SearchToLIB uses EncyclopeDIA, Walnut (Pecan), or XCorDIA to create an DIA Elib for EncyclopeDIA DIA search -Walnut is a FASTA database search engine for DIA data that uses PECAN-style scoring. -You should prefix your arguments with a high memory setting, e.g. "-Xmx8g" for 8gb + +**SearchToLIB** + +@ENCYCLOPEDIA_WIKI@ + +SearchToLIB uses the EncyclopeDIA algorithm, or the Walnut (Pecan) algorithm to search Data-Independent Acquisition (DIA) MS/MS spectrum files and creates a DIA elib chromatogram library for EncyclopeDIA DIA quantitation search. -Required Parameters: - -i input .DIA or .MZML file - -f background FASTA file +SearchToLIB can also quantify peptides from the chromatogram library. + +@MSCONVERT_HELP@ + +**Typical DIA Workflow** + +Two sets of Mass Spec MS/MS DIA data are collected for the experiment. In addition to collecting wide-window DIA experiments on each quantitative replicate, a pool containing peptides from every condition is measured using several staggered narrow-window DIA experiments. -Other Parameters: - -t target FASTA file (default: background FASTA file) - -tp true/false target FASTA file contains peptides (default: false) - -o output report file (default: [input file].pecan.txt) - -acquisition (default: overlapping dia) - -addDecoysToBackground (default: false) - -alpha (default: 1.8) - -beta (default: 0.4) - -dontRunDecoys (default: false) - -enzyme (default: trypsin) - -filterPeaklists (default: false) - -fixed (default: C=57.0214635) - -foffset (default: 0) - -frag (default: YONLY) - -ftol (default: 10) - -ftolunits (default: ppm) - -maxCharge (default: 3) - -maxLength (default: 100) - -maxMissedCleavage (default: 1) - -minCharge (default: 2) - -minEluteTime (default: 12) - -minIntensity (default: -1.0) - -minLength (default: 5) - -minNumOfQuantitativePeaks (default: 3) - -minQuantitativeIonNumber (default: 3) - -numberOfQuantitativePeaks (default: 5) - -numberOfReportedPeaks (default: 1) - -numberOfThreadsUsed (default: 12) - -percolatorProteinThreshold (default: 0.01) - -percolatorThreshold (default: 0.01) - -percolatorVersionNumber (default: 3) - -poffset (default: 0) - -precursorIsolationMargin (default: 0) - -precursorWindowSize (default: -1) - -ptol (default: 10) - -ptolunits (default: ppm) - -requireVariableMods (default: false) - -variable (default: -) + 1. SearchToLib is first run with the pooled narrow-window mzML files to create a combined DIA elib chromatogram library. + If a Spectral library argument is provided, for example from **Prosit**, SearchToLIB uses EncyclopeDIA to search each input spectrum mzML file. + Otherwise, SearchToLIB uses Walnut, a FASTA database search engine for DIA data that uses PECAN-style scoring. + + + * Prosit_ generates a predicted spectrum library of fragmentation patterns and retention times for every +2H and +3H tryptic peptide in a FASTA database, with up to one missed cleavage. + + + 2. EncyclopeDIA Quantify is then run on the wide-window quantitative replicate mzML files using that chromatogram library to produce quantification results. + +.. image:: SearchToLib_Workflow.png + :height: 439 + :width: 768 + +.. _Prosit: https://www.proteomicsdb.org/prosit ]]></help> <expand macro="citations" />
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/encyclopedia_searchtolib.xml.bak Tue Jun 30 11:43:03 2020 -0400 @@ -0,0 +1,138 @@ +<tool id="encyclopedia_searchtolib" name="SearchToLib" version="@VERSION@.0"> + <description>Build a Chromatogram Library or quantify samples from Data-Independent Acquisition (DIA) MS/MS Data</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command detect_errors="exit_code"><![CDATA[ + @CMD_IMPORTS@ + @LINK_SCAN_INPUTS@ + @LINK_FASTA_INPUT@ + @LINK_TARGET_FASTA@ + @LINK_LIB_INPUT@ + for SCAN_FILE in `ls -1 inputs/*`; do + echo "\$SCAN_FILE" && + EncyclopeDIA -Djava.awt.headless=true -Xmx20g + #if not $l + -walnut + #end if + -i \$SCAN_FILE + @FASTA_INPUT@ + @TARGET_FASTA@ + @LIB_INPUT@ + @COMMON_OPTIONS@ + @MASS_LIBRARY_TOLERANCE@ + @PERCOLATOR_OPTIONS@ + @PEAK_OPTIONS@ + @WINDOW_OPTIONS@ + @MODIFICATION_OPTIONS@ + @SEARCH_OPTIONS@ | tee -a search2lib.log + ; done && + EncyclopeDIA -Djava.awt.headless=true -Xmx12g -libexport + #if not $l + -pecan + #end if + @SCAN_INPUTS@ + @FASTA_INPUT@ + @TARGET_FASTA@ + @LIB_INPUT@ + -a $a + -o chromatogram_library.elib + | tee -a search2lib.log + ]]></command> + <inputs> + <expand macro="scan_inputs"/> + <expand macro="lib_input" optional="true" libhelp="Use a Chromatogram elib for quantification, or a Prosit dlib spectral library to make a chromatogram elib using EncyclopeDIA, or else leave blank to make a Chromatogram library from just the fasta using Walnut"/> + <expand macro="fasta_input"/> + <expand macro="target_fasta"/> + <param argument="-a" type="boolean" truevalue="true" falsevalue="false" checked="false" label="align between files" help="retention-time alignment of peptides should be enabled when quantifying samples"/> + <expand macro="common_options"/> + <expand macro="mass_library_tolerance"/> + <expand macro="percolator_options"/> + <expand macro="peak_options"/> + <expand macro="window_options"/> + <expand macro="modification_options"/> + <expand macro="search_options"/> + <param name="select_outputs" type="select" label="Select outputs" multiple="true"> + <option value="log" selected="true">log</option> + <option value="elib" selected="true">elib</option> + <option value="features" selected="true">concatenated_features.txt</option> + <option value="results" selected="true">concatenated_results.txt</option> + <option value="decoy" selected="false">concatenated_decoy.txt</option> + <option value="peptides" selected="true">peptides.txt (requires match between runs)</option> + <option value="proteins" selected="true">proteins.txt (requires match between runs)</option> + </param> + </inputs> + <outputs> + <data name="log" format="txt" label="${tool.name} ${on_string} log" from_work_dir="search2lib.log"> + <filter>'log' in select_outputs</filter> + </data> + <data name="elib" format="elib" label="${tool.name} ${on_string} elib" from_work_dir="chromatogram_library.elib"> + <filter>'elib' in select_outputs</filter> + </data> + <data name="features" format="tabular" label="${tool.name} ${on_string} concatenated_features.txt" from_work_dir="inputs/chromatogram_library_concatenated_features.txt"> + <filter>'features' in select_outputs</filter> + <actions> + <action name="column_names" type="metadata" default="id,TD,ScanNr,topN,rank,peakZScore,peakCalibratedScore,deltaSn,avgIdotp,midIdotp,peakScore,peakWeightedScore,NCI,CIMassErrMean,CIMassErrVar,precursorMassErrMean,precursorMassErrVar,peakSimilarity,sampledTimes,midTime,spectraNorm,pepLength,charge2,charge3,precursorMz,sequence,protein" /> + </actions> + </data> + <data name="results" format="tabular" label="${tool.name} ${on_string} concatenated_results.txt" from_work_dir="inputs/chromatogram_library_concatenated_results.txt"> + <filter>'results' in select_outputs</filter> + <actions> + <action name="column_names" type="metadata" default="PSMId,score,q-value,posterior_error_prob,peptide,proteinIds" /> + </actions> + </data> + <data name="decoy" format="tabular" label="${tool.name} ${on_string} concatenated_decoy.txt" from_work_dir="inputs/chromatogram_library_concatenated_decoy.txt"> + <filter>'decoy' in select_outputs</filter> + <actions> + <action name="column_names" type="metadata" default="PSMId,score,q-value,posterior_error_prob,peptide,proteinIds" /> + </actions> + </data> + <data name="peptides" format="tabular" label="${tool.name} ${on_string} peptides.txt" from_work_dir="chromatogram_library.peptides.txt"> + <filter>a and 'peptides' in select_outputs</filter> + <actions> + <action name="column_names" type="metadata" default="Peptide,Protein,numFragments" /> + </actions> + </data> + <data name="proteins" format="tabular" label="${tool.name} ${on_string} proteins.txt" from_work_dir="chromatogram_library.proteins.txt"> + <filter>a and 'proteins' in select_outputs</filter> + <actions> + <action name="column_names" type="metadata" default="Protein,NumPeptides,PeptideSequences" /> + </actions> + </data> + </outputs> + <help><![CDATA[ + +**SearchToLIB** + +@ENCYCLOPEDIA_WIKI@ + +SearchToLIB uses the EncyclopeDIA algorithm, or the Walnut (Pecan) algorithm to search Data-Independent Acquisition (DIA) MS/MS spectrum files and creates a DIA elib chromatogram library for EncyclopeDIA DIA quantitation search. + +SearchToLIB can also quantify peptides from the chromatogram library. + +@MSCONVERT_HELP@ + +**Typical DIA SearchToLib Workflow** + +Two sets of Mass Spec MS/MS DIA data are collected for the experiment. In addition to collecting wide-window DIA experiments on each quantitative replicate, a pool containing peptides from every condition is measured using several staggered narrow-window DIA experiments. + + 1. SearchToLib is first run with the pooled narrow-window mzML files to create a combined DIA elib chromatogram library. + If a Spectral library argument is provided, for example from **Prosit**, SearchToLIB uses EncyclopeDIA to search each input spectrum mzML file. + Otherwise, SearchToLIB uses Walnut, a FASTA database search engine for DIA data that uses PECAN-style scoring. + + + * Prosit_ generates a predicted spectrum library of fragmentation patterns and retention times for every +2H and +3H tryptic peptide in a FASTA database, with up to one missed cleavage. + + + 2. SearchToLib is then run on the wide-window quantitative replicate mzML files using that chromatogram library, with the *align between files* option, to produce quantification results. + +.. image:: SearchToLib_Workflow.png + :height: 439 + :width: 768 + +.. _Prosit: https://www.proteomicsdb.org/prosit + + ]]></help> + <expand macro="citations" /> +</tool>
--- a/macros.xml Fri Jun 19 10:21:31 2020 -0400 +++ b/macros.xml Tue Jun 30 11:43:03 2020 -0400 @@ -30,11 +30,17 @@ <yield/> </requirements> </xml> + <token name="@ENCYCLOPEDIA_WIKI@"> +EncyclopeDIA_ is library search engine comprised of several algorithms for DIA data analysis and can search for peptides using either DDA-based spectrum libraries or DIA-based chromatogram libraries. + +.. _EncyclopeDIA: https://bitbucket.org/searleb/encyclopedia/wiki/Home + </token> <xml name="citations"> <citations> <citation type="doi">10.1038/s41467-018-07454-w</citation> <citation type="doi">10.1038/s41467-020-15346-1</citation> - <yield /> + <citation type="doi">10.1074/mcp.P119.001913</citation> + <yield/> </citations> </xml> <token name="@CMD_IMPORTS@"> @@ -77,7 +83,9 @@ </token> <xml name="scan_input"> - <param argument="-i" type="data" format="imzml,mzml" label="Spectrum file, .mzml or .dia"/> + <param argument="-i" type="data" format="imzml,mzml" label="Spectrum file in mzML format"> + <help>@MSCONVERT_RAW@</help> + </param> </xml> <token name="@LINK_SCAN_INPUT@"><![CDATA[ #set $i_name = $ln_name($i) @@ -88,7 +96,9 @@ </token> <xml name="scan_inputs"> - <param argument="-i" type="data" format="imzml,mzml" multiple="true" label="Spectrum file, .mzml or .dia"/> + <param argument="-i" type="data" format="imzml,mzml" multiple="true" label="Spectrum files in mzML format"> + <help>@MSCONVERT_RAW@</help> + </param> </xml> <token name="@LINK_SCAN_INPUTS@"><![CDATA[ #set $inputs_dir = 'inputs' @@ -103,7 +113,9 @@ </token> <xml name="fasta_input"> - <param argument="-f" type="data" format="fasta" label="Background protein fasta database"/> + <param argument="-f" type="data" format="fasta" label="Background proteome protein fasta database"> + <help>provides the necessary peptide-to-protein links not specified in the spectrum library</help> + </param> </xml> <token name="@LINK_FASTA_INPUT@"><![CDATA[ #set $f_name = $ln_name($f) @@ -114,7 +126,7 @@ </token> <xml name="target_fasta"> - <param argument="-t" type="data" format="fasta" label="target FASTA file" optional="true"/> + <param argument="-t" type="data" format="fasta" label="Target fasta database" optional="true"/> <param argument="-tp" type="boolean" truevalue="true" falsevalue="false" checked="false" label="target FASTA file contains peptides"/> </xml> <token name="@LINK_TARGET_FASTA@"><![CDATA[ @@ -132,9 +144,9 @@ #end if </token> - <xml name="lib_input" token_optional="true" token_help=""> + <xml name="lib_input" token_optional="true" token_libhelp=""> <param argument="-l" type="data" format="elib,dlib" optional="@OPTIONAL@" label="Library: Chromatagram .ELIB or Spectrum .DLIB"> - <help>@HELP@</help> + <help>@LIBHELP@</help> </param> </xml> <token name="@LINK_LIB_INPUT@"><![CDATA[ @@ -484,106 +496,25 @@ ## -dontRunDecoys $search.dontRunDecoys #end if </token> - <!-- -minNumOfQuantitativePeaks minQuantitativeIonNumber numberOfQuantitativePeaks numberOfReportedPeaksu - +acquisition (default: overlapping dia) - +addDecoysToBackground (default: false) - +alpha (default: 1.8) - +beta (default: 0.4) - +dontRunDecoys (default: false) - +enzyme (default: trypsin) - +filterPeaklists (default: false) - +fixed (default: C=57.0214635) - +foffset (default: 0) - =frag (default: YONLY) - +ftol (default: 10) - +ftolunits (default: ppm) - +maxCharge (default: 3) - +ftolunits (default: ppm) - +maxCharge (default: 3) - +maxLength (default: 100) - +maxMissedCleavage (default: 1) - +minCharge (default: 2) - +minEluteTime (default: 12) - +minIntensity (default: -1.0) - +minLength (default: 5) - +minNumOfQuantitativePeaks (default: 3) - +minQuantitativeIonNumber (default: 3) - +numberOfQuantitativePeaks (default: 5) - -numberOfReportedPeaks (default: 1) - -numberOfThreadsUsed (default: 12) - +percolatorProteinThreshol (default: 0.01) - +percolatorThreshold (default: 0.01) - +percolatorVersionNumber (default: 3) - +poffset (default: 0) - +precursorIsolationMargin (default: 0) - +precursorWindowSize (default: -1) - +ptol (default: 10) - +ptolunits (default: ppm) - -requireVariableMods (default: false) - -variable (default: -) - --> <xml name="libexport"> <param argument="-a" type="boolean" truevalue="true" falsevalue="false" checked="false" label="align between files"/> </xml> -</macros> -<!-- -e w t x l param -+:+:+:+:+ i -+:+:+:+:+ l -+:+:+:+:+ f - -+:+:+:+:+ t --:+:-:+:- tp --:+:-:+:+ a - -+:+:+:+:+ o + <token name="@MSCONVERT_CMD@"><![CDATA[ + msconvert --zlib --64 --mzML --simAsSpectra --filter "peakPicking true 1-" --filter "demultiplex optimization=overlap_only" *.raw +]]> + </token> + <token name="@MSCONVERT_RAW@"><![CDATA[ +mzML conversion from RAW requires special options: @MSCONVERT_CMD@ +]]> + </token> + <token name="@MSCONVERT_HELP@"><![CDATA[ -+:+:+:+:- acquisition --:+:-:+:- addDecoysToBackground --:+:-:+:- alpha --:+:-:+:- beta --:-:-:-:+ blib --:+:-:+:- dontRunDecoys -+:+:+:+:- enzyme -+:-:+:-:- expectedPeakWidth -+:+:+:+:- filterPeaklists -+:+:+:+:+ fixed -+:+:+:+:+ foffset -+:+:+:+:- frag -+:+:+:+:+ ftol -+:+:+:+:+ ftolunits -+:-:+:-:- lftol -+:-:+:-:- lftolunits -+:-:-:-:- libexport -+:-:+:-:+ localizationModification --:+:-:+:- maxCharge --:+:-:+:- maxLength --:+:-:+:- maxMissedCleavage --:+:-:+:- minCharge --:+:-:+:- minEluteTime -+:+:+:+:- minIntensity --:+:-:+:- minLength -+:+:+:+:+ minNumOfQuantitativePeaks -+:+:+:+:+ minQuantitativeIonNumber -+:-:+:-:+ numberOfExtraDecoyLibrariesSearched -+:+:+:+:+ numberOfQuantitativePeaks --:+:-:+:- numberOfReportedPeaks --:+:-:+:- numberOfThreadsUsed --:-:-:-:+ percolatorLocation --:+:-:+:- percolatorProteinThreshol -+:-:+:-:+ percolatorProteinThreshold -+:+:+:+:+ percolatorThreshold -+:+:+:+:- percolatorVersionNumber --:-:-:-:+ phospho -+:+:+:+:- poffset -+:+:+:+:- precursorIsolationMargin -+:+:+:+:- precursorWindowSize -+:+:+:+:- ptol -+:+:+:+:- ptolunits --:+:-:+:- requireVariableMods -+:-:+:-:- rtWindowInMin -+:-:+:-:- scoringBreadthType --:+:-:+:- variable -+:-:+:-:- verifyModificationIons ---> + The MSConvert command can be used to deconvolute DIA raw files. You need to use these options + + :: + + @MSCONVERT_CMD@ + +]]> + </token> +</macros>