Repository 'flashlfq'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/flashlfq

Changeset 0:6d3560a3a548 (2018-01-25)
Next changeset 1:1e2fc34b1f20 (2018-01-25)
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/flashlfq commit 7fafb40376213a4c59322517e6a6aaac38d7e376
added:
flashlfq.xml
test-data/aggregatePSMs_5ppmAroundZero.psmtsv
test-data/sliced-mzml.mzML
b
diff -r 000000000000 -r 6d3560a3a548 flashlfq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/flashlfq.xml Thu Jan 25 16:10:58 2018 -0500
[
b'@@ -0,0 +1,153 @@\n+<tool id="flashlfq" name="FlashLFQ" version="0.1.99">\n+    <description>ultrafast label-free quantification for mass-spectrometry proteomics</description>\n+    <requirements>\n+        <requirement type="package" version="0.1.99">flashlfq</requirement>\n+    </requirements>\n+    <command><![CDATA[\n+        #import re\n+        #set $idt_path = $re.sub(\'\\s\',\'_\',$re.sub(\'[.][^.]*$\',\'\',$idt.display_name.split(\'/\')[-1])) + ".psmtsv"\n+        ln -s \'${idt}\' \'${idt_path}\' &&\n+        #for $peak_list in $peak_lists:\n+            #set $input_name = $re.sub(\'[.][^.]*$\',\'\',$peak_list.display_name.split(\'/\')[-1]) + ".mzML"\n+            ln -s \'${peak_list}\' \'${input_name}\' &&\n+        #end for\n+\n+        FlashLFQ \n+        --idt \'$idt_path\'\n+        --rep `pwd`\n+        --ppm $ppm\n+        --iso $iso\n+        --nis $nis\n+        #if $intensity == \'integrate\':\n+            --int true\n+        #end if\n+        #if $charge == \'precursor\':\n+            --chg true\n+        #end if\n+        $rmm $mbr\n+        --pau false\n+        && cat *_FlashLFQ_Log.txt | sed \'s/\\(Analysis summary for:\\).*working./\\1 /\' > \'$log\' \n+        && cp *_FlashLFQ_QuantifiedBaseSequences.tsv \'$quantifiedBaseSequences\'\n+        && cp *_FlashLFQ_QuantifiedModifiedSequences.tsv \'$quantifiedModifiedSequences\'\n+        && cp *_FlashLFQ_QuantifiedPeaks.tsv \'$quantifiedPeaks\'\n+        && cp *_FlashLFQ_QuantifiedProteins.tsv \'$quantifiedProteins\'\n+    ]]></command>\n+    <inputs>\n+        <param name="idt" type="data" format="tabular" label="identification file" \n+             help="MetaMorpheus,Morpheus"/>\n+        <param name="peak_lists" type="data" format="mzml" multiple="true" label="spectrum files"/>\n+        <param name="ppm" type="float" value="10" min="1" max="20" label="monoisotopic ppm tolerance"/>\n+        <param name="iso" type="float" value="5" min="1" max="10" label="isotopic distribution tolerance in ppm"/>\n+        <param name="nis" type="integer" value="2" min="1" max="30" label="number of isotopes required to be observed"/>\n+        <param name="intensity" type="select" label="intensity">\n+            <option value="apex" selected="true">use the apex intensity</option>\n+            <option value="integrate">integrate chromatographic peak intensity</option>\n+        </param>\n+        <param name="charge" type="select" label="charge">\n+            <option value="all" selected="true">use all identification detected charge states</option>\n+            <option value="precursor">use precursor charge</option>\n+        </param>\n+        <param name="rmm" type="boolean" truevalue="--rmm true" falsevalue="--rmm false" checked="true" \n+            label="require observed monoisotopic mass peak"/>\n+        <param name="mbr" type="boolean" truevalue="--mbr true" falsevalue="--mbr false" checked="false" \n+            label="match between runs"/>\n+    </inputs>\n+    <outputs>\n+        <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" />\n+        <data name="quantifiedPeaks" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeaks.tsv">\n+            <actions>\n+                <action name="column_names" type="metadata" \n+                 default="File Name,Base Sequence,Full Sequence,Protein Group,Peptide Monoisotopic Mass,MS2 Retention Time,Precursor Charge,Theoretical MZ,Peak intensity,Peak RT Start,Peak RT Apex,Peak RT End,Peak MZ,Peak Charge,Num Charge States Observed,Peak Detection Type,PSMs Mapped,Base Sequences Mapped,Full Sequences Mapped,Peak Split Valley RT,Peak Apex Mass Error (ppm)"/>\n+            </actions>\n+        </data>\n+        <data name="quantifiedBaseSequences" format="tabular" label="${tool.name} on ${on_string}: QuantifiedBaseSequences.tsv">\n+            <actions>\n+                <action name="column_names" type="metadata" \n+                 default="Sequence,Protein Group,${\',\'.join([\'Intensity_\' + i.name for i in $peak_lists])},${\',\'.join([\'Detection Type_\' + i.name for i in $peak_lists])}"/>\n+   '..b'proteomics.\n+\n+**Accepted command-line arguments:**\n+\n+::\n+\n+    --idt [string | identification file path (TSV format)]\n+    --raw [string | MS data file (.raw or .mzML)]\n+    --rep [string | repository containing MS data files]\n+    --ppm [double | monoisotopic ppm tolerance] (default = 10)\n+    --iso [double | isotopic distribution tolerance in ppm] (default = 5)\n+    --sil [boolean | silent mode; no console output] (default = false)\n+    --pau [boolean | pause at end of run] (default = true)\n+    --int [boolean | integrate chromatographic peak intensity instead of using \n+          the apex intensity] (default = false)\n+    --chg [boolean | use only precursor charge state; when set to false, FlashLFQ looks \n+          for all charge states detected in the MS/MS identification file for each peptide] (default = false)\n+    --mbr [bool|match between runs]\n+    --rmm [bool|require observed monoisotopic mass peak]\n+    --nis [int|number of isotopes required to be observed]\n+\n+\n+**Tab-Delimited Identification Text File**\n+\n+The first line of the text file should contain column headers identifying what each column is. Note that MetaMorpheus (.psmtsv), Morpheus, MaxQuant (msms.txt), and TDPortal tab-delimited column headers are supported natively and such files can be read without modification. For search software that lists decoys and PSMs above 1% FDR (e.g., MetaMorpheus), you may want to remove these prior to FlashLFQ analysis. FlashLFQ will probably crash if ambiguous PSMs are passed into it (e.g., a PSM with more than 2 peptides listed in one line).\n+\n+The following headers are required in the list of MS/MS identifications:\n+\n+  - **File Name** - File extensions should be tolerated, but no extension is tested more extensively (e.g. use MyFile and not MyFile.mzML)\n+  - **Base Sequence** - Should only contain amino acid sequences, or it will likely result in a crash\n+  - **Full Sequence** - Modified sequence. Can contain any letters, but must be consistent between the same peptidoform to get accurate results\n+  - **Peptide Monoisotopic Mass** - Theoretical monoisotopic mass, including modification mass\n+  - **Scan Retention Time** - MS/MS identification scan retention time\n+  - **Precursor Charge** - Charge of the ion selected for MS/MS resulting in the identification\n+  - **Protein Accession** - Protein accession(s) for the peptide; protein quantification is still preliminary\n+\n+\n+**Outputs**:\n+\n+  - **QuantifiedProteins.tsv** - Protein intensities are summed here within a run. \n+\n+  - **QuantifiedPeaks.tsv** - Each chromatographic peak is shown here, even peaks that were not quantifiable (peak intensity = 0). Details about each peak, such as number of PSMs mapped, start/apex/end retention times, ppm error, etc are contained in this file. A peptide can have multiple peaks over the course of a run (e.g., oxidized peptidoforms elute at different times, etc). Ambiguous peaks are displayed with a | (pipe) delimiter to indicate more than one peptide mapped to that peak.\n+\n+  - **QuantifiedModifiedSequences.tsv** - Similar to QuantifiedBaseSequences, but instead of being summed by Base Sequence, peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs.\n+\n+  - **QuantifiedBaseSequences.tsv** - Peptide intensities are summed here within a run (including differently-modified forms of the same amino acid sequence) and displayed in a convenient format for comparing across runs. The identification type (MS/MS or MBR) is also indicated. A peptide with more than 30% of its intensity coming from ambiguous peak(s) is considered not quantifiable and is given an intensity of -1.\n+\n+\n+  - **Log.txt** - Log of the FlashLFQ run. Includes timestamps and quantification time for each file, total analysis time, directories used, and settings.\n+\n+\n+    ]]></help>\n+    <citations>\n+        <citation type="doi">10.1021/acs.jproteome.7b00608</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r 6d3560a3a548 test-data/aggregatePSMs_5ppmAroundZero.psmtsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/aggregatePSMs_5ppmAroundZero.psmtsv Thu Jan 25 16:10:58 2018 -0500
[
@@ -0,0 +1,5 @@
+File Name Scan Number Scan Retention Time Num Experimental Peaks Total Ion Current Precursor Scan Number Precursor Charge Precursor MZ Precursor Intensity Precursor Mass Score Notch Quantification Intensity Ambiguous Matches Protein Accession Protein Name Peptide Description Start and End Residues In Protein Previous Amino Acid Next Amino Acid Base Sequence Full Sequence Variable Mods Matched Ion Counts Matched Ion Masses Localized Scores Missed Cleavages Peptide Monoisotopic Mass Decoy/Contaminant/Target Improvement Possible Mass Diff (Da) Mass Diff (ppm)Cumulative Target Cumulative Decoy QValue Cumulative Target Notch Cumulative Decoy Notch QValue Notch
+sliced-raw 36 94.12193 13.00000 2929925.75000 34 2.00000 676.33484 3555206.50000 1350.65512 1.098 0.000 0 0.00000 P34223 UBX1_YEAST UBX domain-containing protein 1 full [242 to 253] K Y EGFQVADGPLYR EGFQVADGPLYR 0 0;1 [[];[604.33330];] [1.098,1.098,1.098,1.098,1.098,1.098,1.098,1.098,1.098,1.098,1.098,1.098] 0 1350.65681 T 0.000 -0.00169 -1.251201 0 0.000000 1 0 0.000000
+sliced-mzml 36 94.12193 13.00000 2929925.75000 34 2.00000 676.33484 3555188.00000 1350.65512 1.098 0.000 0 0.00000 P34223 UBX1_YEAST UBX domain-containing protein 1 full [242 to 253] K Y EGFQVADGPLYR EGFQVADGPLYR 0 0;1 [[];[604.33330];] [1.098,1.098,1.098,1.098,1.098,1.098,1.098,1.098,1.098,1.098,1.098,1.098] 0 1350.65681 T 0.000 -0.00169 -1.251202 0 0.000000 2 0 0.000000
+sliced-raw 25 94.05811 12.00000 2664486.50000 23 2.00000 676.33521 2498743.50000 1350.65586 1.094 0.000 0 0.00000 P34223 UBX1_YEAST UBX domain-containing protein 1 full [242 to 253] K Y EGFQVADGPLYR EGFQVADGPLYR 0 0;1 [[];[604.33330];] [1.094,1.094,1.094,1.094,1.094,1.094,1.094,1.094,1.094,1.094,1.094,1.094] 0 1350.65681 T 0.000 -0.00096 -0.708933 0 0.000000 3 0 0.000000
+sliced-mzml 25 94.05811 12.00000 2664486.50000 23 2.00000 676.33484 2493913.00000 1350.65512 1.094 0.000 0 0.00000 P34223 UBX1_YEAST UBX domain-containing protein 1 full [242 to 253] K Y EGFQVADGPLYR EGFQVADGPLYR 0 0;1 [[];[604.33330];] [1.094,1.094,1.094,1.094,1.094,1.094,1.094,1.094,1.094,1.094,1.094,1.094] 0 1350.65681 T 0.000 -0.00169 -1.251204 0 0.000000 4 0 0.000000
b
diff -r 000000000000 -r 6d3560a3a548 test-data/sliced-mzml.mzML
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sliced-mzml.mzML Thu Jan 25 16:10:58 2018 -0500
b
b'@@ -0,0 +1,6123 @@\n+<?xml version="1.0" encoding="utf-8"?>\r\n+<indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.2_idx.xsd">\r\n+  <mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" id="sliced-raw" version="1.1.0">\r\n+    <cvList count="2">\r\n+      <cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" version="4.0.1" URI="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/>\r\n+      <cv id="UO" fullName="Unit Ontology" version="12:10:2011" URI="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo"/>\r\n+    </cvList>\r\n+    <fileDescription>\r\n+      <fileContent>\r\n+        <cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value=""/>\r\n+        <cvParam cvRef="MS" accession="MS:1000580" name="MSn spectrum" value=""/>\r\n+      </fileContent>\r\n+      <sourceFileList count="1">\r\n+        <sourceFile id="RAW1" name="sliced-raw.raw" location="file:///C:\\Users\\rmillikin\\Documents\\Data\\Yeast">\r\n+          <cvParam cvRef="MS" accession="MS:1000768" name="Thermo nativeID format" value=""/>\r\n+          <cvParam cvRef="MS" accession="MS:1000563" name="Thermo RAW format" value=""/>\r\n+          <cvParam cvRef="MS" accession="MS:1000569" name="SHA-1" value="47b7a9a4b60c07f8229910014aea4b93ee226fec"/>\r\n+        </sourceFile>\r\n+      </sourceFileList>\r\n+    </fileDescription>\r\n+    <referenceableParamGroupList count="1">\r\n+      <referenceableParamGroup id="CommonInstrumentParams">\r\n+        <cvParam cvRef="MS" accession="MS:1001742" name="LTQ Orbitrap Velos" value=""/>\r\n+        <cvParam cvRef="MS" accession="MS:1000529" name="instrument serial number" value="SN03001B"/>\r\n+      </referenceableParamGroup>\r\n+    </referenceableParamGroupList>\r\n+    <softwareList count="2">\r\n+      <software id="Xcalibur" version="2.6.0">\r\n+        <cvParam cvRef="MS" accession="MS:1000532" name="Xcalibur" value=""/>\r\n+      </software>\r\n+      <software id="pwiz" version="3.0.10875">\r\n+        <cvParam cvRef="MS" accession="MS:1000615" name="ProteoWizard software" value=""/>\r\n+      </software>\r\n+    </softwareList>\r\n+    <instrumentConfigurationList count="2">\r\n+      <instrumentConfiguration id="IC1">\r\n+        <referenceableParamGroupRef ref="CommonInstrumentParams"/>\r\n+        <componentList count="3">\r\n+          <source order="1">\r\n+            <cvParam cvRef="MS" accession="MS:1000398" name="nanoelectrospray" value=""/>\r\n+            <cvParam cvRef="MS" accession="MS:1000485" name="nanospray inlet" value=""/>\r\n+          </source>\r\n+          <analyzer order="2">\r\n+            <cvParam cvRef="MS" accession="MS:1000484" name="orbitrap" value=""/>\r\n+          </analyzer>\r\n+          <detector order="3">\r\n+            <cvParam cvRef="MS" accession="MS:1000624" name="inductive detector" value=""/>\r\n+          </detector>\r\n+        </componentList>\r\n+        <softwareRef ref="Xcalibur"/>\r\n+      </instrumentConfiguration>\r\n+      <instrumentConfiguration id="IC2">\r\n+        <referenceableParamGroupRef ref="CommonInstrumentParams"/>\r\n+        <componentList count="3">\r\n+          <source order="1">\r\n+            <cvParam cvRef="MS" accession="MS:1000398" name="nanoelectrospray" value=""/>\r\n+            <cvParam cvRef="MS" accession="MS:1000485" name="nanospray inlet" value=""/>\r\n+          </source>\r\n+          <analyzer order="2">\r\n+            <cvParam cvRef="MS" accession="MS:1000083" name="radial ejection linear ion trap" value=""/>\r\n+          </analyzer>\r\n+          <detector order="3">\r\n+            <cvParam cvRef="MS" accession="MS:1000253" name="electron multiplier" value=""/>\r\n+          </detector>\r\n+        </componentList>\r\n+        <softwareRef ref="Xcalib'..b'ollerNumber=1 scan=56">498704</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=57">537542</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=58">543051</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=59">548389</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=60">553862</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=61">559312</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=62">564842</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=63">570236</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=64">575637</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=65">580983</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=66">586404</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=67">591788</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=68">629259</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=69">634776</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=70">640137</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=71">645669</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=72">651127</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=73">656609</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=74">662034</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=75">667530</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=76">672916</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=77">678349</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=78">683756</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=79">718399</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=80">724044</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=81">729438</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=82">734759</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=83">740252</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=84">745671</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=85">751140</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=86">756489</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=87">761864</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=88">767281</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=89">772658</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=90">802518</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=91">807971</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=92">813831</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=93">819186</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=94">824778</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=95">830079</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=96">835649</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=97">840996</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=98">846520</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=99">851890</offset>\r\n+      <offset idRef="controllerType=0 controllerNumber=1 scan=100">857257</offset>\r\n+    </index>\r\n+    <index name="chromatogram">\r\n+      <offset idRef="TIC">889680</offset>\r\n+    </index>\r\n+  </indexList>\r\n+  <indexListOffset>891911</indexListOffset>\r\n+  <fileChecksum>06271877a4ad725a6043b27b9940a707026448dd</fileChecksum>\r\n+</indexedmzML>\r\n'