Mercurial > repos > galaxyp > flashlfq
view flashlfq.xml @ 4:908ab13490dc draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/flashlfq commit 047addefbac7468a92ba4fc04899bd5136c58ea4
author | galaxyp |
---|---|
date | Wed, 30 Jan 2019 05:39:17 -0500 |
parents | ff1148892ce3 |
children | cb346052c5de |
line wrap: on
line source
<tool id="flashlfq" name="FlashLFQ" version="0.1.111"> <description>ultrafast label-free quantification for mass-spectrometry proteomics</description> <requirements> <requirement type="package" version="0.1.111">flashlfq</requirement> </requirements> <command><![CDATA[ #import re #set $idt_path = $re.sub('\s','_',$re.sub('[.][^.]*$','',$idt.display_name.split('/')[-1])) + ".psmtsv" ln -s '${idt}' '${idt_path}' && mkdir spectrum_dir && #for $peak_list in $peak_lists: #set $input_name = $re.sub('[.][^.]*$','',$peak_list.display_name.split('/')[-1]) + ".mzML" ln -s '${peak_list}' 'spectrum_dir/${input_name}' && #end for #if $normalize.nor == 'true': #set $input_name = $re.sub('[.][^.]*$','',$experimental_design.display_name.split('/')[-1]) + ".tsv" ln -s '${experimental_design}' 'spectrum_dir/${input_name}' && #end if FlashLFQ --idt '$idt_path' --rep "./spectrum_dir" --ppm $ppm --iso $iso --nis $nis #if $normalize.nor == 'true': --nor true #end if #if $intensity == 'integrate': --int true #end if #if $charge == 'precursor': --chg true #end if $rmm $mbr $pro --out . > logfile.txt && cat logfile.txt | sed 's/\(Analysis summary for:\).*working./\1 /' > '$log' && cp *_FlashLFQ_QuantifiedProteins.tsv '$quantifiedProteins' && cp *_FlashLFQ_QuantifiedPeptides.tsv '$quantifiedPeptides' && cp *_FlashLFQ_QuantifiedPeaks.tsv '$quantifiedPeaks' ]]></command> <inputs> <param name="idt" type="data" format="tabular" label="identification file" help="MetaMorpheus,Morpheus,PeptideShaker PSM Report,MaxQuant"/> <param name="peak_lists" type="data" format="mzml" multiple="true" label="spectrum files"/> <param name="ppm" type="float" value="10" min="1" max="20" label="monoisotopic ppm tolerance"/> <param name="iso" type="float" value="5" min="1" max="10" label="isotopic distribution tolerance in ppm"/> <param name="nis" type="integer" value="2" min="1" max="30" label="number of isotopes required to be observed"/> <param name="intensity" type="select" label="intensity"> <option value="apex" selected="true">use the apex intensity</option> <option value="integrate">integrate chromatographic peak intensity</option> </param> <param name="charge" type="select" label="charge"> <option value="all" selected="true">use all identification detected charge states</option> <option value="precursor">use precursor charge</option> </param> <param name="rmm" type="boolean" truevalue="--rmm true" falsevalue="--rmm false" checked="true" label="require observed monoisotopic mass peak"/> <conditional name="normalize"> <param name="nor" type="select" label="normalize intensity results"> <option value="false">No</option> <option value="true">Yes</option> </param> <when value="false"/> <when value="true"> <param name="experimental_design" type="data" format="tabular" label="ExperimentalDesign.tsv"/> </when> </conditional> <param name="mbr" type="boolean" truevalue="--mbr true" falsevalue="--mbr false" checked="false" label="match between runs"/> <param name="mrt" type="float" value="1.5" min="0" max="30" label="maximum MBR window in minutes"/> <param name="pro" type="boolean" truevalue="--pro true" falsevalue="--pro false" checked="false" label="advanced protein quantification"/> </inputs> <outputs> <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" /> <data name="quantifiedPeaks" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeaks.tsv" /> <data name="quantifiedPeptides" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeptides.tsv" /> <data name="quantifiedProteins" format="tabular" label="${tool.name} on ${on_string}: QuantifiedProteins.tsv" /> </outputs> <tests> <test> <param name="idt" value="aggregatePSMs_5ppmAroundZero.psmtsv" ftype="tabular"/> <param name="peak_lists" value="sliced-mzml.mzML" ftype="mzml"/> <param name="ppm" value="12"/> <param name="iso" value="6"/> <output name="quantifiedPeaks"> <assert_contents> <has_text text="EGFQVADGPLYR" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ **FlashLFQ** is an ultrafast label-free quantification for mass-spectrometry proteomics. **Accepted command-line arguments:** :: --idt [string | identification file path (TSV format)] --raw [string | MS data file (.raw or .mzML)] --rep [string | repository containing MS data files] --ppm [double | monoisotopic ppm tolerance] (default = 10) --iso [double | isotopic distribution tolerance in ppm] (default = 5) --sil [boolean | silent mode; no console output] (default = false) --pau [boolean | pause at end of run] (default = true) --int [boolean | integrate chromatographic peak intensity instead of using the apex intensity] (default = false) --chg [boolean | use only precursor charge state; when set to false, FlashLFQ looks for all charge states detected in the MS/MS identification file for each peptide] (default = false) --mbr [bool|match between runs] --rmm [bool|require observed monoisotopic mass peak] --nis [int|number of isotopes required to be observed] **Tab-Delimited Identification Text File** The first line of the text file should contain column headers identifying what each column is. Note that MetaMorpheus (.psmtsv), Morpheus, MaxQuant (msms.txt), and TDPortal tab-delimited column headers are supported natively and such files can be read without modification. For search software that lists decoys and PSMs above 1% FDR (e.g., MetaMorpheus), you may want to remove these prior to FlashLFQ analysis. FlashLFQ will probably crash if ambiguous PSMs are passed into it (e.g., a PSM with more than 2 peptides listed in one line). The following headers are required in the list of MS/MS identifications: - **File Name** - File extensions should be tolerated, but no extension is tested more extensively (e.g. use MyFile and not MyFile.mzML) - **Base Sequence** - Should only contain amino acid sequences, or it will likely result in a crash - **Full Sequence** - Modified sequence. Can contain any letters, but must be consistent between the same peptidoform to get accurate results - **Peptide Monoisotopic Mass** - Theoretical monoisotopic mass, including modification mass - **Scan Retention Time** - MS/MS identification scan retention time - **Precursor Charge** - Charge of the ion selected for MS/MS resulting in the identification - **Protein Accession** - Protein accession(s) for the peptide; protein quantification is still preliminary **Outputs**: - **QuantifiedProteins.tsv** - Lists protein accession and in the future will include gene and organism if the TSV contains it. The intensity is either a) the sum of the 3 most intense peptides or b) (Advanced protein quant) a weighted-average of the intensities of the peptides assigned to the protein. The weights are determined by how well the peptide co-varies with the other peptides assigned to that protein. - **QuantifiedPeaks.tsv** - Each chromatographic peak is shown here, even peaks that were not quantifiable (peak intensity = 0). Details about each peak, such as number of PSMs mapped, start/apex/end retention times, ppm error, etc are contained in this file. A peptide can have multiple peaks over the course of a run (e.g., oxidized peptidoforms elute at different times, etc). Ambiguous peaks are displayed with a | (pipe) delimiter to indicate more than one peptide mapped to that peak. - **QuantifiedPeptides.tsv** - Peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs. - **Log.txt** - Log of the FlashLFQ run. ]]></help> <citations> <citation type="doi">10.1021/acs.jproteome.7b00608</citation> </citations> </tool>