comparison flashlfq.xml @ 5:cb346052c5de draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/flashlfq commit 7f1c24d169a610aa910ca717fd698fe782b42699"
author galaxyp
date Thu, 24 Oct 2019 15:51:49 -0400
parents 908ab13490dc
children c8ae97f37bf3
comparison
equal deleted inserted replaced
4:908ab13490dc 5:cb346052c5de
1 <tool id="flashlfq" name="FlashLFQ" version="0.1.111"> 1 <tool id="flashlfq" name="FlashLFQ" version="1.0.0.0">
2 <description>ultrafast label-free quantification for mass-spectrometry proteomics</description> 2 <description>ultrafast label-free quantification for mass-spectrometry proteomics</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.1.111">flashlfq</requirement> 4 <requirement type="package" version="1.0.0">flashlfq</requirement>
5 </requirements> 5 </requirements>
6 <command><![CDATA[ 6 <command><![CDATA[
7 #import re 7 #import re
8 #set $idt_path = $re.sub('\s','_',$re.sub('[.][^.]*$','',$idt.display_name.split('/')[-1])) + ".psmtsv" 8 #set $idt_path = $re.sub('\s','_',$re.sub('[.][^.]*$','',$idt.display_name.split('/')[-1])) + ".psmtsv"
9 ln -s '${idt}' '${idt_path}' && 9 ln -s '${idt}' '${idt_path}' &&
10 mkdir spectrum_dir && 10 mkdir spectrum_dir &&
11 #for $peak_list in $peak_lists: 11 #for $peak_list in $peak_lists:
12 #set $input_name = $re.sub('[.][^.]*$','',$peak_list.display_name.split('/')[-1]) + ".mzML" 12 #set $ext = '.mzML'
13 #if $peak_list.extension.endswith('raw')
14 #set $ext = '.RAW'
15 #end if
16 #set $input_name = $re.sub('[.][^.]*$','',$peak_list.display_name.split('/')[-1]) + $ext
13 ln -s '${peak_list}' 'spectrum_dir/${input_name}' && 17 ln -s '${peak_list}' 'spectrum_dir/${input_name}' &&
14 #end for 18 #end for
15 #if $normalize.nor == 'true': 19 #if $experiment.use_design == 'true':
16 #set $input_name = $re.sub('[.][^.]*$','',$experimental_design.display_name.split('/')[-1]) + ".tsv" 20 ln -s '${experiment.experimental_design}' 'spectrum_dir/ExperimentalDesign.tsv' &&
17 ln -s '${experimental_design}' 'spectrum_dir/${input_name}' &&
18 #end if 21 #end if
22 echo 'y' |
19 FlashLFQ 23 FlashLFQ
20 --idt '$idt_path' 24 --idt '$idt_path'
21 --rep "./spectrum_dir" 25 --rep "./spectrum_dir"
22 --ppm $ppm 26 --ppm $ppm
23 --iso $iso 27 --iso $iso
24 --nis $nis 28 --nis $nis
25 #if $normalize.nor == 'true': 29 $int
26 --nor true 30 $chg
31 $mbr
32 #if $experiment.use_design == 'true':
33 $experiment.nor
34 #if $experiment.bayesian.calculate == 'true':
35 --bay true
36 --ctr '$experiment.bayesian.ctr'
37 #if str($experiment.bayesian.fcc):
38 -fcc $experiment.bayesian.fcc
39 #end if
40 $experiment.bayesian.sha
41 $experiment.bayesian.rmc
42 --mcm '$experiment.bayesian.mcm'
43 #if str($experiment.bayesian.rns):
44 --rns $experiment.bayesian.rns
45 #end if
46 #end if
27 #end if 47 #end if
28 #if $intensity == 'integrate': 48 --out out > logfile.txt
29 --int true
30 #end if
31 #if $charge == 'precursor':
32 --chg true
33 #end if
34 $rmm $mbr $pro
35 --out . > logfile.txt
36 && cat logfile.txt | sed 's/\(Analysis summary for:\).*working./\1 /' > '$log'
37 && cp *_FlashLFQ_QuantifiedProteins.tsv '$quantifiedProteins'
38 && cp *_FlashLFQ_QuantifiedPeptides.tsv '$quantifiedPeptides'
39 && cp *_FlashLFQ_QuantifiedPeaks.tsv '$quantifiedPeaks'
40 ]]></command> 49 ]]></command>
41
42 <inputs> 50 <inputs>
43 <param name="idt" type="data" format="tabular" label="identification file" 51 <param argument="--idt" type="data" format="tabular" label="identification file"
44 help="MetaMorpheus,Morpheus,PeptideShaker PSM Report,MaxQuant"/> 52 help="MetaMorpheus,Morpheus,PeptideShaker PSM Report,MaxQuant"/>
45 <param name="peak_lists" type="data" format="mzml" multiple="true" label="spectrum files"/> 53 <param name="peak_lists" type="data" format="mzml,raw,thermo.raw" multiple="true" label="spectrum files"/>
46 <param name="ppm" type="float" value="10" min="1" max="20" label="monoisotopic ppm tolerance"/> 54 <param argument="--ppm" type="float" value="10" min="1" max="20" label="monoisotopic ppm tolerance"/>
47 <param name="iso" type="float" value="5" min="1" max="10" label="isotopic distribution tolerance in ppm"/> 55 <param argument="--iso" type="float" value="5" min="1" max="10" label="isotopic distribution tolerance in ppm"/>
48 <param name="nis" type="integer" value="2" min="1" max="30" label="number of isotopes required to be observed"/> 56 <param argument="--nis" type="integer" value="2" min="2" max="30" label="number of isotopes required to be observed"/>
49 <param name="intensity" type="select" label="intensity"> 57 <param argument="--int" type="boolean" truevalue="--int true" falsevalue="--int false" checked="false"
50 <option value="apex" selected="true">use the apex intensity</option> 58 label="integrate peak areas (not recommended)"/>
51 <option value="integrate">integrate chromatographic peak intensity</option> 59 <param argument="--chg" type="boolean" truevalue="--chg true" falsevalue="--chg false" checked="false"
52 </param> 60 label="use only precursor charge state"/>
53 <param name="charge" type="select" label="charge"> 61 <param argument="--mbr" type="boolean" truevalue="--mbr true" falsevalue="--mbr false" checked="false"
54 <option value="all" selected="true">use all identification detected charge states</option> 62 label="match between runs"/>
55 <option value="precursor">use precursor charge</option> 63 <param argument="--mrt" type="float" value="2.5" min=".01" max="60" label="maximum MBR window in minutes"/>
56 </param> 64 <conditional name="experiment">
57 <param name="rmm" type="boolean" truevalue="--rmm true" falsevalue="--rmm false" checked="true" 65 <param name="use_design" type="select" label="Use experimnetal design for normalization or protein fold-change analysis">
58 label="require observed monoisotopic mass peak"/>
59 <conditional name="normalize">
60 <param name="nor" type="select" label="normalize intensity results">
61 <option value="false">No</option> 66 <option value="false">No</option>
62 <option value="true">Yes</option> 67 <option value="true">Yes</option>
63 </param> 68 </param>
64 <when value="false"/> 69 <when value="false"/>
65 <when value="true"> 70 <when value="true">
66 <param name="experimental_design" type="data" format="tabular" label="ExperimentalDesign.tsv"/> 71 <param name="experimental_design" type="data" format="tabular" label="ExperimentalDesign.tsv"/>
72 <param argument="--nor" type="boolean" truevalue="--nor true" falsevalue="--nor false" checked="true"
73 label="normalize intensity results"/>
74 <conditional name="bayesian">
75 <param name="calculate" type="select" label="Perform Bayesian protein fold-change analysis">
76 <option value="false">No</option>
77 <option value="true">Yes</option>
78 </param>
79 <when value="false"/>
80 <when value="true">
81 <param argument="--ctr" type="select" value="" label="control condition for Bayesian protein fold-change analysis">
82 <options from_dataset="experimental_design">
83 <column name="name" index="1"/>
84 <column name="value" index="1"/>
85 <filter type="static_value" name="heading_ctr" column="1" value="Condition" keep="False"/>
86 <filter type="unique_value" name="unique_ctr" column="1"/>
87 <filter type="sort_by" name="sorted_ctr" column="1"/>
88 </options>
89 </param>
90 <param argument="--fcc" type="float" value="" min="0.01" label="fold-change cutoff" optional="true"
91 help="Leave blank to detemine emperically from data."/>
92 <param argument="--sha" type="boolean" truevalue="--sha true" falsevalue="--sha false" checked="false"
93 label="use shared peptides for protein quantification"/>
94 <param argument="--rmc" type="boolean" truevalue="--rmc true" falsevalue="--rmc false" checked="false"
95 label="require MS/MS ID in condition"/>
96 <param argument="--mcm" type="integer" value="500" min="500" label="number of markov-chain monte carlo iterations"/>
97 <param argument="--rns" type="integer" value="" optional="true" label="random seed"/>
98 </when>
99 </conditional>
67 </when> 100 </when>
68 </conditional> 101 </conditional>
69 <param name="mbr" type="boolean" truevalue="--mbr true" falsevalue="--mbr false" checked="false"
70 label="match between runs"/>
71 <param name="mrt" type="float" value="1.5" min="0" max="30" label="maximum MBR window in minutes"/>
72 <param name="pro" type="boolean" truevalue="--pro true" falsevalue="--pro false" checked="false"
73 label="advanced protein quantification"/>
74 </inputs> 102 </inputs>
75 103
76 <outputs> 104 <outputs>
77 <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" /> 105 <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="logfile.txt"/>
78 <data name="quantifiedPeaks" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeaks.tsv" /> 106 <data name="toml" format="txt" label="${tool.name} on ${on_string}: FlashLfqSettings.toml" from_work_dir="out/FlashLfqSettings.toml"/>
79 <data name="quantifiedPeptides" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeptides.tsv" /> 107 <data name="quantifiedPeaks" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeaks.tsv" from_work_dir="out/QuantifiedPeaks.tsv"/>
80 <data name="quantifiedProteins" format="tabular" label="${tool.name} on ${on_string}: QuantifiedProteins.tsv" /> 108 <data name="quantifiedPeptides" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeptides.tsv" from_work_dir="out/QuantifiedPeptides.tsv"/>
81 109 <data name="quantifiedProteins" format="tabular" label="${tool.name} on ${on_string}: QuantifiedProteins.tsv" from_work_dir="out/QuantifiedProteins.tsv"/>
110 <data name="foldChange" format="tabular" label="${tool.name} on ${on_string}: BayesianFoldChangeAnalysis.tsv" from_work_dir="out/BayesianFoldChangeAnalysis.tsv">
111 <filter>'bayesian' in experiment and 'ctr' in experiment['bayesian']</filter>
112 </data>
82 </outputs> 113 </outputs>
83 <tests> 114 <tests>
84 <test> 115 <test>
85 <param name="idt" value="aggregatePSMs_5ppmAroundZero.psmtsv" ftype="tabular"/> 116 <param name="idt" value="aggregatePSMs_5ppmAroundZero.psmtsv" ftype="tabular"/>
86 <param name="peak_lists" value="sliced-mzml.mzML" ftype="mzml"/> 117 <param name="peak_lists" value="sliced-mzml.mzML" ftype="mzml"/>
92 </assert_contents> 123 </assert_contents>
93 </output> 124 </output>
94 </test> 125 </test>
95 </tests> 126 </tests>
96 <help><![CDATA[ 127 <help><![CDATA[
97
98 **FlashLFQ** is an ultrafast label-free quantification for mass-spectrometry proteomics. 128 **FlashLFQ** is an ultrafast label-free quantification for mass-spectrometry proteomics.
129 https://github.com/smith-chem-wisc/FlashLFQ/wiki
130
99 131
100 **Accepted command-line arguments:** 132 **Accepted command-line arguments:**
101 133
102 :: 134 ::
103 135
104 --idt [string | identification file path (TSV format)] 136 --idt [string|identification file path]
105 --raw [string | MS data file (.raw or .mzML)] 137 --rep [string|directory containing spectral data files]
106 --rep [string | repository containing MS data files] 138 --out [string|output directory]
107 --ppm [double | monoisotopic ppm tolerance] (default = 10) 139 --ppm [double|ppm tolerance]
108 --iso [double | isotopic distribution tolerance in ppm] (default = 5) 140 --nor [bool|normalize intensity results]
109 --sil [boolean | silent mode; no console output] (default = false)
110 --pau [boolean | pause at end of run] (default = true)
111 --int [boolean | integrate chromatographic peak intensity instead of using
112 the apex intensity] (default = false)
113 --chg [boolean | use only precursor charge state; when set to false, FlashLFQ looks
114 for all charge states detected in the MS/MS identification file for each peptide] (default = false)
115 --mbr [bool|match between runs] 141 --mbr [bool|match between runs]
116 --rmm [bool|require observed monoisotopic mass peak] 142 --sha [bool|use shared peptides for protein quantification]
143 --bay [bool|Bayesian protein fold-change analysis]
144 --ctr [string|control condition for Bayesian protein fold-change analysis]
145 --fcc [double|fold-change cutoff for Bayesian protein fold-change analysis]
146
147 **Advanced settings:**
148
149 ::
150
151 --sil [bool|silent mode]
152 --int [bool|integrate peak areas (not recommended)]
153 --iso [double|isotopic distribution tolerance in ppm]
154 --mrt [double|maximum MBR window in minutes]
155 --chg [bool|use only precursor charge state]
117 --nis [int|number of isotopes required to be observed] 156 --nis [int|number of isotopes required to be observed]
157 --rmc [bool|require MS/MS ID in condition]
158 --mcm [int|number of markov-chain monte carlo iterations for the Bayesian protein fold-change analysis]
159 --rns [int|random seed for the Bayesian protein fold-change analysis]
118 160
119 161
120 **Tab-Delimited Identification Text File** 162 **Tab-Delimited Identification Text File**
121 163
122 The first line of the text file should contain column headers identifying what each column is. Note that MetaMorpheus (.psmtsv), Morpheus, MaxQuant (msms.txt), and TDPortal tab-delimited column headers are supported natively and such files can be read without modification. For search software that lists decoys and PSMs above 1% FDR (e.g., MetaMorpheus), you may want to remove these prior to FlashLFQ analysis. FlashLFQ will probably crash if ambiguous PSMs are passed into it (e.g., a PSM with more than 2 peptides listed in one line). 164 The first line of the text file should contain column headers identifying what each column is. Note that MetaMorpheus (.psmtsv), Morpheus, MaxQuant (msms.txt), and TDPortal tab-delimited column headers are supported natively and such files can be read without modification. For search software that lists decoys and PSMs above 1% FDR (e.g., MetaMorpheus), you may want to remove these prior to FlashLFQ analysis. FlashLFQ will probably crash if ambiguous PSMs are passed into it (e.g., a PSM with more than 2 peptides listed in one line).
129 - **Peptide Monoisotopic Mass** - Theoretical monoisotopic mass, including modification mass 171 - **Peptide Monoisotopic Mass** - Theoretical monoisotopic mass, including modification mass
130 - **Scan Retention Time** - MS/MS identification scan retention time 172 - **Scan Retention Time** - MS/MS identification scan retention time
131 - **Precursor Charge** - Charge of the ion selected for MS/MS resulting in the identification 173 - **Precursor Charge** - Charge of the ion selected for MS/MS resulting in the identification
132 - **Protein Accession** - Protein accession(s) for the peptide; protein quantification is still preliminary 174 - **Protein Accession** - Protein accession(s) for the peptide; protein quantification is still preliminary
133 175
176 **ExperimentalDesign File**
177
178 The ExperimentalDesign_ File should have 5 columns separated by TAB characters:
179
180 - SpectrumFileName - Without the file extension
181 - Condition - Cannot be blank
182 - Sample - an integer, at least 1. Each condition must have continuous sample numbers starting at 1. For example, samples 1, 3, and 4 are not valid because sample 2 is missing. In this case you would label the samples as 1, 2, and 3.
183 - Fraction - an integer, at least 1. Each sample must have continuous fraction numbers starting at 1. If your data is not fractionated, just enter 1 for all fractions. It is OK for two samples to have different total numbers of fractions. It is NOT recommended to use a sample if it is missing a fraction with significant peptide intensity (e.g., if sample 2 is missing fraction #5 out of 10 total fractions).
184 - Replicate - an integer, at least 1. Each fraction must have continuous replicate numbers starting at 1.
185
186 ::
187
188 For example, with spectrum files named:
189
190 - 20130510_EXQ1_IgPa_QC_UPS1_01.mzml
191 - 20130510_EXQ1_IgPa_QC_UPS1_02.mzml
192 - 20130510_EXQ1_IgPa_QC_UPS2_01.mzml
193 - 20130510_EXQ1_IgPa_QC_UPS2_02.mzml
194
195 The ExperimentalDesign File:
196
197 FileName Condition Biorep Fraction Techrep
198 20130510_EXQ1_IgPa_QC_UPS1_01 S1 1 1 1
199 20130510_EXQ1_IgPa_QC_UPS1_02 S1 2 1 1
200 20130510_EXQ1_IgPa_QC_UPS2_01 S2 1 1 1
201 20130510_EXQ1_IgPa_QC_UPS2_02 S2 2 1 1
202
134 203
135 **Outputs**: 204 **Outputs**:
136 205
137 - **QuantifiedProteins.tsv** - Lists protein accession and in the future will include gene and organism if the TSV contains it. The intensity is either a) the sum of the 3 most intense peptides or b) (Advanced protein quant) a weighted-average of the intensities of the peptides assigned to the protein. The weights are determined by how well the peptide co-varies with the other peptides assigned to that protein. 206 - **QuantifiedProteins.tsv** - Lists protein accession and in the future will include gene and organism if the TSV contains it. The intensity is either a) the sum of the 3 most intense peptides or b) (Advanced protein quant) a weighted-average of the intensities of the peptides assigned to the protein. The weights are determined by how well the peptide co-varies with the other peptides assigned to that protein.
138 207
141 - **QuantifiedPeptides.tsv** - Peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs. 210 - **QuantifiedPeptides.tsv** - Peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs.
142 211
143 - **Log.txt** - Log of the FlashLFQ run. 212 - **Log.txt** - Log of the FlashLFQ run.
144 213
145 214
215 .. _FlashLFQ: https://github.com/smith-chem-wisc/FlashLFQ/wiki
216 .. _ExperimentalDesign: https://github.com/smith-chem-wisc/FlashLFQ/wiki/Experimental-Design
146 217
147 ]]></help> 218 ]]></help>
148 <citations> 219 <citations>
149 <citation type="doi">10.1021/acs.jproteome.7b00608</citation> 220 <citation type="doi">10.1021/acs.jproteome.7b00608</citation>
150 </citations> 221 </citations>