Mercurial > repos > galaxyp > flashlfq
comparison flashlfq.xml @ 5:cb346052c5de draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/flashlfq commit 7f1c24d169a610aa910ca717fd698fe782b42699"
author | galaxyp |
---|---|
date | Thu, 24 Oct 2019 15:51:49 -0400 |
parents | 908ab13490dc |
children | c8ae97f37bf3 |
comparison
equal
deleted
inserted
replaced
4:908ab13490dc | 5:cb346052c5de |
---|---|
1 <tool id="flashlfq" name="FlashLFQ" version="0.1.111"> | 1 <tool id="flashlfq" name="FlashLFQ" version="1.0.0.0"> |
2 <description>ultrafast label-free quantification for mass-spectrometry proteomics</description> | 2 <description>ultrafast label-free quantification for mass-spectrometry proteomics</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="0.1.111">flashlfq</requirement> | 4 <requirement type="package" version="1.0.0">flashlfq</requirement> |
5 </requirements> | 5 </requirements> |
6 <command><![CDATA[ | 6 <command><![CDATA[ |
7 #import re | 7 #import re |
8 #set $idt_path = $re.sub('\s','_',$re.sub('[.][^.]*$','',$idt.display_name.split('/')[-1])) + ".psmtsv" | 8 #set $idt_path = $re.sub('\s','_',$re.sub('[.][^.]*$','',$idt.display_name.split('/')[-1])) + ".psmtsv" |
9 ln -s '${idt}' '${idt_path}' && | 9 ln -s '${idt}' '${idt_path}' && |
10 mkdir spectrum_dir && | 10 mkdir spectrum_dir && |
11 #for $peak_list in $peak_lists: | 11 #for $peak_list in $peak_lists: |
12 #set $input_name = $re.sub('[.][^.]*$','',$peak_list.display_name.split('/')[-1]) + ".mzML" | 12 #set $ext = '.mzML' |
13 #if $peak_list.extension.endswith('raw') | |
14 #set $ext = '.RAW' | |
15 #end if | |
16 #set $input_name = $re.sub('[.][^.]*$','',$peak_list.display_name.split('/')[-1]) + $ext | |
13 ln -s '${peak_list}' 'spectrum_dir/${input_name}' && | 17 ln -s '${peak_list}' 'spectrum_dir/${input_name}' && |
14 #end for | 18 #end for |
15 #if $normalize.nor == 'true': | 19 #if $experiment.use_design == 'true': |
16 #set $input_name = $re.sub('[.][^.]*$','',$experimental_design.display_name.split('/')[-1]) + ".tsv" | 20 ln -s '${experiment.experimental_design}' 'spectrum_dir/ExperimentalDesign.tsv' && |
17 ln -s '${experimental_design}' 'spectrum_dir/${input_name}' && | |
18 #end if | 21 #end if |
22 echo 'y' | | |
19 FlashLFQ | 23 FlashLFQ |
20 --idt '$idt_path' | 24 --idt '$idt_path' |
21 --rep "./spectrum_dir" | 25 --rep "./spectrum_dir" |
22 --ppm $ppm | 26 --ppm $ppm |
23 --iso $iso | 27 --iso $iso |
24 --nis $nis | 28 --nis $nis |
25 #if $normalize.nor == 'true': | 29 $int |
26 --nor true | 30 $chg |
31 $mbr | |
32 #if $experiment.use_design == 'true': | |
33 $experiment.nor | |
34 #if $experiment.bayesian.calculate == 'true': | |
35 --bay true | |
36 --ctr '$experiment.bayesian.ctr' | |
37 #if str($experiment.bayesian.fcc): | |
38 -fcc $experiment.bayesian.fcc | |
39 #end if | |
40 $experiment.bayesian.sha | |
41 $experiment.bayesian.rmc | |
42 --mcm '$experiment.bayesian.mcm' | |
43 #if str($experiment.bayesian.rns): | |
44 --rns $experiment.bayesian.rns | |
45 #end if | |
46 #end if | |
27 #end if | 47 #end if |
28 #if $intensity == 'integrate': | 48 --out out > logfile.txt |
29 --int true | |
30 #end if | |
31 #if $charge == 'precursor': | |
32 --chg true | |
33 #end if | |
34 $rmm $mbr $pro | |
35 --out . > logfile.txt | |
36 && cat logfile.txt | sed 's/\(Analysis summary for:\).*working./\1 /' > '$log' | |
37 && cp *_FlashLFQ_QuantifiedProteins.tsv '$quantifiedProteins' | |
38 && cp *_FlashLFQ_QuantifiedPeptides.tsv '$quantifiedPeptides' | |
39 && cp *_FlashLFQ_QuantifiedPeaks.tsv '$quantifiedPeaks' | |
40 ]]></command> | 49 ]]></command> |
41 | |
42 <inputs> | 50 <inputs> |
43 <param name="idt" type="data" format="tabular" label="identification file" | 51 <param argument="--idt" type="data" format="tabular" label="identification file" |
44 help="MetaMorpheus,Morpheus,PeptideShaker PSM Report,MaxQuant"/> | 52 help="MetaMorpheus,Morpheus,PeptideShaker PSM Report,MaxQuant"/> |
45 <param name="peak_lists" type="data" format="mzml" multiple="true" label="spectrum files"/> | 53 <param name="peak_lists" type="data" format="mzml,raw,thermo.raw" multiple="true" label="spectrum files"/> |
46 <param name="ppm" type="float" value="10" min="1" max="20" label="monoisotopic ppm tolerance"/> | 54 <param argument="--ppm" type="float" value="10" min="1" max="20" label="monoisotopic ppm tolerance"/> |
47 <param name="iso" type="float" value="5" min="1" max="10" label="isotopic distribution tolerance in ppm"/> | 55 <param argument="--iso" type="float" value="5" min="1" max="10" label="isotopic distribution tolerance in ppm"/> |
48 <param name="nis" type="integer" value="2" min="1" max="30" label="number of isotopes required to be observed"/> | 56 <param argument="--nis" type="integer" value="2" min="2" max="30" label="number of isotopes required to be observed"/> |
49 <param name="intensity" type="select" label="intensity"> | 57 <param argument="--int" type="boolean" truevalue="--int true" falsevalue="--int false" checked="false" |
50 <option value="apex" selected="true">use the apex intensity</option> | 58 label="integrate peak areas (not recommended)"/> |
51 <option value="integrate">integrate chromatographic peak intensity</option> | 59 <param argument="--chg" type="boolean" truevalue="--chg true" falsevalue="--chg false" checked="false" |
52 </param> | 60 label="use only precursor charge state"/> |
53 <param name="charge" type="select" label="charge"> | 61 <param argument="--mbr" type="boolean" truevalue="--mbr true" falsevalue="--mbr false" checked="false" |
54 <option value="all" selected="true">use all identification detected charge states</option> | 62 label="match between runs"/> |
55 <option value="precursor">use precursor charge</option> | 63 <param argument="--mrt" type="float" value="2.5" min=".01" max="60" label="maximum MBR window in minutes"/> |
56 </param> | 64 <conditional name="experiment"> |
57 <param name="rmm" type="boolean" truevalue="--rmm true" falsevalue="--rmm false" checked="true" | 65 <param name="use_design" type="select" label="Use experimnetal design for normalization or protein fold-change analysis"> |
58 label="require observed monoisotopic mass peak"/> | |
59 <conditional name="normalize"> | |
60 <param name="nor" type="select" label="normalize intensity results"> | |
61 <option value="false">No</option> | 66 <option value="false">No</option> |
62 <option value="true">Yes</option> | 67 <option value="true">Yes</option> |
63 </param> | 68 </param> |
64 <when value="false"/> | 69 <when value="false"/> |
65 <when value="true"> | 70 <when value="true"> |
66 <param name="experimental_design" type="data" format="tabular" label="ExperimentalDesign.tsv"/> | 71 <param name="experimental_design" type="data" format="tabular" label="ExperimentalDesign.tsv"/> |
72 <param argument="--nor" type="boolean" truevalue="--nor true" falsevalue="--nor false" checked="true" | |
73 label="normalize intensity results"/> | |
74 <conditional name="bayesian"> | |
75 <param name="calculate" type="select" label="Perform Bayesian protein fold-change analysis"> | |
76 <option value="false">No</option> | |
77 <option value="true">Yes</option> | |
78 </param> | |
79 <when value="false"/> | |
80 <when value="true"> | |
81 <param argument="--ctr" type="select" value="" label="control condition for Bayesian protein fold-change analysis"> | |
82 <options from_dataset="experimental_design"> | |
83 <column name="name" index="1"/> | |
84 <column name="value" index="1"/> | |
85 <filter type="static_value" name="heading_ctr" column="1" value="Condition" keep="False"/> | |
86 <filter type="unique_value" name="unique_ctr" column="1"/> | |
87 <filter type="sort_by" name="sorted_ctr" column="1"/> | |
88 </options> | |
89 </param> | |
90 <param argument="--fcc" type="float" value="" min="0.01" label="fold-change cutoff" optional="true" | |
91 help="Leave blank to detemine emperically from data."/> | |
92 <param argument="--sha" type="boolean" truevalue="--sha true" falsevalue="--sha false" checked="false" | |
93 label="use shared peptides for protein quantification"/> | |
94 <param argument="--rmc" type="boolean" truevalue="--rmc true" falsevalue="--rmc false" checked="false" | |
95 label="require MS/MS ID in condition"/> | |
96 <param argument="--mcm" type="integer" value="500" min="500" label="number of markov-chain monte carlo iterations"/> | |
97 <param argument="--rns" type="integer" value="" optional="true" label="random seed"/> | |
98 </when> | |
99 </conditional> | |
67 </when> | 100 </when> |
68 </conditional> | 101 </conditional> |
69 <param name="mbr" type="boolean" truevalue="--mbr true" falsevalue="--mbr false" checked="false" | |
70 label="match between runs"/> | |
71 <param name="mrt" type="float" value="1.5" min="0" max="30" label="maximum MBR window in minutes"/> | |
72 <param name="pro" type="boolean" truevalue="--pro true" falsevalue="--pro false" checked="false" | |
73 label="advanced protein quantification"/> | |
74 </inputs> | 102 </inputs> |
75 | 103 |
76 <outputs> | 104 <outputs> |
77 <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" /> | 105 <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="logfile.txt"/> |
78 <data name="quantifiedPeaks" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeaks.tsv" /> | 106 <data name="toml" format="txt" label="${tool.name} on ${on_string}: FlashLfqSettings.toml" from_work_dir="out/FlashLfqSettings.toml"/> |
79 <data name="quantifiedPeptides" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeptides.tsv" /> | 107 <data name="quantifiedPeaks" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeaks.tsv" from_work_dir="out/QuantifiedPeaks.tsv"/> |
80 <data name="quantifiedProteins" format="tabular" label="${tool.name} on ${on_string}: QuantifiedProteins.tsv" /> | 108 <data name="quantifiedPeptides" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeptides.tsv" from_work_dir="out/QuantifiedPeptides.tsv"/> |
81 | 109 <data name="quantifiedProteins" format="tabular" label="${tool.name} on ${on_string}: QuantifiedProteins.tsv" from_work_dir="out/QuantifiedProteins.tsv"/> |
110 <data name="foldChange" format="tabular" label="${tool.name} on ${on_string}: BayesianFoldChangeAnalysis.tsv" from_work_dir="out/BayesianFoldChangeAnalysis.tsv"> | |
111 <filter>'bayesian' in experiment and 'ctr' in experiment['bayesian']</filter> | |
112 </data> | |
82 </outputs> | 113 </outputs> |
83 <tests> | 114 <tests> |
84 <test> | 115 <test> |
85 <param name="idt" value="aggregatePSMs_5ppmAroundZero.psmtsv" ftype="tabular"/> | 116 <param name="idt" value="aggregatePSMs_5ppmAroundZero.psmtsv" ftype="tabular"/> |
86 <param name="peak_lists" value="sliced-mzml.mzML" ftype="mzml"/> | 117 <param name="peak_lists" value="sliced-mzml.mzML" ftype="mzml"/> |
92 </assert_contents> | 123 </assert_contents> |
93 </output> | 124 </output> |
94 </test> | 125 </test> |
95 </tests> | 126 </tests> |
96 <help><![CDATA[ | 127 <help><![CDATA[ |
97 | |
98 **FlashLFQ** is an ultrafast label-free quantification for mass-spectrometry proteomics. | 128 **FlashLFQ** is an ultrafast label-free quantification for mass-spectrometry proteomics. |
129 https://github.com/smith-chem-wisc/FlashLFQ/wiki | |
130 | |
99 | 131 |
100 **Accepted command-line arguments:** | 132 **Accepted command-line arguments:** |
101 | 133 |
102 :: | 134 :: |
103 | 135 |
104 --idt [string | identification file path (TSV format)] | 136 --idt [string|identification file path] |
105 --raw [string | MS data file (.raw or .mzML)] | 137 --rep [string|directory containing spectral data files] |
106 --rep [string | repository containing MS data files] | 138 --out [string|output directory] |
107 --ppm [double | monoisotopic ppm tolerance] (default = 10) | 139 --ppm [double|ppm tolerance] |
108 --iso [double | isotopic distribution tolerance in ppm] (default = 5) | 140 --nor [bool|normalize intensity results] |
109 --sil [boolean | silent mode; no console output] (default = false) | |
110 --pau [boolean | pause at end of run] (default = true) | |
111 --int [boolean | integrate chromatographic peak intensity instead of using | |
112 the apex intensity] (default = false) | |
113 --chg [boolean | use only precursor charge state; when set to false, FlashLFQ looks | |
114 for all charge states detected in the MS/MS identification file for each peptide] (default = false) | |
115 --mbr [bool|match between runs] | 141 --mbr [bool|match between runs] |
116 --rmm [bool|require observed monoisotopic mass peak] | 142 --sha [bool|use shared peptides for protein quantification] |
143 --bay [bool|Bayesian protein fold-change analysis] | |
144 --ctr [string|control condition for Bayesian protein fold-change analysis] | |
145 --fcc [double|fold-change cutoff for Bayesian protein fold-change analysis] | |
146 | |
147 **Advanced settings:** | |
148 | |
149 :: | |
150 | |
151 --sil [bool|silent mode] | |
152 --int [bool|integrate peak areas (not recommended)] | |
153 --iso [double|isotopic distribution tolerance in ppm] | |
154 --mrt [double|maximum MBR window in minutes] | |
155 --chg [bool|use only precursor charge state] | |
117 --nis [int|number of isotopes required to be observed] | 156 --nis [int|number of isotopes required to be observed] |
157 --rmc [bool|require MS/MS ID in condition] | |
158 --mcm [int|number of markov-chain monte carlo iterations for the Bayesian protein fold-change analysis] | |
159 --rns [int|random seed for the Bayesian protein fold-change analysis] | |
118 | 160 |
119 | 161 |
120 **Tab-Delimited Identification Text File** | 162 **Tab-Delimited Identification Text File** |
121 | 163 |
122 The first line of the text file should contain column headers identifying what each column is. Note that MetaMorpheus (.psmtsv), Morpheus, MaxQuant (msms.txt), and TDPortal tab-delimited column headers are supported natively and such files can be read without modification. For search software that lists decoys and PSMs above 1% FDR (e.g., MetaMorpheus), you may want to remove these prior to FlashLFQ analysis. FlashLFQ will probably crash if ambiguous PSMs are passed into it (e.g., a PSM with more than 2 peptides listed in one line). | 164 The first line of the text file should contain column headers identifying what each column is. Note that MetaMorpheus (.psmtsv), Morpheus, MaxQuant (msms.txt), and TDPortal tab-delimited column headers are supported natively and such files can be read without modification. For search software that lists decoys and PSMs above 1% FDR (e.g., MetaMorpheus), you may want to remove these prior to FlashLFQ analysis. FlashLFQ will probably crash if ambiguous PSMs are passed into it (e.g., a PSM with more than 2 peptides listed in one line). |
129 - **Peptide Monoisotopic Mass** - Theoretical monoisotopic mass, including modification mass | 171 - **Peptide Monoisotopic Mass** - Theoretical monoisotopic mass, including modification mass |
130 - **Scan Retention Time** - MS/MS identification scan retention time | 172 - **Scan Retention Time** - MS/MS identification scan retention time |
131 - **Precursor Charge** - Charge of the ion selected for MS/MS resulting in the identification | 173 - **Precursor Charge** - Charge of the ion selected for MS/MS resulting in the identification |
132 - **Protein Accession** - Protein accession(s) for the peptide; protein quantification is still preliminary | 174 - **Protein Accession** - Protein accession(s) for the peptide; protein quantification is still preliminary |
133 | 175 |
176 **ExperimentalDesign File** | |
177 | |
178 The ExperimentalDesign_ File should have 5 columns separated by TAB characters: | |
179 | |
180 - SpectrumFileName - Without the file extension | |
181 - Condition - Cannot be blank | |
182 - Sample - an integer, at least 1. Each condition must have continuous sample numbers starting at 1. For example, samples 1, 3, and 4 are not valid because sample 2 is missing. In this case you would label the samples as 1, 2, and 3. | |
183 - Fraction - an integer, at least 1. Each sample must have continuous fraction numbers starting at 1. If your data is not fractionated, just enter 1 for all fractions. It is OK for two samples to have different total numbers of fractions. It is NOT recommended to use a sample if it is missing a fraction with significant peptide intensity (e.g., if sample 2 is missing fraction #5 out of 10 total fractions). | |
184 - Replicate - an integer, at least 1. Each fraction must have continuous replicate numbers starting at 1. | |
185 | |
186 :: | |
187 | |
188 For example, with spectrum files named: | |
189 | |
190 - 20130510_EXQ1_IgPa_QC_UPS1_01.mzml | |
191 - 20130510_EXQ1_IgPa_QC_UPS1_02.mzml | |
192 - 20130510_EXQ1_IgPa_QC_UPS2_01.mzml | |
193 - 20130510_EXQ1_IgPa_QC_UPS2_02.mzml | |
194 | |
195 The ExperimentalDesign File: | |
196 | |
197 FileName Condition Biorep Fraction Techrep | |
198 20130510_EXQ1_IgPa_QC_UPS1_01 S1 1 1 1 | |
199 20130510_EXQ1_IgPa_QC_UPS1_02 S1 2 1 1 | |
200 20130510_EXQ1_IgPa_QC_UPS2_01 S2 1 1 1 | |
201 20130510_EXQ1_IgPa_QC_UPS2_02 S2 2 1 1 | |
202 | |
134 | 203 |
135 **Outputs**: | 204 **Outputs**: |
136 | 205 |
137 - **QuantifiedProteins.tsv** - Lists protein accession and in the future will include gene and organism if the TSV contains it. The intensity is either a) the sum of the 3 most intense peptides or b) (Advanced protein quant) a weighted-average of the intensities of the peptides assigned to the protein. The weights are determined by how well the peptide co-varies with the other peptides assigned to that protein. | 206 - **QuantifiedProteins.tsv** - Lists protein accession and in the future will include gene and organism if the TSV contains it. The intensity is either a) the sum of the 3 most intense peptides or b) (Advanced protein quant) a weighted-average of the intensities of the peptides assigned to the protein. The weights are determined by how well the peptide co-varies with the other peptides assigned to that protein. |
138 | 207 |
141 - **QuantifiedPeptides.tsv** - Peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs. | 210 - **QuantifiedPeptides.tsv** - Peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs. |
142 | 211 |
143 - **Log.txt** - Log of the FlashLFQ run. | 212 - **Log.txt** - Log of the FlashLFQ run. |
144 | 213 |
145 | 214 |
215 .. _FlashLFQ: https://github.com/smith-chem-wisc/FlashLFQ/wiki | |
216 .. _ExperimentalDesign: https://github.com/smith-chem-wisc/FlashLFQ/wiki/Experimental-Design | |
146 | 217 |
147 ]]></help> | 218 ]]></help> |
148 <citations> | 219 <citations> |
149 <citation type="doi">10.1021/acs.jproteome.7b00608</citation> | 220 <citation type="doi">10.1021/acs.jproteome.7b00608</citation> |
150 </citations> | 221 </citations> |