comparison flashlfq.xml @ 4:908ab13490dc draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/flashlfq commit 047addefbac7468a92ba4fc04899bd5136c58ea4
author galaxyp
date Wed, 30 Jan 2019 05:39:17 -0500
parents ff1148892ce3
children cb346052c5de
comparison
equal deleted inserted replaced
3:ff1148892ce3 4:908ab13490dc
1 <tool id="flashlfq" name="FlashLFQ" version="0.1.100"> 1 <tool id="flashlfq" name="FlashLFQ" version="0.1.111">
2 <description>ultrafast label-free quantification for mass-spectrometry proteomics</description> 2 <description>ultrafast label-free quantification for mass-spectrometry proteomics</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.1.100">flashlfq</requirement> 4 <requirement type="package" version="0.1.111">flashlfq</requirement>
5 </requirements> 5 </requirements>
6 <command><![CDATA[ 6 <command><![CDATA[
7 #import re 7 #import re
8 #set $idt_path = $re.sub('\s','_',$re.sub('[.][^.]*$','',$idt.display_name.split('/')[-1])) + ".psmtsv" 8 #set $idt_path = $re.sub('\s','_',$re.sub('[.][^.]*$','',$idt.display_name.split('/')[-1])) + ".psmtsv"
9 ln -s '${idt}' '${idt_path}' && 9 ln -s '${idt}' '${idt_path}' &&
10 mkdir spectrum_dir && 10 mkdir spectrum_dir &&
11 #for $peak_list in $peak_lists: 11 #for $peak_list in $peak_lists:
12 #set $input_name = $re.sub('[.][^.]*$','',$peak_list.display_name.split('/')[-1]) + ".mzML" 12 #set $input_name = $re.sub('[.][^.]*$','',$peak_list.display_name.split('/')[-1]) + ".mzML"
13 ln -s '${peak_list}' 'spectrum_dir/${input_name}' && 13 ln -s '${peak_list}' 'spectrum_dir/${input_name}' &&
14 #end for 14 #end for
15 15 #if $normalize.nor == 'true':
16 #set $input_name = $re.sub('[.][^.]*$','',$experimental_design.display_name.split('/')[-1]) + ".tsv"
17 ln -s '${experimental_design}' 'spectrum_dir/${input_name}' &&
18 #end if
16 FlashLFQ 19 FlashLFQ
17 --idt '$idt_path' 20 --idt '$idt_path'
18 --rep spectrum_dir 21 --rep "./spectrum_dir"
19 --ppm $ppm 22 --ppm $ppm
20 --iso $iso 23 --iso $iso
21 --nis $nis 24 --nis $nis
25 #if $normalize.nor == 'true':
26 --nor true
27 #end if
22 #if $intensity == 'integrate': 28 #if $intensity == 'integrate':
23 --int true 29 --int true
24 #end if 30 #end if
25 #if $charge == 'precursor': 31 #if $charge == 'precursor':
26 --chg true 32 --chg true
27 #end if 33 #end if
28 $rmm $mbr 34 $rmm $mbr $pro
29 --pau false 35 --out . > logfile.txt
30 && cat *_FlashLFQ_Log.txt | sed 's/\(Analysis summary for:\).*working./\1 /' > '$log' 36 && cat logfile.txt | sed 's/\(Analysis summary for:\).*working./\1 /' > '$log'
31 && cp *_FlashLFQ_QuantifiedBaseSequences.tsv '$quantifiedBaseSequences' 37 && cp *_FlashLFQ_QuantifiedProteins.tsv '$quantifiedProteins'
32 && cp *_FlashLFQ_QuantifiedModifiedSequences.tsv '$quantifiedModifiedSequences' 38 && cp *_FlashLFQ_QuantifiedPeptides.tsv '$quantifiedPeptides'
33 && cp *_FlashLFQ_QuantifiedPeaks.tsv '$quantifiedPeaks' 39 && cp *_FlashLFQ_QuantifiedPeaks.tsv '$quantifiedPeaks'
34 ## create issue for FlashLFQ to name column headers correctly
35 && grep -v '^test$' *_FlashLFQ_QuantifiedProteins.tsv > '$quantifiedProteins'
36 ]]></command> 40 ]]></command>
41
37 <inputs> 42 <inputs>
38 <param name="idt" type="data" format="tabular" label="identification file" 43 <param name="idt" type="data" format="tabular" label="identification file"
39 help="MetaMorpheus,Morpheus"/> 44 help="MetaMorpheus,Morpheus,PeptideShaker PSM Report,MaxQuant"/>
40 <param name="peak_lists" type="data" format="mzml" multiple="true" label="spectrum files"/> 45 <param name="peak_lists" type="data" format="mzml" multiple="true" label="spectrum files"/>
41 <param name="ppm" type="float" value="10" min="1" max="20" label="monoisotopic ppm tolerance"/> 46 <param name="ppm" type="float" value="10" min="1" max="20" label="monoisotopic ppm tolerance"/>
42 <param name="iso" type="float" value="5" min="1" max="10" label="isotopic distribution tolerance in ppm"/> 47 <param name="iso" type="float" value="5" min="1" max="10" label="isotopic distribution tolerance in ppm"/>
43 <param name="nis" type="integer" value="2" min="1" max="30" label="number of isotopes required to be observed"/> 48 <param name="nis" type="integer" value="2" min="1" max="30" label="number of isotopes required to be observed"/>
44 <param name="intensity" type="select" label="intensity"> 49 <param name="intensity" type="select" label="intensity">
47 </param> 52 </param>
48 <param name="charge" type="select" label="charge"> 53 <param name="charge" type="select" label="charge">
49 <option value="all" selected="true">use all identification detected charge states</option> 54 <option value="all" selected="true">use all identification detected charge states</option>
50 <option value="precursor">use precursor charge</option> 55 <option value="precursor">use precursor charge</option>
51 </param> 56 </param>
52 <param name="rmm" type="boolean" truevalue="--rmm true" falsevalue="--rmm false" checked="true" 57 <param name="rmm" type="boolean" truevalue="--rmm true" falsevalue="--rmm false" checked="true"
53 label="require observed monoisotopic mass peak"/> 58 label="require observed monoisotopic mass peak"/>
54 <param name="mbr" type="boolean" truevalue="--mbr true" falsevalue="--mbr false" checked="false" 59 <conditional name="normalize">
60 <param name="nor" type="select" label="normalize intensity results">
61 <option value="false">No</option>
62 <option value="true">Yes</option>
63 </param>
64 <when value="false"/>
65 <when value="true">
66 <param name="experimental_design" type="data" format="tabular" label="ExperimentalDesign.tsv"/>
67 </when>
68 </conditional>
69 <param name="mbr" type="boolean" truevalue="--mbr true" falsevalue="--mbr false" checked="false"
55 label="match between runs"/> 70 label="match between runs"/>
71 <param name="mrt" type="float" value="1.5" min="0" max="30" label="maximum MBR window in minutes"/>
72 <param name="pro" type="boolean" truevalue="--pro true" falsevalue="--pro false" checked="false"
73 label="advanced protein quantification"/>
56 </inputs> 74 </inputs>
75
57 <outputs> 76 <outputs>
58 <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" /> 77 <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" />
59 <data name="quantifiedPeaks" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeaks.tsv"> 78 <data name="quantifiedPeaks" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeaks.tsv" />
60 <actions> 79 <data name="quantifiedPeptides" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeptides.tsv" />
61 <action name="column_names" type="metadata" 80 <data name="quantifiedProteins" format="tabular" label="${tool.name} on ${on_string}: QuantifiedProteins.tsv" />
62 default="File Name,Base Sequence,Full Sequence,Protein Group,Peptide Monoisotopic Mass,MS2 Retention Time,Precursor Charge,Theoretical MZ,Peak intensity,Peak RT Start,Peak RT Apex,Peak RT End,Peak MZ,Peak Charge,Num Charge States Observed,Peak Detection Type,PSMs Mapped,Base Sequences Mapped,Full Sequences Mapped,Peak Split Valley RT,Peak Apex Mass Error (ppm)"/> 81
63 </actions>
64 </data>
65 <data name="quantifiedBaseSequences" format="tabular" label="${tool.name} on ${on_string}: QuantifiedBaseSequences.tsv">
66 <actions>
67 <action name="column_names" type="metadata"
68 default="Sequence,Protein Group,${','.join(['Intensity_' + i.name for i in $peak_lists])},${','.join(['Detection Type_' + i.name for i in $peak_lists])}"/>
69 </actions>
70 </data>
71 <data name="quantifiedModifiedSequences" format="tabular" label="${tool.name} on ${on_string}: QuantifiedModifiedSequences.tsv">
72 <actions>
73 <action name="column_names" type="metadata"
74 default="Sequence,Protein Group,${','.join(['Intensity_' + i.name for i in $peak_lists])},${','.join(['Detection Type_' + i.name for i in $peak_lists])}"/>
75 </actions>
76 </data>
77 <data name="quantifiedProteins" format="tabular" label="${tool.name} on ${on_string}: QuantifiedProteins.tsv">
78 <actions>
79 <action name="column_names" type="metadata"
80 default="Protein,${','.join([i.name for i in $peak_lists])}"/>
81 </actions>
82 </data>
83 </outputs> 82 </outputs>
84 <tests> 83 <tests>
85 <test> 84 <test>
86 <param name="idt" value="aggregatePSMs_5ppmAroundZero.psmtsv" ftype="tabular"/> 85 <param name="idt" value="aggregatePSMs_5ppmAroundZero.psmtsv" ftype="tabular"/>
87 <param name="peak_lists" value="sliced-mzml.mzML" ftype="mzml"/> 86 <param name="peak_lists" value="sliced-mzml.mzML" ftype="mzml"/>
88 <param name="ppm" value="12"/> 87 <param name="ppm" value="12"/>
89 <param name="iso" value="6"/> 88 <param name="iso" value="6"/>
90 <output name="log"> 89 <output name="quantifiedPeaks">
91 <assert_contents> 90 <assert_contents>
92 <has_text text="ppmTolerance = 12" /> 91 <has_text text="EGFQVADGPLYR" />
93 <has_text text="isotopePpmTolerance = 6" />
94 </assert_contents> 92 </assert_contents>
95 </output> 93 </output>
96 </test> 94 </test>
97 </tests> 95 </tests>
98 <help><![CDATA[ 96 <help><![CDATA[
134 - **Protein Accession** - Protein accession(s) for the peptide; protein quantification is still preliminary 132 - **Protein Accession** - Protein accession(s) for the peptide; protein quantification is still preliminary
135 133
136 134
137 **Outputs**: 135 **Outputs**:
138 136
139 - **QuantifiedProteins.tsv** - Protein intensities are summed here within a run. 137 - **QuantifiedProteins.tsv** - Lists protein accession and in the future will include gene and organism if the TSV contains it. The intensity is either a) the sum of the 3 most intense peptides or b) (Advanced protein quant) a weighted-average of the intensities of the peptides assigned to the protein. The weights are determined by how well the peptide co-varies with the other peptides assigned to that protein.
140 138
141 - **QuantifiedPeaks.tsv** - Each chromatographic peak is shown here, even peaks that were not quantifiable (peak intensity = 0). Details about each peak, such as number of PSMs mapped, start/apex/end retention times, ppm error, etc are contained in this file. A peptide can have multiple peaks over the course of a run (e.g., oxidized peptidoforms elute at different times, etc). Ambiguous peaks are displayed with a | (pipe) delimiter to indicate more than one peptide mapped to that peak. 139 - **QuantifiedPeaks.tsv** - Each chromatographic peak is shown here, even peaks that were not quantifiable (peak intensity = 0). Details about each peak, such as number of PSMs mapped, start/apex/end retention times, ppm error, etc are contained in this file. A peptide can have multiple peaks over the course of a run (e.g., oxidized peptidoforms elute at different times, etc). Ambiguous peaks are displayed with a | (pipe) delimiter to indicate more than one peptide mapped to that peak.
142 140
143 - **QuantifiedModifiedSequences.tsv** - Similar to QuantifiedBaseSequences, but instead of being summed by Base Sequence, peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs. 141 - **QuantifiedPeptides.tsv** - Peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs.
144 142
145 - **QuantifiedBaseSequences.tsv** - Peptide intensities are summed here within a run (including differently-modified forms of the same amino acid sequence) and displayed in a convenient format for comparing across runs. The identification type (MS/MS or MBR) is also indicated. A peptide with more than 30% of its intensity coming from ambiguous peak(s) is considered not quantifiable and is given an intensity of -1. 143 - **Log.txt** - Log of the FlashLFQ run.
146 144
147
148 - **Log.txt** - Log of the FlashLFQ run. Includes timestamps and quantification time for each file, total analysis time, directories used, and settings.
149 145
150 146
151 ]]></help> 147 ]]></help>
152 <citations> 148 <citations>
153 <citation type="doi">10.1021/acs.jproteome.7b00608</citation> 149 <citation type="doi">10.1021/acs.jproteome.7b00608</citation>