comparison calisp.xml @ 1:867f17ede7f3 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
author galaxyp
date Thu, 14 Sep 2023 12:49:19 +0000
parents 6d93529d19d4
children
comparison
equal deleted inserted replaced
0:6d93529d19d4 1:867f17ede7f3
1 <tool id="calisp" name="calisp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> 1 <tool id="calisp" name="calisp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
2 <description>Estimate isotopic composition of peptides from proteomics mass spectrometry data</description> 2 <description>Estimate isotopic composition of peptides from proteomics mass spectrometry data</description>
3 <macros> 3 <macros>
4 <token name="@TOOL_VERSION@">3.0.10</token> 4 <token name="@TOOL_VERSION@">3.0.13</token>
5 <token name="@VERSION_SUFFIX@">0</token> 5 <token name="@VERSION_SUFFIX@">0</token>
6 <token name="@CALISP_REPO@">https://raw.githubusercontent.com/kinestetika/Calisp/208d495674e2b52fe56cf23457c833d1c2527242</token> 6 <token name="@CALISP_REPO@">https://raw.githubusercontent.com/kinestetika/Calisp/208d495674e2b52fe56cf23457c833d1c2527242</token>
7 <xml name="input_macro" tokens="multiple"> 7 <xml name="input_macro" tokens="multiple">
8 <!-- According to readme mzid input is not yet implented --> 8 <!-- According to readme mzid input is not yet implented -->
9 </xml> 9 </xml>
28 --output_file calisp-output/ 28 --output_file calisp-output/
29 --mass_accuracy $mass_accuracy 29 --mass_accuracy $mass_accuracy
30 --bin_delimiter '$bin_delimiter' 30 --bin_delimiter '$bin_delimiter'
31 --threads "\${GALAXY_SLOTS:-1}" 31 --threads "\${GALAXY_SLOTS:-1}"
32 --isotope $isotope 32 --isotope $isotope
33 $compute_clumps && 33 $compute_clumps
34 '$__tool_directory__/feather2tsv.py' --calisp_output calisp-output/ 34 #if $isotope_abundance_matrix
35 --isotope_abundance_matrix '$isotope_abundance_matrix'
36 #end if
37
38 #if $isotope_abundance_matrix
39 && ISOTOPE_ABUNDANCE_MATRIX="$isotope_abundance_matrix"
40 #else
41 && ISOTOPE_ABUNDANCE_MATRIX="\$(python -c 'import site; print(f"{site.getsitepackages()[0]}/calisp/isotope_matrix.txt")')"
42 #end if
43
44 && python '$__tool_directory__/benchmarking.py'
45 --input calisp-output/
46 --isotope_abundance_matrix "\$ISOTOPE_ABUNDANCE_MATRIX"
47 --isotope $isotope
48 #if $benchmark_cond.benchmark == 'yes'
49 --out_filtered '$filtered'
50 --out_summary '$summary'
51 #if $benchmark_cond.nominal_values
52 --nominal_values '$benchmark_cond.nominal_values'
53 #end if
54 #end if
35 ]]></command> 55 ]]></command>
36 <inputs> 56 <inputs>
37 <param argument="--spectrum_file" type="data" multiple="false" format="mzml" label="Spectrum file"/> 57 <param argument="--spectrum_file" type="data" multiple="false" format="mzml" label="Spectrum file"/>
38 <param argument="--peptide_file" type="data" multiple="false" format="tabular" label="Peptide file" help="Psm file" /> 58 <param argument="--peptide_file" type="data" multiple="false" format="tabular" label="Peptide file" help="Psm file" />
39 <param argument="--mass_accuracy" type="float" value="10" label="Mass accuracy" help="The maximum mass difference between theoretical mass and experimental mass of a peptide" /> 59 <param argument="--mass_accuracy" type="float" value="10" label="Mass accuracy" help="The maximum mass difference between theoretical mass and experimental mass of a peptide" />
57 <option value="33S">33S</option> 77 <option value="33S">33S</option>
58 <option value="34S">34S</option> 78 <option value="34S">34S</option>
59 <option value="36S">36S</option> 79 <option value="36S">36S</option>
60 </param> 80 </param>
61 <param argument="--compute_clumps" type="boolean" truevalue="--compute_clumps" falsevalue="" checked="false" label="Compute clumps" help="To compute clumpiness of carbon assimilation. Only use when samples are labeled tosaturation. Estimation of clumpiness takes much additional time." /> 81 <param argument="--compute_clumps" type="boolean" truevalue="--compute_clumps" falsevalue="" checked="false" label="Compute clumps" help="To compute clumpiness of carbon assimilation. Only use when samples are labeled tosaturation. Estimation of clumpiness takes much additional time." />
82 <param argument="--isotope_abundance_matrix" type="data" format="tabular" optional="true" label="Custom isotope abundance matrix" help="If not given the built in matrix will be used" />
83 <conditional name="benchmark_cond">
84 <param name="benchmark" type="select" label="Run benchmarking">
85 <option value="yes">Yes</option>
86 <option value="no">No</option>
87 </param>
88 <when value="yes">
89 <param name="nominal_values" type="data" format="tabular" optional="true" label="Nominal values" help="A table containing ms_run and their nominal value (1, 5, or 10)"/>
90 </when>
91 <when value="no"/>
92 </conditional>
62 </inputs> 93 </inputs>
63 <outputs> 94 <outputs>
64 <collection name="output" type="list"> 95 <collection name="output" type="list">
65 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.tsv" format="tabular" directory="calisp-output"/> 96 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.tsv" format="tabular" directory="calisp-output"/>
66 </collection> 97 </collection>
98 <data name="filtered" format="tabular" label="${tool.name} on ${on_string}: filtered">
99 <filter>benchmark_cond['benchmark'] == 'yes'</filter>
100 </data>
101 <data name="summary" format="tabular" label="${tool.name} on ${on_string}: peptide summary">
102 <filter>benchmark_cond['benchmark'] == 'yes'</filter>
103 </data>
67 </outputs> 104 </outputs>
68 <tests> 105 <tests>
69 <!-- TODO test data to large, avilable from here: https://github.com/kinestetika/Calisp/tree/master/test 106 <!-- TODO test data to large, avilable from here: https://github.com/kinestetika/Calisp/tree/master/test
70 if possible inlcude via location in the future 107 if possible inlcude via location in the future -->
71 <test expect_num_outputs="1"> 108 <!-- <test expect_num_outputs="3">
72 <param name="spectrum_file" value="calisp_test_data.mzML" ftype="mzml"/> 109 <param name="spectrum_file" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/test/calisp_test_data.mzML" ftype="mzml"/>
73 <param name="peptide_file" value="calisp_test_data_TargetPeptideSpectrumMatch.txt" ftype="tabular"/> 110 <param name="peptide_file" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/test/calisp_test_data_TargetPeptideSpectrumMatch.txt" ftype="tabular"/>
111 <param name="benchmark" value="true"/>
74 <output_collection name="output" count="1"> 112 <output_collection name="output" count="1">
75 <element name="calisp_test_data"> 113 <element name="calisp_test_data">
76 <assert_contents> 114 <assert_contents>
77 <has_text text="experiment"/> 115 <has_text text="experiment"/>
78 <has_text text="MKH_260min_1800ng"/> 116 <has_text text="MKH_260min_1800ng"/>
79 <has_text text="HOMO"/> 117 <has_text text="HOMO"/>
80 <has_text text="P13645"/> 118 <has_text text="P13645"/>
81 <has_text text="NHEEEMKDLR"/> 119 <has_text text="NHEEEMKDLR"/>
82 <has_text text="Oxidation"/> 120 <has_text text="Oxidation"/>
83 <has_n_columns n="85"/> 121 <has_n_columns n="84"/>
84 <has_n_lines n="24"/> 122 <has_n_lines n="24"/>
85 </assert_contents> 123 </assert_contents>
86 </element> 124 </element>
87 </output_collection> 125 </output_collection>
88 </test> 126 <output name="filtered" ftype="tabular">
89 --> 127 <assert_contents>
128 <has_n_lines n="24"/>
129 <has_n_columns n="87"/>
130 </assert_contents>
131 </output>
132 <output name="summary" value="summary.tsv" ftype="tabular"/>
133 </test> -->
134
135 <!-- same test, but with isotope abundance matrix supplied by the user
136 (using the same as the built in => same results)
137
138 TODO: test will only work with 23.1 tool-utils package available -->
139 <!-- <test expect_num_outputs="3">
140 <param name="spectrum_file" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/test/calisp_test_data.mzML" ftype="mzml"/>
141 <param name="peptide_file" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/test/calisp_test_data_TargetPeptideSpectrumMatch.txt" ftype="tabular"/>
142 <param name="isotope_abundance_matrix" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/src/calisp/isotope_matrix.txt" ftype="tabular"/>
143 <param name="benchmark" value="true"/>
144 <output_collection name="output" count="1">
145 <element name="calisp_test_data">
146 <assert_contents>
147 <has_text text="experiment"/>
148 <has_text text="MKH_260min_1800ng"/>
149 <has_text text="HOMO"/>
150 <has_text text="P13645"/>
151 <has_text text="NHEEEMKDLR"/>
152 <has_text text="Oxidation"/>
153 <has_n_columns n="84"/>
154 <has_n_lines n="24"/>
155 </assert_contents>
156 </element>
157 </output_collection>
158 <output name="filtered" ftype="tabular">
159 <assert_contents>
160 <has_n_lines n="24"/>
161 <has_n_columns n="87"/>
162 </assert_contents>
163 </output>
164 <output name="summary" value="summary.tsv" ftype="tabular"/>
165 </test> -->
166
167 <!-- trst output filters for no benchmarking -->
168 <!-- <test expect_num_outputs="1">
169 <param name="spectrum_file" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/test/calisp_test_data.mzML" ftype="mzml"/>
170 <param name="peptide_file" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/test/calisp_test_data_TargetPeptideSpectrumMatch.txt" ftype="tabular"/>
171 <conditional name="benchmark_cond">
172 <param name="benchmark" value="no"/>
173 </conditional>
174 <output_collection name="output" count="1">
175 <element name="calisp_test_data">
176 <assert_contents>
177 <has_text text="experiment"/>
178 <has_text text="MKH_260min_1800ng"/>
179 <has_text text="HOMO"/>
180 <has_text text="P13645"/>
181 <has_text text="NHEEEMKDLR"/>
182 <has_text text="Oxidation"/>
183 <has_n_columns n="84"/>
184 <has_n_lines n="24"/>
185 </assert_contents>
186 </element>
187 </output_collection>
188 </test> -->
90 </tests> 189 </tests>
91 <help><![CDATA[ 190 <help><![CDATA[
92 Calisp (Calgary approach to isotopes in proteomics) is a program that estimates 191 Calisp (Calgary approach to isotopes in proteomics) is a program that estimates
93 isotopic composition (e.g. 13C/12C, delta13C, 15N/14N etc) of peptides from 192 isotopic composition (e.g. 13C/12C, delta13C, 15N/14N etc) of peptides from
94 proteomics mass spectrometry data. Input data consist of mzML files and files 193 proteomics mass spectrometry data. Input data consist of mzML files and files
163 pattern_precursor_id id of the ms1 spectrum that was the source of the pattern 262 pattern_precursor_id id of the ms1 spectrum that was the source of the pattern
164 pattern_total_intensity total intensity of the pattern 263 pattern_total_intensity total intensity of the pattern
165 pattern_peak_count # of peaks in the pattern 264 pattern_peak_count # of peaks in the pattern
166 pattern_median_peak_spacing medium mass difference between a pattern's peaks 265 pattern_median_peak_spacing medium mass difference between a pattern's peaks
167 spectrum_mass_irregularity a measure for the standard deviation in the mass difference between a pattern's peaks 266 spectrum_mass_irregularity a measure for the standard deviation in the mass difference between a pattern's peaks
168 ratio_na the estimated isotope ratio inferred from neutron abundance (sip experiments) 267 ratio_na the estimated isotope ratio (in percent) inferred from neutron abundance (sip experiments)
169 ratio_fft the estimated isotope ratio inferred by the fft method (natural isotope abundances) 268 ratio_fft the estimated isotope ratio (in percent) inferred by the fft method (natural isotope abundances)
170 error_fft the remaining error after fitting the pattern with fft 269 error_fft the remaining error after fitting the pattern with fft
171 error_clumpy the remaining error after fitting the pattern with the clumpy carbon method 270 error_clumpy the remaining error after fitting the pattern with the clumpy carbon method
172 flag_peptide_contains_sulfur true if peptide contains sulfur 271 flag_peptide_contains_sulfur true if peptide contains sulfur
173 flag_peptide_has_modifications true if peptide has no modifications 272 flag_peptide_has_modifications true if peptide has no modifications
174 flag_peptide_assigned_to_multiple_bins true if peptide is associated with multiple proteins from different bins/mags 273 flag_peptide_assigned_to_multiple_bins true if peptide is associated with multiple proteins from different bins/mags
181 flag_peak_at_minus_one_pos true if a peak was detected immediately before the monoisotopic peak, could indicate overlap with another pattern 280 flag_peak_at_minus_one_pos true if a peak was detected immediately before the monoisotopic peak, could indicate overlap with another pattern
182 i0 - i19 the intensities of the first 20 peaks of the pattern 281 i0 - i19 the intensities of the first 20 peaks of the pattern
183 m0 - m19 the masses of the first 20 peaks of the pattern 282 m0 - m19 the masses of the first 20 peaks of the pattern
184 c1 - c6 contributions of clumps of 1-6 carbon to ratio_na. These are the outcomes of the clumpy carbon model. These results are only meaningful if the biomass was labeled to saturation. 283 c1 - c6 contributions of clumps of 1-6 carbon to ratio_na. These are the outcomes of the clumpy carbon model. These results are only meaningful if the biomass was labeled to saturation.
185 ========================================== =================== 284 ========================================== ===================
285
286 Benchmarking
287 ============
288
289 If the user chooses to run the additional benchmarking script two additional
290 outputs are created as follows.
291
292 Load data:
293 ----------
294
295 - Concatenate calisp result tables
296 - add column ``delta_na`` = 1000 * ``ratio_na`` / (1/factor-2)
297 - add column ``delta_fft`` = 1000 * ``ratio_fft`` / (1/factor-2)
298
299 Filter data:
300 ------------
301
302 Rows are removed for which any of the following criteria applies
303
304 - flag_peak_at_minus_one_pos
305 - flag_pattern_is_wobbly
306 - flag_psm_has_low_confidence
307 - flag_psm_is_ambiguous
308 - flag_pattern_is_contaminated
309 - flag_peptide_assigned_to_multiple_bins
310
311 Furthermore in the ``peptide`` column the strings ``"Oxidation"``, ``"Carbamidomethyl"``,
312 and text in brackets (i.e. ``[]``) preceded by any number of spaces
313 is removed.
314
315 Benchmarking:
316 -------------
317
318 Iterate through all combinations of unique peptides, proteins, and samples
319 and output the following tabular information
320
321 =================== ===========================
322 Column Content
323 =================== ===========================
324 file The name of the mzML spectrum file comprising the peptide
325 bin bin/mag ids, separated by commas. Calisp expects the protein ids in the psm file to consist of two parts, separated by a delimiter (_ by default). The first part is the bin/mag id, and the second part is the protein id
326 %label The label percentage (≠ 0 if labelled components used during experiments)
327 ratio The natural abundance ratio of the target element (C, H, N, O, S)
328 peptide The labeled peptides
329 psm_mz psm m over z
330 n(patterns) The number of iterations of the same pattern for the peptides has been repeated
331 mean intensity The mean of the total intensity of the pattern
332 ratio_NA median The mean of the estimated isotope ratio inferred from neutron abundance (sip experiments)
333 N mean The mean of the number of neutrons inferred from custom 'neutron' modifications
334 ratio_NA SEM The standard error of the mean of the estimated isotope ratio inferred from neutron abundance (sip experiments)
335 ratio_FFT median The mean of the estimated isotope ratio inferred by the fft method (natural isotope abundances)
336 ratio_FFT SEM The standard error of the mean of the estimated isotope ratio inferred by the fft method (natural isotope abundances)
337 False Positive Any false positive indications
338 =================== ===========================
339
340 Mean, median, and standard error values are computed for
341 all entries of this sample and that have the same peptide.
342
343 **Isotope abundance matrix**:
344
345 The isotope abundance matrix gives the background unlabeled fraction.
346 The default matrix implemented in calisp is given here:
347 https://github.com/kinestetika/Calisp/blob/v@TOOL_VERSION@/src/calisp/isotope_matrix.txt.
348 Columns specify the atom of interest and the rows the isotope, i.e.
349 rows 1-5 correspond to C, N, O, H, S. For instance
350 13C is in the 2nd column of the 1st row and 14C in the 3rd column
351 of the same row.
352
353 **Benchmarking without nominal values**:
354
355 If no nominal values, i.e. percentage of labeled atoms are given,
356 nominal values of 0 are assumed.
357
358 The values in the `ratio` column is comuted as `background_isotope / background_unlabelled * 100`
359 where `background_unlabelled` is taken from the isotope abundance matrix
360 according to the chosen target isotope.
361 Then `background_isotope` is given by `1 - background_unlabelled`
362
363 All entries of the table are considered not false positive.
364
365 **Benchmarking with nominal values**:
366
367 The `%label` (the nominal value) of a sample is either 0 (the default),
368 1, 5, or 10 and can be provided or each sample by a tabular dataset
369 (column 1 should give the sample names and column 2 the nominal value).
370
371 The `ratio = I / U * 100` is given by
372 `U = unlabeled_fraction * background_unlabelled` and
373 `I = nominal_value / 100 + unlabeled_fraction * background_isotope`
374 where
375 `unlabeled_fraction = 1 - nominal_value / 100`
376 `background_isotope = 1 - background_unlabelled`
377 and `background_unlabelled` is given by the isotope abundance matrix.
378
379 A peptide is considered false positive if it's not a contaminant(at the moment only K12)
380 and the median of `ratio_na` values for the same peptide and sample
381 is greater than a threshold depending on the nominal value:
382
383 "For false positive discovery rates we set the threshold at the
384 isotope/unlabelled associated with 1/4 of a generation of labeling. The E.
385 coli values (1.7, 4.2 and 7.1) are for 1 generation at 1, 5 and 10% label, and
386 we take the background (1.07) into account as well.""
387
388 ============= =========
389 nominal value threshold
390 ============= =========
391 1 `1.07 + (1.7 - 1.07) / 4`
392 5 `1.07 + (4.2 - 1.07) / 4`
393 10 `1.07 + (7.1 - 1.07) / 4`
394 ============= =========
395
396 File an issue at https://github.com/galaxyproteomics/tools-galaxyp/issues if
397 different contaminants of thresholds should be considered.
186 ]]></help> 398 ]]></help>
187 <citations> 399 <citations>
188 <citation type="doi">10.1186/s40168-022-01454-1</citation> 400 <citation type="doi">10.1186/s40168-022-01454-1</citation>
189 <citation type="doi">10.1073/pnas.1722325115</citation> 401 <citation type="doi">10.1073/pnas.1722325115</citation>
190 <citation type="doi">10.1101/2021.03.29.437612</citation>
191 <citation type="doi">10.1093/bioinformatics/bty046</citation> 402 <citation type="doi">10.1093/bioinformatics/bty046</citation>
192 </citations> 403 </citations>
193 </tool> 404 </tool>