Mercurial > repos > galaxyp > calisp
comparison calisp.xml @ 1:867f17ede7f3 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
author | galaxyp |
---|---|
date | Thu, 14 Sep 2023 12:49:19 +0000 |
parents | 6d93529d19d4 |
children |
comparison
equal
deleted
inserted
replaced
0:6d93529d19d4 | 1:867f17ede7f3 |
---|---|
1 <tool id="calisp" name="calisp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> | 1 <tool id="calisp" name="calisp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> |
2 <description>Estimate isotopic composition of peptides from proteomics mass spectrometry data</description> | 2 <description>Estimate isotopic composition of peptides from proteomics mass spectrometry data</description> |
3 <macros> | 3 <macros> |
4 <token name="@TOOL_VERSION@">3.0.10</token> | 4 <token name="@TOOL_VERSION@">3.0.13</token> |
5 <token name="@VERSION_SUFFIX@">0</token> | 5 <token name="@VERSION_SUFFIX@">0</token> |
6 <token name="@CALISP_REPO@">https://raw.githubusercontent.com/kinestetika/Calisp/208d495674e2b52fe56cf23457c833d1c2527242</token> | 6 <token name="@CALISP_REPO@">https://raw.githubusercontent.com/kinestetika/Calisp/208d495674e2b52fe56cf23457c833d1c2527242</token> |
7 <xml name="input_macro" tokens="multiple"> | 7 <xml name="input_macro" tokens="multiple"> |
8 <!-- According to readme mzid input is not yet implented --> | 8 <!-- According to readme mzid input is not yet implented --> |
9 </xml> | 9 </xml> |
28 --output_file calisp-output/ | 28 --output_file calisp-output/ |
29 --mass_accuracy $mass_accuracy | 29 --mass_accuracy $mass_accuracy |
30 --bin_delimiter '$bin_delimiter' | 30 --bin_delimiter '$bin_delimiter' |
31 --threads "\${GALAXY_SLOTS:-1}" | 31 --threads "\${GALAXY_SLOTS:-1}" |
32 --isotope $isotope | 32 --isotope $isotope |
33 $compute_clumps && | 33 $compute_clumps |
34 '$__tool_directory__/feather2tsv.py' --calisp_output calisp-output/ | 34 #if $isotope_abundance_matrix |
35 --isotope_abundance_matrix '$isotope_abundance_matrix' | |
36 #end if | |
37 | |
38 #if $isotope_abundance_matrix | |
39 && ISOTOPE_ABUNDANCE_MATRIX="$isotope_abundance_matrix" | |
40 #else | |
41 && ISOTOPE_ABUNDANCE_MATRIX="\$(python -c 'import site; print(f"{site.getsitepackages()[0]}/calisp/isotope_matrix.txt")')" | |
42 #end if | |
43 | |
44 && python '$__tool_directory__/benchmarking.py' | |
45 --input calisp-output/ | |
46 --isotope_abundance_matrix "\$ISOTOPE_ABUNDANCE_MATRIX" | |
47 --isotope $isotope | |
48 #if $benchmark_cond.benchmark == 'yes' | |
49 --out_filtered '$filtered' | |
50 --out_summary '$summary' | |
51 #if $benchmark_cond.nominal_values | |
52 --nominal_values '$benchmark_cond.nominal_values' | |
53 #end if | |
54 #end if | |
35 ]]></command> | 55 ]]></command> |
36 <inputs> | 56 <inputs> |
37 <param argument="--spectrum_file" type="data" multiple="false" format="mzml" label="Spectrum file"/> | 57 <param argument="--spectrum_file" type="data" multiple="false" format="mzml" label="Spectrum file"/> |
38 <param argument="--peptide_file" type="data" multiple="false" format="tabular" label="Peptide file" help="Psm file" /> | 58 <param argument="--peptide_file" type="data" multiple="false" format="tabular" label="Peptide file" help="Psm file" /> |
39 <param argument="--mass_accuracy" type="float" value="10" label="Mass accuracy" help="The maximum mass difference between theoretical mass and experimental mass of a peptide" /> | 59 <param argument="--mass_accuracy" type="float" value="10" label="Mass accuracy" help="The maximum mass difference between theoretical mass and experimental mass of a peptide" /> |
57 <option value="33S">33S</option> | 77 <option value="33S">33S</option> |
58 <option value="34S">34S</option> | 78 <option value="34S">34S</option> |
59 <option value="36S">36S</option> | 79 <option value="36S">36S</option> |
60 </param> | 80 </param> |
61 <param argument="--compute_clumps" type="boolean" truevalue="--compute_clumps" falsevalue="" checked="false" label="Compute clumps" help="To compute clumpiness of carbon assimilation. Only use when samples are labeled tosaturation. Estimation of clumpiness takes much additional time." /> | 81 <param argument="--compute_clumps" type="boolean" truevalue="--compute_clumps" falsevalue="" checked="false" label="Compute clumps" help="To compute clumpiness of carbon assimilation. Only use when samples are labeled tosaturation. Estimation of clumpiness takes much additional time." /> |
82 <param argument="--isotope_abundance_matrix" type="data" format="tabular" optional="true" label="Custom isotope abundance matrix" help="If not given the built in matrix will be used" /> | |
83 <conditional name="benchmark_cond"> | |
84 <param name="benchmark" type="select" label="Run benchmarking"> | |
85 <option value="yes">Yes</option> | |
86 <option value="no">No</option> | |
87 </param> | |
88 <when value="yes"> | |
89 <param name="nominal_values" type="data" format="tabular" optional="true" label="Nominal values" help="A table containing ms_run and their nominal value (1, 5, or 10)"/> | |
90 </when> | |
91 <when value="no"/> | |
92 </conditional> | |
62 </inputs> | 93 </inputs> |
63 <outputs> | 94 <outputs> |
64 <collection name="output" type="list"> | 95 <collection name="output" type="list"> |
65 <discover_datasets pattern="(?P<designation>.*)\.tsv" format="tabular" directory="calisp-output"/> | 96 <discover_datasets pattern="(?P<designation>.*)\.tsv" format="tabular" directory="calisp-output"/> |
66 </collection> | 97 </collection> |
98 <data name="filtered" format="tabular" label="${tool.name} on ${on_string}: filtered"> | |
99 <filter>benchmark_cond['benchmark'] == 'yes'</filter> | |
100 </data> | |
101 <data name="summary" format="tabular" label="${tool.name} on ${on_string}: peptide summary"> | |
102 <filter>benchmark_cond['benchmark'] == 'yes'</filter> | |
103 </data> | |
67 </outputs> | 104 </outputs> |
68 <tests> | 105 <tests> |
69 <!-- TODO test data to large, avilable from here: https://github.com/kinestetika/Calisp/tree/master/test | 106 <!-- TODO test data to large, avilable from here: https://github.com/kinestetika/Calisp/tree/master/test |
70 if possible inlcude via location in the future | 107 if possible inlcude via location in the future --> |
71 <test expect_num_outputs="1"> | 108 <!-- <test expect_num_outputs="3"> |
72 <param name="spectrum_file" value="calisp_test_data.mzML" ftype="mzml"/> | 109 <param name="spectrum_file" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/test/calisp_test_data.mzML" ftype="mzml"/> |
73 <param name="peptide_file" value="calisp_test_data_TargetPeptideSpectrumMatch.txt" ftype="tabular"/> | 110 <param name="peptide_file" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/test/calisp_test_data_TargetPeptideSpectrumMatch.txt" ftype="tabular"/> |
111 <param name="benchmark" value="true"/> | |
74 <output_collection name="output" count="1"> | 112 <output_collection name="output" count="1"> |
75 <element name="calisp_test_data"> | 113 <element name="calisp_test_data"> |
76 <assert_contents> | 114 <assert_contents> |
77 <has_text text="experiment"/> | 115 <has_text text="experiment"/> |
78 <has_text text="MKH_260min_1800ng"/> | 116 <has_text text="MKH_260min_1800ng"/> |
79 <has_text text="HOMO"/> | 117 <has_text text="HOMO"/> |
80 <has_text text="P13645"/> | 118 <has_text text="P13645"/> |
81 <has_text text="NHEEEMKDLR"/> | 119 <has_text text="NHEEEMKDLR"/> |
82 <has_text text="Oxidation"/> | 120 <has_text text="Oxidation"/> |
83 <has_n_columns n="85"/> | 121 <has_n_columns n="84"/> |
84 <has_n_lines n="24"/> | 122 <has_n_lines n="24"/> |
85 </assert_contents> | 123 </assert_contents> |
86 </element> | 124 </element> |
87 </output_collection> | 125 </output_collection> |
88 </test> | 126 <output name="filtered" ftype="tabular"> |
89 --> | 127 <assert_contents> |
128 <has_n_lines n="24"/> | |
129 <has_n_columns n="87"/> | |
130 </assert_contents> | |
131 </output> | |
132 <output name="summary" value="summary.tsv" ftype="tabular"/> | |
133 </test> --> | |
134 | |
135 <!-- same test, but with isotope abundance matrix supplied by the user | |
136 (using the same as the built in => same results) | |
137 | |
138 TODO: test will only work with 23.1 tool-utils package available --> | |
139 <!-- <test expect_num_outputs="3"> | |
140 <param name="spectrum_file" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/test/calisp_test_data.mzML" ftype="mzml"/> | |
141 <param name="peptide_file" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/test/calisp_test_data_TargetPeptideSpectrumMatch.txt" ftype="tabular"/> | |
142 <param name="isotope_abundance_matrix" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/src/calisp/isotope_matrix.txt" ftype="tabular"/> | |
143 <param name="benchmark" value="true"/> | |
144 <output_collection name="output" count="1"> | |
145 <element name="calisp_test_data"> | |
146 <assert_contents> | |
147 <has_text text="experiment"/> | |
148 <has_text text="MKH_260min_1800ng"/> | |
149 <has_text text="HOMO"/> | |
150 <has_text text="P13645"/> | |
151 <has_text text="NHEEEMKDLR"/> | |
152 <has_text text="Oxidation"/> | |
153 <has_n_columns n="84"/> | |
154 <has_n_lines n="24"/> | |
155 </assert_contents> | |
156 </element> | |
157 </output_collection> | |
158 <output name="filtered" ftype="tabular"> | |
159 <assert_contents> | |
160 <has_n_lines n="24"/> | |
161 <has_n_columns n="87"/> | |
162 </assert_contents> | |
163 </output> | |
164 <output name="summary" value="summary.tsv" ftype="tabular"/> | |
165 </test> --> | |
166 | |
167 <!-- trst output filters for no benchmarking --> | |
168 <!-- <test expect_num_outputs="1"> | |
169 <param name="spectrum_file" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/test/calisp_test_data.mzML" ftype="mzml"/> | |
170 <param name="peptide_file" location="https://raw.githubusercontent.com/kinestetika/Calisp/v@TOOL_VERSION@/test/calisp_test_data_TargetPeptideSpectrumMatch.txt" ftype="tabular"/> | |
171 <conditional name="benchmark_cond"> | |
172 <param name="benchmark" value="no"/> | |
173 </conditional> | |
174 <output_collection name="output" count="1"> | |
175 <element name="calisp_test_data"> | |
176 <assert_contents> | |
177 <has_text text="experiment"/> | |
178 <has_text text="MKH_260min_1800ng"/> | |
179 <has_text text="HOMO"/> | |
180 <has_text text="P13645"/> | |
181 <has_text text="NHEEEMKDLR"/> | |
182 <has_text text="Oxidation"/> | |
183 <has_n_columns n="84"/> | |
184 <has_n_lines n="24"/> | |
185 </assert_contents> | |
186 </element> | |
187 </output_collection> | |
188 </test> --> | |
90 </tests> | 189 </tests> |
91 <help><![CDATA[ | 190 <help><![CDATA[ |
92 Calisp (Calgary approach to isotopes in proteomics) is a program that estimates | 191 Calisp (Calgary approach to isotopes in proteomics) is a program that estimates |
93 isotopic composition (e.g. 13C/12C, delta13C, 15N/14N etc) of peptides from | 192 isotopic composition (e.g. 13C/12C, delta13C, 15N/14N etc) of peptides from |
94 proteomics mass spectrometry data. Input data consist of mzML files and files | 193 proteomics mass spectrometry data. Input data consist of mzML files and files |
163 pattern_precursor_id id of the ms1 spectrum that was the source of the pattern | 262 pattern_precursor_id id of the ms1 spectrum that was the source of the pattern |
164 pattern_total_intensity total intensity of the pattern | 263 pattern_total_intensity total intensity of the pattern |
165 pattern_peak_count # of peaks in the pattern | 264 pattern_peak_count # of peaks in the pattern |
166 pattern_median_peak_spacing medium mass difference between a pattern's peaks | 265 pattern_median_peak_spacing medium mass difference between a pattern's peaks |
167 spectrum_mass_irregularity a measure for the standard deviation in the mass difference between a pattern's peaks | 266 spectrum_mass_irregularity a measure for the standard deviation in the mass difference between a pattern's peaks |
168 ratio_na the estimated isotope ratio inferred from neutron abundance (sip experiments) | 267 ratio_na the estimated isotope ratio (in percent) inferred from neutron abundance (sip experiments) |
169 ratio_fft the estimated isotope ratio inferred by the fft method (natural isotope abundances) | 268 ratio_fft the estimated isotope ratio (in percent) inferred by the fft method (natural isotope abundances) |
170 error_fft the remaining error after fitting the pattern with fft | 269 error_fft the remaining error after fitting the pattern with fft |
171 error_clumpy the remaining error after fitting the pattern with the clumpy carbon method | 270 error_clumpy the remaining error after fitting the pattern with the clumpy carbon method |
172 flag_peptide_contains_sulfur true if peptide contains sulfur | 271 flag_peptide_contains_sulfur true if peptide contains sulfur |
173 flag_peptide_has_modifications true if peptide has no modifications | 272 flag_peptide_has_modifications true if peptide has no modifications |
174 flag_peptide_assigned_to_multiple_bins true if peptide is associated with multiple proteins from different bins/mags | 273 flag_peptide_assigned_to_multiple_bins true if peptide is associated with multiple proteins from different bins/mags |
181 flag_peak_at_minus_one_pos true if a peak was detected immediately before the monoisotopic peak, could indicate overlap with another pattern | 280 flag_peak_at_minus_one_pos true if a peak was detected immediately before the monoisotopic peak, could indicate overlap with another pattern |
182 i0 - i19 the intensities of the first 20 peaks of the pattern | 281 i0 - i19 the intensities of the first 20 peaks of the pattern |
183 m0 - m19 the masses of the first 20 peaks of the pattern | 282 m0 - m19 the masses of the first 20 peaks of the pattern |
184 c1 - c6 contributions of clumps of 1-6 carbon to ratio_na. These are the outcomes of the clumpy carbon model. These results are only meaningful if the biomass was labeled to saturation. | 283 c1 - c6 contributions of clumps of 1-6 carbon to ratio_na. These are the outcomes of the clumpy carbon model. These results are only meaningful if the biomass was labeled to saturation. |
185 ========================================== =================== | 284 ========================================== =================== |
285 | |
286 Benchmarking | |
287 ============ | |
288 | |
289 If the user chooses to run the additional benchmarking script two additional | |
290 outputs are created as follows. | |
291 | |
292 Load data: | |
293 ---------- | |
294 | |
295 - Concatenate calisp result tables | |
296 - add column ``delta_na`` = 1000 * ``ratio_na`` / (1/factor-2) | |
297 - add column ``delta_fft`` = 1000 * ``ratio_fft`` / (1/factor-2) | |
298 | |
299 Filter data: | |
300 ------------ | |
301 | |
302 Rows are removed for which any of the following criteria applies | |
303 | |
304 - flag_peak_at_minus_one_pos | |
305 - flag_pattern_is_wobbly | |
306 - flag_psm_has_low_confidence | |
307 - flag_psm_is_ambiguous | |
308 - flag_pattern_is_contaminated | |
309 - flag_peptide_assigned_to_multiple_bins | |
310 | |
311 Furthermore in the ``peptide`` column the strings ``"Oxidation"``, ``"Carbamidomethyl"``, | |
312 and text in brackets (i.e. ``[]``) preceded by any number of spaces | |
313 is removed. | |
314 | |
315 Benchmarking: | |
316 ------------- | |
317 | |
318 Iterate through all combinations of unique peptides, proteins, and samples | |
319 and output the following tabular information | |
320 | |
321 =================== =========================== | |
322 Column Content | |
323 =================== =========================== | |
324 file The name of the mzML spectrum file comprising the peptide | |
325 bin bin/mag ids, separated by commas. Calisp expects the protein ids in the psm file to consist of two parts, separated by a delimiter (_ by default). The first part is the bin/mag id, and the second part is the protein id | |
326 %label The label percentage (≠ 0 if labelled components used during experiments) | |
327 ratio The natural abundance ratio of the target element (C, H, N, O, S) | |
328 peptide The labeled peptides | |
329 psm_mz psm m over z | |
330 n(patterns) The number of iterations of the same pattern for the peptides has been repeated | |
331 mean intensity The mean of the total intensity of the pattern | |
332 ratio_NA median The mean of the estimated isotope ratio inferred from neutron abundance (sip experiments) | |
333 N mean The mean of the number of neutrons inferred from custom 'neutron' modifications | |
334 ratio_NA SEM The standard error of the mean of the estimated isotope ratio inferred from neutron abundance (sip experiments) | |
335 ratio_FFT median The mean of the estimated isotope ratio inferred by the fft method (natural isotope abundances) | |
336 ratio_FFT SEM The standard error of the mean of the estimated isotope ratio inferred by the fft method (natural isotope abundances) | |
337 False Positive Any false positive indications | |
338 =================== =========================== | |
339 | |
340 Mean, median, and standard error values are computed for | |
341 all entries of this sample and that have the same peptide. | |
342 | |
343 **Isotope abundance matrix**: | |
344 | |
345 The isotope abundance matrix gives the background unlabeled fraction. | |
346 The default matrix implemented in calisp is given here: | |
347 https://github.com/kinestetika/Calisp/blob/v@TOOL_VERSION@/src/calisp/isotope_matrix.txt. | |
348 Columns specify the atom of interest and the rows the isotope, i.e. | |
349 rows 1-5 correspond to C, N, O, H, S. For instance | |
350 13C is in the 2nd column of the 1st row and 14C in the 3rd column | |
351 of the same row. | |
352 | |
353 **Benchmarking without nominal values**: | |
354 | |
355 If no nominal values, i.e. percentage of labeled atoms are given, | |
356 nominal values of 0 are assumed. | |
357 | |
358 The values in the `ratio` column is comuted as `background_isotope / background_unlabelled * 100` | |
359 where `background_unlabelled` is taken from the isotope abundance matrix | |
360 according to the chosen target isotope. | |
361 Then `background_isotope` is given by `1 - background_unlabelled` | |
362 | |
363 All entries of the table are considered not false positive. | |
364 | |
365 **Benchmarking with nominal values**: | |
366 | |
367 The `%label` (the nominal value) of a sample is either 0 (the default), | |
368 1, 5, or 10 and can be provided or each sample by a tabular dataset | |
369 (column 1 should give the sample names and column 2 the nominal value). | |
370 | |
371 The `ratio = I / U * 100` is given by | |
372 `U = unlabeled_fraction * background_unlabelled` and | |
373 `I = nominal_value / 100 + unlabeled_fraction * background_isotope` | |
374 where | |
375 `unlabeled_fraction = 1 - nominal_value / 100` | |
376 `background_isotope = 1 - background_unlabelled` | |
377 and `background_unlabelled` is given by the isotope abundance matrix. | |
378 | |
379 A peptide is considered false positive if it's not a contaminant(at the moment only K12) | |
380 and the median of `ratio_na` values for the same peptide and sample | |
381 is greater than a threshold depending on the nominal value: | |
382 | |
383 "For false positive discovery rates we set the threshold at the | |
384 isotope/unlabelled associated with 1/4 of a generation of labeling. The E. | |
385 coli values (1.7, 4.2 and 7.1) are for 1 generation at 1, 5 and 10% label, and | |
386 we take the background (1.07) into account as well."" | |
387 | |
388 ============= ========= | |
389 nominal value threshold | |
390 ============= ========= | |
391 1 `1.07 + (1.7 - 1.07) / 4` | |
392 5 `1.07 + (4.2 - 1.07) / 4` | |
393 10 `1.07 + (7.1 - 1.07) / 4` | |
394 ============= ========= | |
395 | |
396 File an issue at https://github.com/galaxyproteomics/tools-galaxyp/issues if | |
397 different contaminants of thresholds should be considered. | |
186 ]]></help> | 398 ]]></help> |
187 <citations> | 399 <citations> |
188 <citation type="doi">10.1186/s40168-022-01454-1</citation> | 400 <citation type="doi">10.1186/s40168-022-01454-1</citation> |
189 <citation type="doi">10.1073/pnas.1722325115</citation> | 401 <citation type="doi">10.1073/pnas.1722325115</citation> |
190 <citation type="doi">10.1101/2021.03.29.437612</citation> | |
191 <citation type="doi">10.1093/bioinformatics/bty046</citation> | 402 <citation type="doi">10.1093/bioinformatics/bty046</citation> |
192 </citations> | 403 </citations> |
193 </tool> | 404 </tool> |