changeset 0:84e4b5d4b7ad draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
author galaxyp
date Fri, 15 Jan 2021 15:58:54 +0000
parents
children a475c1906e0b
files mztab2tsv.xml mztab_reader.py test-data/1.mztab test-data/2.mztab
diffstat 4 files changed, 357 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mztab2tsv.xml	Fri Jan 15 15:58:54 2021 +0000
@@ -0,0 +1,98 @@
+<tool id="mztab2tsv" name="pyteomics" version="@TOOL_VERSION@" profile="20.01" license="MIT">
+    <description>convert mztab to tabular</description>
+    <macros>
+        <token name="@TOOL_VERSION@">4.4.1</token>
+        <xml name="output" token_type="" token_label="">
+            <data name="out_@TYPE@" format="tabular" from_work_dir="@TYPE@.tsv" label="${tool.name} on ${on_string}: @LABEL@">
+                <filter>"@TYPE@" in out_select</filter>
+            </data>
+        </xml>
+    </macros>
+    <xrefs>
+        <xref type="bio.tools">pyteomics</xref>
+    </xrefs>
+    <edam_topics>
+        <edam_topic>topic_0121</edam_topic><!-- proteomics -->
+        <edam_topic>topic_3520</edam_topic><!-- proteomics experiment-->
+    </edam_topics>
+    <edam_operations>
+        <edam_operation>operation_3434</edam_operation><!--	Convert a data set from one form to another -->
+    </edam_operations>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">pyteomics</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+    ## make sure that selected outputs exist even if the tool does not generate them
+    #for o in $out_select
+        touch '$o'.tsv &&
+    #end for
+    '$__tool_directory__/mztab_reader.py' --path_in '$path_in'
+    ]]></command>
+    <inputs>
+        <param argument="--path_in" type="data" format="mztab,mztab2" label="mztab or mztab2 data set" help="" />
+        <param name="out_select" type="select" label="Select desired tables" multiple="true" help="">
+            <option value="mtd" selected="true">Metadata (v1,v2)</option>
+            <option value="prt">Protein table (v1)</option>
+            <option value="pep">Peptide table (v1)</option>
+            <option value="psm">Peptide spectrum match table (v1)</option>
+            <option value="sml">Small molecule table (v1,v2)</option>
+            <option value="smf">Small molecule feature table (v2)</option>
+            <option value="sme">Small molecule evidence table (v2)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <expand macro="output" type="mtd" label="Metadata"/>
+        <expand macro="output" type="prt" label="Proteins"/>
+        <expand macro="output" type="pep" label="Peptides"/>
+        <expand macro="output" type="psm" label="Peptide spectrum matches"/>
+        <expand macro="output" type="sml" label="Small molecules"/>
+        <expand macro="output" type="smf" label="Small molecule feature"/>
+        <expand macro="output" type="sme" label="Small molecule evidence"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="5">
+            <param name="path_in" ftype="mztab" value="1.mztab"/>
+            <param name="out_select" value="mtd,prt,pep,psm,sml"/>
+            <output name="out_mtd" ftype="tabular">
+                <assert_contents><has_text text="mzTab-version"/></assert_contents>
+            </output>
+            <output name="out_prt" ftype="tabular">
+                <assert_contents><has_text text="accession"/></assert_contents>
+            </output>
+            <output name="out_pep" ftype="tabular">
+                <assert_contents><has_text text="&quot;&quot;"/></assert_contents>
+            </output>
+            <output name="out_psm" ftype="tabular">
+                <assert_contents><has_text text="PSM_ID"/></assert_contents>
+            </output>
+            <output name="out_sml" ftype="tabular">
+                <assert_contents><has_text text="&quot;&quot;"/></assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="path_in" ftype="mztab" value="2.mztab"/>
+            <param name="out_select" value="mtd,sml,smf,sme"/>
+            <output name="out_mtd" ftype="tabular">
+                <assert_contents><has_text text="mzTab-version"/></assert_contents>
+            </output>
+            <output name="out_sml" ftype="tabular">
+                <assert_contents><has_text text="SML_ID"/></assert_contents>
+            </output>
+            <output name="out_smf" ftype="tabular">
+                <assert_contents><has_text text="SMF_ID"/></assert_contents>
+            </output>
+            <output name="out_sme" ftype="tabular">
+                <assert_contents><has_text text="SME_ID"/></assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+        Convert a mztab or mztab2 file to tabular files using the pyteomics library https://pyteomics.readthedocs.io/en/latest/.
+
+        The specifications of the mztab and mztab2 formats can be found here https://github.com/HUPO-PSI/mzTab/.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1007/s13361-012-0516-6</citation>
+        <citation type="doi">10.1021/acs.jproteome.8b00717</citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mztab_reader.py	Fri Jan 15 15:58:54 2021 +0000
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+import argparse
+import os
+
+import pandas as pd
+from pyteomics.mztab import MzTab
+
+
+def read_mztab(input_path, output_path):
+    """
+    Read mztab file
+    """
+    mztab = MzTab(input_path)
+    if mztab.variant == 'P':
+        return read_mztab_p(mztab, output_path)
+    elif mztab.variant == 'M':
+        return read_mztab_m(mztab, output_path)
+
+
+def read_mztab_p(mztab, output_path):
+    """
+    Processing mztab "P"
+    """
+    mtd = pd.DataFrame.from_dict(mztab.metadata, orient='index')
+    mtd.to_csv(os.path.join(output_path, "mtd.tsv"), sep="\t")
+    prt = mztab.protein_table
+    prt.to_csv(os.path.join(output_path, "prt.tsv"), sep="\t")
+    pep = mztab.peptide_table
+    pep.to_csv(os.path.join(output_path, "pep.tsv"), sep="\t")
+    psm = mztab.spectrum_match_table
+    psm.to_csv(os.path.join(output_path, "psm.tsv"), sep="\t")
+    sml = mztab.small_molecule_table
+    sml.to_csv(os.path.join(output_path, "sml.tsv"), sep="\t")
+
+
+def read_mztab_m(mztab, output_path):
+    """
+    Processing mztab "M"
+    """
+    mtd = pd.DataFrame.from_dict(mztab.metadata, orient='index')
+    mtd.to_csv(os.path.join(output_path, "mtd.tsv"), sep="\t")
+    sml = mztab.small_molecule_table
+    sml.to_csv(os.path.join(output_path, "sml.tsv"), sep="\t")
+    smf = mztab.small_molecule_feature_table
+    smf.to_csv(os.path.join(output_path, "smf.tsv"), sep="\t")
+    sme = mztab.small_molecule_evidence_table
+    sme.to_csv(os.path.join(output_path, "sme.tsv"), sep="\t")
+
+
+if __name__ == "__main__":
+    # Create the parser
+    my_parser = argparse.ArgumentParser(description='List of paths')
+    # Add the arguments
+    my_parser.add_argument('--path_in',
+                           metavar='path',
+                           type=str,
+                           required=True,
+                           help='the path of input .mztab file')
+    my_parser.add_argument('--path_out',
+                           metavar='path',
+                           type=str,
+                           default=os.getcwd(),
+                           help='the path of folder for output .tsv file')
+
+    # Execute parse_args()
+    args = my_parser.parse_args()
+
+    read_mztab(args.path_in, args.path_out)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1.mztab	Fri Jan 15 15:58:54 2021 +0000
@@ -0,0 +1,104 @@
+COM	This	line	serves	as	a	size	and	separator	hint	for	spreadsheet	applications.	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
+COM	Report of a minimal "Complete Quantification report" label free experiment, quantification on 2 study variables (control/treatment), 3+3 assays (replicates) reported,identifications reported.
+MTD	mzTab-version	1.0.0
+MTD	mzTab-mode	Complete
+MTD	mzTab-type	Quantification
+MTD	description	mzTab example file for reporting a summary report of quantification data quantified on the protein level
+MTD	protein_search_engine_score[1]	[MS,MS:1001171,Mascot:score,]
+MTD	psm_search_engine_score[1]	[MS,MS:1001171,Mascot:score,]
+MTD	ms_run[1]-location	file://C:/path/to/my/file1.mzML
+MTD	ms_run[2]-location	file://C:/path/to/my/file2.mzML
+MTD	ms_run[3]-location	file://C:/path/to/my/file3.mzML
+MTD	ms_run[4]-location	file://C:/path/to/my/file4.mzML
+MTD	ms_run[5]-location	file://C:/path/to/my/file5.mzML
+MTD	ms_run[6]-location	file://C:/path/to/my/file6.mzML
+MTD	protein-quantification_unit	[PRIDE, PRIDE:0000393, Relative quantification unit,]
+MTD	software[1]	[MS, MS:1000752, TOPP software,]
+MTD	fixed_mod[1]	[UNIMOD, UNIMOD:4, Carbamidomethyl, ]
+MTD	variable_mod[1]	[UNIMOD, UNIMOD:35, Oxidation, ]
+MTD	quantification_method	[MS, MS:1002038, unlabeled sample, ]
+MTD	assay[1]-quantification_reagent	[MS, MS:1002038, unlabeled sample, ]
+MTD	assay[2]-quantification_reagent	[MS, MS:1002038, unlabeled sample, ]
+MTD	assay[3]-quantification_reagent	[MS, MS:1002038, unlabeled sample, ]
+MTD	assay[4]-quantification_reagent	[MS, MS:1002038, unlabeled sample, ]
+MTD	assay[5]-quantification_reagent	[MS, MS:1002038, unlabeled sample, ]
+MTD	assay[6]-quantification_reagent	[MS, MS:1002038, unlabeled sample, ]
+MTD	assay[1]-ms_run_ref	ms_run[1]
+MTD	assay[2]-ms_run_ref	ms_run[2]
+MTD	assay[3]-ms_run_ref	ms_run[3]
+MTD	assay[4]-ms_run_ref	ms_run[4]
+MTD	assay[5]-ms_run_ref	ms_run[5]
+MTD	assay[6]-ms_run_ref	ms_run[6]
+MTD	study_variable[1]-assay_refs	assay[1], assay[2], assay[3]
+MTD	study_variable[2]-assay_refs	assay[4], assay[5], assay[6]
+MTD	study_variable[1]-description	heat shock response of control
+MTD	study_variable[2]-description	heat shock response of treatment
+
+PRH	accession	description	taxid	species	database	database_version	search_engine	best_search_engine_score[1]	search_engine_score[1]_ms_run[1]	search_engine_score[1]_ms_run[2]	search_engine_score[1]_ms_run[3]	search_engine_score[1]_ms_run[4]	search_engine_score[1]_ms_run[5]	search_engine_score[1]_ms_run[6]	num_psms_ms_run[1]	num_psms_ms_run[2]	num_psms_ms_run[3]	num_psms_ms_run[4]	num_psms_ms_run[5]	num_psms_ms_run[6]	num_peptides_distinct_ms_run[1]	num_peptides_distinct_ms_run[2]	num_peptides_distinct_ms_run[3]	num_peptides_distinct_ms_run[4]	num_peptides_distinct_ms_run[5]	num_peptides_distinct_ms_run[6]	num_peptides_unique_ms_run[1]	num_peptides_unique_ms_run[2]	num_peptides_unique_ms_run[3]	num_peptides_unique_ms_run[4]	num_peptides_unique_ms_run[5]	num_peptides_unique_ms_run[6]	ambiguity_members	modifications	protein_coverage	protein_abundance_assay[1]	protein_abundance_assay[2]	protein_abundance_assay[3]	protein_abundance_assay[4]	protein_abundance_assay[5]	protein_abundance_assay[6]	protein_abundance_study_variable[1]	protein_abundance_stdev_study_variable[1]	protein_abundance_std_error_study_variable[1]	protein_abundance_study_variable[2]	protein_abundance_stdev_study_variable[2]	protein_abundance_std_error_study_variable[2]
+COM	Accession	Description	Taxonomie ID	Species	Database	Version	Search Engine	best Mascot score	Mascot score (HSPControlRep1)	Mascot score (HSPControlRep2)	Mascot score (HSPControlRep3)	Mascot score (HSPTreatmentRep1)	Mascot score (HSPTreatmentRep2)	Mascot score (HSPTreatmentRep3)	PSMs (HSPControlRep1)	PSMs (HSPControlRep2)	PSMs (HSPControlRep3)	PSMs (HSPTreatmentRep4)	PSMs (HSPTreatmentRep5)	PSMs (HSPTreatmentRep6)	Distinct Peptides (HSPControlRep1)	Distinct Peptides (HSPControlRep2)	Distinct Peptides (HSPControlRep3)	Distinct Peptides (HSPTreatmentRep4)	Distinct Peptides (HSPTreatmentRep5)	Distinct Peptides (HSPTreatmentRep6)	Unique Peptides (HSPControlRep1)	Unique Peptides (HSPControlRep2)	Unique Peptides (HSPControlRep3)	Unique Peptides (HSPTreatmentRep4)	Unique Peptides (HSPTreatmentRep5)	Unique Peptides (HSPTreatmentRep6)	Ambiguity Members	Modifications	Protein Coverage (fraction)	Abundance (HSPTreatmentRep1)	Abundance (HSPTreatmentRep2)	Abundance (HSPTreatmentRep3)	Abundance (HSPTreatmentRep4)	Abundance (HSPTreatmentRep5)	Abundance (HSPTreatmentRep6)	Abundance (HSPControl)	Standard Deviation (HSPControl)	Standard Error (HSPControl)	Abundance (HSPTreatment)	Standard Deviation (HSPTreatment)	Standard Error (HSPTreatment)
+PRT	P63017	Heat shock cognate 71 kDa protein	10090	Mus musculus	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	46	46	26	36	-3	-1	null	1	1	1	1	1	0	1	1	1	1	1	0	1	1	1	1	1	0	null	0	0.34	34.3	40.43507695	41.12124635	266.9554147	234.4	271.0324163	38.61877444	3.755870949	2.168453103	257.4626103	20.07656548	11.59121048
+PRT	P14602	Heat shock protein beta-1	10090	Mus musculus	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	100	100	100	-9	20	100	4	3	3	1	2	3	2	3	3	1	2	3	2	2	2	1	2	2	1	Q340U4,Q5K0U2,P8L901	0	0.12	98588.4	114212.9033	100070.7061	4709.411242	4345.7	6704.588342	104290.6698	8624.809914	4979.536326	5253.233195	1269.998146	733.2337713
+PRT	Q8K0U4	Heat shock 70 kDa protein 12A	10090	Mus musculus	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	120	120	null	-2	39	36	-7	1	0	1	1	1	1	1	0	1	1	1	1	1	0	1	1	1	1	null	0	0.14	43.4	86.09123822	54.98032306	459.4934179	375.5	609.3477328	61.49052043	22.0776461	12.74653492	481.4470502	118.4595375	68.39264584
+PRT	Q61699	Heat shock protein 105 kDa	10090	Mus musculus	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	36	30	31	36	-2	31	24	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	null	0	0.08	3432.54	3847.349077	3838.448278	11372.30364	9587.5	10303.56594	3706.112452	236.9624883	136.8103564	10421.12319	898.190283	518.5704017
+PRT	P07901	Heat shock protein HSP 90-alpha	10090	Mus musculus	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	45	45	6	-2	35	8	3	4	3	4	3	2	4	4	3	4	3	2	4	4	3	4	3	2	4	null	12-UNIMOD:35, 98-UNIMOD:35,727-UNIMOD:35	0.21	3242354.3	3284123.069	3404460.592	633072.591	552426.4	618457.6276	3310312.654	84166.6994	48593.66656	601318.8729	42968.06623	24807.6246
+
+PSH	sequence	PSM_ID	accession	unique	database	database_version	search_engine	search_engine_score[1]	modifications	spectra_ref	retention_time	charge	exp_mass_to_charge	calc_mass_to_charge	pre	post	start	end
+COM	Sequence	PSM identifier	accession	Unqiue	Database	Database Version	Search Engine	Mascot score	Modifications	Spectra Reference	Retention Time	Charge	Experimental m/z	Calculated m/z	Pre	Post	Start	End
+PSM	QTQTFTTYSDNQPGVL	1	P63017	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	46	null	ms_run[1]:scan=1296	1336.62	3	600.6006697	600.6197	K	I	424	439
+PSM	AVVNGYSASDTVGAGFAQAK	2	Q8K0U4	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	120	null	ms_run[1]:scan=1300	1327.08	2	956.9464833	956.9736	K	E	261	281
+PSM	ALLRLHQECEKLK	3	Q61699	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	30	9-UNIMOD:4	ms_run[1]:scan=845	885.62	3	527.6406579	527.6362	R	K	262	274
+PSM	DWYPAHSR	4	P14602	0	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	100	null	ms_run[1]:scan=544	571.08	2	516.21	516.2383	R	L	21	28
+PSM	DWYPAHSR	4	Q340U4	0	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	100	null	ms_run[1]:scan=544	571.08	2	516.21	516.2383	K	E	143	150
+PSM	DWYPAHSR	4	P16627	0	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	100	null	ms_run[1]:scan=544	571.08	2	516.21	516.2383	R	M	240	247
+PSM	MNQSNASPTLDGLFR	5	P14602	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	14	null	ms_run[1]:scan=1155	1195.62	3	550.9282794	550.935	-	R	1	15
+PSM	LWPFQVINEAGKPK	6	P14602	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	45	null	ms_run[1]:scan=1064	1104.62	3	542.9688356	542.9716	K	V	91	104
+PSM	MIKLGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR	7	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	45	null	ms_run[1]:scan=2849	2876.08	2	1974.400793	1974.3984	R	M	692	728
+PSM	LGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR	8	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	120	23-UNIMOD:35	ms_run[1]:scan=2584	2611.08	2	1788.281997	1788.2886	K	M	695	728
+PSM	TLTIVDTGIGMTK	9	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	76	11-UNIMOD:35	ms_run[1]:scan=1092	1132.62	3	450.5920214	450.583	R	A	88	100
+PSM	MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNK	10	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	87	0-UNIMOD:35	ms_run[1]:scan=3157	3184.08	2	2405.587318	2405.6084	-	E	1	41
+PSM	QTQTFTTYSDNQPGVL	11	P63017	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	26	null	ms_run[2]:scan=1530	1336.62	3	600.6265518	600.6197	K	I	424	439
+PSM	ALLRLHQECEKLK	12	Q61699	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	36	9-UNIMOD:4	ms_run[2]:scan=1079	885.62	3	527.6362432	527.6362	R	K	262	274
+PSM	DWYPAHSR	13	P14602	0	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	100	null	ms_run[2]:scan=778	571.08	2	516.21	516.2383	R	L	21	28
+PSM	DWYPAHSR	13	Q340U4	0	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	100	null	ms_run[2]:scan=778	571.08	2	516.21	516.2383	K	E	143	150
+PSM	DWYPAHSR	13	P16627	0	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	100	null	ms_run[2]:scan=778	571.08	2	516.21	516.2383	R	M	240	247
+PSM	MNQSNASPTLDGLFR	14	P14602	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	40	null	ms_run[2]:scan=1389	1195.62	3	550.9468571	550.935	-	R	1	15
+PSM	LWPFQVINEAGKPK	15	P14602	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	16	null	ms_run[2]:scan=1298	1104.62	3	542.9666503	542.9716	K	V	91	104
+PSM	MIKLGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR	16	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	6	null	ms_run[2]:scan=3083	2876.08	2	1974.399035	1974.3984	R	M	692	728
+PSM	TLTIVDTGIGMTK	17	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	21	null	ms_run[2]:scan=1326	1132.62	3	450.5400013	450.583	R	A	88	100
+PSM	MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNK	18	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	-5	null	ms_run[2]:scan=3391	3184.08	2	2405.599817	2405.6084	-	E	1	41
+PSM	QTQTFTTYSDNQPGVL	19	P63017	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	36	null	ms_run[3]:scan=1062	1336.62	3	600.6484541	600.6197	K	I	424	439
+PSM	AVVNGYSASDTVGAGFAQAK	20	Q8K0U4	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	-2	null	ms_run[3]:scan=1066	1327.08	2	956.9766608	956.9736	K	E	261	281
+PSM	ALLRLHQECEKLK	21	Q61699	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	36	9-UNIMOD:4	ms_run[3]:scan=611	885.62	3	527.6486368	527.6362	R	K	262	274
+PSM	MNQSNASPTLDGLFR	22	P14602	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	-9	null	ms_run[3]:scan=921	1195.62	3	550.9336303	550.935	-	R	1	15
+PSM	MIKLGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR	23	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	-2	null	ms_run[3]:scan=2615	2876.08	2	1974.392219	1974.3984	R	M	692	728
+PSM	LGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR	24	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	-3	23-UNIMOD:35	ms_run[3]:scan=2350	2611.08	2	1788.28771	1788.2886	K	M	695	728
+PSM	TLTIVDTGIGMTK	25	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	37	null	ms_run[3]:scan=858	1132.62	3	450.5960917	450.583	R	A	88	100
+PSM	MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNK	26	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	6	12-UNIMOD:35	ms_run[3]:scan=2923	3184.08	2	2405.604605	2405.6084	-	E	1	41
+PSM	QTQTFTTYSDNQPGVL	27	P63017	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	-3	null	ms_run[4]:scan=2731	1336.62	3	600.6123009	600.6197	K	I	424	439
+PSM	AVVNGYSASDTVGAGFAQAK	28	Q8K0U4	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	39	null	ms_run[4]:scan=2735	1327.08	2	956.9765302	956.9736	K	E	261	281
+PSM	ALLRLHQECEKLK	29	Q61699	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	-2	9-UNIMOD:4	ms_run[4]:scan=2280	885.62	3	527.6343404	527.6362	R	K	262	274
+PSM	MNQSNASPTLDGLFR	30	P14602	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	20	null	ms_run[4]:scan=2590	1195.62	3	550.9284574	550.935	-	R	1	15
+PSM	LWPFQVINEAGKPK	31	P14602	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	-3	null	ms_run[4]:scan=2499	1104.62	3	542.9715699	542.9716	K	V	91	104
+PSM	MIKLGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR	32	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	35	null	ms_run[4]:scan=4284	2876.08	2	1974.40429	1974.3984	R	M	692	728
+PSM	LGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR	33	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	0	23-UNIMOD:35	ms_run[4]:scan=4019	2611.08	2	1788.289062	1788.2886	K	M	695	728
+PSM	MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNK	34	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	11	0-UNIMOD:35	ms_run[4]:scan=4592	3184.08	2	2405.57421	2405.6084	-	E	1	41
+PSM	QTQTFTTYSDNQPGVL	35	P63017	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	-1	null	ms_run[5]:scan=2031	1336.62	3	600.5900228	600.6197	K	I	424	439
+PSM	AVVNGYSASDTVGAGFAQAK	36	Q8K0U4	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	36	null	ms_run[5]:scan=2035	1327.08	2	956.9477197	956.9736	K	E	261	281
+PSM	ALLRLHQECEKLK	37	Q61699	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	31	9-UNIMOD:4	ms_run[5]:scan=1580	885.62	3	527.6254449	527.6362	R	K	262	274
+PSM	DWYPAHSR	38	P14602	0	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	100	null	ms_run[5]:scan=1279	571.08	2	516.21	516.2383	R	L	21	28
+PSM	DWYPAHSR	38	Q340U4	0	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	100	null	ms_run[5]:scan=1279	571.08	2	516.21	516.2383	K	E	143	150
+PSM	DWYPAHSR	38	P16627	0	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	100	null	ms_run[5]:scan=1279	571.08	2	516.21	516.2383	R	M	240	247
+PSM	MNQSNASPTLDGLFR	39	P14602	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	32	null	ms_run[5]:scan=1890	1195.62	3	550.9120992	550.935	-	R	1	15
+PSM	LWPFQVINEAGKPK	40	P14602	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	21	null	ms_run[5]:scan=1799	1104.62	3	542.9599424	542.9716	K	V	91	104
+PSM	TLTIVDTGIGMTK	41	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	8	11-UNIMOD:35	ms_run[5]:scan=1827	1132.62	3	450.5534561	450.583	R	A	88	100
+PSM	MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNK	42	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	-5	0-UNIMOD:35	ms_run[5]:scan=3892	3184.08	2	2405.594573	2405.6084	-	E	1	41
+PSM	AVVNGYSASDTVGAGFAQAK	43	Q8K0U4	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	-7	null	ms_run[6]:scan=1331	1327.08	2	956.9880766	956.9736	K	E	261	281
+PSM	ALLRLHQECEKLK	44	Q61699	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	24	9-UNIMOD:4	ms_run[6]:scan=876	885.62	3	527.64539	527.6362	R	K	262	274
+PSM	DWYPAHSR	45	P14602	0	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	4	null	ms_run[6]:scan=575	571.08	2	516.21	516.2383	R	L	21	28
+PSM	DWYPAHSR	45	Q340U4	0	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	40	null	ms_run[6]:scan=575	571.08	2	516.21	516.2383	K	E	143	150
+PSM	DWYPAHSR	45	P16627	0	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	9	null	ms_run[6]:scan=575	571.08	2	516.21	516.2383	R	M	240	247
+PSM	MNQSNASPTLDGLFR	46	P14602	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	32	null	ms_run[6]:scan=1186	1195.62	3	550.9319012	550.935	-	R	1	15
+PSM	MIKLGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR	47	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	3	null	ms_run[6]:scan=2880	2876.08	2	1974.377816	1974.3984	R	M	692	728
+PSM	LGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR	48	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	29	23-UNIMOD:35	ms_run[6]:scan=2615	2611.08	2	1788.294771	1788.2886	K	M	695	728
+PSM	TLTIVDTGIGMTK	49	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	39	11-UNIMOD:35	ms_run[6]:scan=1123	1132.62	3	450.6038036	450.583	R	A	88	100
+PSM	MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNK	50	P07901	1	UniProtKB	2013_08	[MS,MS:1001207,Mascot,]	33	0-UNIMOD:35	ms_run[6]:scan=3188	3184.08	2	2405.605739	2405.6084	-	E	1	41
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2.mztab	Fri Jan 15 15:58:54 2021 +0000
@@ -0,0 +1,86 @@
+COM	Meta data section																				
+MTD	mzTab-version	2.0.0-M																			
+MTD	mzTab-ID	ISAS-2018-1234																			
+MTD	description	Minimal proposed sample file for identification and quantification of lipids																			
+MTD	publication[1]	pubmed:29039908 | doi:10.1021/acs.analchem.7b03576																			
+MTD	cv[1]-label	MS																			
+MTD	cv[1]-full_name	PSI-MS controlled vocabulary																			
+MTD	cv[1]-version	4.0.18																			
+MTD	cv[1]-uri	https://github.com/HUPO-PSI/psi-ms-CV/blob/master/psi-ms.obo																			
+MTD	cv[2]-label	MSIO
+MTD	cv[2]-uri	https://www.ebi.ac.uk/ols/ontologies/msio
+MTD	cv[2]-version	1.0.1
+MTD	cv[2]-full_name	Metabolomics Standards Initiative Ontology (MSIO)																			
+MTD	cv[3]-label	UO																			
+MTD	cv[3]-full_name	Units of Measurement Ontology																			
+MTD	cv[3]-version	 2017-09-25																			
+MTD	cv[3]-uri	http://purl.obolibrary.org/obo/uo.owl																			
+MTD	quantification_method	[MS, MS:1001838, SRM quantitation analysis, ]																			
+MTD	sample_processing[1]	[MSIO, MSIO:0000148, high performance liquid chromatography, ]
+MTD	instrument[1]-name	[MS, MS:1001911, Q Exactive , ]																			
+MTD	instrument[1]-source	[MS, MS:1000073, electrospray ionization, ]																			
+MTD	instrument[1]-analyzer[1]	[MS, MS:1000081, quadrupole, ]																			
+MTD	instrument[1]-analyzer[2]	[MS, MS:1000484, orbitrap, ]																			
+MTD	instrument[1]-detector	[MS, MS:1000624, inductive detector, ]																			
+MTD	software[1]	[MS, MS:1000532, Xcalibur,2.8-280502/2.8.1.2806]																			
+MTD	software[1]-setting[1]	ScheduledSRMWindow: 2 min																			
+MTD	software[1]-setting[2]	CycleTime: 2 s																			
+MTD	software[2]	[MS, MS:1000922, Skyline, 3.5.0.9319]																			
+MTD	software[2]-setting[1]	MSMSmassrange: (50.0, 1800.0)																			
+MTD	sample[1]	QEx-1273-prm-sp1																			
+MTD	sample[1]-description	Sphingolipids with concentration reported as picomolar per mg of protein, abundances are reported after calibration correction.																			
+MTD	ms_run[1]-location	file:///C:/data/QEx-1273-prm-sp1.mzML																			
+MTD	ms_run[1]-format	[MS, MS:1000584, mzML file, ]																			
+MTD	ms_run[1]-id_format	[MS, MS:1000768, Thermo nativeID format, ]																			
+MTD	ms_run[1]-scan_polarity[1]	[MS, MS:1000130, positive scan, ]																			
+MTD	ms_run[1]-instrument_ref	instrument[1]																			
+MTD	assay[1]	Description of assay 1																			
+MTD	assay[1]-sample_ref	sample[1]																			
+MTD	assay[1]-ms_run_ref	ms_run[1]																			
+MTD	study_variable[1]	Sphingolipid SRM Quantitation																			
+MTD	study_variable[1]-assay_refs	assay[1]																			
+MTD	study_variable[1]-description	sphingolipid srm quantitation																			
+MTD	study_variable[1]-average_function	[MS, MS:1002883, median, ]																			
+MTD	study_variable[1]-variation_function	[MS, MS:1002885, standard error, ]																			
+MTD	small_molecule-quantification_unit	[UO, UO:0000072, picomolal, ]																			
+MTD	small_molecule_feature-quantification_unit	[UO, UO:0000072, picomolal, ]																			
+MTD	small_molecule-identification_reliability	[MS, MS:1002896, compound identification confidence level, ]																			
+MTD	database[1]	[,, Pubchem, ]																			
+MTD	database[1]-prefix	PUBCHEM-CPD																			
+MTD	database[1]-version	02.12.2017																			
+MTD	database[1]-uri	https://www.ncbi.nlm.nih.gov/pccompound																			
+MTD	database[2]	[,, LipidMaps, ]																			
+MTD	database[2]-prefix	LM																			
+MTD	database[2]-version	2017-12																			
+MTD	database[2]-uri	http://www.lipidmaps.org/																			
+MTD	database[3]	[,, LipidCreator Transitions, ]																			
+MTD	database[3]-prefix	LCTR																			
+MTD	database[3]-version	2018-07																			
+MTD	database[3]-uri	https://lifs.isas.de/lipidcreator																			
+COM	MTD	colunit-small_molecule	retention_time=[UO, UO:0000010, second, ]																		
+MTD	colunit-small_molecule_evidence	opt_global_mass_error=[UO, UO:0000169, parts per million, ]																			
+MTD	id_confidence_measure[1]	[MS, MS:1002890, fragmentation score, ]																			
+MTD	external_study_uri[1]	file:///C:/data/prm.sky.zip																			
+																					
+COM	"MzTab 2.0.0-M ""proposed"" specification"																				
+COM	Summary rows. 																				
+COM	Evidences (e.g. multiple modifications, adducts incl. charge variants are summarized). 																				
+COM	For most use cases this summary lines may be sufficient.																				
+COM	Negative and positive scan polarities are currently not explicitly included, this is still under debate in the mzTAB community.																				
+SMH	SML_ID	SMF_ID_REFS	chemical_name	database_identifier	chemical_formula	smiles	inchi	uri	theoretical_neutral_mass	adduct_ions	reliability	best_id_confidence_measure	best_id_confidence_value	abundance_assay[1]	abundance_study_variable[1]	abundance_variation_study_variable[1]	opt_global_lipid_category	opt_global_lipid_species	opt_global_lipid_best_id_level
+SML	1	1 | 2 | 3 | 4	Cer(d18:1/24:0)	LM:LMSP02010012	C42H83NO3	CCCCCCCCCCCCCCCCCCCCCCCC(=O)N[C@@H](CO)[C@H](O)/C=C/CCCCCCCCCCCCC	InChI=1S/C42H83NO3/c1-3-5-7-9-11-13-15-17-18-19-20-21-22-23-24-26-28-30-32-34-36-38-42(46)43-40(39-44)41(45)37-35-33-31-29-27-25-16-14-12-10-8-6-4-2/h35,37,40-41,44-45H,3-34,36,38-39H2,1-2H3,(H,43,46)/b37-35+/t40-,41+/m0/s1	http://www.lipidmaps.org/data/LMSDRecord.php?LM_ID=LMSP02010012	649.6373	[M+H]+	2	[,, qualifier ions exact mass,]	0.958	4.448784E-05	4.448784E-05	0	Sphingolipids	Cer 42:1	Cer d18:1/24:0
+																					
+COM	MS feature rows , used to report m/z and individual abundance information for quantification																				
+SFH	SMF_ID	SME_ID_REFS	SME_ID_REF_ambiguity_code	adduct_ion	isotopomer	exp_mass_to_charge	charge	retention_time_in_seconds	retention_time_in_seconds_start	retention_time_in_seconds_end	abundance_assay[1]	opt_global_quantifiers_SMF_ID_REFS									
+SMF	1	1	null	[M+H]1+	null	650.6432	1	821.2341	756.0000	954.0000	4.448784E-05	3									
+SMF	2	2	null	null	null	252.2677	1	821.2341	756.0000	954.0000	6.673176E-06	null									
+SMF	3	3	null	null	null	264.2689	1	821.2341	756.0000	954.0000	1.3346352E-05	null									
+SMF	4	4	null	null	null	282.2788	1	821.2341	756.0000	954.0000	9.831813E-06	null									
+																					
+COM	Evidence rows for parent / fragment ions.																				
+COM	Primary use case: report single hits from spectral library or accurate mass searches without quantification. -> Qualification																				
+SEH	SME_ID	evidence_input_id	database_identifier	chemical_formula	smiles	inchi	chemical_name	uri	derivatized_form	adduct_ion	exp_mass_to_charge	charge	theoretical_mass_to_charge	opt_global_mass_error	spectra_ref	identification_method	ms_level	id_confidence_measure[1]	rank	opt_global_qualifiers_evidence_grouping_ID_REFS
+SME	1	1	LM:LMSP0501AB02	C42H83NO3	CCCCCCCCCCCCCCCCCCCCCCCC(=O)N[C@@H](CO)[C@H](O)/C=C/CCCCCCCCCCCCC	InChI=1S/C42H83NO3/c1-3-5-7-9-11-13-15-17-18-19-20-21-22-23-24-26-28-30-32-34-36-38-42(46)43-40(39-44)41(45)37-35-33-31-29-27-25-16-14-12-10-8-6-4-2/h35,37,40-41,44-45H,3-34,36,38-39H2,1-2H3,(H,43,46)/b37-35+/t40-,41+/m0/s1	LacCer d18:1/12:0	http://www.lipidmaps.org/data/LMSDRecord.php?LM_ID=LMSP02010012	null	[M+H]1+	650.6432	1	650.6446	-2.1517	ms_run[1]:controllerType=0 controllerNumber=1 scan=731	[,, qualifier ions exact mass,]	[MS,MS:1000511, ms level, 1]	0.958	1	2
+SME	2	2	LCTR:LCTR0809812	C17H33N	null	null	Cer d18:1/24:0 W' - CHO	null	null	null	252.2677	1	252.2686	-3.5676	ms_run[1]:controllerType=0 controllerNumber=1 scan=732	[,, exact mass, ]	[MS,MS:1000511, ms level, 2]	0.9780	1	null
+SME	3	2	LCTR:LCTR0871245	C18H33N	null	null	Cer d18:1/24:0 W''	null	null	null	264.2689	1	264.2686	-1.1352	ms_run[1]:controllerType=0 controllerNumber=1 scan=732	[,, exact mass, ]	[MS,MS:1000511, ms level, 2]	0.7500	1	null
+SME	4	2	LCTR:LCTR0809711	C18H35NO	null	null	Cer d18:1/24:0 W'	null	null	null	282.2788	1	282.2791	-1.0628	ms_run[1]:controllerType=0 controllerNumber=1 scan=732	[,, exact mass, ]	[MS,MS:1000511, ms level, 2]	0.8760	1	null
\ No newline at end of file