annotate mztab_reader.py @ 0:84e4b5d4b7ad draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
author galaxyp
date Fri, 15 Jan 2021 15:58:54 +0000
parents
children a475c1906e0b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
2
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
3 import argparse
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
4 import os
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
5
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
6 import pandas as pd
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
7 from pyteomics.mztab import MzTab
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
8
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
9
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
10 def read_mztab(input_path, output_path):
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
11 """
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
12 Read mztab file
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
13 """
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
14 mztab = MzTab(input_path)
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
15 if mztab.variant == 'P':
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
16 return read_mztab_p(mztab, output_path)
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
17 elif mztab.variant == 'M':
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
18 return read_mztab_m(mztab, output_path)
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
19
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
20
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
21 def read_mztab_p(mztab, output_path):
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
22 """
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
23 Processing mztab "P"
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
24 """
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
25 mtd = pd.DataFrame.from_dict(mztab.metadata, orient='index')
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
26 mtd.to_csv(os.path.join(output_path, "mtd.tsv"), sep="\t")
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
27 prt = mztab.protein_table
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
28 prt.to_csv(os.path.join(output_path, "prt.tsv"), sep="\t")
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
29 pep = mztab.peptide_table
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
30 pep.to_csv(os.path.join(output_path, "pep.tsv"), sep="\t")
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
31 psm = mztab.spectrum_match_table
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
32 psm.to_csv(os.path.join(output_path, "psm.tsv"), sep="\t")
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
33 sml = mztab.small_molecule_table
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
34 sml.to_csv(os.path.join(output_path, "sml.tsv"), sep="\t")
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
35
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
36
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
37 def read_mztab_m(mztab, output_path):
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
38 """
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
39 Processing mztab "M"
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
40 """
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
41 mtd = pd.DataFrame.from_dict(mztab.metadata, orient='index')
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
42 mtd.to_csv(os.path.join(output_path, "mtd.tsv"), sep="\t")
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
43 sml = mztab.small_molecule_table
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
44 sml.to_csv(os.path.join(output_path, "sml.tsv"), sep="\t")
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
45 smf = mztab.small_molecule_feature_table
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
46 smf.to_csv(os.path.join(output_path, "smf.tsv"), sep="\t")
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
47 sme = mztab.small_molecule_evidence_table
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
48 sme.to_csv(os.path.join(output_path, "sme.tsv"), sep="\t")
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
49
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
50
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
51 if __name__ == "__main__":
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
52 # Create the parser
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
53 my_parser = argparse.ArgumentParser(description='List of paths')
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
54 # Add the arguments
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
55 my_parser.add_argument('--path_in',
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
56 metavar='path',
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
57 type=str,
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
58 required=True,
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
59 help='the path of input .mztab file')
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
60 my_parser.add_argument('--path_out',
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
61 metavar='path',
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
62 type=str,
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
63 default=os.getcwd(),
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
64 help='the path of folder for output .tsv file')
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
65
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
66 # Execute parse_args()
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
67 args = my_parser.parse_args()
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
68
84e4b5d4b7ad "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
69 read_mztab(args.path_in, args.path_out)