Mercurial > repos > galaxyp > pyteomics_mztab2tsv
annotate mztab_reader.py @ 0:84e4b5d4b7ad draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
author | galaxyp |
---|---|
date | Fri, 15 Jan 2021 15:58:54 +0000 |
parents | |
children | a475c1906e0b |
rev | line source |
---|---|
0
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
2 |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
3 import argparse |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
4 import os |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
5 |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
6 import pandas as pd |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
7 from pyteomics.mztab import MzTab |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
8 |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
9 |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
10 def read_mztab(input_path, output_path): |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
11 """ |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
12 Read mztab file |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
13 """ |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
14 mztab = MzTab(input_path) |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
15 if mztab.variant == 'P': |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
16 return read_mztab_p(mztab, output_path) |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
17 elif mztab.variant == 'M': |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
18 return read_mztab_m(mztab, output_path) |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
19 |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
20 |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
21 def read_mztab_p(mztab, output_path): |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
22 """ |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
23 Processing mztab "P" |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
24 """ |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
25 mtd = pd.DataFrame.from_dict(mztab.metadata, orient='index') |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
26 mtd.to_csv(os.path.join(output_path, "mtd.tsv"), sep="\t") |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
27 prt = mztab.protein_table |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
28 prt.to_csv(os.path.join(output_path, "prt.tsv"), sep="\t") |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
29 pep = mztab.peptide_table |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
30 pep.to_csv(os.path.join(output_path, "pep.tsv"), sep="\t") |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
31 psm = mztab.spectrum_match_table |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
32 psm.to_csv(os.path.join(output_path, "psm.tsv"), sep="\t") |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
33 sml = mztab.small_molecule_table |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
34 sml.to_csv(os.path.join(output_path, "sml.tsv"), sep="\t") |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
35 |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
36 |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
37 def read_mztab_m(mztab, output_path): |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
38 """ |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
39 Processing mztab "M" |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
40 """ |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
41 mtd = pd.DataFrame.from_dict(mztab.metadata, orient='index') |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
42 mtd.to_csv(os.path.join(output_path, "mtd.tsv"), sep="\t") |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
43 sml = mztab.small_molecule_table |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
44 sml.to_csv(os.path.join(output_path, "sml.tsv"), sep="\t") |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
45 smf = mztab.small_molecule_feature_table |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
46 smf.to_csv(os.path.join(output_path, "smf.tsv"), sep="\t") |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
47 sme = mztab.small_molecule_evidence_table |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
48 sme.to_csv(os.path.join(output_path, "sme.tsv"), sep="\t") |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
49 |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
50 |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
51 if __name__ == "__main__": |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
52 # Create the parser |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
53 my_parser = argparse.ArgumentParser(description='List of paths') |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
54 # Add the arguments |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
55 my_parser.add_argument('--path_in', |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
56 metavar='path', |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
57 type=str, |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
58 required=True, |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
59 help='the path of input .mztab file') |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
60 my_parser.add_argument('--path_out', |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
61 metavar='path', |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
62 type=str, |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
63 default=os.getcwd(), |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
64 help='the path of folder for output .tsv file') |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
65 |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
66 # Execute parse_args() |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
67 args = my_parser.parse_args() |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
68 |
84e4b5d4b7ad
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff
changeset
|
69 read_mztab(args.path_in, args.path_out) |