annotate splitMSP.py @ 0:ae0263faa819 draft default tip

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
author recetox
date Fri, 25 Mar 2022 15:38:00 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
1 import argparse
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
2 import os
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
3 from typing import List
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
4
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
5 from matchms import Spectrum
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
6 from matchms.exporting import save_as_msp
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
7 from matchms.importing import load_from_msp
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
8
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
9
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
10 def read_spectra(filename: str) -> List[Spectrum]:
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
11 """Read spectra from file.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
12
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
13 Args:
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
14 filename (str): Path to .msp file from which to load the spectra.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
15
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
16 Returns:
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
17 List[Spectrum]: Spectra contained in the file.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
18 """
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
19 return list(load_from_msp(filename, True))
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
20
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
21
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
22 def get_spectra_names(spectra: list) -> List[str]:
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
23 """Read the keyword 'compound_name' from a spectra.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
24
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
25 Args:
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
26 spectra (list): List of individual spectra.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
27
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
28 Returns:
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
29 List[str]: List with 'compoud_name' of individual spectra.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
30 """
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
31 return [x.get("compound_name") for x in spectra]
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
32
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
33
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
34 def make_outdir(outdir: str):
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
35 """Create destination directory.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
36
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
37 Args:
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
38 outdir (str): Path to destination directory where split spectra files are generated.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
39 """
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
40 return os.mkdir(outdir)
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
41
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
42
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
43 def write_spectra(filename, outdir):
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
44 """Generates MSP files of individual spectra. Structure of filename is 'compound_name.msp'.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
45
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
46 Args:
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
47 filename (str): MSP file that contains the spectra.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
48 outdir (str): Path to destination directory.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
49 """
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
50 spectra = read_spectra(filename)
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
51 names = get_spectra_names(spectra)
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
52 for i in range(len(spectra)):
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
53 outpath = assemble_outpath(names[i], outdir)
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
54 save_as_msp(spectra[i], outpath)
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
55
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
56
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
57 def assemble_outpath(name, outdir):
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
58 """Filter special chracteres from name.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
59
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
60 Args:
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
61 name (str): Name to be filetered.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
62 outdir (str): Path to destination directory.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
63 """
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
64 filename = ''.join(filter(str.isalnum, name))
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
65 outfile = str(filename) + ".msp"
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
66 outpath = os.path.join(outdir, outfile)
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
67 return outpath
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
68
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
69
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
70 def split_spectra(filename, outdir):
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
71 """Save individual MSP spectra files in the destination directory.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
72
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
73 Args:
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
74 filename (str): MSP file that contains the spectra.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
75 outdir (str): Path to destination directory where split spectra files are saved.
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
76 """
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
77 make_outdir(outdir)
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
78 return write_spectra(filename, outdir)
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
79
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
80
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
81 listarg = argparse.ArgumentParser()
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
82 listarg.add_argument('--filename', type=str)
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
83 listarg.add_argument('--outdir', type=str)
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
84 args = listarg.parse_args()
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
85 outdir = args.outdir
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
86 filename = args.filename
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
87
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
88
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
89 if __name__ == "__main__":
ae0263faa819 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
90 split_spectra(filename, outdir)