comparison matchms_split.py @ 0:169c72b2ce79 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
author recetox
date Thu, 27 Apr 2023 12:02:44 +0000
parents
children 0cf68b536cd1
comparison
equal deleted inserted replaced
-1:000000000000 0:169c72b2ce79
1 import argparse
2 import itertools
3 import os
4 from typing import List
5
6 from matchms import Spectrum
7 from matchms.exporting import save_as_msp
8 from matchms.importing import load_from_msp
9
10
11 def read_spectra(filename: str) -> List[Spectrum]:
12 """Read spectra from file.
13
14 Args:
15 filename (str): Path to .msp file from which to load the spectra.
16
17 Returns:
18 List[Spectrum]: Spectra contained in the file.
19 """
20 return list(load_from_msp(filename, True))
21
22
23 def get_spectra_names(spectra: list) -> List[str]:
24 """Read the keyword 'compound_name' from a spectra.
25
26 Args:
27 spectra (list): List of individual spectra.
28
29 Returns:
30 List[str]: List with 'compoud_name' of individual spectra.
31 """
32 return [x.get("compound_name") for x in spectra]
33
34
35 def make_outdir(outdir: str):
36 """Create destination directory.
37
38 Args:
39 outdir (str): Path to destination directory where split spectra files are generated.
40 """
41 return os.mkdir(outdir)
42
43
44 def write_spectra(spectra, outdir):
45 """Generates MSP files of individual spectra.
46
47 Args:
48 spectra (List[Spectrum]): Spectra to write to file
49 outdir (str): Path to destination directory.
50 """
51 names = get_spectra_names(spectra)
52 for i in range(len(spectra)):
53 outpath = assemble_outpath(names[i], outdir)
54 save_as_msp(spectra[i], outpath)
55
56
57 def assemble_outpath(name, outdir):
58 """Filter special chracteres from name.
59
60 Args:
61 name (str): Name to be filetered.
62 outdir (str): Path to destination directory.
63 """
64 filename = ''.join(filter(str.isalnum, name))
65 outfile = str(filename) + ".msp"
66 outpath = os.path.join(outdir, outfile)
67 return outpath
68
69
70 def split_spectra(filename, outdir):
71 """Save individual MSP spectra files in the destination directory.
72
73 Args:
74 filename (str): MSP file that contains the spectra.
75 outdir (str): Path to destination directory where split spectra files are saved.
76 """
77 make_outdir(outdir)
78 return write_spectra(filename, outdir)
79
80
81 def split_round_robin(iterable, num_chunks):
82 chunks = [list() for _ in range(num_chunks)]
83 index = itertools.cycle(range(num_chunks))
84 for value in iterable:
85 chunks[next(index)].append(value)
86 chunks = filter(lambda x: len(x) > 0, chunks)
87 return chunks
88
89
90 listarg = argparse.ArgumentParser()
91 listarg.add_argument('--filename', type=str)
92 listarg.add_argument('--method', type=str)
93 listarg.add_argument('--outdir', type=str)
94 listarg.add_argument('--parameter', type=int)
95 args = listarg.parse_args()
96 outdir = args.outdir
97 filename = args.filename
98 method = args.method
99 parameter = args.parameter
100
101
102 if __name__ == "__main__":
103 spectra = load_from_msp(filename)
104 make_outdir(outdir)
105
106 if method == "one-per-file":
107 write_spectra(list(spectra), outdir)
108 else:
109 if method == "chunk-size":
110 chunks = iter(lambda: list(itertools.islice(spectra, parameter)), [])
111 elif method == "num-chunks":
112 chunks = split_round_robin(spectra, parameter)
113 for i, x in enumerate(chunks):
114 save_as_msp(x, os.path.join(outdir, f"chunk_{i}.msp"))