comparison matchms_split.py @ 0:e5010b19d64d draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
author recetox
date Tue, 27 Jun 2023 14:26:29 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e5010b19d64d
1 import argparse
2 import itertools
3 import os
4 from typing import List
5
6 from matchms.exporting import save_as_msp
7 from matchms.importing import load_from_msp
8
9
10 def get_spectra_names(spectra: list) -> List[str]:
11 """Read the keyword 'compound_name' from a spectra.
12
13 Args:
14 spectra (list): List of individual spectra.
15
16 Returns:
17 List[str]: List with 'compoud_name' of individual spectra.
18 """
19 return [x.get("compound_name") for x in spectra]
20
21
22 def make_outdir(outdir: str):
23 """Create destination directory.
24
25 Args:
26 outdir (str): Path to destination directory where split spectra files are generated.
27 """
28 return os.mkdir(outdir)
29
30
31 def write_spectra(spectra, outdir):
32 """Generates MSP files of individual spectra.
33
34 Args:
35 spectra (List[Spectrum]): Spectra to write to file
36 outdir (str): Path to destination directory.
37 """
38 names = get_spectra_names(spectra)
39 for i in range(len(spectra)):
40 outpath = assemble_outpath(names[i], outdir)
41 save_as_msp(spectra[i], outpath)
42
43
44 def assemble_outpath(name, outdir):
45 """Filter special chracteres from name.
46
47 Args:
48 name (str): Name to be filetered.
49 outdir (str): Path to destination directory.
50 """
51 filename = ''.join(filter(str.isalnum, name))
52 outfile = str(filename) + ".msp"
53 outpath = os.path.join(outdir, outfile)
54 return outpath
55
56
57 def split_round_robin(iterable, num_chunks):
58 chunks = [list() for _ in range(num_chunks)]
59 index = itertools.cycle(range(num_chunks))
60 for value in iterable:
61 chunks[next(index)].append(value)
62 chunks = filter(lambda x: len(x) > 0, chunks)
63 return chunks
64
65
66 listarg = argparse.ArgumentParser()
67 listarg.add_argument('--filename', type=str)
68 listarg.add_argument('--method', type=str)
69 listarg.add_argument('--outdir', type=str)
70 listarg.add_argument('--parameter', type=int)
71 args = listarg.parse_args()
72 outdir = args.outdir
73 filename = args.filename
74 method = args.method
75 parameter = args.parameter
76
77
78 if __name__ == "__main__":
79 spectra = load_from_msp(filename, metadata_harmonization=True)
80 make_outdir(outdir)
81
82 if method == "one-per-file":
83 write_spectra(list(spectra), outdir)
84 else:
85 if method == "chunk-size":
86 chunks = iter(lambda: list(itertools.islice(spectra, parameter)), [])
87 elif method == "num-chunks":
88 chunks = split_round_robin(spectra, parameter)
89 for i, x in enumerate(chunks):
90 save_as_msp(x, os.path.join(outdir, f"chunk_{i}.msp"))