diff matchms_split.py @ 0:80df426e7e47 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
author recetox
date Thu, 30 May 2024 18:07:29 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/matchms_split.py	Thu May 30 18:07:29 2024 +0000
@@ -0,0 +1,66 @@
+import argparse
+import itertools
+import os
+
+import matchms
+from matchms.exporting import save_as_msp
+from matchms.importing import load_from_msp
+
+
+matchms.Metadata.set_key_replacements({})
+
+
+def make_outdir(outdir: str):
+    """Create destination directory.
+
+    Args:
+        outdir (str): Path to destination directory where split spectra files are generated.
+    """
+    return os.mkdir(outdir)
+
+
+def write_spectra(spectra, outdir):
+    """Generates MSP files of individual spectra.
+
+    Args:
+        spectra (List[Spectrum]): Spectra to write to file
+        outdir   (str): Path to destination directory.
+    """
+    for i in range(len(spectra)):
+        save_as_msp(spectra[i], os.path.join(outdir, f"{i}.msp"))
+
+
+def split_round_robin(iterable, num_chunks):
+    chunks = [list() for _ in range(num_chunks)]
+    index = itertools.cycle(range(num_chunks))
+    for value in iterable:
+        chunks[next(index)].append(value)
+    chunks = filter(lambda x: len(x) > 0, chunks)
+    return chunks
+
+
+listarg = argparse.ArgumentParser()
+listarg.add_argument('--filename', type=str)
+listarg.add_argument('--method', type=str)
+listarg.add_argument('--outdir', type=str)
+listarg.add_argument('--parameter', type=int)
+args = listarg.parse_args()
+outdir = args.outdir
+filename = args.filename
+method = args.method
+parameter = args.parameter
+
+
+if __name__ == "__main__":
+    spectra = load_from_msp(filename, metadata_harmonization=False)
+    make_outdir(outdir)
+
+    if method == "one-per-file":
+        write_spectra(list(spectra), outdir)
+    else:
+        if method == "chunk-size":
+            chunks = iter(lambda: list(itertools.islice(spectra, parameter)), [])
+        elif method == "num-chunks":
+            chunks = split_round_robin(spectra, parameter)
+        for i, x in enumerate(chunks):
+            save_as_msp(x, os.path.join(outdir, f"chunk_{i}.msp"))