Mercurial > repos > recetox > matchms_add_key
annotate matchms_split.py @ 19:d5a50ec6035d draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit a57d984a9850c1faa44b6948981eb1303881ba9c
| author | recetox | 
|---|---|
| date | Thu, 14 Aug 2025 12:40:05 +0000 | 
| parents | 3c82adf06cd5 | 
| children | 
| rev | line source | 
|---|---|
| 0 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 1 import argparse | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 2 import itertools | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 3 import os | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 4 | 
| 19 
d5a50ec6035d
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit a57d984a9850c1faa44b6948981eb1303881ba9c
 recetox parents: 
9diff
changeset | 5 from matchms import Metadata | 
| 0 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 6 from matchms.exporting import save_as_msp | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 7 from matchms.importing import load_from_msp | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 8 | 
| 19 
d5a50ec6035d
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit a57d984a9850c1faa44b6948981eb1303881ba9c
 recetox parents: 
9diff
changeset | 9 Metadata.set_key_replacements({}) | 
| 0 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 10 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 11 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 12 def make_outdir(outdir: str): | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 13 """Create destination directory. | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 14 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 15 Args: | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 16 outdir (str): Path to destination directory where split spectra files are generated. | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 17 """ | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 18 return os.mkdir(outdir) | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 19 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 20 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 21 def write_spectra(spectra, outdir): | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 22 """Generates MSP files of individual spectra. | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 23 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 24 Args: | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 25 spectra (List[Spectrum]): Spectra to write to file | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 26 outdir (str): Path to destination directory. | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 27 """ | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 28 for i in range(len(spectra)): | 
| 9 
3c82adf06cd5
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
 recetox parents: 
0diff
changeset | 29 save_as_msp(spectra[i], os.path.join(outdir, f"{i}.msp")) | 
| 0 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 30 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 31 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 32 def split_round_robin(iterable, num_chunks): | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 33 chunks = [list() for _ in range(num_chunks)] | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 34 index = itertools.cycle(range(num_chunks)) | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 35 for value in iterable: | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 36 chunks[next(index)].append(value) | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 37 chunks = filter(lambda x: len(x) > 0, chunks) | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 38 return chunks | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 39 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 40 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 41 listarg = argparse.ArgumentParser() | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 42 listarg.add_argument('--filename', type=str) | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 43 listarg.add_argument('--method', type=str) | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 44 listarg.add_argument('--outdir', type=str) | 
| 19 
d5a50ec6035d
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit a57d984a9850c1faa44b6948981eb1303881ba9c
 recetox parents: 
9diff
changeset | 45 listarg.add_argument('--parameter', type=int, required=False) | 
| 0 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 46 args = listarg.parse_args() | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 47 outdir = args.outdir | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 48 filename = args.filename | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 49 method = args.method | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 50 parameter = args.parameter | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 51 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 52 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 53 if __name__ == "__main__": | 
| 9 
3c82adf06cd5
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
 recetox parents: 
0diff
changeset | 54 spectra = load_from_msp(filename, metadata_harmonization=False) | 
| 0 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 55 make_outdir(outdir) | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 56 | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 57 if method == "one-per-file": | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 58 write_spectra(list(spectra), outdir) | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 59 else: | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 60 if method == "chunk-size": | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 61 chunks = iter(lambda: list(itertools.islice(spectra, parameter)), []) | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 62 elif method == "num-chunks": | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 63 chunks = split_round_robin(spectra, parameter) | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 64 for i, x in enumerate(chunks): | 
| 
ea00a749ec1f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
 recetox parents: diff
changeset | 65 save_as_msp(x, os.path.join(outdir, f"chunk_{i}.msp")) | 
