Mercurial > repos > recetox > msp_split
annotate splitMSP.py @ 0:ae0263faa819 draft default tip
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
author | recetox |
---|---|
date | Fri, 25 Mar 2022 15:38:00 +0000 |
parents | |
children |
rev | line source |
---|---|
0
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
1 import argparse |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
2 import os |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
3 from typing import List |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
4 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
5 from matchms import Spectrum |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
6 from matchms.exporting import save_as_msp |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
7 from matchms.importing import load_from_msp |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
8 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
9 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
10 def read_spectra(filename: str) -> List[Spectrum]: |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
11 """Read spectra from file. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
12 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
13 Args: |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
14 filename (str): Path to .msp file from which to load the spectra. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
15 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
16 Returns: |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
17 List[Spectrum]: Spectra contained in the file. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
18 """ |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
19 return list(load_from_msp(filename, True)) |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
20 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
21 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
22 def get_spectra_names(spectra: list) -> List[str]: |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
23 """Read the keyword 'compound_name' from a spectra. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
24 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
25 Args: |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
26 spectra (list): List of individual spectra. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
27 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
28 Returns: |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
29 List[str]: List with 'compoud_name' of individual spectra. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
30 """ |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
31 return [x.get("compound_name") for x in spectra] |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
32 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
33 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
34 def make_outdir(outdir: str): |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
35 """Create destination directory. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
36 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
37 Args: |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
38 outdir (str): Path to destination directory where split spectra files are generated. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
39 """ |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
40 return os.mkdir(outdir) |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
41 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
42 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
43 def write_spectra(filename, outdir): |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
44 """Generates MSP files of individual spectra. Structure of filename is 'compound_name.msp'. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
45 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
46 Args: |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
47 filename (str): MSP file that contains the spectra. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
48 outdir (str): Path to destination directory. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
49 """ |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
50 spectra = read_spectra(filename) |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
51 names = get_spectra_names(spectra) |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
52 for i in range(len(spectra)): |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
53 outpath = assemble_outpath(names[i], outdir) |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
54 save_as_msp(spectra[i], outpath) |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
55 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
56 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
57 def assemble_outpath(name, outdir): |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
58 """Filter special chracteres from name. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
59 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
60 Args: |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
61 name (str): Name to be filetered. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
62 outdir (str): Path to destination directory. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
63 """ |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
64 filename = ''.join(filter(str.isalnum, name)) |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
65 outfile = str(filename) + ".msp" |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
66 outpath = os.path.join(outdir, outfile) |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
67 return outpath |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
68 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
69 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
70 def split_spectra(filename, outdir): |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
71 """Save individual MSP spectra files in the destination directory. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
72 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
73 Args: |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
74 filename (str): MSP file that contains the spectra. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
75 outdir (str): Path to destination directory where split spectra files are saved. |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
76 """ |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
77 make_outdir(outdir) |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
78 return write_spectra(filename, outdir) |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
79 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
80 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
81 listarg = argparse.ArgumentParser() |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
82 listarg.add_argument('--filename', type=str) |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
83 listarg.add_argument('--outdir', type=str) |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
84 args = listarg.parse_args() |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
85 outdir = args.outdir |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
86 filename = args.filename |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
87 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
88 |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
89 if __name__ == "__main__": |
ae0263faa819
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff
changeset
|
90 split_spectra(filename, outdir) |