annotate matchms_filtering_wrapper.py @ 3:1f2b13dbc0c8 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f7bab98744e338dcdbdc9cf6f9de287632c76ea2
author recetox
date Tue, 18 Oct 2022 13:22:20 +0000
parents 3d275fbdf741
children 13de8005adba
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
1 import argparse
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
2 import sys
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
3
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
4 from matchms.exporting import save_as_mgf, save_as_msp
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
5 from matchms.filtering import add_compound_name, add_fingerprint, add_losses, add_parent_mass, add_precursor_mz,\
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
6 add_retention_index, add_retention_time, clean_compound_name
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
7 from matchms.filtering import default_filters, normalize_intensities, select_by_mz, select_by_relative_intensity
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
8 from matchms.importing import load_from_mgf, load_from_msp
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
9
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
10
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
11 def main(argv):
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
12 parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
13 parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
14 parser.add_argument("--spectra_format", type=str, required=True, help="Format of spectra file.")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
15 parser.add_argument("--output", type=str, required=True, help="Filtered mass spectra file.")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
16 parser.add_argument("-normalise_intensities", action='store_true',
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
17 help="Normalize intensities of peaks (and losses) to unit height.")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
18 parser.add_argument("-default_filters", action='store_true',
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
19 help="Collection of filters that are considered default and that do no require any (factory) arguments.")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
20 parser.add_argument("-clean_metadata", action='store_true',
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
21 help="Apply all adding and cleaning filters if possible, so that the spectra have canonical metadata.")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
22 parser.add_argument("-relative_intensity", action='store_true',
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
23 help="Keep only peaks within set relative intensity range (keep if to_intensity >= intensity >= from_intensity).")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
24 parser.add_argument("--from_intensity", type=float, help="Lower bound for intensity filter")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
25 parser.add_argument("--to_intensity", type=float, help="Upper bound for intensity filter")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
26 parser.add_argument("-mz_range", action='store_true',
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
27 help="Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz).")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
28 parser.add_argument("--from_mz", type=float, help="Lower bound for m/z filter")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
29 parser.add_argument("--to_mz", type=float, help="Upper bound for m/z filter")
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
30 args = parser.parse_args()
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
31
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
32 if not (args.normalise_intensities
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
33 or args.default_filters
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
34 or args.clean_metadata
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
35 or args.relative_intensity
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
36 or args.mz_range):
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
37 raise ValueError('No filter selected.')
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
38
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
39 if args.spectra_format == 'msp':
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
40 spectra = list(load_from_msp(args.spectra))
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
41 elif args.queries_format == 'mgf':
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
42 spectra = list(load_from_mgf(args.spectra))
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
43 else:
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
44 raise ValueError(f'File format {args.spectra_format} not supported for mass spectra file.')
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
45
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
46 filtered_spectra = []
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
47 for spectrum in spectra:
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
48 if args.normalise_intensities:
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
49 spectrum = normalize_intensities(spectrum)
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
50
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
51 if args.default_filters:
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
52 spectrum = default_filters(spectrum)
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
53
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
54 if args.clean_metadata:
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
55 filters = [add_compound_name, add_precursor_mz, add_fingerprint, add_losses, add_parent_mass,
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
56 add_retention_index, add_retention_time, clean_compound_name]
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
57 for metadata_filter in filters:
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
58 spectrum = metadata_filter(spectrum)
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
59
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
60 if args.relative_intensity:
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
61 spectrum = select_by_relative_intensity(spectrum, args.from_intensity, args.to_intensity)
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
62
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
63 if args.mz_range:
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
64 spectrum = select_by_mz(spectrum, args.from_mz, args.to_mz)
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
65
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
66 filtered_spectra.append(spectrum)
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
67
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
68 if args.spectra_format == 'msp':
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
69 save_as_msp(filtered_spectra, args.output)
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
70 else:
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
71 save_as_mgf(filtered_spectra, args.output)
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
72
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
73 return 0
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
74
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
75
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
76 if __name__ == "__main__":
3d275fbdf741 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff changeset
77 main(argv=sys.argv[1:])