comparison matchms_filtering_wrapper.py @ 11:ae45992f969e draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
author recetox
date Thu, 12 Oct 2023 13:29:16 +0000
parents 1b09315a3f87
children 23d4bc72c505
comparison
equal deleted inserted replaced
10:1b09315a3f87 11:ae45992f969e
2 import sys 2 import sys
3 3
4 from matchms.exporting import save_as_mgf, save_as_msp 4 from matchms.exporting import save_as_mgf, save_as_msp
5 from matchms.filtering import add_compound_name, add_fingerprint, add_losses, add_parent_mass, add_precursor_mz,\ 5 from matchms.filtering import add_compound_name, add_fingerprint, add_losses, add_parent_mass, add_precursor_mz,\
6 add_retention_index, add_retention_time, clean_compound_name 6 add_retention_index, add_retention_time, clean_compound_name
7 from matchms.filtering import default_filters, normalize_intensities, select_by_mz, select_by_relative_intensity 7 from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \
8 select_by_relative_intensity
8 from matchms.importing import load_from_mgf, load_from_msp 9 from matchms.importing import load_from_mgf, load_from_msp
10
11
12 def require_key(spectrum, key):
13 if spectrum.get(key):
14 return spectrum
15
16 return None
9 17
10 18
11 def main(argv): 19 def main(argv):
12 parser = argparse.ArgumentParser(description="Compute MSP similarity scores") 20 parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
13 parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.") 21 parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.")
25 parser.add_argument("--to_intensity", type=float, help="Upper bound for intensity filter") 33 parser.add_argument("--to_intensity", type=float, help="Upper bound for intensity filter")
26 parser.add_argument("-mz_range", action='store_true', 34 parser.add_argument("-mz_range", action='store_true',
27 help="Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz).") 35 help="Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz).")
28 parser.add_argument("--from_mz", type=float, help="Lower bound for m/z filter") 36 parser.add_argument("--from_mz", type=float, help="Lower bound for m/z filter")
29 parser.add_argument("--to_mz", type=float, help="Upper bound for m/z filter") 37 parser.add_argument("--to_mz", type=float, help="Upper bound for m/z filter")
38 parser.add_argument("-require_smiles", action='store_true',
39 help="Remove spectra that does not contain SMILES.")
40 parser.add_argument("-require_inchi", action='store_true',
41 help="Remove spectra that does not contain INCHI.")
42 parser.add_argument("-reduce_to_top_n_peaks", action='store_true',
43 help="reduce to top n peaks filter.")
44 parser.add_argument("--n_max", type=int, help="Maximum number of peaks. Remove peaks if more peaks are found.")
30 args = parser.parse_args() 45 args = parser.parse_args()
31 46
32 if not (args.normalise_intensities 47 if not (args.normalise_intensities
33 or args.default_filters 48 or args.default_filters
34 or args.clean_metadata 49 or args.clean_metadata
35 or args.relative_intensity 50 or args.relative_intensity
36 or args.mz_range): 51 or args.mz_range
52 or args.require_smiles
53 or args.require_inchi
54 or args.reduce_to_top_n_peaks):
37 raise ValueError('No filter selected.') 55 raise ValueError('No filter selected.')
38 56
39 if args.spectra_format == 'msp': 57 if args.spectra_format == 'msp':
40 spectra = list(load_from_msp(args.spectra)) 58 spectra = list(load_from_msp(args.spectra))
41 elif args.queries_format == 'mgf': 59 elif args.queries_format == 'mgf':
61 spectrum = select_by_relative_intensity(spectrum, args.from_intensity, args.to_intensity) 79 spectrum = select_by_relative_intensity(spectrum, args.from_intensity, args.to_intensity)
62 80
63 if args.mz_range: 81 if args.mz_range:
64 spectrum = select_by_mz(spectrum, args.from_mz, args.to_mz) 82 spectrum = select_by_mz(spectrum, args.from_mz, args.to_mz)
65 83
66 filtered_spectra.append(spectrum) 84 if args.reduce_to_top_n_peaks:
85 spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max)
86
87 if args.require_smiles and spectrum is not None:
88 spectrum = require_key(spectrum, "smiles")
89
90 if args.require_inchi and spectrum is not None:
91 spectrum = require_key(spectrum, "inchi")
92
93 if spectrum is not None:
94 filtered_spectra.append(spectrum)
67 95
68 if args.spectra_format == 'msp': 96 if args.spectra_format == 'msp':
69 save_as_msp(filtered_spectra, args.output) 97 save_as_msp(filtered_spectra, args.output)
70 else: 98 else:
71 save_as_mgf(filtered_spectra, args.output) 99 save_as_mgf(filtered_spectra, args.output)