Mercurial > repos > recetox > matchms_filtering
annotate matchms_filtering_wrapper.py @ 0:3d275fbdf741 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
author | recetox |
---|---|
date | Tue, 22 Mar 2022 16:08:03 +0000 |
parents | |
children | 13de8005adba |
rev | line source |
---|---|
0
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
1 import argparse |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
2 import sys |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
3 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
4 from matchms.exporting import save_as_mgf, save_as_msp |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
5 from matchms.filtering import add_compound_name, add_fingerprint, add_losses, add_parent_mass, add_precursor_mz,\ |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
6 add_retention_index, add_retention_time, clean_compound_name |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
7 from matchms.filtering import default_filters, normalize_intensities, select_by_mz, select_by_relative_intensity |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
8 from matchms.importing import load_from_mgf, load_from_msp |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
9 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
10 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
11 def main(argv): |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
12 parser = argparse.ArgumentParser(description="Compute MSP similarity scores") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
13 parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
14 parser.add_argument("--spectra_format", type=str, required=True, help="Format of spectra file.") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
15 parser.add_argument("--output", type=str, required=True, help="Filtered mass spectra file.") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
16 parser.add_argument("-normalise_intensities", action='store_true', |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
17 help="Normalize intensities of peaks (and losses) to unit height.") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
18 parser.add_argument("-default_filters", action='store_true', |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
19 help="Collection of filters that are considered default and that do no require any (factory) arguments.") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
20 parser.add_argument("-clean_metadata", action='store_true', |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
21 help="Apply all adding and cleaning filters if possible, so that the spectra have canonical metadata.") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
22 parser.add_argument("-relative_intensity", action='store_true', |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
23 help="Keep only peaks within set relative intensity range (keep if to_intensity >= intensity >= from_intensity).") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
24 parser.add_argument("--from_intensity", type=float, help="Lower bound for intensity filter") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
25 parser.add_argument("--to_intensity", type=float, help="Upper bound for intensity filter") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
26 parser.add_argument("-mz_range", action='store_true', |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
27 help="Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz).") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
28 parser.add_argument("--from_mz", type=float, help="Lower bound for m/z filter") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
29 parser.add_argument("--to_mz", type=float, help="Upper bound for m/z filter") |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
30 args = parser.parse_args() |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
31 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
32 if not (args.normalise_intensities |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
33 or args.default_filters |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
34 or args.clean_metadata |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
35 or args.relative_intensity |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
36 or args.mz_range): |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
37 raise ValueError('No filter selected.') |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
38 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
39 if args.spectra_format == 'msp': |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
40 spectra = list(load_from_msp(args.spectra)) |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
41 elif args.queries_format == 'mgf': |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
42 spectra = list(load_from_mgf(args.spectra)) |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
43 else: |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
44 raise ValueError(f'File format {args.spectra_format} not supported for mass spectra file.') |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
45 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
46 filtered_spectra = [] |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
47 for spectrum in spectra: |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
48 if args.normalise_intensities: |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
49 spectrum = normalize_intensities(spectrum) |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
50 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
51 if args.default_filters: |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
52 spectrum = default_filters(spectrum) |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
53 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
54 if args.clean_metadata: |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
55 filters = [add_compound_name, add_precursor_mz, add_fingerprint, add_losses, add_parent_mass, |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
56 add_retention_index, add_retention_time, clean_compound_name] |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
57 for metadata_filter in filters: |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
58 spectrum = metadata_filter(spectrum) |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
59 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
60 if args.relative_intensity: |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
61 spectrum = select_by_relative_intensity(spectrum, args.from_intensity, args.to_intensity) |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
62 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
63 if args.mz_range: |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
64 spectrum = select_by_mz(spectrum, args.from_mz, args.to_mz) |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
65 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
66 filtered_spectra.append(spectrum) |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
67 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
68 if args.spectra_format == 'msp': |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
69 save_as_msp(filtered_spectra, args.output) |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
70 else: |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
71 save_as_mgf(filtered_spectra, args.output) |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
72 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
73 return 0 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
74 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
75 |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
76 if __name__ == "__main__": |
3d275fbdf741
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
77 main(argv=sys.argv[1:]) |