Mercurial > repos > recetox > matchms_filtering
diff matchms_filtering_wrapper.py @ 0:3d275fbdf741 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
author | recetox |
---|---|
date | Tue, 22 Mar 2022 16:08:03 +0000 |
parents | |
children | 13de8005adba |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/matchms_filtering_wrapper.py Tue Mar 22 16:08:03 2022 +0000 @@ -0,0 +1,77 @@ +import argparse +import sys + +from matchms.exporting import save_as_mgf, save_as_msp +from matchms.filtering import add_compound_name, add_fingerprint, add_losses, add_parent_mass, add_precursor_mz,\ + add_retention_index, add_retention_time, clean_compound_name +from matchms.filtering import default_filters, normalize_intensities, select_by_mz, select_by_relative_intensity +from matchms.importing import load_from_mgf, load_from_msp + + +def main(argv): + parser = argparse.ArgumentParser(description="Compute MSP similarity scores") + parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.") + parser.add_argument("--spectra_format", type=str, required=True, help="Format of spectra file.") + parser.add_argument("--output", type=str, required=True, help="Filtered mass spectra file.") + parser.add_argument("-normalise_intensities", action='store_true', + help="Normalize intensities of peaks (and losses) to unit height.") + parser.add_argument("-default_filters", action='store_true', + help="Collection of filters that are considered default and that do no require any (factory) arguments.") + parser.add_argument("-clean_metadata", action='store_true', + help="Apply all adding and cleaning filters if possible, so that the spectra have canonical metadata.") + parser.add_argument("-relative_intensity", action='store_true', + help="Keep only peaks within set relative intensity range (keep if to_intensity >= intensity >= from_intensity).") + parser.add_argument("--from_intensity", type=float, help="Lower bound for intensity filter") + parser.add_argument("--to_intensity", type=float, help="Upper bound for intensity filter") + parser.add_argument("-mz_range", action='store_true', + help="Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz).") + parser.add_argument("--from_mz", type=float, help="Lower bound for m/z filter") + parser.add_argument("--to_mz", type=float, help="Upper bound for m/z filter") + args = parser.parse_args() + + if not (args.normalise_intensities + or args.default_filters + or args.clean_metadata + or args.relative_intensity + or args.mz_range): + raise ValueError('No filter selected.') + + if args.spectra_format == 'msp': + spectra = list(load_from_msp(args.spectra)) + elif args.queries_format == 'mgf': + spectra = list(load_from_mgf(args.spectra)) + else: + raise ValueError(f'File format {args.spectra_format} not supported for mass spectra file.') + + filtered_spectra = [] + for spectrum in spectra: + if args.normalise_intensities: + spectrum = normalize_intensities(spectrum) + + if args.default_filters: + spectrum = default_filters(spectrum) + + if args.clean_metadata: + filters = [add_compound_name, add_precursor_mz, add_fingerprint, add_losses, add_parent_mass, + add_retention_index, add_retention_time, clean_compound_name] + for metadata_filter in filters: + spectrum = metadata_filter(spectrum) + + if args.relative_intensity: + spectrum = select_by_relative_intensity(spectrum, args.from_intensity, args.to_intensity) + + if args.mz_range: + spectrum = select_by_mz(spectrum, args.from_mz, args.to_mz) + + filtered_spectra.append(spectrum) + + if args.spectra_format == 'msp': + save_as_msp(filtered_spectra, args.output) + else: + save_as_mgf(filtered_spectra, args.output) + + return 0 + + +if __name__ == "__main__": + main(argv=sys.argv[1:])