Mercurial > repos > recetox > ipapy2_ms1_annotation
diff ipapy2_MS2_annotation.py @ 0:7f84a8a5edde draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/ipapy2 commit 64b61ff2823b4f54868c0ab7a4c0dc49eaf2979a
author | recetox |
---|---|
date | Fri, 16 May 2025 08:00:41 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ipapy2_MS2_annotation.py Fri May 16 08:00:41 2025 +0000 @@ -0,0 +1,136 @@ +from ipaPy2 import ipa +from utils import flattern_annotations, MSArgumentParser + + +def main( + input_dataset_mapped_isotope_patterns, + input_dataset_MS2, + input_dataset_adducts, + input_dataset_MS2_DB, + ppm, + ratiosd, + ppmunk, + ratiounk, + ppmthr, + pRTNone, + pRTout, + mzdCS, + ppmCS, + CSunk, + evfilt, + output_dataset, + ncores, +): + annotations = ipa.MSMSannotation( + input_dataset_mapped_isotope_patterns, + input_dataset_MS2, + input_dataset_adducts, + input_dataset_MS2_DB, + ppm=ppm, + ratiosd=ratiosd, + ppmunk=ppmunk, + ratiounk=ratiounk, + ppmthr=ppmthr, + pRTNone=pRTNone, + pRTout=pRTout, + mzdCS=mzdCS, + ppmCS=ppmCS, + CSunk=CSunk, + evfilt=evfilt, + ncores=ncores, + ) + annotations_flat = flattern_annotations(annotations) + write_func, file_path = output_dataset + write_func(annotations_flat, file_path) + + +if __name__ == "__main__": + parser = MSArgumentParser( + """Annotation of the dataset base on the MS1 and MS2 information. Prior + probabilities are based on mass only, while post probabilities are based + on mass, RT, previous knowledge and isotope patterns.""" + ) + parser.add_argument( + "--input_dataset_mapped_isotope_patterns", + nargs=2, + action="load_data", + required=True, + help=( + "A dataset containing the MS1 data. Ideally obtained from" + " map_isotope_patterns" + ), + ) + parser.add_argument( + "--input_dataset_MS2", + nargs=2, + action="load_data", + required=True, + help="A dataset containing the MS2 fragmentation data", + ) + parser.add_argument( + "--input_dataset_adducts", + nargs=2, + action="load_data", + required=True, + help=( + "A dataset containing the information on all the possible adducts given the" + " database. Ideally obtained from compute_all_adducts" + ), + ) + parser.add_argument( + "--input_dataset_MS2_DB", + nargs=2, + action="load_data", + required=True, + help="A dataset containing the MS2 database", + ) + parser.add_argument( + "--mzdCS", + type=int, + default=0, + help="""maximum mz difference allowed when computing cosine similarity + scores. If one wants to use this parameter instead of ppmCS, this + must be set to 0. Default 0.""", + ) + parser.add_argument( + "--ppmCS", + type=int, + default=10, + help="""maximum ppm allowed when computing cosine similarity scores. + If one wants to use this parameter instead of mzdCS, this must be + set to 0. Default 10.""", + ) + parser.add_argument( + "--CSunk", + type=float, + default=0.7, + help="""cosine similarity score associated with the 'unknown' annotation. + Default 0.7""", + ) + parser.add_argument( + "--evfilt", + type=bool, + default=False, + help="""Default value False. If true, only spectrum acquired with the same + collision energy are considered.""", + ) + args = parser.parse_args() + main( + args.input_dataset_mapped_isotope_patterns, + args.input_dataset_MS2, + args.input_dataset_adducts, + args.input_dataset_MS2_DB, + args.ppm, + args.ratiosd, + args.ppmunk, + args.ratiounk, + args.ppmthr, + args.pRTNone, + args.pRTout, + args.mzdCS, + args.ppmCS, + args.CSunk, + args.evfilt, + args.output_dataset, + args.ncores, + )