Mercurial > repos > computational-metabolomics > cfmid
comparison cfmid.py @ 0:41c4de0ed4ec draft default tip
planemo upload for repository https://github.com/computational-metabolomics/cfm-galaxy/tree/master/tools/cfm commit f0157bb3b01871411f27c1d5bd4ccee2039335d0
| author | computational-metabolomics |
|---|---|
| date | Wed, 15 Nov 2023 16:28:04 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:41c4de0ed4ec |
|---|---|
| 1 import argparse | |
| 2 import csv | |
| 3 import os | |
| 4 import shutil | |
| 5 | |
| 6 | |
| 7 parser = argparse.ArgumentParser() | |
| 8 parser.add_argument("--input") | |
| 9 parser.add_argument("--db_local") | |
| 10 parser.add_argument("--num_highest") | |
| 11 parser.add_argument("--ppm_db") | |
| 12 parser.add_argument("--ppm_mass_tol") | |
| 13 parser.add_argument("--abs_mass_tol") | |
| 14 parser.add_argument("--polarity") | |
| 15 parser.add_argument("--score_type") | |
| 16 parser.add_argument("--results") | |
| 17 parser.add_argument("--tool_directory") | |
| 18 | |
| 19 args = parser.parse_args() | |
| 20 print(args) | |
| 21 | |
| 22 # Example | |
| 23 # python cfmid.py --abs_mass_tol='0.01' --db_local='test-data/demo_db.tsv' --input='test-data/input.msp' --num_highest='10' --polarity='pos' --ppm_db='10.0' --ppm_mass_tol='10.0' --results='results.txt' --score_type='Jaccard' --tool_directory='/home/rw/Documents/cfm-galaxy/tools/cfm' | |
| 24 | |
| 25 id2info = {} | |
| 26 mz2id = [] | |
| 27 # store DB in dicts | |
| 28 with open(args.db_local) as csvfile: | |
| 29 csvreader = csv.DictReader(csvfile, delimiter="\t") | |
| 30 for row in csvreader: | |
| 31 id2info[row["Identifier"]] = row | |
| 32 mz2id.append((float(row["MonoisotopicMass"]), row["Identifier"])) | |
| 33 | |
| 34 name_tmp = "tmp" | |
| 35 if os.path.isdir(name_tmp): | |
| 36 shutil.rmtree(name_tmp) | |
| 37 | |
| 38 os.makedirs(name_tmp) | |
| 39 with open(args.input, "r") as infile: | |
| 40 numlines = 0 | |
| 41 for line in infile: | |
| 42 line = line.strip() | |
| 43 if numlines == 0: | |
| 44 print(line) | |
| 45 if "NAME" in line: | |
| 46 featid = line.split("NAME: ")[1] | |
| 47 if "PRECURSORMZ" in line: | |
| 48 mz = float(line.split("PRECURSORMZ: ")[1]) | |
| 49 if args.polarity == "pos": | |
| 50 mz2 = mz - 1.007276 | |
| 51 else: | |
| 52 mz2 = mz + 1.007276 | |
| 53 if "Num Peaks" in line: | |
| 54 numlines = int(line.split("Num Peaks: ")[1]) | |
| 55 linesread = 0 | |
| 56 peaklist = [] | |
| 57 else: | |
| 58 if linesread == numlines: | |
| 59 numlines = 0 | |
| 60 cand_id_list = [] | |
| 61 mz_ranges = (float(args.ppm_db) * mz2) / 1e6 | |
| 62 mz_ranges = (mz2 - mz_ranges, mz2 + mz_ranges) | |
| 63 # check hits | |
| 64 for t in mz2id: | |
| 65 if (t[0] > mz_ranges[0]) and (t[0] < mz_ranges[1]): | |
| 66 cand_id_list.append(t[1]) | |
| 67 # run only if we got candidates | |
| 68 if len(cand_id_list) > 0: | |
| 69 # write spec file | |
| 70 with open("./tmpspec.txt", "w") as outfile: | |
| 71 for e in ["low", "mid", "high"]: | |
| 72 outfile.write(e + "\n") | |
| 73 for p in peaklist: | |
| 74 outfile.write(p[0] + "\t" + p[1] + "\n") | |
| 75 # write candidates file | |
| 76 with open("./tmpcand.txt", "w") as outfile: | |
| 77 for c in cand_id_list: | |
| 78 outfile.write( | |
| 79 "{0} {1}\n".format(c, id2info[c]["InChI"]) | |
| 80 ) # TODO: Use InChI or SMILES | |
| 81 | |
| 82 # create command line call | |
| 83 outi = os.path.join(name_tmp, "cfm_" + featid + ".txt") | |
| 84 cmd_command = "cfm-id tmpspec.txt {0} tmpcand.txt ".format(featid) | |
| 85 cmd_command += "{0} {1} {2} {3} ".format( | |
| 86 args.num_highest, | |
| 87 args.ppm_db, | |
| 88 args.ppm_mass_tol, | |
| 89 args.abs_mass_tol, | |
| 90 ) | |
| 91 if args.polarity == "pos": | |
| 92 cmd_command += ( | |
| 93 os.path.join( | |
| 94 args.tool_directory, | |
| 95 "data", | |
| 96 "positive_metab_se_cfm", | |
| 97 "param_output0.log", | |
| 98 ) | |
| 99 + " " | |
| 100 ) | |
| 101 cmd_command += ( | |
| 102 os.path.join( | |
| 103 args.tool_directory, | |
| 104 "data", | |
| 105 "positive_metab_se_cfm", | |
| 106 "param_config.txt", | |
| 107 ) | |
| 108 + " " | |
| 109 ) | |
| 110 else: | |
| 111 cmd_command += ( | |
| 112 os.path.join( | |
| 113 args.tool_directory, | |
| 114 "data", | |
| 115 "negative_metab_se_cfm/param_output0.log", | |
| 116 ) | |
| 117 + " " | |
| 118 ) | |
| 119 cmd_command += ( | |
| 120 os.path.join( | |
| 121 args.tool_directory, | |
| 122 "data", | |
| 123 "negative_metab_se_cfm/param_config.txt", | |
| 124 ) | |
| 125 + " " | |
| 126 ) | |
| 127 cmd_command += "{0} 1 {1}".format(args.score_type, outi) | |
| 128 # run | |
| 129 print(cmd_command) | |
| 130 os.system(cmd_command) | |
| 131 else: | |
| 132 line = tuple(line.split("\t")) | |
| 133 linesread += 1 | |
| 134 peaklist.append(line) | |
| 135 | |
| 136 | |
| 137 # merge outputs | |
| 138 outfiles = os.listdir(name_tmp) | |
| 139 outfiles.sort(key=lambda x: os.path.getmtime(os.path.join(name_tmp, x))) | |
| 140 with open(args.results, "w") as outfile: | |
| 141 outfile.write("UID\tRank\tScore\tIdentifier\tInChI\n") | |
| 142 for fname in outfiles: | |
| 143 fileid = os.path.basename(fname) | |
| 144 fileid = fileid.split("_")[1] | |
| 145 fileid = fileid.split(".txt")[0] | |
| 146 with open(os.path.join(name_tmp, fname)) as infile: | |
| 147 for line in infile: | |
| 148 line = line.replace(" ", "\t") | |
| 149 outfile.write(fileid + "\t" + line) |
