comparison cfmid.py @ 0:41c4de0ed4ec draft default tip

planemo upload for repository https://github.com/computational-metabolomics/cfm-galaxy/tree/master/tools/cfm commit f0157bb3b01871411f27c1d5bd4ccee2039335d0
author computational-metabolomics
date Wed, 15 Nov 2023 16:28:04 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:41c4de0ed4ec
1 import argparse
2 import csv
3 import os
4 import shutil
5
6
7 parser = argparse.ArgumentParser()
8 parser.add_argument("--input")
9 parser.add_argument("--db_local")
10 parser.add_argument("--num_highest")
11 parser.add_argument("--ppm_db")
12 parser.add_argument("--ppm_mass_tol")
13 parser.add_argument("--abs_mass_tol")
14 parser.add_argument("--polarity")
15 parser.add_argument("--score_type")
16 parser.add_argument("--results")
17 parser.add_argument("--tool_directory")
18
19 args = parser.parse_args()
20 print(args)
21
22 # Example
23 # python cfmid.py --abs_mass_tol='0.01' --db_local='test-data/demo_db.tsv' --input='test-data/input.msp' --num_highest='10' --polarity='pos' --ppm_db='10.0' --ppm_mass_tol='10.0' --results='results.txt' --score_type='Jaccard' --tool_directory='/home/rw/Documents/cfm-galaxy/tools/cfm'
24
25 id2info = {}
26 mz2id = []
27 # store DB in dicts
28 with open(args.db_local) as csvfile:
29 csvreader = csv.DictReader(csvfile, delimiter="\t")
30 for row in csvreader:
31 id2info[row["Identifier"]] = row
32 mz2id.append((float(row["MonoisotopicMass"]), row["Identifier"]))
33
34 name_tmp = "tmp"
35 if os.path.isdir(name_tmp):
36 shutil.rmtree(name_tmp)
37
38 os.makedirs(name_tmp)
39 with open(args.input, "r") as infile:
40 numlines = 0
41 for line in infile:
42 line = line.strip()
43 if numlines == 0:
44 print(line)
45 if "NAME" in line:
46 featid = line.split("NAME: ")[1]
47 if "PRECURSORMZ" in line:
48 mz = float(line.split("PRECURSORMZ: ")[1])
49 if args.polarity == "pos":
50 mz2 = mz - 1.007276
51 else:
52 mz2 = mz + 1.007276
53 if "Num Peaks" in line:
54 numlines = int(line.split("Num Peaks: ")[1])
55 linesread = 0
56 peaklist = []
57 else:
58 if linesread == numlines:
59 numlines = 0
60 cand_id_list = []
61 mz_ranges = (float(args.ppm_db) * mz2) / 1e6
62 mz_ranges = (mz2 - mz_ranges, mz2 + mz_ranges)
63 # check hits
64 for t in mz2id:
65 if (t[0] > mz_ranges[0]) and (t[0] < mz_ranges[1]):
66 cand_id_list.append(t[1])
67 # run only if we got candidates
68 if len(cand_id_list) > 0:
69 # write spec file
70 with open("./tmpspec.txt", "w") as outfile:
71 for e in ["low", "mid", "high"]:
72 outfile.write(e + "\n")
73 for p in peaklist:
74 outfile.write(p[0] + "\t" + p[1] + "\n")
75 # write candidates file
76 with open("./tmpcand.txt", "w") as outfile:
77 for c in cand_id_list:
78 outfile.write(
79 "{0} {1}\n".format(c, id2info[c]["InChI"])
80 ) # TODO: Use InChI or SMILES
81
82 # create command line call
83 outi = os.path.join(name_tmp, "cfm_" + featid + ".txt")
84 cmd_command = "cfm-id tmpspec.txt {0} tmpcand.txt ".format(featid)
85 cmd_command += "{0} {1} {2} {3} ".format(
86 args.num_highest,
87 args.ppm_db,
88 args.ppm_mass_tol,
89 args.abs_mass_tol,
90 )
91 if args.polarity == "pos":
92 cmd_command += (
93 os.path.join(
94 args.tool_directory,
95 "data",
96 "positive_metab_se_cfm",
97 "param_output0.log",
98 )
99 + " "
100 )
101 cmd_command += (
102 os.path.join(
103 args.tool_directory,
104 "data",
105 "positive_metab_se_cfm",
106 "param_config.txt",
107 )
108 + " "
109 )
110 else:
111 cmd_command += (
112 os.path.join(
113 args.tool_directory,
114 "data",
115 "negative_metab_se_cfm/param_output0.log",
116 )
117 + " "
118 )
119 cmd_command += (
120 os.path.join(
121 args.tool_directory,
122 "data",
123 "negative_metab_se_cfm/param_config.txt",
124 )
125 + " "
126 )
127 cmd_command += "{0} 1 {1}".format(args.score_type, outi)
128 # run
129 print(cmd_command)
130 os.system(cmd_command)
131 else:
132 line = tuple(line.split("\t"))
133 linesread += 1
134 peaklist.append(line)
135
136
137 # merge outputs
138 outfiles = os.listdir(name_tmp)
139 outfiles.sort(key=lambda x: os.path.getmtime(os.path.join(name_tmp, x)))
140 with open(args.results, "w") as outfile:
141 outfile.write("UID\tRank\tScore\tIdentifier\tInChI\n")
142 for fname in outfiles:
143 fileid = os.path.basename(fname)
144 fileid = fileid.split("_")[1]
145 fileid = fileid.split(".txt")[0]
146 with open(os.path.join(name_tmp, fname)) as infile:
147 for line in infile:
148 line = line.replace(" ", "\t")
149 outfile.write(fileid + "\t" + line)