annotate reactome_analysis.py @ 8:a58dc5d4b8cd draft

planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
author proteore
date Fri, 28 Jun 2019 05:17:11 -0400
parents 9cc475dcd0f2
children 19195d1a4063
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
1 import os, re, json, argparse, csv
0
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
2
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
3 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
4
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
5 def id_valid(identifiers):
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
6 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
7 Validate IDs if they contain special characters
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
8 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
9 res = []
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
10 remove = []
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
11 for id in identifiers:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
12 id = id.split(";")[0]
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
13 if re.match("^[A-Za-z0-9_-]*$", id):
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
14 res.append(id)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
15 else:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
16 remove.append(id)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
17 return res, remove
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
18
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
19 def isnumber(format, n):
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
20 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
21 Check if an variable is numeric
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
22 """
4
878128362e33 planemo upload commit 34edf9c3db61650a03d5a0d548b9697a94ecde34-dirty
proteore
parents: 0
diff changeset
23 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$")
878128362e33 planemo upload commit 34edf9c3db61650a03d5a0d548b9697a94ecde34-dirty
proteore
parents: 0
diff changeset
24 int_format = re.compile(r"^[-]?[1-9][0-9]*$")
0
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
25 test = ""
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
26 if format == "int":
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
27 test = re.match(int_format, n)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
28 elif format == "float":
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
29 test = re.match(float_format, n)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
30 if test:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
31 return True
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
32 else:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
33 return False
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
34
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
35 def data_json(identifiers):
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
36 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
37 Submit IDs list to Reactome and return results in json format
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
38 Return error in HTML format if web service is not available
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
39 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
40 trash = []
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
41 if identifiers[1] == "list":
8
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
42 ids = identifiers[0].split()
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
43 ids = [x.split(";") for x in ids]
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
44 ids = [item.strip() for sublist in ids for item in sublist if item != '']
6
9cc475dcd0f2 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents: 4
diff changeset
45 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1" % ids).read()
0
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
46 if len(id_valid(identifiers[0].split())[1]) > 0:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
47 trash = id_valid(identifiers[0].split())[1]
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
48 elif identifiers[1] == "file":
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
49 header = identifiers[2]
8
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
50 with open(identifiers[0],"r") as mq :
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
51 file_content = csv.reader(mq,delimiter="\t")
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
52 file_content = list(file_content) #csv object to list
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
53 ncol = identifiers[3]
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
54 if isnumber("int", ncol.replace("c", "")):
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
55 if header == "true":
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
56 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]]
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
57 else:
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
58 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]]
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
59
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
60 idens = [item.strip() for sublist in idens for item in sublist if item != ''] #flat list of list of lists, remove empty items
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
61 ids = "\n".join(id_valid(idens)[0])
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
62 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1 2> stderr" % ids).read()
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
63 if len(id_valid(idens)[1]) > 0:
a58dc5d4b8cd planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
64 trash = id_valid(idens)[1]
6
9cc475dcd0f2 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents: 4
diff changeset
65 #print(json_string)
9cc475dcd0f2 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents: 4
diff changeset
66 j = json.loads(json_string)
9cc475dcd0f2 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents: 4
diff changeset
67 print ("Identifiers not found: " + str(j["identifiersNotFound"]))
9cc475dcd0f2 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents: 4
diff changeset
68 print ("Pathways found: " + str(j["pathwaysFound"]))
0
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
69 return json_string, trash
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
70
6
9cc475dcd0f2 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents: 4
diff changeset
71 def write_output(filename, json_string, species, trash_file, trash):
0
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
72 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
73 Replace json result in template and print to output
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
74 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
75 template = open(os.path.join(CURRENT_DIR, "template.html"))
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
76 output = open(filename, "w")
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
77 try:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
78 for line in template:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
79 if "{token}" in line:
6
9cc475dcd0f2 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents: 4
diff changeset
80 line = line.replace("{species}", species)
0
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
81 line = line.replace("{token}", json.loads(json_string)["summary"]["token"])
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
82 output.write(line)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
83 except ValueError:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
84 output.write("An error occurred due to unavailability of Reactome web service. Please return later.")
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
85 template.close()
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
86 output.close()
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
87
4
878128362e33 planemo upload commit 34edf9c3db61650a03d5a0d548b9697a94ecde34-dirty
proteore
parents: 0
diff changeset
88 if trash:
6
9cc475dcd0f2 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents: 4
diff changeset
89 #print(trash)
4
878128362e33 planemo upload commit 34edf9c3db61650a03d5a0d548b9697a94ecde34-dirty
proteore
parents: 0
diff changeset
90 trash_out = open(trash_file, "w")
878128362e33 planemo upload commit 34edf9c3db61650a03d5a0d548b9697a94ecde34-dirty
proteore
parents: 0
diff changeset
91 trash_out.write("\n".join(trash))
878128362e33 planemo upload commit 34edf9c3db61650a03d5a0d548b9697a94ecde34-dirty
proteore
parents: 0
diff changeset
92 trash_out.close()
0
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
93
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
94 def options():
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
95 parser = argparse.ArgumentParser()
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
96 argument = parser.add_argument("--json", nargs="+", required=True)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
97 argument = parser.add_argument("--output", default="output.html")
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
98 argument = parser.add_argument("--trash", default="trash.txt")
6
9cc475dcd0f2 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents: 4
diff changeset
99 argument = parser.add_argument("--species", default="48887")
0
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
100 args = parser.parse_args()
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
101 filename = args.output
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
102 json_string, trash = data_json(args.json)
6
9cc475dcd0f2 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents: 4
diff changeset
103 write_output(filename, json_string, args.species, args.trash, trash)
0
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
104
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
105 if __name__ == "__main__":
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
106 options()