Mercurial > repos > proteore > proteore_reactome
comparison reactome_analysis.py @ 11:19195d1a4063 draft default tip
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
author | proteore |
---|---|
date | Mon, 10 May 2021 15:30:34 +0000 |
parents | a58dc5d4b8cd |
children |
comparison
equal
deleted
inserted
replaced
10:ef31b5ac28d7 | 11:19195d1a4063 |
---|---|
1 import os, re, json, argparse, csv | 1 import argparse |
2 import csv | |
3 import json | |
4 import os | |
5 import re | |
2 | 6 |
3 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) | 7 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) |
8 | |
4 | 9 |
5 def id_valid(identifiers): | 10 def id_valid(identifiers): |
6 """ | 11 """ |
7 Validate IDs if they contain special characters | 12 Validate IDs if they contain special characters |
8 """ | 13 """ |
13 if re.match("^[A-Za-z0-9_-]*$", id): | 18 if re.match("^[A-Za-z0-9_-]*$", id): |
14 res.append(id) | 19 res.append(id) |
15 else: | 20 else: |
16 remove.append(id) | 21 remove.append(id) |
17 return res, remove | 22 return res, remove |
18 | 23 |
24 | |
19 def isnumber(format, n): | 25 def isnumber(format, n): |
20 """ | 26 """ |
21 Check if an variable is numeric | 27 Check if an variable is numeric |
22 """ | 28 """ |
23 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") | 29 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") |
30 if test: | 36 if test: |
31 return True | 37 return True |
32 else: | 38 else: |
33 return False | 39 return False |
34 | 40 |
41 | |
35 def data_json(identifiers): | 42 def data_json(identifiers): |
36 """ | 43 """ |
37 Submit IDs list to Reactome and return results in json format | 44 Submit IDs list to Reactome and return results in json format |
38 Return error in HTML format if web service is not available | 45 Return error in HTML format if web service is not available |
39 """ | 46 """ |
40 trash = [] | 47 trash = [] |
41 if identifiers[1] == "list": | 48 if identifiers[1] == "list": |
42 ids = identifiers[0].split() | 49 ids = identifiers[0].split() |
43 ids = [x.split(";") for x in ids] | 50 ids = [x.split(";") for x in ids] |
44 ids = [item.strip() for sublist in ids for item in sublist if item != ''] | 51 ids = [item.strip() for sublist in ids for item in sublist if item != ''] # noqa 501 |
45 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1" % ids).read() | 52 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1" % ids).read() # noqa 501 |
46 if len(id_valid(identifiers[0].split())[1]) > 0: | 53 if len(id_valid(identifiers[0].split())[1]) > 0: |
47 trash = id_valid(identifiers[0].split())[1] | 54 trash = id_valid(identifiers[0].split())[1] |
48 elif identifiers[1] == "file": | 55 elif identifiers[1] == "file": |
49 header = identifiers[2] | 56 header = identifiers[2] |
50 with open(identifiers[0],"r") as mq : | 57 with open(identifiers[0], "r") as mq: |
51 file_content = csv.reader(mq,delimiter="\t") | 58 file_content = csv.reader(mq, delimiter="\t") |
52 file_content = list(file_content) #csv object to list | 59 file_content = list(file_content) # csv object to list |
53 ncol = identifiers[3] | 60 ncol = identifiers[3] |
54 if isnumber("int", ncol.replace("c", "")): | 61 if isnumber("int", ncol.replace("c", "")): |
55 if header == "true": | 62 if header == "true": |
56 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] | 63 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # noqa 501 |
57 else: | 64 else: |
58 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] | 65 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] # noqa 501 |
59 | 66 # flat list of list of lists, remove empty items |
60 idens = [item.strip() for sublist in idens for item in sublist if item != ''] #flat list of list of lists, remove empty items | 67 idens = [item.strip() for sublist in idens for item in sublist if item != ''] # noqa 501 |
61 ids = "\n".join(id_valid(idens)[0]) | 68 ids = "\n".join(id_valid(idens)[0]) |
62 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1 2> stderr" % ids).read() | 69 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1 2> stderr" % ids).read() # noqa 501 |
63 if len(id_valid(idens)[1]) > 0: | 70 if len(id_valid(idens)[1]) > 0: |
64 trash = id_valid(idens)[1] | 71 trash = id_valid(idens)[1] |
65 #print(json_string) | 72 # print(json_string) |
66 j = json.loads(json_string) | 73 j = json.loads(json_string) |
67 print ("Identifiers not found: " + str(j["identifiersNotFound"])) | 74 print("Identifiers not found: " + str(j["identifiersNotFound"])) |
68 print ("Pathways found: " + str(j["pathwaysFound"])) | 75 print("Pathways found: " + str(j["pathwaysFound"])) |
69 return json_string, trash | 76 return json_string, trash |
77 | |
70 | 78 |
71 def write_output(filename, json_string, species, trash_file, trash): | 79 def write_output(filename, json_string, species, trash_file, trash): |
72 """ | 80 """ |
73 Replace json result in template and print to output | 81 Replace json result in template and print to output |
74 """ | 82 """ |
75 template = open(os.path.join(CURRENT_DIR, "template.html")) | 83 template = open(os.path.join(CURRENT_DIR, "template.html")) |
76 output = open(filename, "w") | 84 output = open(filename, "w") |
77 try: | 85 try: |
78 for line in template: | 86 for line in template: |
79 if "{token}" in line: | 87 if "{token}" in line: |
80 line = line.replace("{species}", species) | 88 line = line.replace("{species}", species) |
81 line = line.replace("{token}", json.loads(json_string)["summary"]["token"]) | 89 line = line.replace("{token}", json.loads(json_string)["summary"]["token"]) # noqa 501 |
82 output.write(line) | 90 output.write(line) |
83 except ValueError: | 91 except ValueError: |
84 output.write("An error occurred due to unavailability of Reactome web service. Please return later.") | 92 output.write("An error occurred due to unavailability of Reactome web service. Please return later.") # noqa 501 |
85 template.close() | 93 template.close() |
86 output.close() | 94 output.close() |
87 | 95 |
88 if trash: | 96 if trash: |
89 #print(trash) | 97 # print(trash) |
90 trash_out = open(trash_file, "w") | 98 trash_out = open(trash_file, "w") |
91 trash_out.write("\n".join(trash)) | 99 trash_out.write("\n".join(trash)) |
92 trash_out.close() | 100 trash_out.close() |
101 | |
93 | 102 |
94 def options(): | 103 def options(): |
95 parser = argparse.ArgumentParser() | 104 parser = argparse.ArgumentParser() |
96 argument = parser.add_argument("--json", nargs="+", required=True) | 105 argument = parser.add_argument("--json", nargs="+", required=True) |
97 argument = parser.add_argument("--output", default="output.html") | 106 argument = parser.add_argument("--output", default="output.html") |
98 argument = parser.add_argument("--trash", default="trash.txt") | 107 argument = parser.add_argument("--trash", default="trash.txt") |
99 argument = parser.add_argument("--species", default="48887") | 108 argument = parser.add_argument("--species", default="48887") # noqa 841 |
100 args = parser.parse_args() | 109 args = parser.parse_args() |
101 filename = args.output | 110 filename = args.output |
102 json_string, trash = data_json(args.json) | 111 json_string, trash = data_json(args.json) |
103 write_output(filename, json_string, args.species, args.trash, trash) | 112 write_output(filename, json_string, args.species, args.trash, trash) |
104 | 113 |
114 | |
105 if __name__ == "__main__": | 115 if __name__ == "__main__": |
106 options() | 116 options() |