comparison query.py @ 12:039e8e1e8b1f draft

Uploaded 20180201
author fabio
date Thu, 01 Feb 2018 16:23:17 -0500
parents 027f2e9d4a25
children e780b47013df
comparison
equal deleted inserted replaced
11:0d0f7080b55c 12:039e8e1e8b1f
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 # https://github.com/ross/requests-futures 3 # https://github.com/ross/requests-futures
4 # http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests 4 # http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests
5 5
6 import os, uuid, optparse, requests, json, time 6 import sys, os, uuid, optparse, requests, json, time
7 #from requests_futures.sessions import FuturesSession 7 #from requests_futures.sessions import FuturesSession
8 8
9 #### NN14 #### 9 #### NN14 ####
10 SERVICE_URL = "http://nn14.galaxyproject.org:8080/"; 10 SERVICE_URL = "http://nn14.galaxyproject.org:8080/";
11 #service_url = "http://127.0.0.1:8082/"; 11 #service_url = "http://127.0.0.1:8082/";
14 ############## 14 ##############
15 # query delay in seconds 15 # query delay in seconds
16 QUERY_DELAY = 30; 16 QUERY_DELAY = 30;
17 ############## 17 ##############
18 18
19 __version__ = "1.0.0";
19 VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ' 20 VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '
20 21
22 # in the case of collections, exitcodes equal to 0 and 1 are not considered errors
23 def raiseException( exitcode, message, errorfilepath ):
24 with open(errorfilepath, 'w') as out:
25 out.write(message);
26 sys.exit(exitcode);
27
21 def query_request( options, args, payload ): 28 def query_request( options, args, payload ):
29 output_dir_path = options.outputdir;
22 # add additional parameters to the payload 30 # add additional parameters to the payload
23 #payload["tree_id"] = str(options.treeid); 31 #payload["tree_id"] = str(options.treeid);
24 payload["search_mode"] = str(options.search); 32 payload["search_mode"] = str(options.search);
25 payload["exact_algorithm"] = int(options.exact); 33 payload["exact_algorithm"] = int(options.exact);
26 payload["search_threshold"] = float(options.sthreshold); 34 payload["search_threshold"] = float(options.sthreshold);
30 # create a session 38 # create a session
31 session = requests.Session(); 39 session = requests.Session();
32 # make a synchronous post request to the query route 40 # make a synchronous post request to the query route
33 req = session.post(QUERY_URL, headers=headers, json=payload); 41 req = session.post(QUERY_URL, headers=headers, json=payload);
34 resp_code = req.status_code; 42 resp_code = req.status_code;
35 print(str(req.content)+"\n\n"); 43 #print(str(req.content)+"\n\n");
36 if resp_code == requests.codes.ok: 44 if resp_code == requests.codes.ok:
37 resp_content = str(req.content); 45 resp_content = str(req.content);
38 # convert out to json 46 # convert out to json
39 json_content = json.loads(resp_content); 47 json_content = json.loads(resp_content);
40 # retrieve task id 48 # retrieve task id
41 task_id = json_content['task_id']; 49 task_id = json_content['task_id'];
42 task_processed = False; 50 task_processed = False;
43 # results json content 51 # results json content
44 json_status_content = None; 52 json_status_content = None;
45 task_status = None;
46 while task_processed is False: 53 while task_processed is False:
47 # create a new session 54 # create a new session
48 session = requests.Session(); 55 session = requests.Session();
49 # make a synchronous get request to the status route 56 # make a synchronous get request to the status route
50 status_query_url = STATUS_URL.replace("<task_id>", task_id); 57 status_query_url = STATUS_URL.replace("<task_id>", task_id);
51 status_req = session.get(status_query_url); 58 status_req = session.get(status_query_url);
52 status_resp_content = str(status_req.content); 59 status_resp_content = str(status_req.content);
53 print(status_resp_content+"\n\n"); 60 #print(status_resp_content+"\n\n");
54 # convert out to json 61 # convert out to json
55 json_status_content = json.loads(status_resp_content); 62 json_status_content = json.loads(status_resp_content);
56 # take a look at the state 63 # take a look at the state
57 # state attribute is always available 64 # state attribute is always available
58 if json_status_content['state'] == 'SUCCESS': 65 if json_status_content['state'] == 'SUCCESS':
59 task_processed = True; 66 task_processed = True;
60 break; 67 break;
61 elif json_status_content['state'] in ['FAILURE', 'REVOKED']: 68 elif json_status_content['state'] in ['FAILURE', 'REVOKED']:
62 return "Task status: "+str(json_status_content['state']); 69 return raiseException( 1, "Task ID: "+str(task_id)+"\nTask status: "+str(json_status_content['state']), str(options.errorfile) );
63 else: 70 else:
64 time.sleep(QUERY_DELAY); # in seconds 71 time.sleep(QUERY_DELAY); # in seconds
65 72
66 # get output dir (collection) path
67 output_dir_path = options.outputdir;
68 if not os.path.exists(output_dir_path):
69 os.makedirs(output_dir_path);
70 out_file_format = "tabular"; 73 out_file_format = "tabular";
71
72 for block in json_status_content['results']: 74 for block in json_status_content['results']:
73 seq_id = block['sequence_id']; 75 seq_id = block['sequence_id'];
74 accessions = block['accession_numbers']; 76 accessions = block['accession_numbers'];
75 # put response block in the output collection 77 # put response block in the output collection
76 output_file_path = os.path.join(output_dir_path, seq_id + "_" + out_file_format); 78 output_file_path = os.path.join(output_dir_path, seq_id + "_" + out_file_format);
77 accessions_list = ""; 79 accessions_list = "";
78 for accession_number in accessions: 80 for accession_number in accessions:
79 accessions_list = accessions_list + accession_number + "\n"; 81 accessions_list = accessions_list + accession_number + "\n";
80 with open(output_file_path, 'w') as out: 82 with open(output_file_path, 'w') as out:
81 out.write(accessions_list.strip()); 83 out.write(accessions_list.strip());
84 return sys.exit(0);
82 else: 85 else:
83 return "Unable to query the remote server. Please try again in a while."; 86 return raiseException( 1, "Unable to query the remote server. Please try again in a while.", str(options.errorfile) );
84 87
85 def query( options, args ): 88 def query( options, args ):
89 output_dir_path = options.outputdir;
86 multiple_data = {}; 90 multiple_data = {};
87 comma_sep_file_paths = options.files; 91 comma_sep_file_paths = options.files;
88 #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths))); 92 #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths)));
89 # check if options.files contains at least one file path 93 # check if options.files contains at least one file path
90 if comma_sep_file_paths is not None: 94 if comma_sep_file_paths is not None:
104 seq_id = line_split[0]; 108 seq_id = line_split[0];
105 # fix seq_id using valid chars only 109 # fix seq_id using valid chars only
106 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) 110 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS)
107 seq_text = line_split[1]; 111 seq_text = line_split[1];
108 if seq_id in multiple_data: 112 if seq_id in multiple_data:
109 return "Error: the id '"+seq_id+"' is duplicated"; 113 return raiseException( 1, "Error: the id '"+seq_id+"' is duplicated", str(options.errorfile) );
110 multiple_data[seq_id] = seq_text; 114 multiple_data[seq_id] = seq_text;
111 if len(multiple_data) > 0: 115 if len(multiple_data) > 0:
112 return query_request( options, args, multiple_data ); 116 return query_request( options, args, multiple_data );
113 #return echo( options, args ); 117 #return echo( options, args );
114 else: 118 else:
115 return "An error has occurred. Please be sure that your input files are valid."; 119 return raiseException( 1, "An error has occurred. Please be sure that your input files are valid.", str(options.errorfile) );
116 else: 120 else:
117 # try with the sequence in --sequence 121 # try with the sequence in --sequence
118 text_content = options.sequences; 122 text_content = options.sequences;
119 #print("sequences: "+text_content); 123 #print("sequences: "+text_content);
120 # check if options.sequences contains a list of sequences (one for each row) 124 # check if options.sequences contains a list of sequences (one for each row)
130 seq_id = line_split[0]; 134 seq_id = line_split[0];
131 # fix seq_id using valid chars only 135 # fix seq_id using valid chars only
132 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) 136 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS)
133 seq_text = line_split[1]; 137 seq_text = line_split[1];
134 if seq_id in multiple_data: 138 if seq_id in multiple_data:
135 return "Error: the id '"+seq_id+"' is duplicated"; 139 return raiseException( 1, "Error: the id '"+seq_id+"' is duplicated", str(options.errorfile) );
136 multiple_data[seq_id] = seq_text; 140 multiple_data[seq_id] = seq_text;
137 if len(multiple_data) > 0: 141 if len(multiple_data) > 0:
138 return query_request( options, args, multiple_data ); 142 return query_request( options, args, multiple_data );
139 #return echo( options, args ); 143 #return echo( options, args );
140 else: 144 else:
141 return "An error has occurred. Please be sure that your input files are valid."; 145 return raiseException( 1, "An error has occurred. Please be sure that your input files are valid.", str(options.errorfile) );
142 else: 146 else:
143 return "You have to insert at least one row formatted as a tab delimited <id, sequence> touple"; 147 return raiseException( 1, "You have to insert at least one row formatted as a tab delimited (ID, SEQUENCE) couple", str(options.errorfile) );
144 return -1; 148 return 1;
145 149
146 def __main__(): 150 def __main__():
147 # Parse the command line options 151 # Parse the command line options
148 usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path"; 152 usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path";
149 parser = optparse.OptionParser(usage = usage); 153 parser = optparse.OptionParser(usage = usage);
154 parser.add_option("-v", "--version", action="store_true", dest="version",
155 default=False, help="display version and exit")
150 parser.add_option("-f", "--files", type="string", 156 parser.add_option("-f", "--files", type="string",
151 action="store", dest="files", help="comma separated files path"); 157 action="store", dest="files", help="comma separated files path");
152 parser.add_option("-n", "--names", type="string", 158 parser.add_option("-n", "--names", type="string",
153 action="store", dest="names", help="comma separated names associated to the files specified in --files"); 159 action="store", dest="names", help="comma separated names associated to the files specified in --files");
154 parser.add_option("-s", "--sequences", type="string", 160 parser.add_option("-s", "--sequences", type="string",
159 action="store", dest="search", help="search mode"); 165 action="store", dest="search", help="search mode");
160 parser.add_option("-e", "--exact", type="int", default=0, 166 parser.add_option("-e", "--exact", type="int", default=0,
161 action="store", dest="exact", help="exact algorithm (required if search is 1 only)"); 167 action="store", dest="exact", help="exact algorithm (required if search is 1 only)");
162 parser.add_option("-t", "--sthreshold", type="float", 168 parser.add_option("-t", "--sthreshold", type="float",
163 action="store", dest="sthreshold", help="threshold applied to the search algrithm"); 169 action="store", dest="sthreshold", help="threshold applied to the search algrithm");
164 parser.add_option("-o", "--outputdir", type="string", 170 parser.add_option("-o", "--outputdir", type="string", default="output",
165 action="store", dest="outputdir", help="output directory (collection) path"); 171 action="store", dest="outputdir", help="output directory (collection) path");
172 parser.add_option("-r", "--errorfile", type="string", default="error.log",
173 action="store", dest="errorfile", help="error file name containing error messages");
166 174
167 #parser.add_option("-k", "--outfile", type="string",
168 #action="store", dest="outfile", help="output file");
169
170 # TEST 175 # TEST
171 #--search 'rrr'
172 #--sthreshold 0.5
173 #--exact 0
174 #--sequences 'id0__tc__CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC'
175 #--outputdir 'collection_content'
176 #sequences = 'NM_001169378.2__tc__atttcggatgctttggagggaggaactctagtgctgcattgattggggcgtgtgttaatgatattcccagttcgcatggcgagcatcgattcctggtacgtatgtgggccccttgactcccacttatcgcacttgtcgttcgcaatttgcatgaattccgcttcgtctgaaacgcacttgcgccagacttctccggctggtctgatctggtctgtgatccggtctggtggggcgccagttgcgtttcgagctcatcaccagtcactccgcagtcgcattctgccagaggtctccgatcaagagcgcttctccattcgagattcaaacgcagcgcggtctgacgccgccacatcgagtgaaatccatatcgatggccacattcacacaggacgagatcgacttcctgcgcagccatggcaacgagctgtgtgccaagacctggctgggattgtgggatccgaagcgggctgtgcaccagcaggagcagcgcgaactgatgatggacaagtatgagcggaagcgatactacctggagccggccagtcctcttaagtcgctggccaatgcggtcaacctgaagtcgtctgctccggcgacgaaccacactcagaatggccaccaaaatgggtatgccagcatccatttgacgcctcctgctgcccagcggacctcggccaatggattgcagaaggtggccaactcgtcgagtaactcttctggaaagacctcatcctcgatcagtaggccacactataatcaccagaacaacagccaaaacaacaatcacgatgcctttggcctgggtggcggattgagcagcctgaacagcgccggttccacatccactggagctctttccgacaccagcagttgtgctagcaatggcttcggtgcggactgcgactttgtggctgactttggctcggccaacattttcgacgccacatcggcgcgttccacaggatcgccggcggtgtcgtccgtgtcctcagtgggttccagcaatggctacgccaaggtgcagcccatccgggcagctcatctccagcagcaacagcagttgcagcagcagctgcatcagcagcagctcctcaatggcaatggtcatcagggcactgagaactttgccgacttcgatcacgctcccatctacaatgcagtggctccaccgacttttaacgattggatcagcgactggagcaggcggggcttccacgatcccttcgacgattgcgatgactcgccaccaggtgcccgccctccagcacctgcgccagctcctgctcaagttcccgcagtatcatcaccattgccaaccgtccgagaagaaccagagcttgcgtggaatttttgggaggacgagatgcgaatagaggcgcaggaaaaggagtcccaaactaaacagccggagttgggctactccttttcgattagtactactacgcccctttccccttcgaatcccttcctgccctaccttgtcagtgaggagcagcatcgaaatcatccagagaagccctccttttcgtattcgttgttcagctccatatcaaatagttcgcaagaagatcaggcggatgatcatgagatgaatgttttaaatgccaatttccatgatttctttacgtggagtgctcccttgcagaacggccatacgaccagtccgcccaagggcggaaatgcagcgatggcgcccagtgaggatcgatatgccgctcttaaggatctcgacgagcagctgcgagaactgaaggccagcgaaagcgccacagagacgcccacgcccaccagtggcaatgttcaggccacagatgcctttggtggagccctcaacaacaatccaaatcccttcaagggccagcaacagcagcagctcagcagccatgtggtgaatccattccagcagcagcaacagcagcagcaccagcagaatctctatggccagttgacgctcataccaaatgcctacggcagcagttcccagcagcagatggggcaccatctcctccagcagcagcagcagcaacagcagagcttcttcaacttcaacaacaacgggttcgccatctcgcagggtctgcccaacggctgcggcttcggcagcatgcaacccgctcctgtgatggccaacaatccctttgcagccagcggcgccatgaacaccaacaatccattcttatgagactcaacccgggagaatccgcctcgcgccacctggcagaggcgctgagccagcgaacaaagagcagacgcggaggaaccgaaccgaaattagtccattttactaacaatagcgttaatctatgtatacataatgcacgccggagagcactctttgtgtacatagcccaaatatgtacacccgaaaggctccacgctgacgctagtcctcgcggatggcggaggcggactggggcgttgatatattcttttacatggtaactctactctaacgtttacggatacggatatttgtatttgccgtttgccctagaactctatacttgtactaagcgcccatgaacacttcatccactaacatagctactaatcctcatcctagtggaggatgcagttggtccagacactctgttatttgttttatccatcctcgtacttgtctttgtcccatttagcactttcgttgcggataagaactttgtcagttattgattgtgtggccttaataagattataaaactaaatattataacgtacgactatacatatacggatacagatacagattcagacacagttagtacagatacagatatacatatacgcttttgtacctaatgaattgcttcttgtttccattgctaatcatctgcttttcgtgtgctaattttatacactagtacgtgcgatatcggccgtgcagatagattgctcagctcgcgagtcaagcctcttttggttgcacccacggcagacatttgtacatatactgtctgattgtaagcctcgtgtaatacctccattaacaccactcccccaccacccatccatcgaaccccgaatccatgactcaattcactgctcacatgtccatgcccatgccttaacgtgtcaaacattatcgaagccttaaagttatttaaaactacgaaatttcaataaaaacaaataagaacgctatc'; 176 #sequences = 'NM_001169378.2__tc__atttcggatgctttggagggaggaactctagtgctgcattgattggggcgtgtgttaatgatattcccagttcgcatggcgagcatcgattcctggtacgtatgtgggccccttgactcccacttatcgcacttgtcgttcgcaatttgcatgaattccgcttcgtctgaaacgcacttgcgccagacttctccggctggtctgatctggtctgtgatccggtctggtggggcgccagttgcgtttcgagctcatcaccagtcactccgcagtcgcattctgccagaggtctccgatcaagagcgcttctccattcgagattcaaacgcagcgcggtctgacgccgccacatcgagtgaaatccatatcgatggccacattcacacaggacgagatcgacttcctgcgcagccatggcaacgagctgtgtgccaagacctggctgggattgtgggatccgaagcgggctgtgcaccagcaggagcagcgcgaactgatgatggacaagtatgagcggaagcgatactacctggagccggccagtcctcttaagtcgctggccaatgcggtcaacctgaagtcgtctgctccggcgacgaaccacactcagaatggccaccaaaatgggtatgccagcatccatttgacgcctcctgctgcccagcggacctcggccaatggattgcagaaggtggccaactcgtcgagtaactcttctggaaagacctcatcctcgatcagtaggccacactataatcaccagaacaacagccaaaacaacaatcacgatgcctttggcctgggtggcggattgagcagcctgaacagcgccggttccacatccactggagctctttccgacaccagcagttgtgctagcaatggcttcggtgcggactgcgactttgtggctgactttggctcggccaacattttcgacgccacatcggcgcgttccacaggatcgccggcggtgtcgtccgtgtcctcagtgggttccagcaatggctacgccaaggtgcagcccatccgggcagctcatctccagcagcaacagcagttgcagcagcagctgcatcagcagcagctcctcaatggcaatggtcatcagggcactgagaactttgccgacttcgatcacgctcccatctacaatgcagtggctccaccgacttttaacgattggatcagcgactggagcaggcggggcttccacgatcccttcgacgattgcgatgactcgccaccaggtgcccgccctccagcacctgcgccagctcctgctcaagttcccgcagtatcatcaccattgccaaccgtccgagaagaaccagagcttgcgtggaatttttgggaggacgagatgcgaatagaggcgcaggaaaaggagtcccaaactaaacagccggagttgggctactccttttcgattagtactactacgcccctttccccttcgaatcccttcctgccctaccttgtcagtgaggagcagcatcgaaatcatccagagaagccctccttttcgtattcgttgttcagctccatatcaaatagttcgcaagaagatcaggcggatgatcatgagatgaatgttttaaatgccaatttccatgatttctttacgtggagtgctcccttgcagaacggccatacgaccagtccgcccaagggcggaaatgcagcgatggcgcccagtgaggatcgatatgccgctcttaaggatctcgacgagcagctgcgagaactgaaggccagcgaaagcgccacagagacgcccacgcccaccagtggcaatgttcaggccacagatgcctttggtggagccctcaacaacaatccaaatcccttcaagggccagcaacagcagcagctcagcagccatgtggtgaatccattccagcagcagcaacagcagcagcaccagcagaatctctatggccagttgacgctcataccaaatgcctacggcagcagttcccagcagcagatggggcaccatctcctccagcagcagcagcagcaacagcagagcttcttcaacttcaacaacaacgggttcgccatctcgcagggtctgcccaacggctgcggcttcggcagcatgcaacccgctcctgtgatggccaacaatccctttgcagccagcggcgccatgaacaccaacaatccattcttatgagactcaacccgggagaatccgcctcgcgccacctggcagaggcgctgagccagcgaacaaagagcagacgcggaggaaccgaaccgaaattagtccattttactaacaatagcgttaatctatgtatacataatgcacgccggagagcactctttgtgtacatagcccaaatatgtacacccgaaaggctccacgctgacgctagtcctcgcggatggcggaggcggactggggcgttgatatattcttttacatggtaactctactctaacgtttacggatacggatatttgtatttgccgtttgccctagaactctatacttgtactaagcgcccatgaacacttcatccactaacatagctactaatcctcatcctagtggaggatgcagttggtccagacactctgttatttgttttatccatcctcgtacttgtctttgtcccatttagcactttcgttgcggataagaactttgtcagttattgattgtgtggccttaataagattataaaactaaatattataacgtacgactatacatatacggatacagatacagattcagacacagttagtacagatacagatatacatatacgcttttgtacctaatgaattgcttcttgtttccattgctaatcatctgcttttcgtgtgctaattttatacactagtacgtgcgatatcggccgtgcagatagattgctcagctcgcgagtcaagcctcttttggttgcacccacggcagacatttgtacatatactgtctgattgtaagcctcgtgtaatacctccattaacaccactcccccaccacccatccatcgaaccccgaatccatgactcaattcactgctcacatgtccatgcccatgccttaacgtgtcaaacattatcgaagccttaaagttatttaaaactacgaaatttcaataaaaacaaataagaacgctatc';
177 #print(sequences);
178 #(options, args) = parser.parse_args(['-x', 'rrr', '-t', 0.5, '-s', sequences, '-o', 'collection_content']); 177 #(options, args) = parser.parse_args(['-x', 'rrr', '-t', 0.5, '-s', sequences, '-o', 'collection_content']);
179 178
180 (options, args) = parser.parse_args(); 179 (options, args) = parser.parse_args();
181 return query( options, args ); 180 if options.version:
181 print __version__;
182 else:
183 # create output dir (collection)
184 output_dir_path = options.outputdir;
185 if not os.path.exists(output_dir_path):
186 os.makedirs(output_dir_path);
187
188 return query( options, args );
182 189
183 if __name__ == "__main__": __main__() 190 if __name__ == "__main__": __main__()