comparison query.py @ 14:e780b47013df draft

Uploaded 20180201
author fabio
date Thu, 01 Feb 2018 17:13:30 -0500
parents 039e8e1e8b1f
children dd3c4fd64402
comparison
equal deleted inserted replaced
13:b5f070767ed4 14:e780b47013df
16 QUERY_DELAY = 30; 16 QUERY_DELAY = 30;
17 ############## 17 ##############
18 18
19 __version__ = "1.0.0"; 19 __version__ = "1.0.0";
20 VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ' 20 VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '
21 # in the case of collections, exitcodes equal to 0 and 1 are not considered errors
22 ERR_EXIT_CODE = 2;
23 OK_EXIT_CODE = 0;
21 24
22 # in the case of collections, exitcodes equal to 0 and 1 are not considered errors 25 def raiseException( exitcode, message, output_dir_path, errorfilename ):
23 def raiseException( exitcode, message, errorfilepath ): 26 errorfilepath = os.path.join(output_dir_path, errorfilename+"_txt");
24 with open(errorfilepath, 'w') as out: 27 with open(errorfilepath, 'w') as out:
25 out.write(message); 28 out.write(message);
26 sys.exit(exitcode); 29 sys.exit(exitcode);
27 30
28 def query_request( options, args, payload ): 31 def query_request( options, args, payload ):
64 # state attribute is always available 67 # state attribute is always available
65 if json_status_content['state'] == 'SUCCESS': 68 if json_status_content['state'] == 'SUCCESS':
66 task_processed = True; 69 task_processed = True;
67 break; 70 break;
68 elif json_status_content['state'] in ['FAILURE', 'REVOKED']: 71 elif json_status_content['state'] in ['FAILURE', 'REVOKED']:
69 return raiseException( 1, "Task ID: "+str(task_id)+"\nTask status: "+str(json_status_content['state']), str(options.errorfile) ); 72 return raiseException( ERR_EXIT_CODE, "Task ID: "+str(task_id)+"\nTask status: "+str(json_status_content['state']), output_dir_path, str(options.errorfile) );
70 else: 73 else:
71 time.sleep(QUERY_DELAY); # in seconds 74 time.sleep(QUERY_DELAY); # in seconds
72 75
73 out_file_format = "tabular"; 76 out_file_format = "tabular";
74 for block in json_status_content['results']: 77 for block in json_status_content['results']:
79 accessions_list = ""; 82 accessions_list = "";
80 for accession_number in accessions: 83 for accession_number in accessions:
81 accessions_list = accessions_list + accession_number + "\n"; 84 accessions_list = accessions_list + accession_number + "\n";
82 with open(output_file_path, 'w') as out: 85 with open(output_file_path, 'w') as out:
83 out.write(accessions_list.strip()); 86 out.write(accessions_list.strip());
84 return sys.exit(0); 87 return sys.exit(OK_EXIT_CODE);
85 else: 88 else:
86 return raiseException( 1, "Unable to query the remote server. Please try again in a while.", str(options.errorfile) ); 89 return raiseException( ERR_EXIT_CODE, "Unable to query the remote server. Please try again in a while.", output_dir_path, str(options.errorfile) );
87 90
88 def query( options, args ): 91 def query( options, args ):
89 output_dir_path = options.outputdir; 92 output_dir_path = options.outputdir;
90 multiple_data = {}; 93 multiple_data = {};
91 comma_sep_file_paths = options.files; 94 comma_sep_file_paths = options.files;
108 seq_id = line_split[0]; 111 seq_id = line_split[0];
109 # fix seq_id using valid chars only 112 # fix seq_id using valid chars only
110 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) 113 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS)
111 seq_text = line_split[1]; 114 seq_text = line_split[1];
112 if seq_id in multiple_data: 115 if seq_id in multiple_data:
113 return raiseException( 1, "Error: the id '"+seq_id+"' is duplicated", str(options.errorfile) ); 116 return raiseException( ERR_EXIT_CODE, "Error: the id '"+seq_id+"' is duplicated", output_dir_path, str(options.errorfile) );
114 multiple_data[seq_id] = seq_text; 117 multiple_data[seq_id] = seq_text;
115 if len(multiple_data) > 0: 118 if len(multiple_data) > 0:
116 return query_request( options, args, multiple_data ); 119 return query_request( options, args, multiple_data );
117 #return echo( options, args ); 120 #return echo( options, args );
118 else: 121 else:
119 return raiseException( 1, "An error has occurred. Please be sure that your input files are valid.", str(options.errorfile) ); 122 return raiseException( ERR_EXIT_CODE, "An error has occurred. Please be sure that your input files are valid.", output_dir_path, str(options.errorfile) );
120 else: 123 else:
121 # try with the sequence in --sequence 124 # try with the sequence in --sequence
122 text_content = options.sequences; 125 text_content = options.sequences;
123 #print("sequences: "+text_content); 126 #print("sequences: "+text_content);
124 # check if options.sequences contains a list of sequences (one for each row) 127 # check if options.sequences contains a list of sequences (one for each row)
134 seq_id = line_split[0]; 137 seq_id = line_split[0];
135 # fix seq_id using valid chars only 138 # fix seq_id using valid chars only
136 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) 139 seq_id = ''.join(e for e in seq_id if e in VALID_CHARS)
137 seq_text = line_split[1]; 140 seq_text = line_split[1];
138 if seq_id in multiple_data: 141 if seq_id in multiple_data:
139 return raiseException( 1, "Error: the id '"+seq_id+"' is duplicated", str(options.errorfile) ); 142 return raiseException( ERR_EXIT_CODE, "Error: the id '"+seq_id+"' is duplicated", output_dir_path, str(options.errorfile) );
140 multiple_data[seq_id] = seq_text; 143 multiple_data[seq_id] = seq_text;
141 if len(multiple_data) > 0: 144 if len(multiple_data) > 0:
142 return query_request( options, args, multiple_data ); 145 return query_request( options, args, multiple_data );
143 #return echo( options, args ); 146 #return echo( options, args );
144 else: 147 else:
145 return raiseException( 1, "An error has occurred. Please be sure that your input files are valid.", str(options.errorfile) ); 148 return raiseException( ERR_EXIT_CODE, "An error has occurred. Please be sure that your input files are valid.", output_dir_path, str(options.errorfile) );
146 else: 149 else:
147 return raiseException( 1, "You have to insert at least one row formatted as a tab delimited (ID, SEQUENCE) couple", str(options.errorfile) ); 150 return raiseException( ERR_EXIT_CODE, "You have to insert at least one row formatted as a tab delimited (ID, SEQUENCE) couple", output_dir_path, str(options.errorfile) );
148 return 1; 151 return ERR_EXIT_CODE;
149 152
150 def __main__(): 153 def __main__():
151 # Parse the command line options 154 # Parse the command line options
152 usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path"; 155 usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path";
153 parser = optparse.OptionParser(usage = usage); 156 parser = optparse.OptionParser(usage = usage);