Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/boto/services/result.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # Copyright (c) 2006,2007 Mitch Garnaat http://garnaat.org/ | |
| 3 # | |
| 4 # Permission is hereby granted, free of charge, to any person obtaining a | |
| 5 # copy of this software and associated documentation files (the | |
| 6 # "Software"), to deal in the Software without restriction, including | |
| 7 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
| 8 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
| 9 # persons to whom the Software is furnished to do so, subject to the fol- | |
| 10 # lowing conditions: | |
| 11 # | |
| 12 # The above copyright notice and this permission notice shall be included | |
| 13 # in all copies or substantial portions of the Software. | |
| 14 # | |
| 15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
| 16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
| 17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
| 18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
| 19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
| 21 # IN THE SOFTWARE. | |
| 22 import os | |
| 23 from datetime import datetime, timedelta | |
| 24 from boto.utils import parse_ts | |
| 25 import boto | |
| 26 | |
| 27 class ResultProcessor(object): | |
| 28 | |
| 29 LogFileName = 'log.csv' | |
| 30 | |
| 31 def __init__(self, batch_name, sd, mimetype_files=None): | |
| 32 self.sd = sd | |
| 33 self.batch = batch_name | |
| 34 self.log_fp = None | |
| 35 self.num_files = 0 | |
| 36 self.total_time = 0 | |
| 37 self.min_time = timedelta.max | |
| 38 self.max_time = timedelta.min | |
| 39 self.earliest_time = datetime.max | |
| 40 self.latest_time = datetime.min | |
| 41 self.queue = self.sd.get_obj('output_queue') | |
| 42 self.domain = self.sd.get_obj('output_domain') | |
| 43 | |
| 44 def calculate_stats(self, msg): | |
| 45 start_time = parse_ts(msg['Service-Read']) | |
| 46 end_time = parse_ts(msg['Service-Write']) | |
| 47 elapsed_time = end_time - start_time | |
| 48 if elapsed_time > self.max_time: | |
| 49 self.max_time = elapsed_time | |
| 50 if elapsed_time < self.min_time: | |
| 51 self.min_time = elapsed_time | |
| 52 self.total_time += elapsed_time.seconds | |
| 53 if start_time < self.earliest_time: | |
| 54 self.earliest_time = start_time | |
| 55 if end_time > self.latest_time: | |
| 56 self.latest_time = end_time | |
| 57 | |
| 58 def log_message(self, msg, path): | |
| 59 keys = sorted(msg.keys()) | |
| 60 if not self.log_fp: | |
| 61 self.log_fp = open(os.path.join(path, self.LogFileName), 'a') | |
| 62 line = ','.join(keys) | |
| 63 self.log_fp.write(line+'\n') | |
| 64 values = [] | |
| 65 for key in keys: | |
| 66 value = msg[key] | |
| 67 if value.find(',') > 0: | |
| 68 value = '"%s"' % value | |
| 69 values.append(value) | |
| 70 line = ','.join(values) | |
| 71 self.log_fp.write(line+'\n') | |
| 72 | |
| 73 def process_record(self, record, path, get_file=True): | |
| 74 self.log_message(record, path) | |
| 75 self.calculate_stats(record) | |
| 76 outputs = record['OutputKey'].split(',') | |
| 77 if 'OutputBucket' in record: | |
| 78 bucket = boto.lookup('s3', record['OutputBucket']) | |
| 79 else: | |
| 80 bucket = boto.lookup('s3', record['Bucket']) | |
| 81 for output in outputs: | |
| 82 if get_file: | |
| 83 key_name = output.split(';')[0] | |
| 84 key = bucket.lookup(key_name) | |
| 85 file_name = os.path.join(path, key_name) | |
| 86 print('retrieving file: %s to %s' % (key_name, file_name)) | |
| 87 key.get_contents_to_filename(file_name) | |
| 88 self.num_files += 1 | |
| 89 | |
| 90 def get_results_from_queue(self, path, get_file=True, delete_msg=True): | |
| 91 m = self.queue.read() | |
| 92 while m: | |
| 93 if 'Batch' in m and m['Batch'] == self.batch: | |
| 94 self.process_record(m, path, get_file) | |
| 95 if delete_msg: | |
| 96 self.queue.delete_message(m) | |
| 97 m = self.queue.read() | |
| 98 | |
| 99 def get_results_from_domain(self, path, get_file=True): | |
| 100 rs = self.domain.query("['Batch'='%s']" % self.batch) | |
| 101 for item in rs: | |
| 102 self.process_record(item, path, get_file) | |
| 103 | |
| 104 def get_results_from_bucket(self, path): | |
| 105 bucket = self.sd.get_obj('output_bucket') | |
| 106 if bucket: | |
| 107 print('No output queue or domain, just retrieving files from output_bucket') | |
| 108 for key in bucket: | |
| 109 file_name = os.path.join(path, key) | |
| 110 print('retrieving file: %s to %s' % (key, file_name)) | |
| 111 key.get_contents_to_filename(file_name) | |
| 112 self.num_files + 1 | |
| 113 | |
| 114 def get_results(self, path, get_file=True, delete_msg=True): | |
| 115 if not os.path.isdir(path): | |
| 116 os.mkdir(path) | |
| 117 if self.queue: | |
| 118 self.get_results_from_queue(path, get_file) | |
| 119 elif self.domain: | |
| 120 self.get_results_from_domain(path, get_file) | |
| 121 else: | |
| 122 self.get_results_from_bucket(path) | |
| 123 if self.log_fp: | |
| 124 self.log_fp.close() | |
| 125 print('%d results successfully retrieved.' % self.num_files) | |
| 126 if self.num_files > 0: | |
| 127 self.avg_time = float(self.total_time)/self.num_files | |
| 128 print('Minimum Processing Time: %d' % self.min_time.seconds) | |
| 129 print('Maximum Processing Time: %d' % self.max_time.seconds) | |
| 130 print('Average Processing Time: %f' % self.avg_time) | |
| 131 self.elapsed_time = self.latest_time-self.earliest_time | |
| 132 print('Elapsed Time: %d' % self.elapsed_time.seconds) | |
| 133 tput = 1.0 / ((self.elapsed_time.seconds/60.0) / self.num_files) | |
| 134 print('Throughput: %f transactions / minute' % tput) | |
| 135 |
