Mercurial > repos > yating-l > jbrowsearchivecreator
comparison util/subtools.py @ 6:237707a6b74d draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit a500f7ab2119cc5faaf80393bd87428389d06880-dirty
| author | yating-l | 
|---|---|
| date | Thu, 15 Feb 2018 17:05:05 -0500 | 
| parents | |
| children | 5d5fdcb798da | 
   comparison
  equal
  deleted
  inserted
  replaced
| 5:e762f4b9e4bd | 6:237707a6b74d | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 This file include common used functions for converting file format to gff3 | |
| 5 """ | |
| 6 from collections import OrderedDict | |
| 7 import json | |
| 8 import subprocess | |
| 9 import os | |
| 10 import sys | |
| 11 import tempfile | |
| 12 import string | |
| 13 import logging | |
| 14 | |
| 15 class PopenError(Exception): | |
| 16 def __init__(self, cmd, error, return_code): | |
| 17 self.cmd = cmd | |
| 18 self.error = error | |
| 19 self.return_code = return_code | |
| 20 | |
| 21 def __str__(self): | |
| 22 message = "The subprocess {0} has returned the error: {1}.".format( | |
| 23 self.cmd, self.return_code) | |
| 24 message = ','.join( | |
| 25 (message, "Its error message is: {0}".format(self.error))) | |
| 26 return repr(message) | |
| 27 | |
| 28 | |
| 29 def _handleExceptionAndCheckCall(array_call, **kwargs): | |
| 30 """ | |
| 31 This class handle exceptions and call the tool. | |
| 32 It maps the signature of subprocess.check_call: | |
| 33 See https://docs.python.org/2/library/subprocess.html#subprocess.check_call | |
| 34 """ | |
| 35 stdout = kwargs.get('stdout', subprocess.PIPE) | |
| 36 stderr = kwargs.get('stderr', subprocess.PIPE) | |
| 37 shell = kwargs.get('shell', False) | |
| 38 stdin = kwargs.get('stdin', None) | |
| 39 | |
| 40 cmd = array_call[0] | |
| 41 | |
| 42 output = None | |
| 43 error = None | |
| 44 | |
| 45 # TODO: Check the value of array_call and <=[0] | |
| 46 logging.debug("Calling {0}:".format(cmd)) | |
| 47 logging.debug("%s", array_call) | |
| 48 logging.debug("---------") | |
| 49 | |
| 50 # TODO: Use universal_newlines option from Popen? | |
| 51 try: | |
| 52 p = subprocess.Popen(array_call, stdout=stdout, | |
| 53 stderr=stderr, shell=shell, stdin=stdin) | |
| 54 | |
| 55 # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate | |
| 56 | |
| 57 output, error = p.communicate() | |
| 58 | |
| 59 if stdout == subprocess.PIPE: | |
| 60 logging.debug("\t{0}".format(output)) | |
| 61 else: | |
| 62 logging.debug("\tOutput in file {0}".format(stdout.name)) | |
| 63 # If we detect an error from the subprocess, then we raise an exception | |
| 64 # TODO: Manage if we raise an exception for everything, or use CRITICAL etc... but not stop process | |
| 65 # TODO: The responsability of returning a sys.exit() should not be there, but up in the app. | |
| 66 if p.returncode: | |
| 67 if stderr == subprocess.PIPE: | |
| 68 raise PopenError(cmd, error, p.returncode) | |
| 69 else: | |
| 70 # TODO: To Handle properly with a design behind, if we received a option as a file for the error | |
| 71 raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}" | |
| 72 .format(cmd, stderr.name, p.returncode)) | |
| 73 | |
| 74 except OSError as e: | |
| 75 message = "The subprocess {0} has encountered an OSError: {1}".format( | |
| 76 cmd, e.strerror) | |
| 77 if e.filename: | |
| 78 message = '\n'.join( | |
| 79 (message, ", against this file: {0}".format(e.filename))) | |
| 80 logging.error(message) | |
| 81 sys.exit(-1) | |
| 82 except PopenError as p: | |
| 83 message = "The subprocess {0} has returned the error: {1}.".format( | |
| 84 p.cmd, p.return_code) | |
| 85 message = '\n'.join( | |
| 86 (message, "Its error message is: {0}".format(p.error))) | |
| 87 | |
| 88 logging.exception(message) | |
| 89 | |
| 90 sys.exit(p.return_code) | |
| 91 except Exception as e: | |
| 92 message = "The subprocess {0} has encountered an unknown error: {1}".format( | |
| 93 cmd, e) | |
| 94 logging.exception(message) | |
| 95 | |
| 96 sys.exit(-1) | |
| 97 return p | |
| 98 | |
| 99 | |
| 100 def write_features(field, attribute, gff3): | |
| 101 """ | |
| 102 The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md) | |
| 103 field, attribute are ordered dictionary | |
| 104 gff3 is the file handler | |
| 105 """ | |
| 106 attr = [] | |
| 107 for v in field.values(): | |
| 108 gff3.write(str(v) + '\t') | |
| 109 for k, v in attribute.items(): | |
| 110 s = str(k) + '=' + str(v) | |
| 111 attr.append(s) | |
| 112 gff3.write(';'.join(attr)) | |
| 113 gff3.write('\n') | |
| 114 | |
| 115 def twoBitInfo(two_bit_file_name, two_bit_info_file): | |
| 116 """ | |
| 117 Call twoBitInfo and write the result into twoBit_info_file | |
| 118 :param two_bit_file_name: | |
| 119 :param two_bit_info_file: | |
| 120 :return the subprocess.check_call return object: | |
| 121 """ | |
| 122 array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file] | |
| 123 p = _handleExceptionAndCheckCall(array_call) | |
| 124 return p | |
| 125 | |
| 126 | |
| 127 def faToTwoBit(fasta_file_name, twoBitFile): | |
| 128 """ | |
| 129 This function call faToTwoBit UCSC tool, and return the twoBitFile | |
| 130 :param fasta_file_name: | |
| 131 :param mySpecieFolder: | |
| 132 :return: | |
| 133 """ | |
| 134 | |
| 135 array_call = ['faToTwoBit', fasta_file_name, twoBitFile] | |
| 136 _handleExceptionAndCheckCall(array_call) | |
| 137 | |
| 138 return twoBitFile | |
| 139 | |
| 140 def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name): | |
| 141 """ | |
| 142 Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name | |
| 143 :param two_bit_info_file_name: | |
| 144 :param chrom_sizes_file_name: | |
| 145 :return: | |
| 146 """ | |
| 147 array_call = ['sort', '-k2rn', two_bit_info_file_name, | |
| 148 '-o', chrom_sizes_file_name] | |
| 149 p = _handleExceptionAndCheckCall(array_call) | |
| 150 return p | |
| 151 | |
| 152 def getChromSizes(reference, tool_dir): | |
| 153 #TODO: find a better way instead of shipping the two exec files with the tool | |
| 154 faToTwoBit = os.path.join(tool_dir, 'faToTwoBit') | |
| 155 twoBitInfo = os.path.join(tool_dir, 'twoBitInfo') | |
| 156 try: | |
| 157 twoBitFile = tempfile.NamedTemporaryFile(bufsize=0) | |
| 158 chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0, suffix='.chrom.sizes', delete=False) | |
| 159 except IOError as err: | |
| 160 print "Cannot create tempfile err({0}): {1}".format(err.errno, err.strerror) | |
| 161 try: | |
| 162 subprocess.call(['faToTwoBit', reference, twoBitFile.name]) | |
| 163 except OSError as err: | |
| 164 print "Cannot generate twoBitFile from faToTwoBit err({0}): {1}".format(err.errno, err.strerror) | |
| 165 try: | |
| 166 subprocess.call(['twoBitInfo', twoBitFile.name, chrom_sizes.name]) | |
| 167 except OSError as err: | |
| 168 print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror) | |
| 169 return chrom_sizes | |
| 170 | |
| 171 def sequence_region(chrom_sizes): | |
| 172 """ | |
| 173 This function read from a chromatin size file generated by twoBitInfo and write the information to dict | |
| 174 return a dict | |
| 175 """ | |
| 176 f = open(chrom_sizes, 'r') | |
| 177 sizes = f.readlines() | |
| 178 sizes_dict = {} | |
| 179 for line in sizes: | |
| 180 chrom_info = line.rstrip().split('\t') | |
| 181 sizes_dict[chrom_info[0]] = chrom_info[1] | |
| 182 return sizes_dict | |
| 183 | |
| 184 def child_blocks(parent_field, parent_attr, gff3, child_type): | |
| 185 num = 0 | |
| 186 blockcount = int(parent_attr['blockcount']) | |
| 187 chromstart = parent_attr['chromstarts'].split(',') | |
| 188 blocksize = parent_attr['blocksizes'].split(',') | |
| 189 parent_start = parent_field['start'] | |
| 190 while num < blockcount: | |
| 191 child_attr = OrderedDict() | |
| 192 child_field = parent_field | |
| 193 child_field['type'] = child_type | |
| 194 child_field['start'] = int(chromstart[num]) + int(parent_start) | |
| 195 child_field['end'] = int(child_field['start']) + int(blocksize[num]) - 1 | |
| 196 child_attr['ID'] = parent_attr['ID'] + '_part_' + str(num+1) | |
| 197 child_attr['Parent'] = parent_attr['ID'] | |
| 198 write_features(child_field, child_attr, gff3) | |
| 199 num = num + 1 | |
| 200 | |
| 201 def add_tracks_to_json(trackList_json, new_tracks, modify_type): | |
| 202 """ | |
| 203 Add to track configuration (trackList.json) | |
| 204 # modify_type = 'add_tracks': add a new track like bam or bigwig, new_track = dict() | |
| 205 # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict()) | |
| 206 """ | |
| 207 with open(trackList_json, 'r+') as f: | |
| 208 data = json.load(f) | |
| 209 if modify_type == 'add_tracks': | |
| 210 data['tracks'].append(new_tracks) | |
| 211 elif modify_type == 'add_attr': | |
| 212 for k in new_tracks: | |
| 213 for track in data['tracks']: | |
| 214 if k.lower() in track['urlTemplate'].lower(): | |
| 215 attr = new_tracks[k] | |
| 216 for k, v in attr.items(): | |
| 217 track[k] = v | |
| 218 f.seek(0, 0) | |
| 219 f.write(json.dumps(data, separators=(',' , ':'), indent=4)) | |
| 220 f.truncate() | |
| 221 f.close() | |
| 222 | |
| 223 | |
| 224 def createBamIndex(bamfile): | |
| 225 subprocess.call(['samtools', 'index', bamfile]) | |
| 226 filename = bamfile + '.bai' | |
| 227 if os.path.exists(filename): | |
| 228 return filename | |
| 229 else: | |
| 230 raise ValueError('Did not find bai file') | |
| 231 | |
| 232 def flatfile_to_json(inputFile, dataType, trackType, trackLabel, outputFolder, options=None, compress=True): | |
| 233 if "bed" in dataType: | |
| 234 fileType = "--bed" | |
| 235 elif "gff" in dataType: | |
| 236 fileType = "--gff" | |
| 237 else: | |
| 238 raise ValueError("%s is not a valid filetype for flatfile_to_json" % dataType) | |
| 239 | |
| 240 | |
| 241 array_call = ['flatfile-to-json.pl', | |
| 242 fileType, inputFile, | |
| 243 '--trackType', trackType, | |
| 244 '--trackLabel', trackLabel, | |
| 245 '--out', outputFolder] | |
| 246 if compress: | |
| 247 array_call.append('--compress') | |
| 248 if options: | |
| 249 config = options.get("config") | |
| 250 clientConfig = options.get("clientConfig") | |
| 251 renderClassName = options.get('renderClassName') | |
| 252 subfeatureClasses = options.get('subfeatureClasses') | |
| 253 load_type = options.get("type") | |
| 254 if clientConfig: | |
| 255 array_call.append('--clientConfig') | |
| 256 array_call.append(clientConfig) | |
| 257 if config: | |
| 258 array_call.append('--config') | |
| 259 array_call.append(config) | |
| 260 if load_type: | |
| 261 array_call.append('--type') | |
| 262 array_call.append(load_type) | |
| 263 if renderClassName: | |
| 264 array_call.append('--renderClassName') | |
| 265 array_call.append(renderClassName) | |
| 266 if subfeatureClasses: | |
| 267 array_call.append('--subfeatureClasses') | |
| 268 array_call.append(json.dumps(subfeatureClasses)) | |
| 269 | |
| 270 p = _handleExceptionAndCheckCall(array_call) | |
| 271 return p | |
| 272 | |
| 273 def bam_to_json(inputFile, trackLabel, outputFolder, options=None, compress=False): | |
| 274 | |
| 275 array_call = ['bam-to-json.pl', | |
| 276 '--bam', inputFile, | |
| 277 '--trackLabel', trackLabel, | |
| 278 '--out', outputFolder] | |
| 279 if compress: | |
| 280 array_call.append('--compress') | |
| 281 if options: | |
| 282 config = options.get('config') | |
| 283 clientConfig = options.get('clientConfig') | |
| 284 if clientConfig: | |
| 285 array_call.append('--clientConfig') | |
| 286 array_call.append(clientConfig) | |
| 287 if config: | |
| 288 array_call.append('--config') | |
| 289 array_call.append(config) | |
| 290 | |
| 291 p = _handleExceptionAndCheckCall(array_call) | |
| 292 return p | |
| 293 | |
| 294 def add_track_json(trackList, track_json): | |
| 295 track_json = json.dumps(track_json) | |
| 296 new_track = subprocess.Popen(['echo', track_json], stdout=subprocess.PIPE) | |
| 297 p = subprocess.call(['add-track-json.pl', trackList], stdin=new_track.stdout) | |
| 298 return p | |
| 299 | |
| 300 def prepare_refseqs(fasta_file_name, outputFolder): | |
| 301 array_call = ['prepare-refseqs.pl', '--fasta', fasta_file_name, '--out', outputFolder] | |
| 302 p = _handleExceptionAndCheckCall(array_call) | |
| 303 return p | |
| 304 | |
| 305 def generate_names(outputFolder): | |
| 306 array_call = ['generate-names.pl', '-v', '--out', outputFolder] | |
| 307 p = _handleExceptionAndCheckCall(array_call) | |
| 308 return p | |
| 309 | |
| 310 def validateFiles(input_file, chrom_sizes_file_name, file_type, options=None): | |
| 311 """ | |
| 312 Call validateFiles on input_file, using chrom_sizes_file_name and file_type | |
| 313 :param input_file: | |
| 314 :param chrom_sizes_file_name: | |
| 315 :param file_type: | |
| 316 :return: | |
| 317 """ | |
| 318 | |
| 319 array_call = ['validateFiles', '-chromInfo=' + chrom_sizes_file_name, '-type='+ file_type, input_file] | |
| 320 if options: | |
| 321 tab = options.get("tab") | |
| 322 autoSql = options.get("autoSql") | |
| 323 logging.debug("tab: {0}".format(tab)) | |
| 324 logging.debug("autoSql: {0}".format(autoSql)) | |
| 325 if autoSql: | |
| 326 autoSql = ''.join(['-as=', autoSql]) | |
| 327 array_call.append(autoSql) | |
| 328 if tab: | |
| 329 array_call.append('-tab') | |
| 330 p = _handleExceptionAndCheckCall(array_call) | |
| 331 return p | |
| 332 | 
