# HG changeset patch # User iuc # Date 1465561506 14400 # Node ID 3d4cd0e3891fa5288716c4a6a029695da4b24388 # Parent d57ebdd39f0feb7bb946684688eaa100eade8323 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8 diff -r d57ebdd39f0f -r 3d4cd0e3891f data_manager/.data_manager_snpsift_dbnsfp.py.swp Binary file data_manager/.data_manager_snpsift_dbnsfp.py.swp has changed diff -r d57ebdd39f0f -r 3d4cd0e3891f data_manager/data_manager_snpsift_dbnsfp.py --- a/data_manager/data_manager_snpsift_dbnsfp.py Wed Jun 08 17:10:36 2016 -0400 +++ b/data_manager/data_manager_snpsift_dbnsfp.py Fri Jun 10 08:25:06 2016 -0400 @@ -103,11 +103,45 @@ files = [f for f in allfiles if re.match(dbNSFP_file_pat, f)] files = sorted(files, key=natural_sortkey) for j, file in enumerate(files): + tempfiles = [] + tempfiles.append(file + "_%d" % len(tempfiles)) + tfh = open(tempfiles[-1], 'w') + lastpos = None fh = my_zip.open(file, 'rU') for i, line in enumerate(fh): - if j > 0 and i == 0: + if i == 0: + if j == 0: + wtr.write(line) continue - wtr.write(line) + else: + pos = int(line.split('\t')[1]) + if lastpos and pos < lastpos: + tfh.close() + tempfiles.append(file + "_%d" % len(tempfiles)) + tfh = open(tempfiles[-1], 'w') + print >> sys.stderr, "%s [%d] pos: %d < %d" % (file, i, pos, lastpos) + lastpos = pos + tfh.write(line) + tfh.close() + if len(tempfiles) == 1: + with open(tempfiles[0], 'r') as tfh: + wtr.writelines(tfh.readlines()) + else: + tfha = [open(temp, 'r') for temp in tempfiles] + lines = [tfh.readline() for tfh in tfha] + curpos = [int(line.split('\t')[1]) for line in lines] + while len(tfha) > 0: + k = curpos.index(min(curpos)) + wtr.write(lines[k]) + line = tfha[k].readline() + if line: + lines[k] = line + curpos[k] = int(line.split('\t')[1]) + else: + tfha[k].close() + del tfha[k] + del lines[k] + del curpos[k] return dbnsfp_tsv