Mercurial > repos > thondeboer > neat_genreads
annotate py/OutputFileWriter.py @ 10:7d10b55965c9 draft default tip
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
| author | thondeboer | 
|---|---|
| date | Wed, 16 May 2018 17:02:51 -0400 | 
| parents | 6e75a84e9338 | 
| children | 
| rev | line source | 
|---|---|
| 0 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 1 import sys | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 2 import os | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 3 import re | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 4 import gzip | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 5 from struct import pack | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 6 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 7 from biopython_modified_bgzf import BgzfWriter | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 8 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 9 BAM_COMPRESSION_LEVEL = 6 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 10 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 11 # return the reverse complement of a string | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 12 RC_DICT = {'A':'T','C':'G','G':'C','T':'A','N':'N'} | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 13 def RC(s): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 14 return ''.join(RC_DICT[n] for n in s[::-1]) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 15 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 16 # SAMtools reg2bin function | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 17 def reg2bin(a,b): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 18 b -= 1 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 19 if (a>>14 == b>>14): return ((1<<15)-1)/7 + (a>>14) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 20 if (a>>17 == b>>17): return ((1<<12)-1)/7 + (a>>17) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 21 if (a>>20 == b>>20): return ((1<<9)-1)/7 + (a>>20) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 22 if (a>>23 == b>>23): return ((1<<6)-1)/7 + (a>>23) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 23 if (a>>26 == b>>26): return ((1<<3)-1)/7 + (a>>26) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 24 return 0 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 25 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 26 CIGAR_PACKED = {'M':0, 'I':1, 'D':2, 'N':3, 'S':4, 'H':5, 'P':6, '=':7, 'X':8} | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 27 SEQ_PACKED = {'=':0, 'A':1, 'C':2, 'M':3, 'G':4, 'R':5, 'S':6, 'V':7, | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 28 'T':8, 'W':9, 'Y':10,'H':11,'K':12,'D':13,'B':14,'N':15} | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 29 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 30 BUFFER_BATCH_SIZE = 1000 # write out to file after this many reads | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 31 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 32 # | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 33 # outFQ = path to output FASTQ prefix | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 34 # paired = True for PE reads, False for SE | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 35 # BAM_header = [refIndex] | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 36 # VCF_header = [path_to_ref] | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 37 # gzipped = True for compressed FASTQ/VCF, False for uncompressed | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 38 # | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 39 class OutputFileWriter: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 40 def __init__(self, outPrefix, paired=False, BAM_header=None, VCF_header=None, gzipped=False, jobTuple=(1,1), noFASTQ=False): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 41 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 42 jobSuffix = '' | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 43 if jobTuple[1] > 1: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 44 jsl = len(str(jobTuple[1])) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 45 jsb = '0'*(jsl-len(str(jobTuple[0]))) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 46 jobSuffix = '.job'+jsb+str(jobTuple[0])+'of'+str(jobTuple[1]) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 47 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 48 fq1 = outPrefix+'_read1.fq'+jobSuffix | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 49 fq2 = outPrefix+'_read2.fq'+jobSuffix | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 50 bam = outPrefix+'_golden.bam'+jobSuffix | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 51 vcf = outPrefix+'_golden.vcf'+jobSuffix | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 52 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 53 self.noFASTQ = noFASTQ | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 54 if not self.noFASTQ: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 55 if gzipped: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 56 self.fq1_file = gzip.open(fq1+'.gz', 'wb') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 57 else: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 58 self.fq1_file = open(fq1,'w') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 59 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 60 self.fq2_file = None | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 61 if paired: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 62 if gzipped: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 63 self.fq2_file = gzip.open(fq2+'.gz', 'wb') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 64 else: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 65 self.fq2_file = open(fq2,'w') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 66 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 67 # | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 68 # VCF OUTPUT | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 69 # | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 70 self.vcf_file = None | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 71 if VCF_header != None: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 72 if gzipped: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 73 self.vcf_file = gzip.open(vcf+'.gz', 'wb') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 74 else: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 75 self.vcf_file = open(vcf, 'wb') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 76 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 77 # WRITE VCF HEADER (if parallel: only for first job) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 78 if jobTuple[0] == 1: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 79 self.vcf_file.write('##fileformat=VCFv4.1\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 80 self.vcf_file.write('##reference='+VCF_header[0]+'\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 81 self.vcf_file.write('##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 82 self.vcf_file.write('##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 83 #self.vcf_file.write('##INFO=<ID=READS,Number=1,Type=String,Description="Names of Reads Covering this Variant">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 84 self.vcf_file.write('##INFO=<ID=VMX,Number=1,Type=String,Description="SNP is Missense in these Read Frames">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 85 self.vcf_file.write('##INFO=<ID=VNX,Number=1,Type=String,Description="SNP is Nonsense in these Read Frames">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 86 self.vcf_file.write('##INFO=<ID=VFX,Number=1,Type=String,Description="Indel Causes Frameshift">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 87 self.vcf_file.write('##INFO=<ID=WP,Number=A,Type=Integer,Description="NEAT-GenReads ploidy indicator">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 88 self.vcf_file.write('##ALT=<ID=DEL,Description="Deletion">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 89 self.vcf_file.write('##ALT=<ID=DUP,Description="Duplication">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 90 self.vcf_file.write('##ALT=<ID=INS,Description="Insertion of novel sequence">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 91 self.vcf_file.write('##ALT=<ID=INV,Description="Inversion">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 92 self.vcf_file.write('##ALT=<ID=CNV,Description="Copy number variable region">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 93 self.vcf_file.write('##ALT=<ID=TRANS,Description="Translocation">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 94 self.vcf_file.write('##ALT=<ID=INV-TRANS,Description="Inverted translocation">\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 95 self.vcf_file.write('#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 96 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 97 # | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 98 # BAM OUTPUT | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 99 # | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 100 self.bam_file = None | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 101 if BAM_header != None: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 102 self.bam_file = BgzfWriter(bam, 'w', compresslevel=BAM_COMPRESSION_LEVEL) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 103 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 104 # WRITE BAM HEADER (if parallel: only for first job) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 105 if True or jobTuple[0] == 1: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 106 self.bam_file.write("BAM\1") | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 107 header = '@HD\tVN:1.5\tSO:coordinate\n' | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 108 for n in BAM_header[0]: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 109 header += '@SQ\tSN:'+n[0]+'\tLN:'+str(n[3])+'\n' | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 110 header += '@RG\tID:NEAT\tSM:NEAT\tLB:NEAT\tPL:NEAT\n' | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 111 headerBytes = len(header) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 112 numRefs = len(BAM_header[0]) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 113 self.bam_file.write(pack('<i',headerBytes)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 114 self.bam_file.write(header) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 115 self.bam_file.write(pack('<i',numRefs)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 116 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 117 for n in BAM_header[0]: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 118 l_name = len(n[0])+1 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 119 self.bam_file.write(pack('<i',l_name)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 120 self.bam_file.write(n[0]+'\0') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 121 self.bam_file.write(pack('<i',n[3])) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 122 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 123 # buffers for more efficient writing | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 124 self.fq1_buffer = [] | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 125 self.fq2_buffer = [] | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 126 self.bam_buffer = [] | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 127 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 128 def writeFASTQRecord(self,readName,read1,qual1,read2=None,qual2=None): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 129 ###self.fq1_file.write('@'+readName+'/1\n'+read1+'\n+\n'+qual1+'\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 130 self.fq1_buffer.append('@'+readName+'/1\n'+read1+'\n+\n'+qual1+'\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 131 if read2 != None: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 132 ####self.fq2_file.write('@'+readName+'/2\n'+RC(read2)+'\n+\n'+qual2[::-1]+'\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 133 self.fq2_buffer.append('@'+readName+'/2\n'+RC(read2)+'\n+\n'+qual2[::-1]+'\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 134 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 135 def writeVCFRecord(self, chrom, pos, idStr, ref, alt, qual, filt, info): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 136 self.vcf_file.write(str(chrom)+'\t'+str(pos)+'\t'+str(idStr)+'\t'+str(ref)+'\t'+str(alt)+'\t'+str(qual)+'\t'+str(filt)+'\t'+str(info)+'\n') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 137 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 138 def writeBAMRecord(self, refID, readName, pos_0, cigar, seq, qual, samFlag, matePos=None, alnMapQual=70): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 139 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 140 myBin = reg2bin(pos_0,pos_0+len(seq)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 141 #myBin = 0 # or just use a dummy value, does this actually matter? | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 142 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 143 myMapQual = alnMapQual | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 144 cig_letters = re.split(r"\d+",cigar)[1:] | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 145 cig_numbers = [int(n) for n in re.findall(r"\d+",cigar)] | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 146 cig_ops = len(cig_letters) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 147 next_refID = refID | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 148 if matePos == None: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 149 next_pos = 0 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 150 my_tlen = 0 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 151 else: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 152 next_pos = matePos | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 153 if pos_0 < next_pos: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 154 my_tlen = next_pos + len(seq) - pos_0 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 155 else: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 156 my_tlen = -pos_0 - len(seq) + next_pos | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 157 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 158 encodedCig = '' | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 159 for i in xrange(cig_ops): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 160 encodedCig += pack('<I',(cig_numbers[i]<<4) + CIGAR_PACKED[cig_letters[i]]) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 161 encodedSeq = '' | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 162 encodedLen = (len(seq)+1)/2 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 163 seqLen = len(seq) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 164 if seqLen&1: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 165 seq += '=' | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 166 for i in xrange(encodedLen): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 167 encodedSeq += pack('<B',(SEQ_PACKED[seq[2*i]]<<4) + SEQ_PACKED[seq[2*i+1]]) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 168 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 169 # apparently samtools automatically adds 33 to the quality score string... | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 170 encodedQual = ''.join([chr(ord(n)-33) for n in qual]) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 171 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 172 #blockSize = 4 + # refID int32 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 173 # 4 + # pos int32 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 174 # 4 + # bin_mq_nl uint32 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 175 # 4 + # flag_nc uint32 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 176 # 4 + # l_seq int32 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 177 # 4 + # next_refID int32 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 178 # 4 + # next_pos int32 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 179 # 4 + # tlen int32 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 180 # len(readName)+1 + | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 181 # 4*cig_ops + | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 182 # encodedLen + | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 183 # len(seq) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 184 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 185 #blockSize = 32 + len(readName)+1 + 4*cig_ops + encodedLen + len(seq) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 186 blockSize = 32 + len(readName)+1 + len(encodedCig) + len(encodedSeq) + len(encodedQual) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 187 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 188 ####self.bam_file.write(pack('<i',blockSize)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 189 ####self.bam_file.write(pack('<i',refID)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 190 ####self.bam_file.write(pack('<i',pos_0)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 191 ####self.bam_file.write(pack('<I',(myBin<<16) + (myMapQual<<8) + len(readName)+1)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 192 ####self.bam_file.write(pack('<I',(samFlag<<16) + cig_ops)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 193 ####self.bam_file.write(pack('<i',seqLen)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 194 ####self.bam_file.write(pack('<i',next_refID)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 195 ####self.bam_file.write(pack('<i',next_pos)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 196 ####self.bam_file.write(pack('<i',my_tlen)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 197 ####self.bam_file.write(readName+'\0') | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 198 ####self.bam_file.write(encodedCig) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 199 ####self.bam_file.write(encodedSeq) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 200 ####self.bam_file.write(encodedQual) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 201 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 202 # a horribly compressed line, I'm sorry. | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 203 # (ref_index, position, data) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 204 self.bam_buffer.append((refID, pos_0, pack('<i',blockSize) + pack('<i',refID) + pack('<i',pos_0) + pack('<I',(myBin<<16) + (myMapQual<<8) + len(readName)+1) + pack('<I',(samFlag<<16) + cig_ops) + pack('<i',seqLen) + pack('<i',next_refID) + pack('<i',next_pos) + pack('<i',my_tlen) + readName+'\0' + encodedCig + encodedSeq + encodedQual)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 205 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 206 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 207 def flushBuffers(self,bamMax=None,lastTime=False): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 208 if (len(self.fq1_buffer) >= BUFFER_BATCH_SIZE or len(self.bam_buffer) >= BUFFER_BATCH_SIZE) or (len(self.fq1_buffer) and lastTime) or (len(self.bam_buffer) and lastTime): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 209 # fq | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 210 if not self.noFASTQ: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 211 self.fq1_file.write(''.join(self.fq1_buffer)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 212 if len(self.fq2_buffer): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 213 self.fq2_file.write(''.join(self.fq2_buffer)) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 214 # bam | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 215 if len(self.bam_buffer): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 216 bam_data = sorted(self.bam_buffer) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 217 if lastTime: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 218 self.bam_file.write(''.join([n[2] for n in bam_data])) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 219 self.bam_buffer = [] | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 220 else: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 221 ind_to_stop_at = 0 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 222 for i in xrange(0,len(bam_data)): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 223 # if we are from previous reference, or have coordinates lower than next window position, it's safe to write out to file | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 224 if bam_data[i][0] != bam_data[-1][0] or bam_data[i][1] < bamMax: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 225 ind_to_stop_at = i+1 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 226 else: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 227 break | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 228 self.bam_file.write(''.join([n[2] for n in bam_data[:ind_to_stop_at]])) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 229 ####print 'BAM WRITING:',ind_to_stop_at,'/',len(bam_data) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 230 if ind_to_stop_at >= len(bam_data): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 231 self.bam_buffer = [] | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 232 else: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 233 self.bam_buffer = bam_data[ind_to_stop_at:] | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 234 self.fq1_buffer = [] | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 235 self.fq2_buffer = [] | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 236 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 237 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 238 def closeFiles(self): | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 239 self.flushBuffers(lastTime=True) | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 240 if not self.noFASTQ: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 241 self.fq1_file.close() | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 242 if self.fq2_file != None: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 243 self.fq2_file.close() | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 244 if self.vcf_file != None: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 245 self.vcf_file.close() | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 246 if self.bam_file != None: | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 247 self.bam_file.close() | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 248 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 249 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 250 | 
| 
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
 thondeboer parents: diff
changeset | 251 | 
