Mercurial > repos > drosofff > repenrich
comparison RepEnrich.py @ 0:1435d142041b draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
| author | drosofff | 
|---|---|
| date | Tue, 23 May 2017 18:37:22 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:1435d142041b | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import argparse | |
| 3 import csv | |
| 4 import numpy | |
| 5 import os | |
| 6 import shlex | |
| 7 import shutil | |
| 8 import subprocess | |
| 9 import sys | |
| 10 | |
| 11 parser = argparse.ArgumentParser(description='Part II: Conducting the alignments to the psuedogenomes. Before doing this step you will require 1) a bamfile of the unique alignments with index 2) a fastq file of the reads mapping to more than one location. These files can be obtained using the following bowtie options [EXAMPLE: bowtie -S -m 1 --max multimap.fastq mm9 mate1_reads.fastq] Once you have the unique alignment bamfile and the reads mapping to more than one location in a fastq file you can run this step. EXAMPLE: python master_output.py /users/nneretti/data/annotation/hg19/hg19_repeatmasker.txt /users/nneretti/datasets/repeatmapping/POL3/Pol3_human/HeLa_InputChIPseq_Rep1 HeLa_InputChIPseq_Rep1 /users/nneretti/data/annotation/hg19/setup_folder HeLa_InputChIPseq_Rep1_multimap.fastq HeLa_InputChIPseq_Rep1.bam') | |
| 12 parser.add_argument('--version', action='version', version='%(prog)s 0.1') | |
| 13 parser.add_argument('annotation_file', action= 'store', metavar='annotation_file', help='List RepeatMasker.org annotation file for your organism. The file may be downloaded from the RepeatMasker.org website. Example: /data/annotation/hg19/hg19_repeatmasker.txt') | |
| 14 parser.add_argument('outputfolder', action= 'store', metavar='outputfolder', help='List folder to contain results. Example: /outputfolder') | |
| 15 parser.add_argument('outputprefix', action= 'store', metavar='outputprefix', help='Enter prefix name for data. Example: HeLa_InputChIPseq_Rep1') | |
| 16 parser.add_argument('setup_folder', action= 'store', metavar='setup_folder', help='List folder that contains the repeat element psuedogenomes. Example /data/annotation/hg19/setup_folder') | |
| 17 parser.add_argument('fastqfile', action= 'store', metavar='fastqfile', help='Enter file for the fastq reads that map to multiple locations. Example /data/multimap.fastq') | |
| 18 parser.add_argument('alignment_bam', action= 'store', metavar='alignment_bam', help='Enter bamfile output for reads that map uniquely. Example /bamfiles/old.bam') | |
| 19 parser.add_argument('--pairedend', action= 'store', dest='pairedend', default= 'FALSE', help='Designate this option for paired-end sequencing. Default FALSE change to TRUE') | |
| 20 parser.add_argument('--collapserepeat', action= 'store', dest='collapserepeat', metavar='collapserepeat', default= 'Simple_repeat', help='Designate this option to generate a collapsed repeat type. Uncollapsed output is generated in addition to collapsed repeat type. Simple_repeat is default to simplify downstream analysis. You can change the default to another repeat name to collapse a seperate specific repeat instead or if the name of Simple_repeat is different for your organism. Default Simple_repeat') | |
| 21 parser.add_argument('--fastqfile2', action= 'store', dest='fastqfile2', metavar='fastqfile2', default= 'none', help='Enter fastqfile2 when using paired-end option. Default none') | |
| 22 parser.add_argument('--cpus', action= 'store', dest='cpus', metavar='cpus', default= "1", type=int, help='Enter available cpus per node. The more cpus the faster RepEnrich performs. RepEnrich is designed to only work on one node. Default: "1"') | |
| 23 parser.add_argument('--allcountmethod', action= 'store', dest='allcountmethod', metavar='allcountmethod', default= "FALSE", help='By default the pipeline only outputs the fraction count method. Consdidered to be the best way to count multimapped reads. Changing this option will include the unique count method, a conservative count, and the total count method, a liberal counting strategy. Our evaluation of simulated data indicated fraction counting is best. Default = FALSE, change to TRUE') | |
| 24 parser.add_argument('--is_bed', action= 'store', dest='is_bed', metavar='is_bed', default= 'FALSE', help='Is the annotation file a bed file. This is also a compatible format. The file needs to be a tab seperated bed with optional fields. Ex. format chr\tstart\tend\tName_element\tclass\tfamily. The class and family should identical to name_element if not applicable. Default FALSE change to TRUE') | |
| 25 args = parser.parse_args() | |
| 26 | |
| 27 # parameters | |
| 28 annotation_file = args.annotation_file | |
| 29 outputfolder = args.outputfolder | |
| 30 outputfile_prefix = args.outputprefix | |
| 31 setup_folder = args.setup_folder | |
| 32 repeat_bed = setup_folder + os.path.sep + 'repnames.bed' | |
| 33 unique_mapper_bam = args.alignment_bam | |
| 34 fastqfile_1 = args.fastqfile | |
| 35 fastqfile_2 = args.fastqfile2 | |
| 36 cpus = args.cpus | |
| 37 b_opt = "-k1 -p " +str(1) +" --quiet" | |
| 38 simple_repeat = args.collapserepeat | |
| 39 paired_end = args.pairedend | |
| 40 allcountmethod = args.allcountmethod | |
| 41 is_bed = args.is_bed | |
| 42 | |
| 43 ################################################################################ | |
| 44 # check that the programs we need are available | |
| 45 try: | |
| 46 subprocess.call(shlex.split("coverageBed -h"), stdout=open(os.devnull, 'wb'), stderr=open(os.devnull, 'wb')) | |
| 47 subprocess.call(shlex.split("bowtie --version"), stdout=open(os.devnull, 'wb'), stderr=open(os.devnull, 'wb')) | |
| 48 except OSError: | |
| 49 print ("Error: Bowtie or BEDTools not loaded") | |
| 50 raise | |
| 51 | |
| 52 ################################################################################ | |
| 53 # define a csv reader that reads space deliminated files | |
| 54 print ('Preparing for analysis using RepEnrich...') | |
| 55 csv.field_size_limit(sys.maxsize) | |
| 56 def import_text(filename, separator): | |
| 57 for line in csv.reader(open(filename), delimiter=separator, | |
| 58 skipinitialspace=True): | |
| 59 if line: | |
| 60 yield line | |
| 61 | |
| 62 ################################################################################ | |
| 63 # build dictionaries to convert repclass and rep families' | |
| 64 if is_bed == "FALSE": | |
| 65 repeatclass = {} | |
| 66 repeatfamily = {} | |
| 67 fin = import_text(annotation_file, ' ') | |
| 68 x = 0 | |
| 69 for line in fin: | |
| 70 if x>2: | |
| 71 classfamily =[] | |
| 72 classfamily = line[10].split(os.path.sep) | |
| 73 line9 = line[9].replace("(","_").replace(")","_").replace("/","_") | |
| 74 repeatclass[line9] = classfamily[0] | |
| 75 if len(classfamily) == 2: | |
| 76 repeatfamily[line9] = classfamily[1] | |
| 77 else: | |
| 78 repeatfamily[line9] = classfamily[0] | |
| 79 x +=1 | |
| 80 if is_bed == "TRUE": | |
| 81 repeatclass = {} | |
| 82 repeatfamily = {} | |
| 83 fin = open(annotation_file, 'r') | |
| 84 for line in fin: | |
| 85 line=line.strip('\n') | |
| 86 line=line.split('\t') | |
| 87 theclass =line[4] | |
| 88 thefamily = line[5] | |
| 89 line3 = line[3].replace("(","_").replace(")","_").replace("/","_") | |
| 90 repeatclass[line3] = theclass | |
| 91 repeatfamily[line3] = thefamily | |
| 92 fin.close() | |
| 93 | |
| 94 ################################################################################ | |
| 95 # build list of repeats initializing dictionaries for downstream analysis' | |
| 96 fin = import_text(setup_folder + os.path.sep + 'repgenomes_key.txt', '\t') | |
| 97 repeat_key ={} | |
| 98 rev_repeat_key ={} | |
| 99 repeat_list = [] | |
| 100 reptotalcounts = {} | |
| 101 classfractionalcounts = {} | |
| 102 familyfractionalcounts = {} | |
| 103 classtotalcounts = {} | |
| 104 familytotalcounts = {} | |
| 105 reptotalcounts_simple = {} | |
| 106 fractionalcounts = {} | |
| 107 i = 0 | |
| 108 for line in fin: | |
| 109 reptotalcounts[line[0]] = 0 | |
| 110 fractionalcounts[line[0]] = 0 | |
| 111 if line[0] in repeatclass: | |
| 112 classtotalcounts[repeatclass[line[0]]] = 0 | |
| 113 classfractionalcounts[repeatclass[line[0]]] = 0 | |
| 114 if line[0] in repeatfamily: | |
| 115 familytotalcounts[repeatfamily[line[0]]] = 0 | |
| 116 familyfractionalcounts[repeatfamily[line[0]]] = 0 | |
| 117 if line[0] in repeatfamily: | |
| 118 if repeatfamily[line[0]] == simple_repeat: | |
| 119 reptotalcounts_simple[simple_repeat] = 0 | |
| 120 else: | |
| 121 reptotalcounts_simple[line[0]] = 0 | |
| 122 repeat_list.append(line[0]) | |
| 123 repeat_key[line[0]] = int(line[1]) | |
| 124 rev_repeat_key[int(line[1])] = line[0] | |
| 125 fin.close() | |
| 126 ################################################################################ | |
| 127 # map the repeats to the psuedogenomes: | |
| 128 if not os.path.exists(outputfolder): | |
| 129 os.mkdir(outputfolder) | |
| 130 ################################################################################ | |
| 131 # Conduct the regions sorting | |
| 132 print ('Conducting region sorting on unique mapping reads....') | |
| 133 fileout= outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt' | |
| 134 with open(fileout, 'w') as stdout: | |
| 135 command = shlex.split("coverageBed -abam " +unique_mapper_bam+" -b " +setup_folder + os.path.sep + 'repnames.bed') | |
| 136 p = subprocess.Popen(command, stdout=stdout) | |
| 137 p.communicate() | |
| 138 stdout.close() | |
| 139 filein = open(outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt','r') | |
| 140 counts = {} | |
| 141 sumofrepeatreads=0 | |
| 142 for line in filein: | |
| 143 line= line.split('\t') | |
| 144 if not str(repeat_key[line[3]]) in counts: | |
| 145 counts[str(repeat_key[line[3]])]=0 | |
| 146 counts[str(repeat_key[line[3]])]+=int(line[4]) | |
| 147 sumofrepeatreads+=int(line[4]) | |
| 148 print ('Identified ' + str(sumofrepeatreads) + 'unique reads that mapped to repeats.') | |
| 149 ################################################################################ | |
| 150 if paired_end == 'TRUE': | |
| 151 if not os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'): | |
| 152 os.mkdir(outputfolder + os.path.sep + 'pair1_bowtie') | |
| 153 if not os.path.exists(outputfolder + os.path.sep + 'pair2_bowtie'): | |
| 154 os.mkdir(outputfolder + os.path.sep + 'pair2_bowtie') | |
| 155 folder_pair1 = outputfolder + os.path.sep + 'pair1_bowtie' | |
| 156 folder_pair2 = outputfolder + os.path.sep + 'pair2_bowtie' | |
| 157 ################################################################################ | |
| 158 print ("Processing repeat psuedogenomes...") | |
| 159 ps = [] | |
| 160 psb= [] | |
| 161 ticker= 0 | |
| 162 for metagenome in repeat_list: | |
| 163 metagenomepath = setup_folder + os.path.sep + metagenome | |
| 164 file1=folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
| 165 file2 =folder_pair2 + os.path.sep + metagenome + '.bowtie' | |
| 166 with open(file1, 'w') as stdout: | |
| 167 command = shlex.split("bowtie " + b_opt + " " + metagenomepath + " " + fastqfile_1) | |
| 168 p = subprocess.Popen(command,stdout=stdout) | |
| 169 with open(file2, 'w') as stdout: | |
| 170 command = shlex.split("bowtie " + b_opt + " " + metagenomepath + " " + fastqfile_2) | |
| 171 pp = subprocess.Popen(command,stdout=stdout) | |
| 172 ps.append(p) | |
| 173 ticker +=1 | |
| 174 psb.append(pp) | |
| 175 ticker +=1 | |
| 176 if ticker == cpus: | |
| 177 for p in ps: | |
| 178 p.communicate() | |
| 179 for p in psb: | |
| 180 p.communicate() | |
| 181 ticker = 0 | |
| 182 psb =[] | |
| 183 ps = [] | |
| 184 if len(ps) > 0: | |
| 185 for p in ps: | |
| 186 p.communicate() | |
| 187 stdout.close() | |
| 188 | |
| 189 ################################################################################ | |
| 190 # combine the output from both read pairs: | |
| 191 print ('sorting and combining the output for both read pairs...') | |
| 192 if not os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): | |
| 193 os.mkdir(outputfolder + os.path.sep + 'sorted_bowtie') | |
| 194 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' | |
| 195 for metagenome in repeat_list: | |
| 196 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
| 197 file2 = folder_pair2 + os.path.sep + metagenome + '.bowtie' | |
| 198 fileout= sorted_bowtie + os.path.sep + metagenome + '.bowtie' | |
| 199 with open(fileout, 'w') as stdout: | |
| 200 p1 = subprocess.Popen(['cat',file1,file2], stdout = subprocess.PIPE) | |
| 201 p2 = subprocess.Popen(['cut', '-f1',"-d "], stdin = p1.stdout, stdout = subprocess.PIPE) | |
| 202 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin = p2.stdout, stdout = subprocess.PIPE) | |
| 203 p4 = subprocess.Popen(['sort'], stdin=p3.stdout, stdout = subprocess.PIPE) | |
| 204 p5 = subprocess.Popen(['uniq'], stdin=p4.stdout, stdout = stdout) | |
| 205 p5.communicate() | |
| 206 stdout.close() | |
| 207 print ('completed ...') | |
| 208 ################################################################################ | |
| 209 if paired_end == 'FALSE': | |
| 210 if not os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'): | |
| 211 os.mkdir(outputfolder + os.path.sep + 'pair1_bowtie') | |
| 212 folder_pair1 = outputfolder + os.path.sep + 'pair1_bowtie' | |
| 213 ################################################################################ | |
| 214 ps = [] | |
| 215 ticker= 0 | |
| 216 print ("Processing repeat psuedogenomes...") | |
| 217 for metagenome in repeat_list: | |
| 218 metagenomepath = setup_folder + os.path.sep + metagenome | |
| 219 file1=folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
| 220 with open(file1, 'w') as stdout: | |
| 221 command = shlex.split("bowtie " + b_opt + " " + metagenomepath + " " + fastqfile_1) | |
| 222 p = subprocess.Popen(command,stdout=stdout) | |
| 223 ps.append(p) | |
| 224 ticker +=1 | |
| 225 if ticker == cpus: | |
| 226 for p in ps: | |
| 227 p.communicate() | |
| 228 ticker = 0 | |
| 229 ps = [] | |
| 230 if len(ps) > 0: | |
| 231 for p in ps: | |
| 232 p.communicate() | |
| 233 stdout.close() | |
| 234 | |
| 235 ################################################################################ | |
| 236 # combine the output from both read pairs: | |
| 237 print ('Sorting and combining the output for both read pairs....') | |
| 238 if not os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): | |
| 239 os.mkdir(outputfolder + os.path.sep + 'sorted_bowtie') | |
| 240 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' | |
| 241 for metagenome in repeat_list: | |
| 242 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
| 243 fileout= sorted_bowtie + os.path.sep + metagenome + '.bowtie' | |
| 244 with open(fileout, 'w') as stdout: | |
| 245 p1 = subprocess.Popen(['cat',file1], stdout = subprocess.PIPE) | |
| 246 p2 = subprocess.Popen(['cut', '-f1'], stdin = p1.stdout, stdout = subprocess.PIPE) | |
| 247 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin = p2.stdout, stdout = subprocess.PIPE) | |
| 248 p4 = subprocess.Popen(['sort'], stdin = p3.stdout,stdout = subprocess.PIPE) | |
| 249 p5 = subprocess.Popen(['uniq'], stdin = p4.stdout,stdout = stdout) | |
| 250 p5.communicate() | |
| 251 stdout.close() | |
| 252 print ('completed ...') | |
| 253 | |
| 254 ################################################################################ | |
| 255 # build a file of repeat keys for all reads | |
| 256 print ('Writing and processing intermediate files...') | |
| 257 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' | |
| 258 readid = {} | |
| 259 sumofrepeatreads=0 | |
| 260 for rep in repeat_list: | |
| 261 for data in import_text(sorted_bowtie + os.path.sep + rep + '.bowtie', '\t'): | |
| 262 readid[data[0]] = '' | |
| 263 for rep in repeat_list: | |
| 264 for data in import_text(sorted_bowtie + os.path.sep + rep + '.bowtie', '\t'): | |
| 265 readid[data[0]]+=str(repeat_key[rep]) + str(',') | |
| 266 for subfamilies in readid.values(): | |
| 267 if not subfamilies in counts: | |
| 268 counts[subfamilies]=0 | |
| 269 counts[subfamilies] +=1 | |
| 270 sumofrepeatreads+=1 | |
| 271 del readid | |
| 272 print ('Identified ' + str(sumofrepeatreads) + ' reads that mapped to repeats for unique and multimappers.') | |
| 273 | |
| 274 ################################################################################ | |
| 275 print ("Conducting final calculations...") | |
| 276 # build a converter to numeric label for repeat and yield a combined list of repnames seperated by backslash | |
| 277 def convert(x): | |
| 278 x = x.strip(',') | |
| 279 x = x.split(',') | |
| 280 global repname | |
| 281 repname = "" | |
| 282 for i in x: | |
| 283 repname = repname + os.path.sep + rev_repeat_key[int(i)] | |
| 284 # building the total counts for repeat element enrichment... | |
| 285 for x in counts.keys(): | |
| 286 count= counts[x] | |
| 287 x = x.strip(',') | |
| 288 x = x.split(',') | |
| 289 for i in x: | |
| 290 reptotalcounts[rev_repeat_key[int(i)]] += int(count) | |
| 291 # building the fractional counts for repeat element enrichment... | |
| 292 for x in counts.keys(): | |
| 293 count= counts[x] | |
| 294 x = x.strip(',') | |
| 295 x = x.split(',') | |
| 296 splits = len(x) | |
| 297 for i in x: | |
| 298 fractionalcounts[rev_repeat_key[int(i)]] += float(numpy.divide(float(count),float(splits))) | |
| 299 # building categorized table of repeat element enrichment... | |
| 300 repcounts = {} | |
| 301 repcounts['other'] = 0 | |
| 302 for key in counts.keys(): | |
| 303 convert(key) | |
| 304 repcounts[repname] = counts[key] | |
| 305 # building the total counts for class enrichment... | |
| 306 for key in reptotalcounts.keys(): | |
| 307 classtotalcounts[repeatclass[key]] += reptotalcounts[key] | |
| 308 # building total counts for family enrichment... | |
| 309 for key in reptotalcounts.keys(): | |
| 310 familytotalcounts[repeatfamily[key]] += reptotalcounts[key] | |
| 311 # building unique counts table' | |
| 312 repcounts2 = {} | |
| 313 for rep in repeat_list: | |
| 314 if "/" +rep in repcounts: | |
| 315 repcounts2[rep] = repcounts["/" +rep] | |
| 316 else: | |
| 317 repcounts2[rep] = 0 | |
| 318 # building the fractionalcounts counts for class enrichment... | |
| 319 for key in fractionalcounts.keys(): | |
| 320 classfractionalcounts[repeatclass[key]] += fractionalcounts[key] | |
| 321 # building fractional counts for family enrichment... | |
| 322 for key in fractionalcounts.keys(): | |
| 323 familyfractionalcounts[repeatfamily[key]] += fractionalcounts[key] | |
| 324 | |
| 325 ################################################################################ | |
| 326 print ('Writing final output and removing intermediate files...') | |
| 327 # print output to file of the categorized counts and total overlapping counts: | |
| 328 if allcountmethod == "TRUE": | |
| 329 fout1 = open(outputfolder + os.path.sep + outputfile_prefix + '_total_counts.txt' , 'w') | |
| 330 for key in reptotalcounts.keys(): | |
| 331 fout1.write(str(key) + '\t' + repeatclass[key] + '\t' + repeatfamily[key] + '\t' + str(reptotalcounts[key]) + '\n') | |
| 332 fout2 = open(outputfolder + os.path.sep + outputfile_prefix + '_class_total_counts.txt' , 'w') | |
| 333 for key in classtotalcounts.keys(): | |
| 334 fout2.write(str(key) + '\t' + str(classtotalcounts[key]) + '\n') | |
| 335 fout3 = open(outputfolder + os.path.sep + outputfile_prefix + '_family_total_counts.txt' , 'w') | |
| 336 for key in familytotalcounts.keys(): | |
| 337 fout3.write(str(key) + '\t' + str(familytotalcounts[key]) + '\n') | |
| 338 fout4 = open(outputfolder + os.path.sep + outputfile_prefix + '_unique_counts.txt' , 'w') | |
| 339 for key in repcounts2.keys(): | |
| 340 fout4.write(str(key) + '\t' + repeatclass[key] + '\t' + repeatfamily[key] + '\t' + str(repcounts2[key]) + '\n') | |
| 341 fout5 = open(outputfolder + os.path.sep + outputfile_prefix + '_class_fraction_counts.txt' , 'w') | |
| 342 for key in classfractionalcounts.keys(): | |
| 343 fout5.write(str(key) + '\t' + str(classfractionalcounts[key]) + '\n') | |
| 344 fout6 = open(outputfolder + os.path.sep + outputfile_prefix + '_family_fraction_counts.txt' , 'w') | |
| 345 for key in familyfractionalcounts.keys(): | |
| 346 fout6.write(str(key) + '\t' + str(familyfractionalcounts[key]) + '\n') | |
| 347 fout7 = open(outputfolder + os.path.sep + outputfile_prefix + '_fraction_counts.txt' , 'w') | |
| 348 for key in fractionalcounts.keys(): | |
| 349 fout7.write(str(key) + '\t' + repeatclass[key] + '\t' + repeatfamily[key] + '\t' + str(int(fractionalcounts[key])) + '\n') | |
| 350 fout1.close() | |
| 351 fout2.close() | |
| 352 fout3.close() | |
| 353 fout4.close() | |
| 354 fout5.close() | |
| 355 fout6.close() | |
| 356 fout7.close() | |
| 357 else: | |
| 358 fout1 = open(outputfolder + os.path.sep + outputfile_prefix + '_class_fraction_counts.txt' , 'w') | |
| 359 for key in classfractionalcounts.keys(): | |
| 360 fout1.write(str(key) + '\t' + str(classfractionalcounts[key]) + '\n') | |
| 361 fout2 = open(outputfolder + os.path.sep + outputfile_prefix + '_family_fraction_counts.txt' , 'w') | |
| 362 for key in familyfractionalcounts.keys(): | |
| 363 fout2.write(str(key) + '\t' + str(familyfractionalcounts[key])+ '\n') | |
| 364 fout3 = open(outputfolder + os.path.sep + outputfile_prefix + '_fraction_counts.txt' , 'w') | |
| 365 for key in fractionalcounts.keys(): | |
| 366 fout3.write(str(key) + '\t' + repeatclass[key] + '\t' + repeatfamily[key] + '\t' + str(int(fractionalcounts[key])) + '\n') | |
| 367 fout1.close() | |
| 368 fout2.close() | |
| 369 fout3.close() | |
| 370 | |
| 371 ################################################################################ | |
| 372 # Remove Large intermediate files | |
| 373 if os.path.exists(outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt'): | |
| 374 os.remove(outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt') | |
| 375 if os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'): | |
| 376 shutil.rmtree(outputfolder + os.path.sep + 'pair1_bowtie') | |
| 377 if os.path.exists(outputfolder + os.path.sep + 'pair2_bowtie'): | |
| 378 shutil.rmtree(outputfolder + os.path.sep + 'pair2_bowtie') | |
| 379 if os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): | |
| 380 shutil.rmtree(outputfolder + os.path.sep + 'sorted_bowtie') | |
| 381 | |
| 382 print ("... Done") | 
