7
|
1 # A translation of calc_fitness.pl into python! For analysis of Tn-Seq.
|
|
2 # This script requires BioPython, which in turn has a good number of dependencies (some optional but very helpful).
|
|
3 # How to install BioPython and a list of its dependencies can be found here: http://biopython.org/DIST/docs/install/Installation.html
|
|
4 # K. McCoy
|
|
5
|
|
6
|
|
7
|
|
8
|
|
9
|
|
10
|
|
11
|
|
12
|
|
13
|
|
14 ##### ARGUMENTS #####
|
|
15
|
|
16 def print_usage():
|
|
17 print "\n" + "You are missing one or more required flags. A complete list of flags accepted by calc_fitness is as follows:" + "\n\n"
|
|
18 print "\033[1m" + "Required" + "\033[0m" + "\n"
|
|
19 print "-ref" + "\t\t" + "The name of the reference genome file, in GenBank format." + "\n"
|
|
20 print "-t1" + "\t\t" + "The name of the bowtie mapfile from time 1." + "\n"
|
|
21 print "-t2" + "\t\t" + "The name of the bowtie mapfile from time 2." + "\n"
|
|
22 print "-out" + "\t\t" + "Name of a file to enter the .csv output." + "\n"
|
|
23 print "\n"
|
|
24 print "\033[1m" + "Optional" + "\033[0m" + "\n"
|
|
25 print "-expansion" + "\t\t" + "Expansion factor (default: 250)" + "\n"
|
|
26 print "-d" + "\t\t" + "All reads being analyzed are downstream of the transposon" + "\n"
|
|
27 print "-reads1" + "\t\t" + "The number of reads to be used to calculate the correction factor for time 0." + "\n\t\t" + "(default counted from bowtie output)" + "\n"
|
|
28 print "-reads2" + "\t\t" + "The number of reads to be used to calculate the correction factor for time 6." + "\n\t\t" + "(default counted from bowtie output)" + "\n"
|
|
29 print "-cutoff" + "\t\t" + "Discard any positions where the average of counted transcripts at time 0 and time 1 is below this number (default 0)" + "\n"
|
|
30 print "-cutoff2" + "\t\t" + "Discard any positions within the normalization genes where the average of counted transcripts at time 0 and time 1 is below this number (default 0)" + "\n"
|
|
31 print "-strand" + "\t\t" + "Use only the specified strand (+ or -) when counting transcripts (default: both)" + "\n"
|
|
32 print "-normalize" + "\t" + "A file that contains a list of genes that should have a fitness of 1" + "\n"
|
|
33 print "-maxweight" + "\t" + "The maximum weight a transposon gene can have in normalization calculations" + "\n"
|
|
34 print "-multiply" + "\t" + "Multiply all fitness scores by a certain value (e.g., the fitness of a knockout). You should normalize the data." + "\n"
|
|
35 print "-ef" + "\t\t" + "Exclude insertions that occur in the first N amount (%) of gene--becuase may not affect gene function." + "\n"
|
|
36 print "-el" + "\t\t" + "Exclude insertions in the last N amount (%) of the gene--considering truncation may not affect gene function." + "\n"
|
|
37 print "-wig" + "\t\t" + "Create a wiggle file for viewing in a genome browser. Provide a filename." + "\n"
|
|
38 print "-uncol" + "\t\t" + "Use if reads were uncollapsed when mapped." + "\n"
|
|
39 print "\n"
|
|
40
|
|
41 import argparse
|
|
42 parser = argparse.ArgumentParser()
|
|
43 parser.add_argument("-ref", action="store", dest="ref_genome")
|
|
44 parser.add_argument("-t1", action="store", dest="mapfile1")
|
|
45 parser.add_argument("-t2", action="store", dest="mapfile2")
|
|
46 parser.add_argument("-out", action="store", dest="outfile")
|
|
47 parser.add_argument("-out2", action="store", dest="outfile2")
|
|
48 parser.add_argument("-expansion", action="store", dest="expansion_factor")
|
|
49 parser.add_argument("-d", action="store", dest="downstream")
|
|
50 parser.add_argument("-reads1", action="store", dest="reads1")
|
|
51 parser.add_argument("-reads2", action="store", dest="reads2")
|
|
52 parser.add_argument("-cutoff", action="store", dest="cutoff")
|
|
53 parser.add_argument("-cutoff2", action="store", dest="cutoff2")
|
|
54 parser.add_argument("-strand", action="store", dest="usestrand")
|
|
55 parser.add_argument("-normalize", action="store", dest="normalize")
|
|
56 parser.add_argument("-maxweight", action="store", dest="max_weight")
|
|
57 parser.add_argument("-multiply", action="store", dest="multiply")
|
|
58 parser.add_argument("-ef", action="store", dest="exclude_first")
|
|
59 parser.add_argument("-el", action="store", dest="exclude_last")
|
|
60 parser.add_argument("-wig", action="store", dest="wig")
|
|
61 parser.add_argument("-uncol", action="store", dest="uncol")
|
|
62 arguments = parser.parse_args()
|
|
63
|
|
64 if (not arguments.ref_genome or not arguments.mapfile1 or not arguments.mapfile2 or not arguments.outfile):
|
|
65 print_usage()
|
|
66 quit()
|
|
67
|
|
68 # Sets the default value of the expansion factor to 250, which is a trivial placeholder number.
|
|
69
|
|
70 if (not arguments.expansion_factor):
|
|
71 arguments.expansion_factor = 250
|
|
72
|
|
73 # 75 is similarly trivial
|
|
74
|
|
75 if (not arguments.max_weight):
|
|
76 arguments.max_weight = 75
|
|
77
|
|
78 # Sets the default value of cutoff to 0; cutoff exists to discard positions with a low number of counted transcripts, because fitnesses calculated from them may not be very accurate, by the same reasoning that studies with low sample sizes are innacurate.
|
|
79
|
|
80 if (not arguments.cutoff):
|
|
81 arguments.cutoff = 0
|
|
82
|
|
83 # Sets the default value of cutoff2 to 10; cutoff2 exists to discard positions within normalization genes with a low number of counted transcripts, because fitnesses calculated from them similarly may not be very accurate.
|
|
84 # This only has an effect if it's larger than cutoff, since the normalization step references a list of insertions already affected by cutoff.
|
|
85
|
|
86 if (not arguments.cutoff2):
|
|
87 arguments.cutoff2 = 10
|
|
88
|
|
89 if (not arguments.usestrand):
|
|
90 arguments.usestrand = "both"
|
|
91
|
|
92
|
|
93
|
|
94
|
|
95
|
|
96
|
|
97 ##### PARSING THE REFERENCE GENOME #####
|
|
98
|
|
99 def get_time():
|
|
100 import datetime
|
|
101 return datetime.datetime.now().time()
|
|
102 print "\n" + "Starting: " + str(get_time()) + "\n"
|
|
103
|
|
104 from Bio import SeqIO
|
|
105 import os.path
|
|
106 handle = open(arguments.ref_genome, "rU")
|
|
107 for record in SeqIO.parse(handle, "genbank"):
|
|
108 refname = record.id
|
|
109 features = record.features
|
|
110 handle.close()
|
|
111
|
|
112 # Makes a dictionary out of each feature that's a gene - with its gene name, start location, end location, and strand as keys to their values. Then makes a list out of all those dictionaries for ease of accessing later on.
|
|
113
|
|
114 feature_list = []
|
|
115 for feature in features:
|
|
116 if feature.type == "gene":
|
|
117 gene = feature.qualifiers["locus_tag"]
|
|
118 strand = feature.location.strand
|
|
119 start = float(feature.location.start)
|
|
120 end = float(feature.location.end)
|
|
121
|
|
122 # Exclude_first and exclude_last are used here to exclude whatever percentage of the genes you like from calculations; e.g. a value of 0.1 for exclude_last would exclude the last 10% of all genes!
|
|
123 # This can be useful because insertions at the very start or end of genes often don't actually break its function.
|
|
124
|
|
125 if (arguments.exclude_first):
|
|
126 start += (end - start) * float(arguments.exclude_first)
|
|
127 if (arguments.exclude_last):
|
|
128 end -= (end - start) * float(arguments.exclude_last)
|
|
129 feature_dictionary = {"gene": gene, "start": start, "end": end, "strand": strand}
|
|
130 feature_list.append(feature_dictionary)
|
|
131
|
|
132 print "Done generating feature lookup: " + str(get_time()) + "\n"
|
|
133
|
|
134
|
|
135
|
|
136
|
|
137
|
|
138
|
|
139
|
|
140
|
|
141
|
|
142
|
|
143 ##### PARSING THE MAPFILES #####
|
|
144
|
|
145 with open(arguments.mapfile1) as file:
|
|
146 r1 = file.readlines()
|
|
147 with open(arguments.mapfile2) as file:
|
|
148 r2 = file.readlines()
|
|
149
|
|
150 # When called, goes through each line of the mapfile to find the strand (+/Watson or -/Crick), count, and position of the read. It may be helpful to look at how the mapfiles are formatted to understand how this code finds them.
|
|
151
|
|
152 def read_mapfile(reads):
|
|
153 plus_total = 0
|
|
154 minus_total = 0
|
|
155 plus_counts = {"total": 0, "sites": 0}
|
|
156 minus_counts = {"total": 0, "sites": 0}
|
|
157 for read in reads:
|
|
158 if (arguments.uncol):
|
|
159 strand = read.split()[2]
|
|
160 count = 1
|
|
161 position = float(read.split()[4])
|
|
162 if arguments.usestrand != "both" and strand != arguments.usestrand:
|
|
163 continue
|
|
164 if (strand == "+"):
|
|
165 sequence_length = len(read.split()[5])
|
|
166 if arguments.downstream:
|
|
167 position += 0
|
|
168 else:
|
|
169 position += (sequence_length - 2)
|
|
170 plus_counts["total"] += count
|
|
171 plus_counts["sites"] += 1
|
|
172 if position in plus_counts:
|
|
173 plus_counts[position] += count
|
|
174 else:
|
|
175 plus_counts[position] = count
|
|
176 else:
|
|
177 minus_counts["total"] += count
|
|
178 minus_counts["sites"] += 1
|
|
179 if position in minus_counts:
|
|
180 minus_counts[position] += count
|
|
181 else:
|
|
182 minus_counts[position] = count
|
|
183 else:
|
|
184 if "-" in read.split()[0]:
|
|
185 strand = read.split()[1]
|
|
186 count = float(read.split()[0].split("-")[1])
|
|
187 position = float(read.split()[3])
|
|
188 else:
|
|
189 continue
|
|
190
|
|
191 # If for some reason you want to skip all reads from one of the strands - for example, if you wanted to compare the two strands - that's done here.
|
|
192
|
|
193 if arguments.usestrand != "both" and strand != arguments.usestrand:
|
|
194 continue
|
|
195
|
|
196 # Makes dictionaries for the + & - strands, with each insert position as a key and the number of insertions there as its corresponding value.
|
|
197
|
|
198 if (strand == "+"):
|
|
199 sequence_length = len(read.split()[4])
|
|
200 if arguments.downstream:
|
|
201 position += 0
|
|
202
|
|
203 # The -2 in "(sequence_length -2)" comes from a fake "TA" in the read; see how the libraries are constructed for further on this
|
|
204
|
|
205 else:
|
|
206 position += (sequence_length - 2)
|
|
207 plus_counts["total"] += count
|
|
208 plus_counts["sites"] += 1
|
|
209 if position in plus_counts:
|
|
210 plus_counts[position] += count
|
|
211 else:
|
|
212 plus_counts[position] = count
|
|
213 else:
|
|
214 minus_counts["total"] += count
|
|
215 minus_counts["sites"] += 1
|
|
216 if position in minus_counts:
|
|
217 minus_counts[position] += count
|
|
218 else:
|
|
219 minus_counts[position] = count
|
|
220 return (plus_counts, minus_counts)
|
|
221
|
|
222 # Calls read_mapfile(reads) to parse arguments.reads1 and arguments.reads2 (your reads from t1 and t2).
|
|
223
|
|
224
|
|
225
|
|
226
|
|
227
|
|
228 (plus_ref_1, minus_ref_1) = read_mapfile(r1)
|
|
229 print "Read first file: " + str(get_time()) + "\n"
|
|
230 (plus_ref_2, minus_ref_2) = read_mapfile(r2)
|
|
231 print "Read second file: " + str(get_time()) + "\n"
|
|
232
|
|
233 # The lines below are just printed for reference. The number of sites is the length of a given dictionary of sites - 1 because its last key, "total", isn't actually a site.
|
|
234
|
|
235 print "Reads:" + "\n"
|
|
236 print "1: + " + str(plus_ref_1["total"]) + " - " + str(minus_ref_1["total"]) + "\n"
|
|
237 print "2: + " + str(plus_ref_2["total"]) + " - " + str(minus_ref_2["total"]) + "\n"
|
|
238 print "Sites:" + "\n"
|
|
239 print "1: + " + str(plus_ref_1["sites"]) + " - " + str(minus_ref_1["sites"]) + "\n"
|
|
240 print "2: + " + str(plus_ref_2["sites"]) + " - " + str(minus_ref_2["sites"]) + "\n"
|
|
241
|
|
242
|
|
243
|
|
244
|
|
245
|
|
246
|
|
247
|
|
248
|
|
249
|
|
250
|
|
251 ##### FITNESS CALCULATIONS #####
|
|
252
|
|
253 # If reads1 and reads2 weren't specified in the command line, sets them as the total number of reads (found in read_mapfile())
|
|
254
|
|
255 if not arguments.reads1:
|
|
256 arguments.reads1 = plus_ref_1["total"] + minus_ref_1["total"]
|
|
257 if not arguments.reads2:
|
|
258 arguments.reads2 = plus_ref_2["total"] + minus_ref_2["total"]
|
|
259
|
|
260 # Calculates the correction factors for reads from t1 and t2; cfactor1 and cfactor2 are the number of reads from t1 and t2 respectively divided by total, which is the average number of reads between the two.
|
|
261 # This is used later on to correct for pipetting errors, or any other error that would cause unequal amounts of DNA from t1 and t2 to be sequenced so that an unequal amount of reads is produced
|
|
262
|
|
263 total = (float(arguments.reads1) + float(arguments.reads2))/2
|
|
264 cfactor1 = float(arguments.reads1)/total
|
|
265 cfactor2 = float(arguments.reads2)/total
|
|
266 print "Cfactor 1: " + str(cfactor1) + "\n"
|
|
267 print "Cfactor 2: " + str(cfactor2) + "\n"
|
|
268 import math
|
|
269 import csv
|
|
270 results = [["position", "strand", "count_1", "count_2", "ratio", "mt_freq_t1", "mt_freq_t2", "pop_freq_t1", "pop_freq_t2", "gene", "D", "W", "nW"]]
|
|
271 genic = 0
|
|
272 total_inserts = 0
|
|
273 with open(arguments.ref_genome, "r") as file:
|
|
274 firstline = file.readline()
|
|
275 genomelength = firstline.split()[2]
|
|
276 i = 0
|
|
277 while i < float(genomelength):
|
|
278
|
|
279 # At each possible location for an insertion in the genome, counts the number of actual insertions at t1 and which strand(s) the corresponding reads came from.
|
|
280
|
|
281 c1 = 0
|
|
282 if i in plus_ref_1:
|
|
283 c1 = float(plus_ref_1[i])
|
|
284 strand = "+/"
|
|
285 if i in minus_ref_1:
|
|
286 c1 += float(minus_ref_1[i])
|
|
287 strand = "b/"
|
|
288 elif i in minus_ref_1:
|
|
289 c1 = float(minus_ref_1[i])
|
|
290 strand = "-/"
|
|
291
|
|
292 # If there were no insertions at a certain location at t1 just continues to the next location; there can't be any comparison to make between t1 and t2 if there are no t1 insertions!
|
|
293
|
|
294 else:
|
|
295 i += 1
|
|
296 continue
|
|
297
|
|
298 # At each location where there was an insertion at t1, counts the number of insertions at t2 and which strand(s) the corresponding reads came from.
|
|
299
|
|
300 c2 = 0
|
|
301 if i in plus_ref_2:
|
|
302 c2 = float(plus_ref_2[i])
|
|
303 if i in minus_ref_2:
|
|
304 c2 += float(minus_ref_2[i])
|
|
305 strand += "b"
|
|
306 else:
|
|
307 strand += "+"
|
|
308 elif i in minus_ref_2:
|
|
309 c2 = float(minus_ref_2[i])
|
|
310 strand += "-"
|
|
311
|
|
312 # Corrects with cfactor1 and cfactor2
|
|
313
|
|
314 c1 /= cfactor1
|
|
315 if c2 != 0:
|
|
316 c2 /= cfactor2
|
|
317 ratio = c2/c1
|
|
318 else:
|
|
319 c2 = 0
|
|
320 ratio = 0
|
|
321
|
|
322 # Passes by all insertions with a number of reads smaller than the cutoff, as they may lead to inaccurate fitness calculations.
|
|
323
|
|
324 if (c1 + c2)/2 < float(arguments.cutoff):
|
|
325 i+= 1
|
|
326 continue
|
|
327
|
|
328 # Calculates each insertion's frequency within the populations at t1 and t2.
|
|
329
|
|
330 mt_freq_t1 = c1/total
|
|
331 mt_freq_t2 = c2/total
|
|
332 pop_freq_t1 = 1 - mt_freq_t1
|
|
333 pop_freq_t2 = 1 - mt_freq_t2
|
|
334
|
|
335 # Calculates each insertion's fitness! This is from the fitness equation log((frequency of mutation @ time 2 / frequency of mutation @ time 1)*expansion factor)/log((frequency of population without the mutation @ time 2 / frequency of population without the mutation @ time 1)*expansion factor)
|
|
336
|
|
337 w = 0
|
|
338 if mt_freq_t2 != 0:
|
|
339 top_w = math.log(mt_freq_t2*(float(arguments.expansion_factor)/mt_freq_t1))
|
|
340 bot_w = math.log(pop_freq_t2*(float(arguments.expansion_factor)/pop_freq_t1))
|
|
341 w = top_w/bot_w
|
|
342
|
|
343 # Checks which gene locus the insertion falls within, and records that.
|
|
344
|
|
345 gene = ''
|
|
346 for feature_dictionary in feature_list:
|
|
347 if feature_dictionary["start"] <= i and feature_dictionary["end"] >= i:
|
|
348 gene = "".join(feature_dictionary["gene"])
|
|
349 genic += 1
|
|
350 break
|
|
351 total_inserts += 1
|
|
352
|
|
353 # Writes all relevant information on each insertion and its fitness to a cvs file: the location of the insertion, its strand, c1, c2, etc. (the variable names are self-explanatiory)
|
|
354 # w is written twice, because the second w will be normalized if normalization is called for, thus becoming nW.
|
|
355
|
|
356 row = [i, strand, c1, c2, ratio, mt_freq_t1, mt_freq_t2, pop_freq_t1, pop_freq_t2, gene, arguments.expansion_factor, w, w]
|
|
357 results.append(row)
|
|
358 i += 1
|
|
359 with open(arguments.outfile, "wb") as csvfile:
|
|
360 writer = csv.writer(csvfile)
|
|
361 writer.writerows(results)
|
|
362
|
|
363 print "Done comparing mapfiles " + str(get_time()) + "\n"
|
|
364 print "Genic: " + str(genic) + "\n"
|
|
365 print "Total: " + str(total_inserts) + "\n"
|
|
366
|
|
367
|
|
368
|
|
369
|
|
370
|
|
371
|
|
372
|
|
373
|
|
374
|
|
375
|
|
376 ##### NORMALIZATION #####
|
|
377
|
|
378 # If making a WIG file is requested in the arguments, starts a string to be added to and then written to the WIG file with a typical WIG file header.
|
|
379 # The header is just in a typical WIG file format; if you'd like to look into this more UCSC has notes on formatting WIG files on their site.
|
|
380
|
|
381 if (arguments.wig):
|
|
382 wigstring = "track type=wiggle_0 name=" + arguments.wig + "\n" + "variableStep chrom=" + refname + "\n"
|
|
383
|
|
384 # Takes normalization genes (which should all be predicted or known to have fitness values of exactly 1.0, like transposons for example) and uses them to normalize the fitnesses of all insertion locations
|
|
385
|
|
386 if (arguments.normalize):
|
|
387 with open(arguments.normalize) as file:
|
|
388 transposon_genes = file.read().splitlines()
|
|
389 print "Normalize genes loaded" + "\n"
|
|
390 blank_ws = 0
|
|
391 sum = 0
|
|
392 count = 0
|
|
393 weights = []
|
|
394 scores = []
|
|
395 for list in results:
|
|
396 if list[9] != '' and list[9] in transposon_genes: # and list[11]:
|
|
397 c1 = list[2]
|
|
398 c2 = list[3]
|
|
399 score = list[11]
|
|
400 avg = (c1 + c2)/2
|
|
401
|
|
402 # Skips over those insertion locations with too few insertions - their fitness values are less accurate because they're based on such small insertion numbers.
|
|
403
|
|
404 if float(c1) >= float(arguments.cutoff2):
|
|
405
|
|
406 # Sets a max weight, to prevent insertion location scores with huge weights from unbalancing the normalization.
|
|
407
|
|
408 if (avg >= float(arguments.max_weight)):
|
|
409 avg = float(arguments.max_weight)
|
|
410
|
|
411 # Tallies how many w values are 0 within the blank_ws value; you might get many transposon genes with a w value of 0 if a bottleneck occurs, for example, which is especially common with in vivo experiments. This is used later by aggregate.py
|
|
412 # For example, when studying a nasal infection in a mouse model, what bacteria "sticks" and is able to survive and what bacteria is swallowed and killed or otherwise flushed out tends to be a matter of chance not fitness; all mutants with an insertion in a specific transposon gene could be flushed out by chance!
|
|
413
|
|
414 if score == 0:
|
|
415 blank_ws += 1
|
|
416
|
|
417 sum += score
|
|
418 count += 1
|
|
419 weights.append(avg)
|
|
420 scores.append(score)
|
|
421
|
|
422 print str(list[9]) + " " + str(score) + " " + str(c1)
|
|
423
|
|
424 # Counts and removes all "blank" fitness values of normalization genes - those that = 0 - because they most likely don't really have a fitness value of 0, and you just happened to not get any reads from that location at t2.
|
|
425
|
|
426 blank_count = 0
|
|
427 original_count = len(scores)
|
|
428 curr_count = original_count
|
|
429 i = 0
|
|
430 while i < curr_count:
|
|
431 w_value = scores[i]
|
|
432 if w_value == 0:
|
|
433 blank_count += 1
|
|
434 weights.pop(i)
|
|
435 scores.pop(i)
|
|
436 i -= 1
|
|
437 curr_count = len(scores)
|
|
438 i += 1
|
|
439
|
|
440 # If no normalization genes can pass the cutoff, normalization cannot occur, so this ends the script advises the user to try again and lower cutoff and/or cutoff2.
|
|
441
|
|
442 if len(scores) == 0:
|
|
443 print 'ERROR: The normalization genes do not have enough reads to pass cutoff and/or cutoff2; please lower one or both of those arguments.' + "\n"
|
|
444 quit()
|
|
445
|
|
446 pc_blank_normals = float(blank_count) / float(original_count)
|
|
447 print "# blank out of " + str(original_count) + ": " + str(pc_blank_normals) + "\n"
|
|
448 with open(arguments.outfile2, "w") as f:
|
|
449 f.write("blanks: " + str(pc_blank_normals) + "\n" + "total: " + str(total) + "\n" + "refname: " + refname)
|
|
450
|
|
451 average = sum / count
|
|
452 i = 0
|
|
453 weighted_sum = 0
|
|
454 weight_sum = 0
|
|
455 while i < len(weights):
|
|
456 weighted_sum += weights[i]*scores[i]
|
|
457 weight_sum += weights[i]
|
|
458 i += 1
|
|
459 weighted_average = weighted_sum/weight_sum
|
|
460
|
|
461 print "Normalization step:" + "\n"
|
|
462 print "Regular average: " + str(average) + "\n"
|
|
463 print "Weighted Average: " + str(weighted_average) + "\n"
|
|
464 print "Total Insertions: " + str(count) + "\n"
|
|
465
|
|
466 old_ws = 0
|
|
467 new_ws = 0
|
|
468 wcount = 0
|
|
469
|
|
470 for list in results:
|
|
471 if list[11] == 'W':
|
|
472 continue
|
|
473 new_w = float(list[11])/weighted_average
|
|
474
|
|
475 # Sometimes you want to multiply all the fitness values by a constant; this does that.
|
|
476 # For example you might multiply all the values by a constant for a genetic interaction screen - where Tn-Seq is performed as usual except there's one background knockout all the mutants share.
|
|
477
|
|
478 if arguments.multiply:
|
|
479 new_w *= float(arguments.multiply)
|
|
480
|
|
481 if float(list[11]) > 0:
|
|
482 old_ws += float(list[11])
|
|
483 new_ws += new_w
|
|
484 wcount += 1
|
|
485
|
|
486 list[12] = new_w
|
|
487
|
|
488 if (arguments.wig):
|
|
489 wigstring += str(list[0]) + " " + str(new_w) + "\n"
|
|
490
|
|
491 old_w_mean = old_ws / wcount
|
|
492 new_w_mean = new_ws / wcount
|
|
493 print "Old W Average: " + str(old_w_mean) + "\n"
|
|
494 print "New W Average: " + str(new_w_mean) + "\n"
|
|
495
|
|
496 with open(arguments.outfile, "wb") as csvfile:
|
|
497 writer = csv.writer(csvfile)
|
|
498 writer.writerows(results)
|
|
499
|
|
500 if (arguments.wig):
|
|
501 if (arguments.normalize):
|
|
502 with open(arguments.wig, "wb") as wigfile:
|
|
503 wigfile.write(wigstring)
|
|
504 else:
|
|
505 for list in results:
|
|
506 wigstring += str(list[0]) + " " + str(list[11]) + "\n"
|
|
507 with open(arguments.wig, "wb") as wigfile:
|
|
508 wigfile.write(wigstring)
|
|
509
|
|
510
|
|
511
|
|
512
|
|
513
|
|
514
|
|
515
|
|
516
|
|
517
|
|
518
|
|
519
|
|
520
|
|
521
|
|
522
|
|
523
|
|
524
|
|
525
|
|
526
|
|
527
|
|
528
|
|
529
|
|
530
|
|
531
|
|
532
|
|
533
|
|
534
|
|
535
|
|
536
|
|
537
|
|
538
|
|
539
|
|
540
|
|
541
|
|
542
|
|
543
|
|
544
|
|
545
|
|
546
|
|
547
|
|
548
|
|
549
|
|
550
|
|
551
|
|
552
|
|
553
|
|
554
|
|
555
|
|
556
|
|
557
|
|
558
|
|
559
|
|
560
|
|
561
|
|
562
|
|
563
|
|
564
|
|
565
|
|
566
|
|
567
|
|
568
|
|
569
|
|
570
|
|
571
|
|
572
|
|
573
|
|
574
|
|
575
|
|
576
|
|
577
|
|
578
|
|
579
|
|
580
|
|
581
|
|
582
|
|
583
|
|
584
|
|
585
|
|
586
|
|
587
|
|
588
|
|
589
|
|
590
|
|
591
|
|
592
|
|
593
|
|
594
|
|
595
|
|
596
|
|
597
|
|
598
|
|
599
|
|
600
|
|
601
|
|
602
|
|
603
|
|
604
|
|
605
|
|
606
|
|
607
|
|
608
|
|
609
|
|
610 # `````````````
|
|
611 # `````````````
|
|
612 # ``@@@@@@@@@``
|
|
613 # ``@@@@@@@@@```
|
|
614 # ``@@@@@@@@@``
|
|
615 # ``@@@@@@@@@``
|
|
616 # ``@@@@@@@@@``
|
|
617 # ``@@@@@@@@@``
|
|
618 # ```@@@@@@@@#``
|
|
619 # ```@@@@@@@@#``
|
|
620 # ```@@@@@@@@+``
|
|
621 # ```@@@@@@@@'``
|
|
622 # ```@@@@@@@@;``
|
|
623 # ```@@@@@@@@;``
|
|
624 # ```@@@@@@@@:``
|
|
625 # ```@@@@@@@@,``
|
|
626 # ``.@@@@@@@@.``
|
|
627 # ``.@@@@@@@@```
|
|
628 # ``.@@@@@@@@```
|
|
629 # ``.@@@@@@@@```
|
|
630 # ``.@@@@@@@@``
|
|
631 # ``,@@@@@@@@``
|
|
632 # ``,@@@@@@@@``
|
|
633 # ``.@@@@@@@@``
|
|
634 # ```@@@@@@@@``
|
|
635 # ``:@@@@@@@@``
|
|
636 # ``:@@@@@@@@``
|
|
637 # ``:@@@@@@@@``
|
|
638 # ``:@@@@@@@@``
|
|
639 # ``'@@@@@@@@``
|
|
640 # ``;@@@@@@@@``
|
|
641 # ``:@@@@@@@@``
|
|
642 # ``:@@@@@@@@``
|
|
643 # ``:@@@@@@@@``
|
|
644 # ``;@@@@@@@#``
|
|
645 # ````+@@@@@@@#`````
|
|
646 # ```````#@@@@@@@#``````
|
|
647 # `````.,@@@@@@@@@...````
|
|
648 # ``@@@@@@@@@@@@@@@@@@;``
|
|
649 # ``@@@@@@@@@@@@@@@@@@;``
|
|
650 # ```````````````````````
|
|
651 # `````````````````````
|
|
652 # ``````.```````
|
|
653 # ````@.''```
|
|
654 # ```# `;```
|
|
655 # ``.+ @```
|
|
656 # ```@ ````,+```
|
|
657 # ```;;````` @```
|
|
658 # ```@ ``````,@```
|
|
659 # ```,+```..```@```
|
|
660 # ```@ ``....```@```
|
|
661 # ```+' ``....```#'``
|
|
662 # ```@```......`` @```
|
|
663 # ```'+```......```'@```
|
|
664 # ```@ ``........```@```
|
|
665 # ```'#```........````@```
|
|
666 # ```@ ``..........```#,``
|
|
667 # ```'#```...........`` @```
|
|
668 # ```@``.............```.+```
|
|
669 # ```:#```.............`` #```
|
|
670 # ``````` ```@ ```.......#......``.@```
|
|
671 # `````````` ```:@```#`......@......```@```
|
|
672 # ``````#@@@`` ```@ `.`:.......@.......`` @```
|
|
673 # ```.#@###@`` ```:@``..`+`....`@.......```@,``
|
|
674 # ```'@####@``` ```@````..@@@@@@@@#,`..#```` @```
|
|
675 # ```#####@@``` ``;@ ,`.,@@. `@@..#..```''``
|
|
676 # ``:####@#```` ```@``@`@@ @@:...`` @```
|
|
677 # ```@#####```` ``,@``.@, ,@`...``:@```
|
|
678 # ``.####@``` ```@.` @` @....``@```
|
|
679 # ``####@``` ``,@ @.` @`.````@```
|
|
680 # ``@##@```` ```@, @: ;# `@..```@.``
|
|
681 # ```@##```` ``.@`,@ @@, #...`` @```
|
|
682 # ```@#@``` ```@, # `@@@ @`.```;'``
|
|
683 # ```##:`` ``.@ +, .@@@ ,'..`` @```
|
|
684 # ``.##``` ```@, @ `@@@ @`.```,+```
|
|
685 # `````@##``` ```@`'. @@@ :...```@``` ``````````
|
|
686 # ````````````````````````````````````````##@``` ```@:`@ @@@ #...`` #``` `````````````````
|
|
687 # ```````````````````````````````````````.###@``` ```@ `, .@@@++'++#@@'` #`..```#``` ````````````'@@@@@.````
|
|
688 # `````+@####################@@@@@@@@@@@@#####@``` ```#;`,. `@#...,.,,,,,,..;@, @....`` @``````````````+@@@########@.``
|
|
689 # `+@##########################################,```` ```````````````@```@ +@,.,,,,,,,,,,,,,,,,,@ @....```#`````````'@@@##############@```
|
|
690 # `@###########################################@``````````````````````````````````````+'``.,'.#.,,,,,,,,,,,,,,,,,,,,.++@......`` @````+@@@#######@+``````'###'``
|
|
691 # ``:@@########@@@@@@@@@@@@@@@@@@@@@@#@@@@@@@##@``````````````````````````......,`,,.,@ ```.##.,.,,,,,,,,,,,,,,,,,,,,.##......`` :.+@@#######@@:``````````###@```
|
|
692 # ````````````````````````````````,#########@###@@@@#################################@'```...@.,,,,,,,,,,,,,,,,,,,,,,,#.........`'@######@@+```````````````@##```
|
|
693 # ```````````````````````````````@#########@#########################################```.....@:,,,,,,,,,,,,,,,,,,..;@..........`@####@@:```````` ```@##@``
|
|
694 # `````@@####@@@##########@@@@@@@@@@@@@@@@@@@@@@@@@@@@#+@+```......@#.,,,,,,,,,,,,,,,,.##..........`` #@#````````` ```##@```
|
|
695 # ``.#@######@####:```````````````````````````````````@ ``.......@:#,,,,,,,,,,,,,,,;@@`............`` @`````` ```@##:``
|
|
696 # ``:########@###@```````````````````````````````````#;```......+..`##,.,,,,,,,,.#@#..'............`` @```` ``;##@```
|
|
697 # ```@@####@@##@'```` ````@ ``.......'.....@@#+;:;'#@@;`...#`............`` @``` ```@##```
|
|
698 # ```````````````` ```@,```.............'..:''':.@`......:............```@.`` ``@###```
|
|
699 # `````````````` ``.@```..............#........'`....................```@``` ``.##@``````
|
|
700 # ```@.``..............`#........,.....................```@.`` ```@#+,``````
|
|
701 # ``.@``.,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,.................```@``` ````+#####@````
|
|
702 # ```@````......,........................,....,,.,.,,,,,..` @,`` ```;@@######````
|
|
703 # ``.@```.......,......................`......,...........```@``` ```+#########@````
|
|
704 # ```@```...........`@@+...............+@@`....,...........```@:`` ``;@#########@'```
|
|
705 # ``.@ ``............@@@@@`.........,@@@@@.....,............```@``` ``@#########@#@@```
|
|
706 # ```@.```............@@@@@@@`.....'@@@@@@@.....,............```#'`` ``@###@###@#@#@@@``
|
|
707 # ``.@```.............@@@@@@@@@..+#@@@@@@@@.....,.............`` #``` ``@#@@@@##@#@#@@@``
|
|
708 # ```@````.............@@@@@@@@@@@@@@@@@@@@@.....,.............```'#``` ``'#@@@@##@#@@@@@,`
|
|
709 # ``.@ ``.........,....@@@@@@@@@',##@@@@@@@@`....,..............```@``` ```@@@@@##@@'#@@@@`
|
|
710 # ```@.```.........,....@@@@@@@#`...`#@@@@@@@`....,..............```.@``` ``#@@@@##;```@@@@`
|
|
711 # ``.@ ``.....,,,,,,,.,.@@@@@#.,,,,,,,.#@@@@@,,,,,,,,,,,,:,,,,,,,,.``@``` ``#@@@@#.````@@@@`
|
|
712 # ```@. ``...............@@@;......,......#@@@`...........,.........```@``` ``#@@@;``````@@@@`
|
|
713 # ```@```................@,........,........+#`...........,.........```@.`` ``#@@@;``````@@@@`
|
|
714 # ```@.``...........................,.........`............,..........```@``` ``#@@@'`` ``#@@;`
|
|
715 # ``.@ ``................,..........,......................,..........```#:`` ``#@@@'`` ```#@``
|
|
716 # ```@,``............................,......................,...........`` @``` ``+@@@'`` `````
|
|
717 # ``.@```............................,......................,...........```#+`` ``;@@@+`` ``
|
|
718 # ```@,``.............................,......................,............```@``` ``'@@@+``
|
|
719 # ``.@```.........,...................,......................,............```'#``` ``;@@@+``
|
|
720 # ```@:```.........,...................,......................,.............`` @``` ``:@@@+``
|
|
721 # ```@`..,,,,,,,,,,,,,,,,,,..,.........,......................,.............```'@``` ``;@@@#``
|
|
722 # ``+'```...,.................,....,,,,,,,,,,,,,,,,,,,,,,,,,,,,,..........,...``@``` ``;@@@#``
|
|
723 # ```@ ``....,.................,.......................................,.......``;@``` ``:@@@#``
|
|
724 # ``'#```....,.................,.......................................,.......```@``` ``:@@@@``
|
|
725 # ```@```.....,.................,.......................................,........``;#`` ``:@@@@``
|
|
726 # ```@ ``.....,.................,.......................................,........`` @``` ``:@@@@``
|
|
727 # ``@````...............................................................,........`` @.`` ``;@@@@``
|
|
728 # ``@ ```..............................................................,........`` .#`` ``'@@@@``
|
|
729 # ``# ``````.`.```````````````..````````````````````..`````````````````.`````````` @`` ``'@@@@``
|
|
730 # ``. `````````````````````````````````````````````````````````````````````````` .;`` ``'@@@@``
|
|
731 # ``@;` `` ` ` ` ` ```` ` ````` ` ` `,+@``` ``+@@@@``
|
|
732 # `````:;'++##@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@+.```` ``+@@@@``
|
|
733 # ```````````````````````````+##@``````````````````@#@``````````````````````````````` ``+@@@@``
|
|
734 # `````````````````````````@##@``````````````````@##;```````````````````````````` ``+@@@@``
|
|
735 # ````###,```` ````+##@``` ``+@@@@``
|
|
736 # ``,###``` ``.@##``` ``'@@@@``
|
|
737 # ``###@`` ```@##``` ``'@@@@``
|
|
738 # ```@##@`` ``@##+`` ``'@@@@``
|
|
739 # ```###.`` ``:##@`` ``'@@@@``
|
|
740 # ``:###``` ```##@``` ``'@@@@``
|
|
741 # ``@##@`` ```@##``` ``'@@@@``
|
|
742 # ```@##'`` ``@###`` ``'@@@@``
|
|
743 # ```@##``` ```##@`` ``'@@@@``
|
|
744 # ``,###``` ```@#@``` ``'@@@@``
|
|
745 # ``####`` ``@##.`` ``'@@@@``
|
|
746 # ``@##@`` ``;##@`` ``'@@@@``
|
|
747 # `````````@##@`` ```##@```` ``;@@@@``
|
|
748 # ``````````````@##;`` ```###`````````````` ``;@@@@``
|
|
749 # `````````.,;.```###``` ``@##:`````````````` ``;@@@@``
|
|
750 # `````#@#########@@##``` ``###@@@@@@###@#@'``` ``;@@@@``
|
|
751 # ```@@###############@`` ``,################`` ``;@@@@``
|
|
752 # ``'@################+`` ```###############+`` ``;@@@@``
|
|
753 # `````````````````````` ``###########@#,```` ``.@@@@``
|
|
754 # ````````````````````` ``````````````````` ```@@@.`
|
|
755 # ```````````````` ```````
|
|
756 #
|
|
757 # |