10
|
1 # A translation of calc_fitness.pl into python! For analysis of Tn-Seq.
|
|
2 # This script requires BioPython, which in turn has a good number of dependencies (some optional but very helpful).
|
|
3 # How to install BioPython and a list of its dependencies can be found here: http://biopython.org/DIST/docs/install/Installation.html
|
|
4 # K. McCoy
|
|
5
|
|
6
|
|
7
|
|
8
|
|
9
|
|
10
|
|
11
|
|
12
|
|
13
|
|
14 ##### ARGUMENTS #####
|
|
15
|
|
16 def print_usage():
|
|
17 print "\n" + "You are missing one or more required flags. A complete list of flags accepted by calc_fitness is as follows:" + "\n\n"
|
|
18 print "\033[1m" + "Required" + "\033[0m" + "\n"
|
|
19 print "-ref" + "\t\t" + "The name of the reference genome file, in GenBank format." + "\n"
|
|
20 print "-t1" + "\t\t" + "The name of the bowtie mapfile from time 1." + "\n"
|
|
21 print "-t2" + "\t\t" + "The name of the bowtie mapfile from time 2." + "\n"
|
|
22 print "-out" + "\t\t" + "Name of a file to enter the .csv output." + "\n"
|
|
23 print "\n"
|
|
24 print "\033[1m" + "Optional" + "\033[0m" + "\n"
|
|
25 print "-expansion" + "\t\t" + "Expansion factor (default: 250)" + "\n"
|
|
26 print "-reads1" + "\t\t" + "The number of reads to be used to calculate the correction factor for time 0." + "\n\t\t" + "(default counted from bowtie output)" + "\n"
|
|
27 print "-reads2" + "\t\t" + "The number of reads to be used to calculate the correction factor for time 6." + "\n\t\t" + "(default counted from bowtie output)" + "\n"
|
|
28 print "-cutoff" + "\t\t" + "Discard any positions where the average of counted transcripts at time 0 and time 1 is below this number (default 0)" + "\n"
|
|
29 print "-cutoff2" + "\t\t" + "Discard any positions within the normalization genes where the average of counted transcripts at time 0 and time 1 is below this number (default 10)" + "\n"
|
|
30 print "-strand" + "\t\t" + "Use only the specified strand (+ or -) when counting transcripts (default: both)" + "\n"
|
|
31 print "-normalize" + "\t" + "A file that contains a list of genes that should have a fitness of 1 - used for normalization and bottleneck calculations." + "\n"
|
|
32 print "-b" + "\t" + "Calculate bottleneck value from all genes (rather than only normalization genes)" + "\n"
|
|
33 print "-maxweight" + "\t" + "The maximum weight a transposon gene can have in normalization calculations" + "\n"
|
|
34 print "-multiply" + "\t" + "Multiply all fitness scores by a certain value (e.g., the fitness of a knockout). You should normalize the data." + "\n"
|
|
35 print "-ef" + "\t\t" + "Exclude insertions that occur in the first N amount (%) of gene--becuase may not affect gene function." + "\n"
|
|
36 print "-el" + "\t\t" + "Exclude insertions in the last N amount (%) of the gene--considering truncation may not affect gene function." + "\n"
|
|
37 print "-wig" + "\t\t" + "Create a wiggle file for viewing in a genome browser. Provide a filename." + "\n"
|
|
38 print "-uncol" + "\t\t" + "Use if reads were uncollapsed when mapped." + "\n"
|
|
39 print "\n"
|
|
40
|
|
41 import argparse
|
|
42 parser = argparse.ArgumentParser()
|
|
43 parser.add_argument("-ref", action="store", dest="ref_genome")
|
|
44 parser.add_argument("-t1", action="store", dest="mapfile1")
|
|
45 parser.add_argument("-t2", action="store", dest="mapfile2")
|
|
46 parser.add_argument("-out", action="store", dest="outfile")
|
|
47 parser.add_argument("-out2", action="store", dest="outfile2")
|
|
48 parser.add_argument("-expansion", action="store", dest="expansion_factor")
|
|
49 parser.add_argument("-reads1", action="store", dest="reads1")
|
|
50 parser.add_argument("-reads2", action="store", dest="reads2")
|
|
51 parser.add_argument("-cutoff", action="store", dest="cutoff")
|
|
52 parser.add_argument("-cutoff2", action="store", dest="cutoff2")
|
|
53 parser.add_argument("-strand", action="store", dest="usestrand")
|
|
54 parser.add_argument("-normalize", action="store", dest="normalize")
|
|
55 parser.add_argument("-b", action="store", dest="bottleall")
|
|
56 parser.add_argument("-maxweight", action="store", dest="max_weight")
|
|
57 parser.add_argument("-multiply", action="store", dest="multiply")
|
|
58 parser.add_argument("-ef", action="store", dest="exclude_first")
|
|
59 parser.add_argument("-el", action="store", dest="exclude_last")
|
|
60 parser.add_argument("-wig", action="store", dest="wig")
|
|
61 parser.add_argument("-uncol", action="store", dest="uncol")
|
|
62 arguments = parser.parse_args()
|
|
63
|
|
64 if (not arguments.ref_genome or not arguments.mapfile1 or not arguments.mapfile2 or not arguments.outfile):
|
|
65 print_usage()
|
|
66 quit()
|
|
67
|
|
68 # Sets the default value of the expansion factor to 250, which is a trivial placeholder number.
|
|
69
|
|
70 if (not arguments.expansion_factor):
|
|
71 arguments.expansion_factor = 250
|
|
72
|
|
73 # 75 is similarly trivial
|
|
74
|
|
75 if (not arguments.max_weight):
|
|
76 arguments.max_weight = 75
|
|
77
|
|
78 # Sets the default value of cutoff to 0; cutoff exists to discard positions with a low number of counted transcripts, because fitnesses calculated from them may not be very accurate, by the same reasoning that studies with low sample sizes are innacurate.
|
|
79
|
|
80 if (not arguments.cutoff):
|
|
81 arguments.cutoff = 0
|
|
82
|
|
83 # Sets the default value of cutoff2 to 10; cutoff2 exists to discard positions within normalization genes with a low number of counted transcripts, because fitnesses calculated from them similarly may not be very accurate.
|
|
84 # This only has an effect if it's larger than cutoff, since the normalization step references a list of insertions already affected by cutoff.
|
|
85
|
|
86 if (not arguments.cutoff2):
|
|
87 arguments.cutoff2 = 10
|
|
88
|
|
89 if (not arguments.usestrand):
|
|
90 arguments.usestrand = "both"
|
|
91
|
|
92
|
|
93
|
|
94
|
|
95
|
|
96
|
|
97 ##### PARSING THE REFERENCE GENOME #####
|
|
98
|
|
99 def get_time():
|
|
100 import datetime
|
|
101 return datetime.datetime.now().time()
|
|
102 print "\n" + "Starting: " + str(get_time()) + "\n"
|
|
103
|
|
104 from Bio import SeqIO
|
|
105 import os.path
|
|
106 handle = open(arguments.ref_genome, "rU")
|
|
107 for record in SeqIO.parse(handle, "genbank"):
|
|
108 refname = record.id
|
|
109 features = record.features
|
|
110 handle.close()
|
|
111
|
|
112 # Makes a dictionary out of each feature that's a gene - with its gene name, start location, end location, and strand as keys to their values. Then makes a list out of all those dictionaries for ease of accessing later on.
|
|
113
|
|
114 feature_list = []
|
|
115 for feature in features:
|
|
116 if feature.type == "gene":
|
|
117 gene = feature.qualifiers["locus_tag"]
|
|
118 strand = feature.location.strand
|
|
119 start = float(feature.location.start)
|
|
120 end = float(feature.location.end)
|
|
121
|
|
122 # Exclude_first and exclude_last are used here to exclude whatever percentage of the genes you like from calculations; e.g. a value of 0.1 for exclude_last would exclude the last 10% of all genes!
|
|
123 # This can be useful because insertions at the very start or end of genes often don't actually break its function.
|
|
124
|
|
125 if (arguments.exclude_first):
|
|
126 start += (end - start) * float(arguments.exclude_first)
|
|
127 if (arguments.exclude_last):
|
|
128 end -= (end - start) * float(arguments.exclude_last)
|
|
129 feature_dictionary = {"gene": gene, "start": start, "end": end, "strand": strand}
|
|
130 feature_list.append(feature_dictionary)
|
|
131
|
|
132 print "Done generating feature lookup: " + str(get_time()) + "\n"
|
|
133
|
|
134
|
|
135
|
|
136
|
|
137
|
|
138
|
|
139
|
|
140
|
|
141
|
|
142
|
|
143 ##### PARSING THE MAPFILES #####
|
|
144
|
|
145 with open(arguments.mapfile1) as file:
|
|
146 r1 = file.readlines()
|
|
147 with open(arguments.mapfile2) as file:
|
|
148 r2 = file.readlines()
|
|
149
|
|
150 # When called, goes through each line of the mapfile to find the strand (+/Watson or -/Crick), count, and position of the read. It may be helpful to look at how the mapfiles are formatted to understand how this code finds them.
|
|
151
|
|
152 def read_mapfile(reads):
|
|
153 plus_total = 0
|
|
154 minus_total = 0
|
|
155 plus_counts = {"total": 0, "sites": 0}
|
|
156 minus_counts = {"total": 0, "sites": 0}
|
|
157 for read in reads:
|
|
158 if (arguments.uncol):
|
|
159 strand = read.split()[2]
|
|
160 count = 1
|
|
161 position = float(read.split()[4])
|
|
162 if arguments.usestrand != "both" and strand != arguments.usestrand:
|
|
163 continue
|
|
164 if (strand == "+"):
|
|
165 sequence_length = len(read.split()[5])
|
|
166 position += (sequence_length - 2)
|
|
167 plus_counts["total"] += count
|
|
168 plus_counts["sites"] += 1
|
|
169 if position in plus_counts:
|
|
170 plus_counts[position] += count
|
|
171 else:
|
|
172 plus_counts[position] = count
|
|
173 else:
|
|
174 minus_counts["total"] += count
|
|
175 minus_counts["sites"] += 1
|
|
176 if position in minus_counts:
|
|
177 minus_counts[position] += count
|
|
178 else:
|
|
179 minus_counts[position] = count
|
|
180 else:
|
|
181 if "-" in read.split()[0]:
|
|
182 strand = read.split()[1]
|
|
183 count = float(read.split()[0].split("-")[1])
|
|
184 position = float(read.split()[3])
|
|
185 else:
|
|
186 continue
|
|
187
|
|
188 # If for some reason you want to skip all reads from one of the strands - for example, if you wanted to compare the two strands - that's done here.
|
|
189
|
|
190 if arguments.usestrand != "both" and strand != arguments.usestrand:
|
|
191 continue
|
|
192
|
|
193 # Makes dictionaries for the + & - strands, with each insert position as a key and the number of insertions there as its corresponding value.
|
|
194
|
|
195 if (strand == "+"):
|
|
196 sequence_length = len(read.split()[4])
|
|
197
|
|
198 # The -2 in "(sequence_length -2)" comes from a fake "TA" in the read; see how the libraries are constructed for further on this
|
|
199
|
|
200 position += (sequence_length - 2)
|
|
201 plus_counts["total"] += count
|
|
202 plus_counts["sites"] += 1
|
|
203 if position in plus_counts:
|
|
204 plus_counts[position] += count
|
|
205 else:
|
|
206 plus_counts[position] = count
|
|
207 else:
|
|
208 minus_counts["total"] += count
|
|
209 minus_counts["sites"] += 1
|
|
210 if position in minus_counts:
|
|
211 minus_counts[position] += count
|
|
212 else:
|
|
213 minus_counts[position] = count
|
|
214 return (plus_counts, minus_counts)
|
|
215
|
|
216 # Calls read_mapfile(reads) to parse arguments.reads1 and arguments.reads2 (your reads from t1 and t2).
|
|
217
|
|
218
|
|
219
|
|
220
|
|
221
|
|
222 (plus_ref_1, minus_ref_1) = read_mapfile(r1)
|
|
223 print "Read first file: " + str(get_time()) + "\n"
|
|
224 (plus_ref_2, minus_ref_2) = read_mapfile(r2)
|
|
225 print "Read second file: " + str(get_time()) + "\n"
|
|
226
|
|
227 # The lines below are just printed for reference. The number of sites is the length of a given dictionary of sites - 1 because its last key, "total", isn't actually a site.
|
|
228
|
|
229 print "Reads:" + "\n"
|
|
230 print "1: + " + str(plus_ref_1["total"]) + " - " + str(minus_ref_1["total"]) + "\n"
|
|
231 print "2: + " + str(plus_ref_2["total"]) + " - " + str(minus_ref_2["total"]) + "\n"
|
|
232 print "Sites:" + "\n"
|
|
233 print "1: + " + str(plus_ref_1["sites"]) + " - " + str(minus_ref_1["sites"]) + "\n"
|
|
234 print "2: + " + str(plus_ref_2["sites"]) + " - " + str(minus_ref_2["sites"]) + "\n"
|
|
235
|
|
236
|
|
237
|
|
238
|
|
239
|
|
240
|
|
241
|
|
242
|
|
243
|
|
244
|
|
245 ##### FITNESS CALCULATIONS #####
|
|
246
|
|
247 # If reads1 and reads2 weren't specified in the command line, sets them as the total number of reads (found in read_mapfile())
|
|
248
|
|
249 if not arguments.reads1:
|
|
250 arguments.reads1 = plus_ref_1["total"] + minus_ref_1["total"]
|
|
251 if not arguments.reads2:
|
|
252 arguments.reads2 = plus_ref_2["total"] + minus_ref_2["total"]
|
|
253
|
|
254 # Calculates the correction factors for reads from t1 and t2; cfactor1 and cfactor2 are the number of reads from t1 and t2 respectively divided by total, which is the average number of reads between the two.
|
|
255 # This is used later on to correct for pipetting errors, or any other error that would cause unequal amounts of DNA from t1 and t2 to be sequenced so that an unequal amount of reads is produced
|
|
256
|
|
257 total = (float(arguments.reads1) + float(arguments.reads2))/2
|
|
258 cfactor1 = float(arguments.reads1)/total
|
|
259 cfactor2 = float(arguments.reads2)/total
|
|
260 print "Cfactor 1: " + str(cfactor1) + "\n"
|
|
261 print "Cfactor 2: " + str(cfactor2) + "\n"
|
|
262 import math
|
|
263 import csv
|
|
264 results = [["position", "strand", "count_1", "count_2", "ratio", "mt_freq_t1", "mt_freq_t2", "pop_freq_t1", "pop_freq_t2", "gene", "D", "W", "nW"]]
|
|
265 genic = 0
|
|
266 total_inserts = 0
|
|
267 with open(arguments.ref_genome, "r") as file:
|
|
268 firstline = file.readline()
|
|
269 genomelength = firstline.split()[2]
|
|
270 i = 0
|
|
271 while i < float(genomelength):
|
|
272
|
|
273 # At each possible location for an insertion in the genome, counts the number of actual insertions at t1 and which strand(s) the corresponding reads came from.
|
|
274
|
|
275 c1 = 0
|
|
276 if i in plus_ref_1:
|
|
277 c1 = float(plus_ref_1[i])
|
|
278 strand = "+/"
|
|
279 if i in minus_ref_1:
|
|
280 c1 += float(minus_ref_1[i])
|
|
281 strand = "b/"
|
|
282 elif i in minus_ref_1:
|
|
283 c1 = float(minus_ref_1[i])
|
|
284 strand = "-/"
|
|
285
|
|
286 # If there were no insertions at a certain location at t1 just continues to the next location; there can't be any comparison to make between t1 and t2 if there are no t1 insertions!
|
|
287
|
|
288 else:
|
|
289 i += 1
|
|
290 continue
|
|
291
|
|
292 # At each location where there was an insertion at t1, counts the number of insertions at t2 and which strand(s) the corresponding reads came from.
|
|
293
|
|
294 c2 = 0
|
|
295 if i in plus_ref_2:
|
|
296 c2 = float(plus_ref_2[i])
|
|
297 if i in minus_ref_2:
|
|
298 c2 += float(minus_ref_2[i])
|
|
299 strand += "b"
|
|
300 else:
|
|
301 strand += "+"
|
|
302 elif i in minus_ref_2:
|
|
303 c2 = float(minus_ref_2[i])
|
|
304 strand += "-"
|
|
305
|
|
306 # Corrects with cfactor1 and cfactor2
|
|
307
|
|
308 c1 /= cfactor1
|
|
309 if c2 != 0:
|
|
310 c2 /= cfactor2
|
|
311 ratio = c2/c1
|
|
312 else:
|
|
313 c2 = 0
|
|
314 ratio = 0
|
|
315
|
|
316 # Passes by all insertions with a number of reads smaller than the cutoff, as they may lead to inaccurate fitness calculations.
|
|
317
|
|
318 if (c1 + c2)/2 < float(arguments.cutoff):
|
|
319 i+= 1
|
|
320 continue
|
|
321
|
|
322 # Calculates each insertion's frequency within the populations at t1 and t2.
|
|
323
|
|
324 mt_freq_t1 = c1/total
|
|
325 mt_freq_t2 = c2/total
|
|
326 pop_freq_t1 = 1 - mt_freq_t1
|
|
327 pop_freq_t2 = 1 - mt_freq_t2
|
|
328
|
|
329 # Calculates each insertion's fitness! This is from the fitness equation log((frequency of mutation @ time 2 / frequency of mutation @ time 1)*expansion factor)/log((frequency of population without the mutation @ time 2 / frequency of population without the mutation @ time 1)*expansion factor)
|
|
330
|
|
331 w = 0
|
|
332 if mt_freq_t2 != 0:
|
|
333 top_w = math.log(mt_freq_t2*(float(arguments.expansion_factor)/mt_freq_t1))
|
|
334 bot_w = math.log(pop_freq_t2*(float(arguments.expansion_factor)/pop_freq_t1))
|
|
335 w = top_w/bot_w
|
|
336
|
|
337 # Checks which gene locus the insertion falls within, and records that.
|
|
338
|
|
339 gene = ''
|
|
340 for feature_dictionary in feature_list:
|
|
341 if feature_dictionary["start"] <= i and feature_dictionary["end"] >= i:
|
|
342 gene = "".join(feature_dictionary["gene"])
|
|
343 genic += 1
|
|
344 break
|
|
345 total_inserts += 1
|
|
346
|
|
347 # Writes all relevant information on each insertion and its fitness to a cvs file: the location of the insertion, its strand, c1, c2, etc. (the variable names are self-explanatiory)
|
|
348 # w is written twice, because the second w will be normalized if normalization is called for, thus becoming nW.
|
|
349
|
|
350 row = [i, strand, c1, c2, ratio, mt_freq_t1, mt_freq_t2, pop_freq_t1, pop_freq_t2, gene, arguments.expansion_factor, w, w]
|
|
351 results.append(row)
|
|
352 i += 1
|
|
353 with open(arguments.outfile, "wb") as csvfile:
|
|
354 writer = csv.writer(csvfile)
|
|
355 writer.writerows(results)
|
|
356
|
|
357 print "Done comparing mapfiles " + str(get_time()) + "\n"
|
|
358 print "Genic: " + str(genic) + "\n"
|
|
359 print "Total: " + str(total_inserts) + "\n"
|
|
360
|
|
361
|
|
362
|
|
363
|
|
364
|
|
365
|
|
366
|
|
367
|
|
368
|
|
369
|
|
370 ##### BOTTLENECK VALUE CALCULATION #####
|
|
371
|
|
372 #the bottleneck value is calculated here if done from all genes - otherwise it's done in the normalization section if only taken from normalization genes
|
|
373
|
|
374 if (arguments.bottleall):
|
|
375 for list in results:
|
|
376 if list[11] == 0:
|
|
377 overall_blank_count += 1
|
|
378 overall_original_count = len(results)
|
|
379
|
|
380 pc_blank_normals = float(overall_blank_count) / float(overall_original_count)
|
|
381
|
|
382 with open(arguments.outfile2, "w") as f:
|
|
383 f.write("bottleneck_value: " + str(pc_blank_normals) + "\n" + "total: " + str(total) + "\n" + "refname: " + refname)
|
|
384
|
|
385
|
|
386
|
|
387
|
|
388
|
|
389
|
|
390
|
|
391
|
|
392
|
|
393
|
|
394 ##### NORMALIZATION #####
|
|
395
|
|
396 # If making a WIG file is requested in the arguments, starts a string to be added to and then written to the WIG file with a typical WIG file header.
|
|
397 # The header is just in a typical WIG file format; if you'd like to look into this more UCSC has notes on formatting WIG files on their site.
|
|
398
|
|
399 if (arguments.wig):
|
|
400 wigstring = "track type=wiggle_0 name=" + arguments.wig + "\n" + "variableStep chrom=" + refname + "\n"
|
|
401
|
|
402 # Takes normalization genes (which should all be predicted or known to have fitness values of exactly 1.0, like transposons for example) and uses them to normalize the fitnesses of all insertion locations
|
|
403
|
|
404 if (arguments.normalize):
|
|
405 with open(arguments.normalize) as file:
|
|
406 transposon_genes = file.read().splitlines()
|
|
407 print "Normalize genes loaded" + "\n"
|
|
408 blank_ws = 0
|
|
409 sum = 0
|
|
410 count = 0
|
|
411 weights = []
|
|
412 scores = []
|
|
413 for list in results:
|
|
414 if list[9] != '' and list[9] in transposon_genes:
|
|
415 c1 = list[2]
|
|
416 c2 = list[3]
|
|
417 score = list[11]
|
|
418 avg = (c1 + c2)/2
|
|
419
|
|
420 # Skips over those insertion locations with too few insertions - their fitness values are less accurate because they're based on such small insertion numbers.
|
|
421
|
|
422 if float(c1) >= float(arguments.cutoff2):
|
|
423
|
|
424 # Sets a max weight, to prevent insertion location scores with huge weights from unbalancing the normalization.
|
|
425
|
|
426 if (avg >= float(arguments.max_weight)):
|
|
427 avg = float(arguments.max_weight)
|
|
428
|
|
429 # Tallies how many w values are 0 within the blank_ws value; you might get many transposon genes with a w value of 0 if a bottleneck occurs, for example, which is especially common with in vivo experiments. This is used later by aggregate.py
|
|
430 # For example, when studying a nasal infection in a mouse model, what bacteria "sticks" and is able to survive and what bacteria is swallowed and killed or otherwise flushed out tends to be a matter of chance not fitness; all mutants with an insertion in a specific transposon gene could be flushed out by chance!
|
|
431
|
|
432 if score == 0:
|
|
433 blank_ws += 1
|
|
434
|
|
435 sum += score
|
|
436 count += 1
|
|
437 weights.append(avg)
|
|
438 scores.append(score)
|
|
439
|
|
440 print str(list[9]) + " " + str(score) + " " + str(c1)
|
|
441
|
|
442 # Counts and removes all "blank" fitness values of normalization genes - those that = 0 - because they most likely don't really have a fitness value of 0, and you just happened to not get any reads from that location at t2.
|
|
443
|
|
444 blank_count = 0
|
|
445 original_count = len(scores)
|
|
446 curr_count = original_count
|
|
447 i = 0
|
|
448 while i < curr_count:
|
|
449 w_value = scores[i]
|
|
450 if w_value == 0:
|
|
451 blank_count += 1
|
|
452 weights.pop(i)
|
|
453 scores.pop(i)
|
|
454 i -= 1
|
|
455 curr_count = len(scores)
|
|
456 i += 1
|
|
457
|
|
458 # If no normalization genes can pass the cutoff, normalization cannot occur, so this ends the script advises the user to try again and lower cutoff and/or cutoff2.
|
|
459
|
|
460 if len(scores) == 0:
|
|
461 print 'ERROR: The normalization genes do not have enough reads to pass cutoff and/or cutoff2; please lower one or both of those arguments.' + "\n"
|
|
462 quit()
|
|
463
|
|
464 pc_blank_normals = float(blank_countv) / float(original_count)
|
|
465 print "# blank out of " + str(original_count) + ": " + str(pc_blank_normals) + "\n"
|
|
466 if (!arguments.bottleall):
|
|
467 with open(arguments.outfile2, "w") as f:
|
|
468 f.write("bottleneck_value: " + str(pc_blank_normals) + "\n" + "total: " + str(total) + "\n" + "refname: " + refname)
|
|
469
|
|
470 average = sum / count
|
|
471 i = 0
|
|
472 weighted_sum = 0
|
|
473 weight_sum = 0
|
|
474 while i < len(weights):
|
|
475 weighted_sum += weights[i]*scores[i]
|
|
476 weight_sum += weights[i]
|
|
477 i += 1
|
|
478 weighted_average = weighted_sum/weight_sum
|
|
479
|
|
480 print "Normalization step:" + "\n"
|
|
481 print "Regular average: " + str(average) + "\n"
|
|
482 print "Weighted Average: " + str(weighted_average) + "\n"
|
|
483 print "Total Insertions: " + str(count) + "\n"
|
|
484
|
|
485 old_ws = 0
|
|
486 new_ws = 0
|
|
487 wcount = 0
|
|
488
|
|
489 for list in results:
|
|
490 if list[11] == 'W':
|
|
491 continue
|
|
492 new_w = float(list[11])/weighted_average
|
|
493
|
|
494 # Sometimes you want to multiply all the fitness values by a constant; this does that.
|
|
495 # For example you might multiply all the values by a constant for a genetic interaction screen - where Tn-Seq is performed as usual except there's one background knockout all the mutants share.
|
|
496
|
|
497 if arguments.multiply:
|
|
498 new_w *= float(arguments.multiply)
|
|
499
|
|
500 if float(list[11]) > 0:
|
|
501 old_ws += float(list[11])
|
|
502 new_ws += new_w
|
|
503 wcount += 1
|
|
504
|
|
505 list[12] = new_w
|
|
506
|
|
507 if (arguments.wig):
|
|
508 wigstring += str(list[0]) + " " + str(new_w) + "\n"
|
|
509
|
|
510 old_w_mean = old_ws / wcount
|
|
511 new_w_mean = new_ws / wcount
|
|
512 print "Old W Average: " + str(old_w_mean) + "\n"
|
|
513 print "New W Average: " + str(new_w_mean) + "\n"
|
|
514
|
|
515 with open(arguments.outfile, "wb") as csvfile:
|
|
516 writer = csv.writer(csvfile)
|
|
517 writer.writerows(results)
|
|
518
|
|
519 if (arguments.wig):
|
|
520 if (arguments.normalize):
|
|
521 with open(arguments.wig, "wb") as wigfile:
|
|
522 wigfile.write(wigstring)
|
|
523 else:
|
|
524 for list in results:
|
|
525 wigstring += str(list[0]) + " " + str(list[11]) + "\n"
|
|
526 with open(arguments.wig, "wb") as wigfile:
|
|
527 wigfile.write(wigstring)
|
|
528
|
|
529
|
|
530
|
|
531
|
|
532
|
|
533
|
|
534
|
|
535
|
|
536
|
|
537
|
|
538
|
|
539
|
|
540
|
|
541
|
|
542
|
|
543
|
|
544
|
|
545
|
|
546
|
|
547
|
|
548
|
|
549
|
|
550
|
|
551
|
|
552
|
|
553
|
|
554
|
|
555
|
|
556
|
|
557
|
|
558
|
|
559
|
|
560
|
|
561
|
|
562
|
|
563
|
|
564
|
|
565
|
|
566
|
|
567
|
|
568
|
|
569
|
|
570
|
|
571
|
|
572
|
|
573
|
|
574
|
|
575
|
|
576
|
|
577
|
|
578
|
|
579
|
|
580
|
|
581
|
|
582
|
|
583
|
|
584
|
|
585
|
|
586
|
|
587
|
|
588
|
|
589
|
|
590
|
|
591
|
|
592
|
|
593
|
|
594
|
|
595
|
|
596
|
|
597
|
|
598
|
|
599
|
|
600
|
|
601
|
|
602
|
|
603
|
|
604
|
|
605
|
|
606
|
|
607
|
|
608
|
|
609
|
|
610
|
|
611
|
|
612
|
|
613
|
|
614
|
|
615
|
|
616
|
|
617
|
|
618
|
|
619
|
|
620
|
|
621
|
|
622
|
|
623
|
|
624
|
|
625
|
|
626
|
|
627
|
|
628
|
|
629 # `````````````
|
|
630 # `````````````
|
|
631 # ``@@@@@@@@@``
|
|
632 # ``@@@@@@@@@```
|
|
633 # ``@@@@@@@@@``
|
|
634 # ``@@@@@@@@@``
|
|
635 # ``@@@@@@@@@``
|
|
636 # ``@@@@@@@@@``
|
|
637 # ```@@@@@@@@#``
|
|
638 # ```@@@@@@@@#``
|
|
639 # ```@@@@@@@@+``
|
|
640 # ```@@@@@@@@'``
|
|
641 # ```@@@@@@@@;``
|
|
642 # ```@@@@@@@@;``
|
|
643 # ```@@@@@@@@:``
|
|
644 # ```@@@@@@@@,``
|
|
645 # ``.@@@@@@@@.``
|
|
646 # ``.@@@@@@@@```
|
|
647 # ``.@@@@@@@@```
|
|
648 # ``.@@@@@@@@```
|
|
649 # ``.@@@@@@@@``
|
|
650 # ``,@@@@@@@@``
|
|
651 # ``,@@@@@@@@``
|
|
652 # ``.@@@@@@@@``
|
|
653 # ```@@@@@@@@``
|
|
654 # ``:@@@@@@@@``
|
|
655 # ``:@@@@@@@@``
|
|
656 # ``:@@@@@@@@``
|
|
657 # ``:@@@@@@@@``
|
|
658 # ``'@@@@@@@@``
|
|
659 # ``;@@@@@@@@``
|
|
660 # ``:@@@@@@@@``
|
|
661 # ``:@@@@@@@@``
|
|
662 # ``:@@@@@@@@``
|
|
663 # ``;@@@@@@@#``
|
|
664 # ````+@@@@@@@#`````
|
|
665 # ```````#@@@@@@@#``````
|
|
666 # `````.,@@@@@@@@@...````
|
|
667 # ``@@@@@@@@@@@@@@@@@@;``
|
|
668 # ``@@@@@@@@@@@@@@@@@@;``
|
|
669 # ```````````````````````
|
|
670 # `````````````````````
|
|
671 # ``````.```````
|
|
672 # ````@.''```
|
|
673 # ```# `;```
|
|
674 # ``.+ @```
|
|
675 # ```@ ````,+```
|
|
676 # ```;;````` @```
|
|
677 # ```@ ``````,@```
|
|
678 # ```,+```..```@```
|
|
679 # ```@ ``....```@```
|
|
680 # ```+' ``....```#'``
|
|
681 # ```@```......`` @```
|
|
682 # ```'+```......```'@```
|
|
683 # ```@ ``........```@```
|
|
684 # ```'#```........````@```
|
|
685 # ```@ ``..........```#,``
|
|
686 # ```'#```...........`` @```
|
|
687 # ```@``.............```.+```
|
|
688 # ```:#```.............`` #```
|
|
689 # ``````` ```@ ```.......#......``.@```
|
|
690 # `````````` ```:@```#`......@......```@```
|
|
691 # ``````#@@@`` ```@ `.`:.......@.......`` @```
|
|
692 # ```.#@###@`` ```:@``..`+`....`@.......```@,``
|
|
693 # ```'@####@``` ```@````..@@@@@@@@#,`..#```` @```
|
|
694 # ```#####@@``` ``;@ ,`.,@@. `@@..#..```''``
|
|
695 # ``:####@#```` ```@``@`@@ @@:...`` @```
|
|
696 # ```@#####```` ``,@``.@, ,@`...``:@```
|
|
697 # ``.####@``` ```@.` @` @....``@```
|
|
698 # ``####@``` ``,@ @.` @`.````@```
|
|
699 # ``@##@```` ```@, @: ;# `@..```@.``
|
|
700 # ```@##```` ``.@`,@ @@, #...`` @```
|
|
701 # ```@#@``` ```@, # `@@@ @`.```;'``
|
|
702 # ```##:`` ``.@ +, .@@@ ,'..`` @```
|
|
703 # ``.##``` ```@, @ `@@@ @`.```,+```
|
|
704 # `````@##``` ```@`'. @@@ :...```@``` ``````````
|
|
705 # ````````````````````````````````````````##@``` ```@:`@ @@@ #...`` #``` `````````````````
|
|
706 # ```````````````````````````````````````.###@``` ```@ `, .@@@++'++#@@'` #`..```#``` ````````````'@@@@@.````
|
|
707 # `````+@####################@@@@@@@@@@@@#####@``` ```#;`,. `@#...,.,,,,,,..;@, @....`` @``````````````+@@@########@.``
|
|
708 # `+@##########################################,```` ```````````````@```@ +@,.,,,,,,,,,,,,,,,,,@ @....```#`````````'@@@##############@```
|
|
709 # `@###########################################@``````````````````````````````````````+'``.,'.#.,,,,,,,,,,,,,,,,,,,,.++@......`` @````+@@@#######@+``````'###'``
|
|
710 # ``:@@########@@@@@@@@@@@@@@@@@@@@@@#@@@@@@@##@``````````````````````````......,`,,.,@ ```.##.,.,,,,,,,,,,,,,,,,,,,,.##......`` :.+@@#######@@:``````````###@```
|
|
711 # ````````````````````````````````,#########@###@@@@#################################@'```...@.,,,,,,,,,,,,,,,,,,,,,,,#.........`'@######@@+```````````````@##```
|
|
712 # ```````````````````````````````@#########@#########################################```.....@:,,,,,,,,,,,,,,,,,,..;@..........`@####@@:```````` ```@##@``
|
|
713 # `````@@####@@@##########@@@@@@@@@@@@@@@@@@@@@@@@@@@@#+@+```......@#.,,,,,,,,,,,,,,,,.##..........`` #@#````````` ```##@```
|
|
714 # ``.#@######@####:```````````````````````````````````@ ``.......@:#,,,,,,,,,,,,,,,;@@`............`` @`````` ```@##:``
|
|
715 # ``:########@###@```````````````````````````````````#;```......+..`##,.,,,,,,,,.#@#..'............`` @```` ``;##@```
|
|
716 # ```@@####@@##@'```` ````@ ``.......'.....@@#+;:;'#@@;`...#`............`` @``` ```@##```
|
|
717 # ```````````````` ```@,```.............'..:''':.@`......:............```@.`` ``@###```
|
|
718 # `````````````` ``.@```..............#........'`....................```@``` ``.##@``````
|
|
719 # ```@.``..............`#........,.....................```@.`` ```@#+,``````
|
|
720 # ``.@``.,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,.................```@``` ````+#####@````
|
|
721 # ```@````......,........................,....,,.,.,,,,,..` @,`` ```;@@######````
|
|
722 # ``.@```.......,......................`......,...........```@``` ```+#########@````
|
|
723 # ```@```...........`@@+...............+@@`....,...........```@:`` ``;@#########@'```
|
|
724 # ``.@ ``............@@@@@`.........,@@@@@.....,............```@``` ``@#########@#@@```
|
|
725 # ```@.```............@@@@@@@`.....'@@@@@@@.....,............```#'`` ``@###@###@#@#@@@``
|
|
726 # ``.@```.............@@@@@@@@@..+#@@@@@@@@.....,.............`` #``` ``@#@@@@##@#@#@@@``
|
|
727 # ```@````.............@@@@@@@@@@@@@@@@@@@@@.....,.............```'#``` ``'#@@@@##@#@@@@@,`
|
|
728 # ``.@ ``.........,....@@@@@@@@@',##@@@@@@@@`....,..............```@``` ```@@@@@##@@'#@@@@`
|
|
729 # ```@.```.........,....@@@@@@@#`...`#@@@@@@@`....,..............```.@``` ``#@@@@##;```@@@@`
|
|
730 # ``.@ ``.....,,,,,,,.,.@@@@@#.,,,,,,,.#@@@@@,,,,,,,,,,,,:,,,,,,,,.``@``` ``#@@@@#.````@@@@`
|
|
731 # ```@. ``...............@@@;......,......#@@@`...........,.........```@``` ``#@@@;``````@@@@`
|
|
732 # ```@```................@,........,........+#`...........,.........```@.`` ``#@@@;``````@@@@`
|
|
733 # ```@.``...........................,.........`............,..........```@``` ``#@@@'`` ``#@@;`
|
|
734 # ``.@ ``................,..........,......................,..........```#:`` ``#@@@'`` ```#@``
|
|
735 # ```@,``............................,......................,...........`` @``` ``+@@@'`` `````
|
|
736 # ``.@```............................,......................,...........```#+`` ``;@@@+`` ``
|
|
737 # ```@,``.............................,......................,............```@``` ``'@@@+``
|
|
738 # ``.@```.........,...................,......................,............```'#``` ``;@@@+``
|
|
739 # ```@:```.........,...................,......................,.............`` @``` ``:@@@+``
|
|
740 # ```@`..,,,,,,,,,,,,,,,,,,..,.........,......................,.............```'@``` ``;@@@#``
|
|
741 # ``+'```...,.................,....,,,,,,,,,,,,,,,,,,,,,,,,,,,,,..........,...``@``` ``;@@@#``
|
|
742 # ```@ ``....,.................,.......................................,.......``;@``` ``:@@@#``
|
|
743 # ``'#```....,.................,.......................................,.......```@``` ``:@@@@``
|
|
744 # ```@```.....,.................,.......................................,........``;#`` ``:@@@@``
|
|
745 # ```@ ``.....,.................,.......................................,........`` @``` ``:@@@@``
|
|
746 # ``@````...............................................................,........`` @.`` ``;@@@@``
|
|
747 # ``@ ```..............................................................,........`` .#`` ``'@@@@``
|
|
748 # ``# ``````.`.```````````````..````````````````````..`````````````````.`````````` @`` ``'@@@@``
|
|
749 # ``. `````````````````````````````````````````````````````````````````````````` .;`` ``'@@@@``
|
|
750 # ``@;` `` ` ` ` ` ```` ` ````` ` ` `,+@``` ``+@@@@``
|
|
751 # `````:;'++##@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@+.```` ``+@@@@``
|
|
752 # ```````````````````````````+##@``````````````````@#@``````````````````````````````` ``+@@@@``
|
|
753 # `````````````````````````@##@``````````````````@##;```````````````````````````` ``+@@@@``
|
|
754 # ````###,```` ````+##@``` ``+@@@@``
|
|
755 # ``,###``` ``.@##``` ``'@@@@``
|
|
756 # ``###@`` ```@##``` ``'@@@@``
|
|
757 # ```@##@`` ``@##+`` ``'@@@@``
|
|
758 # ```###.`` ``:##@`` ``'@@@@``
|
|
759 # ``:###``` ```##@``` ``'@@@@``
|
|
760 # ``@##@`` ```@##``` ``'@@@@``
|
|
761 # ```@##'`` ``@###`` ``'@@@@``
|
|
762 # ```@##``` ```##@`` ``'@@@@``
|
|
763 # ``,###``` ```@#@``` ``'@@@@``
|
|
764 # ``####`` ``@##.`` ``'@@@@``
|
|
765 # ``@##@`` ``;##@`` ``'@@@@``
|
|
766 # `````````@##@`` ```##@```` ``;@@@@``
|
|
767 # ``````````````@##;`` ```###`````````````` ``;@@@@``
|
|
768 # `````````.,;.```###``` ``@##:`````````````` ``;@@@@``
|
|
769 # `````#@#########@@##``` ``###@@@@@@###@#@'``` ``;@@@@``
|
|
770 # ```@@###############@`` ``,################`` ``;@@@@``
|
|
771 # ``'@################+`` ```###############+`` ``;@@@@``
|
|
772 # `````````````````````` ``###########@#,```` ``.@@@@``
|
|
773 # ````````````````````` ``````````````````` ```@@@.`
|
|
774 # ```````````````` ```````
|
|
775 #
|
|
776 # |