annotate consol_fit.py @ 0:da1c63d00c1b draft

Uploaded
author kaymccoy
date Thu, 11 Aug 2016 18:07:29 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
1 # Consol_fit! It's a script & it'll consolidate your fitness values if you got them from a looping trimming pipeline instead of the standard split-by-transposon pipeline. That's all.
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
2
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
3 import math
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
4 import csv
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
5
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
6
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
7
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
8
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
9
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
10
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
11
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
12
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
13
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
14
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
15 ##### ARGUMENTS #####
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
16
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
17 def print_usage():
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
18 print "\n" + "You are missing one or more required flags. A complete list of flags accepted by calc_fitness is as follows:" + "\n\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
19 print "\033[1m" + "Required" + "\033[0m" + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
20 print "-i" + "\t\t" + "The calc_fit file to be consolidated" + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
21 print "-out" + "\t\t" + "Name of a file to enter the .csv output." + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
22 print "-out2" + "\t\t" + "Name of a file to put the percent blank score in (used in aggregate)." + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
23 print "-calctxt" + "\t\t" + "The txt file output from calc_fit" + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
24 print "-normalize" + "\t" + "A file that contains a list of genes that should have a fitness of 1" + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
25 print "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
26 print "\033[1m" + "Optional" + "\033[0m" + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
27 print "-cutoff" + "\t\t" + "Discard any positions where the average of counted transcripts at time 0 and time 1 is below this number (default 0)" + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
28 print "-cutoff2" + "\t\t" + "Discard any positions within the normalization genes where the average of counted transcripts at time 0 and time 1 is below this number (default 0)" + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
29 print "-wig" + "\t\t" + "Create a wiggle file for viewing in a genome browser. Provide a filename." + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
30 print "-maxweight" + "\t" + "The maximum weight a transposon gene can have in normalization calculations" + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
31 print "-multiply" + "\t" + "Multiply all fitness scores by a certain value (e.g., the fitness of a knockout). You should normalize the data." + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
32 print "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
33
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
34 import argparse
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
35 parser = argparse.ArgumentParser()
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
36 parser.add_argument("-calctxt", action="store", dest="calctxt")
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
37 parser.add_argument("-normalize", action="store", dest="normalize")
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
38 parser.add_argument("-i", action="store", dest="input")
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
39 parser.add_argument("-out", action="store", dest="outfile")
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
40 parser.add_argument("-out2", action="store", dest="outfile2")
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
41 parser.add_argument("-cutoff", action="store", dest="cutoff")
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
42 parser.add_argument("-cutoff2", action="store", dest="cutoff2")
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
43 parser.add_argument("-wig", action="store", dest="wig")
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
44 parser.add_argument("-maxweight", action="store", dest="max_weight")
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
45 parser.add_argument("-multiply", action="store", dest="multiply")
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
46 arguments = parser.parse_args()
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
47
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
48 if (not arguments.input or not arguments.outfile or not arguments.calctxt):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
49 print_usage()
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
50 quit()
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
51
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
52 if (not arguments.max_weight):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
53 arguments.max_weight = 75
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
54
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
55 if (not arguments.cutoff):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
56 arguments.cutoff = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
57
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
58 # Cutoff2 only has an effect if it's larger than cutoff, since the normalization step references a list of insertions already affected by cutoff.
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
59
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
60 if (not arguments.cutoff2):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
61 arguments.cutoff2 = 10
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
62
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
63 #Gets total & refname from calc_fit outfile2
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
64
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
65 with open(arguments.calctxt) as file:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
66 calctxt = file.readlines()
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
67 total = float(calctxt[1].split()[1])
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
68 refname = calctxt[2].split()[1]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
69
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
70
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
71
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
72
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
73
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
74
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
75
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
76
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
77
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
78
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
79 ##### CONSOLIDATING THE CALC_FIT FILE #####
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
80
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
81 with open(arguments.input) as file:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
82 input = file.readlines()
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
83 results = [["position", "strand", "count_1", "count_2", "ratio", "mt_freq_t1", "mt_freq_t2", "pop_freq_t1", "pop_freq_t2", "gene", "D", "W", "nW"]]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
84 i = 1
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
85 d = float(input[i].split(",")[10])
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
86 while i < len(input):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
87 position = float(input[i].split(",")[0])
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
88 strands = input[i].split(",")[1]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
89 c1 = float(input[i].split(",")[2])
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
90 c2 = float(input[i].split(",")[3])
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
91 gene = input[i].split(",")[9]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
92 while i + 1 < len(input) and float(input[i+1].split(",")[0]) - position <= 4:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
93 if i + 1 < len(input):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
94 i += 1
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
95 c1 += float(input[i].split(",")[2])
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
96 c2 += float(input[i].split(",")[3])
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
97 strands = input[i].split(",")[1]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
98 if strands[0] == 'b':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
99 new_strands = 'b/'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
100 elif strands[0] == '+':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
101 if input[i].split(",")[1][0] == 'b':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
102 new_strands = 'b/'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
103 elif input[i].split(",")[1][0] == '+':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
104 new_strands = '+/'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
105 elif input[i].split(",")[1][0] == '-':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
106 new_strands = 'b/'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
107 elif strands[0] == '-':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
108 if input[i].split(",")[1][0] == 'b':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
109 new_strands = 'b/'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
110 elif input[i].split(",")[1][0] == '+':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
111 new_strands = 'b/'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
112 elif input[i].split(",")[1][0] == '-':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
113 new_strands = '-/'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
114 if len(strands) == 3:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
115 if len(input[i].split(",")[1]) < 3:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
116 new_strands += strands[2]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
117 elif strands[0] == 'b':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
118 new_strands += 'b'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
119 elif strands[0] == '+':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
120 if input[i].split(",")[1][2] == 'b':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
121 new_strands += 'b'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
122 elif input[i].split(",")[1][2] == '+':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
123 new_strands += '+'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
124 elif input[i].split(",")[1][2] == '-':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
125 new_strands += 'b'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
126 elif strands[0] == '-':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
127 if input[i].split(",")[1][2] == 'b':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
128 new_strands += 'b'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
129 elif input[i].split(",")[1][2] == '+':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
130 new_strands += 'b'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
131 elif input[i].split(",")[1][2] == '-':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
132 new_strands += '-'
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
133 else:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
134 if len(input[i].split(",")[1]) == 3:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
135 new_strands += input[i].split(",")[1][2]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
136 strands = new_strands
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
137 i +=1
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
138 if c2 != 0:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
139 ratio = c2/c1
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
140 else:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
141 ratio = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
142 mt_freq_t1 = c1/total
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
143 mt_freq_t2 = c2/total
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
144 pop_freq_t1 = 1 - mt_freq_t1
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
145 pop_freq_t2 = 1 - mt_freq_t2
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
146 w = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
147 if mt_freq_t2 != 0:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
148 top_w = math.log(mt_freq_t2*(d/mt_freq_t1))
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
149 bot_w = math.log(pop_freq_t2*(d/pop_freq_t1))
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
150 w = top_w/bot_w
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
151 row = [position, strands, c1, c2, ratio, mt_freq_t1, mt_freq_t2, pop_freq_t1, pop_freq_t2, gene, d, w, w]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
152 results.append(row)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
153 with open(arguments.outfile, "wb") as csvfile:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
154 writer = csv.writer(csvfile)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
155 writer.writerows(results)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
156
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
157
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
158
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
159
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
160
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
161
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
162
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
163
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
164
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
165
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
166 ##### REDOING NORMALIZATION #####
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
167
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
168 # The header below is just in a typical WIG file format; if you'd like to look into this more UCSC has notes on formatting WIG files on their site.
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
169
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
170 if (arguments.wig):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
171 wigstring = "track type=wiggle_0 name=" + arguments.wig + "\n" + "variableStep chrom=" + refname + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
172
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
173 if (arguments.normalize):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
174 with open(arguments.normalize) as file:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
175 transposon_genes = file.read().splitlines()
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
176 print "Normalize genes loaded" + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
177 blank_ws = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
178 sum = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
179 count = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
180 weights = []
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
181 scores = []
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
182 for list in results:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
183 if list[9] != '' and list[9] in transposon_genes and list[11]:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
184 c1 = list[2]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
185 c2 = list[3]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
186 score = list[11]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
187 avg = (c1 + c2)/2
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
188
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
189 # Skips over those insertion locations with too few insertions - their fitness values are less accurate because they're based on such small insertion numbers.
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
190
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
191 if float(c1) >= float(arguments.cutoff2):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
192
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
193 # Sets a max weight, to prevent insertion location scores with huge weights from unbalancing the normalization.
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
194
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
195 if (avg >= float(arguments.max_weight)):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
196 avg = float(arguments.max_weight)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
197
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
198 # Tallies how many w values are 0 within the blank_ws value; you might get many transposon genes with a w value of 0 if a bottleneck occurs, which is especially common with in vivo experiments.
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
199 # For example, when studying a nasal infection in a mouse model, what bacteria "sticks" and is able to survive and what bacteria is swallowed and killed or otherwise flushed out tends to be a matter of chance not fitness; all mutants with an insertion in a specific transposon gene could be flushed out by chance!
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
200
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
201 if score == 0:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
202 blank_ws += 1
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
203 sum += score
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
204 count += 1
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
205 weights.append(avg)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
206 scores.append(score)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
207
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
208 print str(list[9]) + " " + str(score) + " " + str(c1)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
209
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
210 # Counts and removes all "blank" fitness values of normalization genes - those that = 0 - because they most likely don't really have a fitness value of 0, and you just happened to not get any reads from that location at t2.
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
211
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
212 blank_count = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
213 original_count = len(scores)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
214 i = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
215 while i < original_count:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
216 w_value = scores[i]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
217 if w_value == 0:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
218 blank_count += 1
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
219 weights.pop[i]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
220 scores.pop[i]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
221 i-=1
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
222 i += 1
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
223
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
224 # If no normalization genes can pass the cutoff, normalization cannot occur, so this ends the script advises the user to try again and lower cutoff and/or cutoff2.
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
225
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
226 if len(scores) == 0:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
227 print 'ERROR: The normalization genes do not have enough reads to pass cutoff and/or cutoff2; please lower one or both of those arguments.' + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
228 quit()
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
229
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
230 pc_blank_normals = float(blank_count) / float(original_count)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
231 print "# blank out of " + str(original_count) + ": " + str(pc_blank_normals) + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
232 with open(arguments.outfile2, "w") as f:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
233 f.write("blanks: " + str(pc_blank_normals) + "\n" + "total: " + str(total) + "\n" + "refname: " + refname)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
234
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
235 average = sum / count
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
236 i = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
237 weighted_sum = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
238 weight_sum = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
239 while i < len(weights):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
240 weighted_sum += weights[i]*scores[i]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
241 weight_sum += weights[i]
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
242 i += 1
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
243 weighted_average = weighted_sum/weight_sum
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
244
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
245 print "Normalization step:" + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
246 print "Regular average: " + str(average) + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
247 print "Weighted Average: " + str(weighted_average) + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
248 print "Total Insertions: " + str(count) + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
249
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
250 old_ws = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
251 new_ws = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
252 wcount = 0
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
253 for list in results:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
254 if list[11] == 'W':
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
255 continue
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
256 new_w = float(list[11])/weighted_average
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
257
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
258 # Sometimes you want to multiply all the fitness values by a constant; this does that.
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
259 # For example you might multiply all the values by a constant for a genetic interaction screen - where Tn-Seq is performed as usual except there's one background knockout all the mutants share.
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
260
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
261 if arguments.multiply:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
262 new_w *= float(arguments.multiply)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
263
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
264 if float(list[11]) > 0:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
265 old_ws += float(list[11])
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
266 new_ws += new_w
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
267 wcount += 1
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
268
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
269 list[12] = new_w
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
270
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
271 if (arguments.wig):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
272 wigstring += str(list[0]) + " " + str(new_w) + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
273
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
274 old_w_mean = old_ws / wcount
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
275 new_w_mean = new_ws / wcount
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
276 print "Old W Average: " + str(old_w_mean) + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
277 print "New W Average: " + str(new_w_mean) + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
278
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
279 with open(arguments.outfile, "wb") as csvfile:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
280 writer = csv.writer(csvfile)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
281 writer.writerows(results)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
282
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
283 if (arguments.wig):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
284 if (arguments.normalize):
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
285 with open(arguments.wig, "wb") as wigfile:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
286 wigfile.write(wigstring)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
287 else:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
288 for list in results:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
289 wigstring += str(list[0]) + " " + str(list[11]) + "\n"
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
290 with open(arguments.wig, "wb") as wigfile:
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
291 wigfile.write(wigstring)
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
292
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
293
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
294 # ___ ___ ___ ___ ___ ___ ___ ___
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
295 # /\__\ /\ \ /\__\ /\__\ /\ \ /\ \ /\ \ /\__\
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
296 # /:/ _/_ /::\ \ |::L__L /::L_L_ /::\ \ /::\ \ /::\ \ |::L__L
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
297 # /::-"\__\ /::\:\__\ |:::\__\ /:/L:\__\ /:/\:\__\ /:/\:\__\ /:/\:\__\ |:::\__\
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
298 # \;:;-",-" \/\::/ / /:;;/__/ \/_/:/ / \:\ \/__/ \:\ \/__/ \:\/:/ / /:;;/__/
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
299 # |:| | /:/ / \/__/ /:/ / \:\__\ \:\__\ \::/ / \/__/
da1c63d00c1b Uploaded
kaymccoy
parents:
diff changeset
300 # \|__| \/__/ \/__/ \/__/ \/__/ \/__/