Mercurial > repos > cafletezbrant > kmersvm
comparison kmersvm/scripts/nullseq_generate.py @ 7:fd740d515502 draft default tip
Uploaded revised kmer-SVM to include modules from kmer-visual.
author | cafletezbrant |
---|---|
date | Sun, 16 Jun 2013 18:06:14 -0400 |
parents | 7fe1103032f7 |
children |
comparison
equal
deleted
inserted
replaced
6:1aea7c1a9ab1 | 7:fd740d515502 |
---|---|
69 | 69 |
70 | 70 |
71 def sample_sequences(positions, buildname, basedir, options): | 71 def sample_sequences(positions, buildname, basedir, options): |
72 """ | 72 """ |
73 """ | 73 """ |
74 rpt_err = options.rpt_err | 74 max_fails = 20 |
75 gc_err = options.gc_err | |
76 max_trys = options.max_trys | 75 max_trys = options.max_trys |
77 norpt = options.norpt | 76 norpt = options.norpt |
78 nogc = options.nogc | 77 nogc = options.nogc |
79 | 78 |
80 chrnames = sorted(set(map(lambda p: p[0], positions))) | 79 chrnames = sorted(set(map(lambda p: p[0], positions))) |
119 if options.count == 0: | 118 if options.count == 0: |
120 count = options.fold*npos | 119 count = options.fold*npos |
121 else: | 120 else: |
122 count = options.count | 121 count = options.count |
123 | 122 |
123 #initialize paramter | |
124 #added by dlee 2/17/13 | |
125 ncfails = 0 | |
126 rpt_err = options.rpt_err | |
127 gc_err = options.gc_err | |
128 | |
124 sampled_positions = [] | 129 sampled_positions = [] |
125 while len(sampled_positions) < count: | 130 while len(sampled_positions) < count: |
126 sampled_prof = random.choice(profiles) | 131 sampled_prof = random.choice(profiles) |
127 sampled_len = sampled_prof[1] | 132 sampled_len = sampled_prof[1] |
128 sampled_gc = sampled_prof[2] | 133 sampled_gc = sampled_prof[2] |
129 sampled_rpt = sampled_prof[3] | 134 sampled_rpt = sampled_prof[3] |
130 | 135 |
136 #relax rpt_err and gc_err if it keep fail to sample a region | |
137 #added by dlee 2/17/13 | |
138 if ncfails >= max_fails: | |
139 if options.quiet == False: | |
140 sys.stderr.write("reached max_fail. relax gc and rpt err criteria\n") | |
141 ncfails = 0 | |
142 rpt_err += 0.01 | |
143 gc_err += 0.01 | |
144 | |
131 rpt_err_allowed = int(rpt_err*sampled_len) | 145 rpt_err_allowed = int(rpt_err*sampled_len) |
132 gc_err_allowed = int(gc_err*sampled_len) | 146 gc_err_allowed = int(gc_err*sampled_len) |
133 trys = 0 | 147 trys = 0 |
134 while trys < max_trys: | 148 while trys < max_trys: |
135 trys += 1 | 149 trys += 1 |
154 #mark the sampled regions | 168 #mark the sampled regions |
155 bits_na[pos:pos_e] = True | 169 bits_na[pos:pos_e] = True |
156 | 170 |
157 sampled_positions.append((chrom, pos, pos_e)) | 171 sampled_positions.append((chrom, pos, pos_e)) |
158 | 172 |
173 #reset the counter of consecutive fails | |
174 #added by dlee 2/17/13 | |
175 ncfails = 0 | |
176 | |
159 #print trys, chrom, pos, pos_e, sampled_len, pos_rpt, sampled_rpt, pos_gc, sampled_gc | 177 #print trys, chrom, pos, pos_e, sampled_len, pos_rpt, sampled_rpt, pos_gc, sampled_gc |
160 break | 178 break |
161 else: | 179 else: |
180 #increase the counter of consecutive fails | |
181 #added by dlee 2/17/13 | |
182 ncfails += 1 | |
183 | |
162 if options.quiet == False: | 184 if options.quiet == False: |
163 sys.stderr.write(' '.join(["fail to sample from", \ | 185 sys.stderr.write(' '.join(["fail to sample from", \ |
164 "len=", str(sampled_len), \ | 186 "len=", str(sampled_len), \ |
165 "rpt=", str(sampled_rpt), \ | 187 "rpt=", str(sampled_rpt), \ |
166 "gc=", str(sampled_gc)]) + '\n') | 188 "gc=", str(sampled_gc)]) + '\n') |