comparison kmersvm/scripts/nullseq_generate.py @ 7:fd740d515502 draft default tip

Uploaded revised kmer-SVM to include modules from kmer-visual.
author cafletezbrant
date Sun, 16 Jun 2013 18:06:14 -0400
parents 7fe1103032f7
children
comparison
equal deleted inserted replaced
6:1aea7c1a9ab1 7:fd740d515502
69 69
70 70
71 def sample_sequences(positions, buildname, basedir, options): 71 def sample_sequences(positions, buildname, basedir, options):
72 """ 72 """
73 """ 73 """
74 rpt_err = options.rpt_err 74 max_fails = 20
75 gc_err = options.gc_err
76 max_trys = options.max_trys 75 max_trys = options.max_trys
77 norpt = options.norpt 76 norpt = options.norpt
78 nogc = options.nogc 77 nogc = options.nogc
79 78
80 chrnames = sorted(set(map(lambda p: p[0], positions))) 79 chrnames = sorted(set(map(lambda p: p[0], positions)))
119 if options.count == 0: 118 if options.count == 0:
120 count = options.fold*npos 119 count = options.fold*npos
121 else: 120 else:
122 count = options.count 121 count = options.count
123 122
123 #initialize paramter
124 #added by dlee 2/17/13
125 ncfails = 0
126 rpt_err = options.rpt_err
127 gc_err = options.gc_err
128
124 sampled_positions = [] 129 sampled_positions = []
125 while len(sampled_positions) < count: 130 while len(sampled_positions) < count:
126 sampled_prof = random.choice(profiles) 131 sampled_prof = random.choice(profiles)
127 sampled_len = sampled_prof[1] 132 sampled_len = sampled_prof[1]
128 sampled_gc = sampled_prof[2] 133 sampled_gc = sampled_prof[2]
129 sampled_rpt = sampled_prof[3] 134 sampled_rpt = sampled_prof[3]
130 135
136 #relax rpt_err and gc_err if it keep fail to sample a region
137 #added by dlee 2/17/13
138 if ncfails >= max_fails:
139 if options.quiet == False:
140 sys.stderr.write("reached max_fail. relax gc and rpt err criteria\n")
141 ncfails = 0
142 rpt_err += 0.01
143 gc_err += 0.01
144
131 rpt_err_allowed = int(rpt_err*sampled_len) 145 rpt_err_allowed = int(rpt_err*sampled_len)
132 gc_err_allowed = int(gc_err*sampled_len) 146 gc_err_allowed = int(gc_err*sampled_len)
133 trys = 0 147 trys = 0
134 while trys < max_trys: 148 while trys < max_trys:
135 trys += 1 149 trys += 1
154 #mark the sampled regions 168 #mark the sampled regions
155 bits_na[pos:pos_e] = True 169 bits_na[pos:pos_e] = True
156 170
157 sampled_positions.append((chrom, pos, pos_e)) 171 sampled_positions.append((chrom, pos, pos_e))
158 172
173 #reset the counter of consecutive fails
174 #added by dlee 2/17/13
175 ncfails = 0
176
159 #print trys, chrom, pos, pos_e, sampled_len, pos_rpt, sampled_rpt, pos_gc, sampled_gc 177 #print trys, chrom, pos, pos_e, sampled_len, pos_rpt, sampled_rpt, pos_gc, sampled_gc
160 break 178 break
161 else: 179 else:
180 #increase the counter of consecutive fails
181 #added by dlee 2/17/13
182 ncfails += 1
183
162 if options.quiet == False: 184 if options.quiet == False:
163 sys.stderr.write(' '.join(["fail to sample from", \ 185 sys.stderr.write(' '.join(["fail to sample from", \
164 "len=", str(sampled_len), \ 186 "len=", str(sampled_len), \
165 "rpt=", str(sampled_rpt), \ 187 "rpt=", str(sampled_rpt), \
166 "gc=", str(sampled_gc)]) + '\n') 188 "gc=", str(sampled_gc)]) + '\n')