kmersvm: kmersvm/scripts/nullseq_generate.py comparison

comparison kmersvm/scripts/nullseq_generate.py @ 7:fd740d515502 draft default tip

Uploaded revised kmer-SVM to include modules from kmer-visual.

author	cafletezbrant
date	Sun, 16 Jun 2013 18:06:14 -0400
parents	7fe1103032f7
children

comparison

equal deleted inserted replaced

-:1aea7c1a9ab1
+:fd740d515502
 def sample_sequences(positions, buildname, basedir, options):
 	"""
 	"""
-	rpt_err = options.rpt_err
+	max_fails = 20
-	gc_err = options.gc_err
 	max_trys = options.max_trys
 	norpt = options.norpt
 	nogc = options.nogc
 	chrnames = sorted(set(map(lambda p: p[0], positions)))
 		if options.count == 0:
 			count = options.fold*npos
 		else:
 			count = options.count
+		#initialize paramter
+		#added by dlee 2/17/13
+		ncfails = 0
+		rpt_err = options.rpt_err
+		gc_err = options.gc_err
 		sampled_positions = []
 		while len(sampled_positions) < count:
 			sampled_prof = random.choice(profiles)
 			sampled_len = sampled_prof[1]
 			sampled_gc = sampled_prof[2]
 			sampled_rpt = sampled_prof[3]
+			#relax rpt_err and gc_err if it keep fail to sample a region
+			#added by dlee 2/17/13
+			if ncfails >= max_fails:
+				if options.quiet == False:
+					sys.stderr.write("reached max_fail. relax gc and rpt err criteria\n")
+				ncfails = 0
+				rpt_err += 0.01
+				gc_err += 0.01
 			rpt_err_allowed = int(rpt_err*sampled_len)
 			gc_err_allowed = int(gc_err*sampled_len)
 			trys = 0
 			while trys < max_trys:
 				trys += 1
 				#mark the sampled regions
 				bits_na[pos:pos_e] = True
 				sampled_positions.append((chrom, pos, pos_e))
+				#reset the counter of consecutive fails
+				#added by dlee 2/17/13
+				ncfails = 0
 				#print trys, chrom, pos, pos_e, sampled_len, pos_rpt, sampled_rpt, pos_gc, sampled_gc
 				break
 			else:
+				#increase the counter of consecutive fails
+				#added by dlee 2/17/13
+				ncfails += 1
 				if options.quiet == False:
 					sys.stderr.write(' '.join(["fail to sample from", \
 							"len=", str(sampled_len), \
 							"rpt=", str(sampled_rpt), \
 							"gc=", str(sampled_gc)]) + '\n')

Mercurial > repos > cafletezbrant > kmersvm

comparison kmersvm/scripts/nullseq_generate.py @ 7:fd740d515502 draft default tip