changeset 7:fd740d515502 draft default tip

Uploaded revised kmer-SVM to include modules from kmer-visual.
author cafletezbrant
date Sun, 16 Jun 2013 18:06:14 -0400
parents 1aea7c1a9ab1
children
files kmersvm/README.txt kmersvm/install.sh kmersvm/kmer2meme.pl kmersvm/kmertopwm.xml kmersvm/nullseq.xml kmersvm/scripts/kmersvm_output_weights.out kmersvm/scripts/kmersvm_train.py kmersvm/scripts/kmersvm_train_kfb_copy.py kmersvm/scripts/libkmersvm.pyc kmersvm/scripts/nullseq_generate.py kmersvm/tomtom.xml kmersvm/train.xml
diffstat 12 files changed, 3218 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/kmersvm/README.txt	Mon Aug 20 21:42:29 2012 -0400
+++ b/kmersvm/README.txt	Sun Jun 16 18:06:14 2013 -0400
@@ -68,6 +68,8 @@
     <tool file="kmersvm/train.xml"/>
     <tool file="kmersvm/split_genome.xml"/>
     <tool file="kmersvm/seqprofile.xml" />
+    <tool file="kmersvm/kmertopwm.xml" />
+    <tool file="kmersvm/tomtom.xml" />
   </section>
 
 Tool Tests:
--- a/kmersvm/install.sh	Mon Aug 20 21:42:29 2012 -0400
+++ b/kmersvm/install.sh	Sun Jun 16 18:06:14 2013 -0400
@@ -1,12 +1,11 @@
 #!/bin/bash
-cd "$1"
-cp tool-data/nullseq_indices.loc.sample ../../tool-data/nullseq_indices.loc
-cp tool-data/sample_roc_chen.png ../../tool-data
-cp tool-data/classify_output.out ../../test-data
-cp tool-data/classify_test.fa ../../test-data
-cp tool-data/kmersvm_output_weights.out ../../test-data
-cp tool-data/test_positive.fa ../../test-data
-cp tool-data/test_negative.fa ../../test-data
-cp tool-data/test_weights.out ../../test-data
-cp tool-data/train_predictions.out ../../test-data
+cp tool-data/nullseq_indices.loc.sample ~/galaxy-dist/tool-data/nullseq_indices.loc
+cp tool-data/sample_roc_chen.png ~/galaxy-dist/tool-data
+cp tool-data/classify_output.out ~/galaxy-dist/test-data
+cp tool-data/classify_test.fa ~/galaxy-dist/test-data
+cp tool-data/kmersvm_output_weights.out ~/galaxy-dist/test-data
+cp tool-data/test_positive.fa ~/galaxy-dist/test-data
+cp tool-data/test_negative.fa ~/galaxy-dist/test-data
+cp tool-data/test_weights.out ~/galaxy-dist/test-data
+cp tool-data/train_predictions.out ~/galaxy-dist/test-data
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kmersvm/kmer2meme.pl	Sun Jun 16 18:06:14 2013 -0400
@@ -0,0 +1,49 @@
+use strict;
+
+open(my $w_fh, "<", $ARGV[0]);
+my $num_kmers = $ARGV[1];
+my @weights = <$w_fh>;
+
+my @temp_k = @weights[8..(7+$num_kmers), (-$num_kmers..-1)];
+
+my @kmers = ();
+#cleanup kmers
+for my $i (0..($#temp_k)){
+	my @temp = split('\t',$temp_k[$i]);
+	#modified by dongwon 042713
+	#push(@kmers, ($temp[0], $temp[1]));
+	push(@kmers, $temp[0]);
+}
+
+open(my $o_fh, ">", "kmer2meme.meme");
+
+print $o_fh
+"MEME version 4
+
+ALPHABET= ACGT
+
+strands: + -
+
+Background letter frequencies (from no specific genome):
+A 0.25 C 0.25 G 0.25 T 0.25\n\n";
+
+foreach my $kmer (@kmers) {
+	print $o_fh "MOTIF $kmer\n";
+	my $l = length($kmer);
+	print $o_fh "letter-probability matrix: alength= 4 w= $l nsites= 1 E= 0\n";
+	foreach my $i (0..($l-1)) {
+		my $nc = substr($kmer, $i, 1);
+		if ($nc eq "A") {
+			print $o_fh " 1.00  0.00  0.00  0.00\n";
+		}elsif ($nc eq "C") {
+			print $o_fh " 0.00  1.00  0.00  0.00\n";
+		}elsif ($nc eq "G") {
+			print $o_fh " 0.00  0.00  1.00  0.00\n";
+		}elsif ($nc eq "T") {
+			print $o_fh " 0.00  0.00  0.00  1.00\n";
+		}else {
+			print " 0.25  0.25  0.25  0.25\n";
+		}
+	}
+	print $o_fh "\n";
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kmersvm/kmertopwm.xml	Sun Jun 16 18:06:14 2013 -0400
@@ -0,0 +1,25 @@
+<tool id="kmer2meme" name="Kmer To MEME">
+	<description>Convert kmers to MEME format for motif finding by Tomtom</description>
+	<command interpreter="perl">kmer2meme.pl
+		$weights $N
+	</command>
+	
+	<inputs>
+		<param format="tabular" name="weights" type="data" label="Kmer Weights"/>
+		<param type="integer" name="N" value="10" label="Kmer Number">
+			<validator type="in_range" message="Kmer number must be in range 1 - 50" min="1" max="50"/>
+		</param>
+	</inputs>
+	
+	<outputs>
+		<data format="txt" from_work_dir="kmer2meme.meme" name="MEME for Kmers" label="${tool.name} on ${on_string}: MEME"/>
+	</outputs>
+	
+	<help>
+This is a utility function that creates PWMs in MEME format for use with Tomtom.
+		
+'Kmer Weights' is the weight file generated by 'Train SVM'.	
+
+'Kmer Number' is the number of most positive and most negative kmers to be processed.
+	</help>
+</tool>
--- a/kmersvm/nullseq.xml	Mon Aug 20 21:42:29 2012 -0400
+++ b/kmersvm/nullseq.xml	Sun Jun 16 18:06:14 2013 -0400
@@ -7,9 +7,9 @@
   	-x $fold -r $rseed -g $gc_err -t $rpt_err $input $dbkey ${indices_path.fields.path}
   </command>
   <inputs>
-    <param name="fold" type="integer" value="1" label="# of Fold-Increase" />
-    <param name="gc_err" type="float" value="0.02" label="Allowable GC Error" />
-    <param name="rpt_err" type="float" value="0.02" label="Allowable Repeat Error" />
+    <param name="fold" type="integer" value="10" label="# of Fold-Increase" min="1" max="50" />
+    <param name="gc_err" type="float" value="0.02" label="Allowable GC Error" min="0.01" max="0.1"/>
+    <param name="rpt_err" type="float" value="0.02" label="Allowable Repeat Error" min="0.01" max="0.1"/>
     <param name="rseed" type="integer" value="1" label="Random Number Seed" />    
     <param format="interval" name="input" type="data" label="BED File of Positive Regions" />
       <validator type="unspecified_build" />
@@ -44,6 +44,16 @@
 **What it does**
   
 Takes an input BED file and generates a set of sequences for use as negative data (null sequences) in Train SVM similar in length, GC content and repeat fraction.  Uses random sampling for efficiency.
+
+----
+
+**Recommended Settings**
+
+Fold-Increase: Default is recommended, up to 50x positive set.
+
+GC Error, Repeat Error: Default is recommended.
+
+----
   
 **Parameters**
   
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kmersvm/scripts/kmersvm_output_weights.out	Sun Jun 16 18:06:14 2013 -0400
@@ -0,0 +1,2088 @@
+#parameters:
+#kernel=1
+#kmerlen=6
+#bias=0.930368454935
+#A=0
+#B=0
+#NOTE: k-mers with large negative weights are also important. They can be found at the bottom of the list.
+#k-mer	revcomp	SVM-weight
+AAAAAA	TTTTTT	0.553324469582
+AAAAAC	GTTTTT	1.0689111563
+AAAAAG	CTTTTT	0.386997519222
+AAAAAT	ATTTTT	0.371506923691
+AAAACA	TGTTTT	0.582941243013
+AAAACC	GGTTTT	-0.00322550380692
+AAAACG	CGTTTT	0.115121834279
+AAAACT	AGTTTT	0.64234562623
+AAAAGA	TCTTTT	0.180098364822
+AAAAGC	GCTTTT	-0.370020965708
+AAAAGG	CCTTTT	-0.148530185678
+AAAAGT	ACTTTT	1.19477154105
+AAAATA	TATTTT	1.23378644064
+AAAATC	GATTTT	-0.980691936551
+AAAATG	CATTTT	0.221932570601
+AAAATT	AATTTT	0.449293989111
+AAACAA	TTGTTT	-1.57507857322
+AAACAC	GTGTTT	-2.1383477652
+AAACAG	CTGTTT	-0.720402198466
+AAACAT	ATGTTT	-0.915754056705
+AAACCA	TGGTTT	0.959609519802
+AAACCC	GGGTTT	0.150812627734
+AAACCG	CGGTTT	-0.204853254781
+AAACCT	AGGTTT	0.486872195933
+AAACGA	TCGTTT	-0.404172254228
+AAACGC	GCGTTT	0.471891306908
+AAACGG	CCGTTT	-0.732914484007
+AAACGT	ACGTTT	-0.79028442459
+AAACTA	TAGTTT	-0.200848111441
+AAACTC	GAGTTT	-0.00260431934722
+AAACTG	CAGTTT	0.456381173353
+AAACTT	AAGTTT	0.639062115506
+AAAGAA	TTCTTT	0.257495713463
+AAAGAC	GTCTTT	-0.228023730318
+AAAGAG	CTCTTT	0.247579662852
+AAAGAT	ATCTTT	-0.304817901111
+AAAGCA	TGCTTT	-0.155658358179
+AAAGCC	GGCTTT	0.416290507318
+AAAGCG	CGCTTT	-0.319122803172
+AAAGCT	AGCTTT	-0.10365386651
+AAAGGA	TCCTTT	0.465546368844
+AAAGGC	GCCTTT	0.293788204177
+AAAGGG	CCCTTT	-0.738483493496
+AAAGGT	ACCTTT	-1.46557110152
+AAAGTA	TACTTT	-0.487013201424
+AAAGTC	GACTTT	-0.815561145197
+AAAGTG	CACTTT	0.523242409873
+AAAGTT	AACTTT	1.49610361616
+AAATAA	TTATTT	-0.50775903415
+AAATAC	GTATTT	-0.925034113885
+AAATAG	CTATTT	-1.3099174763
+AAATAT	ATATTT	-1.8047214372
+AAATCA	TGATTT	-0.899342838259
+AAATCC	GGATTT	-0.146519411262
+AAATCG	CGATTT	-0.267007765303
+AAATCT	AGATTT	0.291560176957
+AAATGA	TCATTT	-0.514145682209
+AAATGC	GCATTT	0.954279511728
+AAATGG	CCATTT	-0.711449233898
+AAATGT	ACATTT	-0.752526282583
+AAATTA	TAATTT	0.00593646027611
+AAATTC	GAATTT	1.26182226428
+AAATTG	CAATTT	-0.0953103902516
+AAATTT	AAATTT	0.189613989631
+AACAAA	TTTGTT	-1.37122264124
+AACAAC	GTTGTT	0.00146931165158
+AACAAG	CTTGTT	-0.803037783522
+AACAAT	ATTGTT	0.00385062783094
+AACACA	TGTGTT	-0.363356864114
+AACACC	GGTGTT	-0.779849447985
+AACACG	CGTGTT	-0.97471290289
+AACACT	AGTGTT	-0.335935444604
+AACAGA	TCTGTT	-1.28171369495
+AACAGC	GCTGTT	-0.411448258216
+AACAGG	CCTGTT	-0.469780016788
+AACAGT	ACTGTT	-0.453227635948
+AACATA	TATGTT	-0.945101613087
+AACATC	GATGTT	-0.0283361724906
+AACATG	CATGTT	-0.575985749697
+AACATT	AATGTT	-0.0429091030472
+AACCAA	TTGGTT	-0.0823228706445
+AACCAC	GTGGTT	2.58639657949
+AACCAG	CTGGTT	-0.276555339554
+AACCAT	ATGGTT	-0.11357479766
+AACCCA	TGGGTT	0.192569792654
+AACCCC	GGGGTT	-0.0425603516266
+AACCCG	CGGGTT	-0.404973603501
+AACCCT	AGGGTT	0.0764485451656
+AACCGA	TCGGTT	-0.137853811078
+AACCGC	GCGGTT	0.710876928983
+AACCGG	CCGGTT	0.272143672682
+AACCGT	ACGGTT	-1.42589113548
+AACCTA	TAGGTT	-0.611888789113
+AACCTC	GAGGTT	0.837839227815
+AACCTG	CAGGTT	-0.422972816872
+AACCTT	AAGGTT	0.0794552714245
+AACGAA	TTCGTT	0.662384258058
+AACGAC	GTCGTT	-0.711145623237
+AACGAG	CTCGTT	0.198654543303
+AACGAT	ATCGTT	-1.14468704666
+AACGCA	TGCGTT	-0.143027192823
+AACGCC	GGCGTT	-0.0833645930753
+AACGCG	CGCGTT	0.0613946992336
+AACGCT	AGCGTT	0.379426684798
+AACGGA	TCCGTT	-0.902189680896
+AACGGC	GCCGTT	0.725518300654
+AACGGG	CCCGTT	0.487999502266
+AACGGT	ACCGTT	-0.323411669378
+AACGTA	TACGTT	0.429654445762
+AACGTC	GACGTT	-0.392752266586
+AACGTG	CACGTT	-1.04792887194
+AACGTT	AACGTT	0.616207780774
+AACTAA	TTAGTT	-0.843322479317
+AACTAC	GTAGTT	0.184493095017
+AACTAG	CTAGTT	0.0179086348231
+AACTAT	ATAGTT	0.994586833037
+AACTCA	TGAGTT	-0.12838936418
+AACTCC	GGAGTT	0.726028047244
+AACTCG	CGAGTT	0.205501965615
+AACTCT	AGAGTT	0.78739364499
+AACTGA	TCAGTT	0.168022862889
+AACTGC	GCAGTT	0.216791948549
+AACTGG	CCAGTT	-0.314557426071
+AACTGT	ACAGTT	-0.0111281613254
+AACTTA	TAAGTT	-0.183787054209
+AACTTC	GAAGTT	0.84215541061
+AACTTG	CAAGTT	0.376469022105
+AAGAAA	TTTCTT	0.832667586229
+AAGAAC	GTTCTT	0.93622383333
+AAGAAG	CTTCTT	0.271875957941
+AAGAAT	ATTCTT	1.43057617592
+AAGACA	TGTCTT	-0.132932072786
+AAGACC	GGTCTT	0.0903286121328
+AAGACG	CGTCTT	0.184576127381
+AAGACT	AGTCTT	0.222042406341
+AAGAGA	TCTCTT	-0.0130328461327
+AAGAGC	GCTCTT	-0.37404789079
+AAGAGG	CCTCTT	-0.163448118904
+AAGAGT	ACTCTT	0.769472446615
+AAGATA	TATCTT	-0.790403171158
+AAGATC	GATCTT	-0.120008098951
+AAGATG	CATCTT	0.598644235302
+AAGATT	AATCTT	1.39556497538
+AAGCAA	TTGCTT	-0.984888908248
+AAGCAC	GTGCTT	0.783311673894
+AAGCAG	CTGCTT	-0.551197739368
+AAGCAT	ATGCTT	0.368035643478
+AAGCCA	TGGCTT	0.281990338241
+AAGCCC	GGGCTT	-0.699900156956
+AAGCCG	CGGCTT	0.986454860217
+AAGCCT	AGGCTT	0.446570897308
+AAGCGA	TCGCTT	-0.717502459474
+AAGCGC	GCGCTT	0.292841378565
+AAGCGG	CCGCTT	0.726632808198
+AAGCGT	ACGCTT	-0.441284795806
+AAGCTA	TAGCTT	-1.65918664431
+AAGCTC	GAGCTT	0.0882183240244
+AAGCTG	CAGCTT	-0.134531324525
+AAGCTT	AAGCTT	0.499772590447
+AAGGAA	TTCCTT	0.608701292821
+AAGGAC	GTCCTT	0.174988238866
+AAGGAG	CTCCTT	0.56638313976
+AAGGAT	ATCCTT	0.86759780737
+AAGGCA	TGCCTT	-0.0660388079911
+AAGGCC	GGCCTT	0.353849453837
+AAGGCG	CGCCTT	-0.103035156648
+AAGGCT	AGCCTT	0.226115108478
+AAGGGA	TCCCTT	-0.0242048325334
+AAGGGC	GCCCTT	-0.991808457742
+AAGGGG	CCCCTT	-0.108713197864
+AAGGGT	ACCCTT	0.426314095539
+AAGGTA	TACCTT	0.0063082317847
+AAGGTC	GACCTT	-2.22148605405
+AAGGTG	CACCTT	-0.171279446553
+AAGTAA	TTACTT	-0.660359179691
+AAGTAC	GTACTT	1.37990716767
+AAGTAG	CTACTT	0.00364551059326
+AAGTAT	ATACTT	0.0627944758868
+AAGTCA	TGACTT	-0.0836841889637
+AAGTCC	GGACTT	-0.0783950838806
+AAGTCG	CGACTT	-0.331907177283
+AAGTCT	AGACTT	0.922862248198
+AAGTGA	TCACTT	0.301713638482
+AAGTGC	GCACTT	0.372118346492
+AAGTGG	CCACTT	-0.29435234237
+AAGTGT	ACACTT	0.453500782049
+AAGTTA	TAACTT	-0.0413529710922
+AAGTTC	GAACTT	0.490694081954
+AAGTTG	CAACTT	1.43527540302
+AATAAA	TTTATT	-0.464685282825
+AATAAC	GTTATT	0.473126192871
+AATAAG	CTTATT	-0.361537503398
+AATAAT	ATTATT	-1.14135793996
+AATACA	TGTATT	-0.0868676244573
+AATACC	GGTATT	-0.432019199972
+AATACG	CGTATT	-0.812104843229
+AATACT	AGTATT	-0.160253986465
+AATAGA	TCTATT	-0.819357586187
+AATAGC	GCTATT	-0.582278240034
+AATAGG	CCTATT	-0.102936475866
+AATAGT	ACTATT	0.64849424254
+AATATA	TATATT	-0.875327209013
+AATATC	GATATT	-0.538482532464
+AATATG	CATATT	0.442497512442
+AATATT	AATATT	-1.60012723551
+AATCAA	TTGATT	-1.37621833951
+AATCAC	GTGATT	0.91628767144
+AATCAG	CTGATT	0.070484765244
+AATCAT	ATGATT	0.650606183815
+AATCCA	TGGATT	-1.02720580521
+AATCCC	GGGATT	-0.352811994914
+AATCCG	CGGATT	0.0165933980204
+AATCCT	AGGATT	0.712428149182
+AATCGA	TCGATT	-0.592300021647
+AATCGC	GCGATT	0.812676084435
+AATCGG	CCGATT	0.39632534305
+AATCGT	ACGATT	-0.342808208442
+AATCTA	TAGATT	-1.480756961
+AATCTC	GAGATT	-0.751509737277
+AATCTG	CAGATT	-0.0237559933613
+AATGAA	TTCATT	-0.771072829647
+AATGAC	GTCATT	-0.14602458728
+AATGAG	CTCATT	0.492363745269
+AATGAT	ATCATT	-0.609265638394
+AATGCA	TGCATT	0.167952139321
+AATGCC	GGCATT	0.965994735545
+AATGCG	CGCATT	-0.166276358058
+AATGCT	AGCATT	1.03827471911
+AATGGA	TCCATT	-0.187500612316
+AATGGC	GCCATT	0.216365462216
+AATGGG	CCCATT	-0.0888492445946
+AATGGT	ACCATT	0.14433579757
+AATGTA	TACATT	0.283672586491
+AATGTC	GACATT	-0.913297517025
+AATGTG	CACATT	1.12759664753
+AATTAA	TTAATT	-2.21736658818
+AATTAC	GTAATT	0.126090373031
+AATTAG	CTAATT	-0.499643372776
+AATTAT	ATAATT	0.0590969699364
+AATTCA	TGAATT	-0.535790423504
+AATTCC	GGAATT	1.30916473709
+AATTCG	CGAATT	0.0530089957774
+AATTCT	AGAATT	1.3354098108
+AATTGA	TCAATT	-1.66820825185
+AATTGC	GCAATT	0.00310194879804
+AATTGG	CCAATT	0.419449404673
+AATTGT	ACAATT	0.464182538132
+AATTTA	TAAATT	-0.570808133223
+AATTTC	GAAATT	0.929450761295
+AATTTG	CAAATT	0.406154967173
+ACAAAA	TTTTGT	0.534003859773
+ACAAAC	GTTTGT	-0.750597270967
+ACAAAG	CTTTGT	-0.174225381133
+ACAAAT	ATTTGT	-0.967477603914
+ACAACA	TGTTGT	0.684531901144
+ACAACC	GGTTGT	0.306111794846
+ACAACG	CGTTGT	-0.492170779986
+ACAACT	AGTTGT	0.12647703187
+ACAAGA	TCTTGT	0.435693866629
+ACAAGC	GCTTGT	0.162579020622
+ACAAGG	CCTTGT	-0.482270829511
+ACAAGT	ACTTGT	0.368700538071
+ACAATA	TATTGT	-0.556960796215
+ACAATC	GATTGT	0.447210789307
+ACAATG	CATTGT	-0.652007172748
+ACACAA	TTGTGT	-0.58426177344
+ACACAC	GTGTGT	0.0976403710637
+ACACAG	CTGTGT	-0.67562334546
+ACACAT	ATGTGT	-0.783431510249
+ACACCA	TGGTGT	-0.0559002312137
+ACACCC	GGGTGT	-0.279278917913
+ACACCG	CGGTGT	0.927647825457
+ACACCT	AGGTGT	-2.31444811782
+ACACGA	TCGTGT	-0.290620517011
+ACACGC	GCGTGT	-0.433731209379
+ACACGG	CCGTGT	0.133955133112
+ACACGT	ACGTGT	-0.0840820691034
+ACACTA	TAGTGT	-1.53601873195
+ACACTC	GAGTGT	-0.0411823725391
+ACACTG	CAGTGT	-0.133117765869
+ACAGAA	TTCTGT	0.192830326341
+ACAGAC	GTCTGT	-0.344297166277
+ACAGAG	CTCTGT	0.0995945155779
+ACAGAT	ATCTGT	-1.71253677969
+ACAGCA	TGCTGT	0.103705732884
+ACAGCC	GGCTGT	-0.141361720091
+ACAGCG	CGCTGT	0.0923052988622
+ACAGCT	AGCTGT	-2.06591471431
+ACAGGA	TCCTGT	0.106606820089
+ACAGGC	GCCTGT	0.243978095226
+ACAGGG	CCCTGT	-0.163198751642
+ACAGGT	ACCTGT	-1.35902898114
+ACAGTA	TACTGT	0.330450384923
+ACAGTC	GACTGT	-0.0441089075653
+ACAGTG	CACTGT	0.410210459073
+ACATAA	TTATGT	-0.124637932465
+ACATAC	GTATGT	0.0408944886861
+ACATAG	CTATGT	-0.281098621777
+ACATAT	ATATGT	-1.5461561949
+ACATCA	TGATGT	0.404823860207
+ACATCC	GGATGT	-0.0771250376801
+ACATCG	CGATGT	0.348036576745
+ACATCT	AGATGT	-1.0135367165
+ACATGA	TCATGT	-0.0533364791011
+ACATGC	GCATGT	-0.230663552067
+ACATGG	CCATGT	0.354870946287
+ACATGT	ACATGT	-0.255821119156
+ACATTA	TAATGT	-0.305695214437
+ACATTC	GAATGT	2.10633976985
+ACATTG	CAATGT	-0.969761944969
+ACCAAA	TTTGGT	-0.0894837549998
+ACCAAC	GTTGGT	-0.310977709975
+ACCAAG	CTTGGT	0.126792582447
+ACCAAT	ATTGGT	-0.208620509384
+ACCACA	TGTGGT	3.95883323455
+ACCACC	GGTGGT	-0.550768309866
+ACCACG	CGTGGT	1.64662237122
+ACCACT	AGTGGT	-0.252204442565
+ACCAGA	TCTGGT	-0.182102497222
+ACCAGC	GCTGGT	-0.666347426374
+ACCAGG	CCTGGT	-0.302144138217
+ACCAGT	ACTGGT	0.0178732652384
+ACCATA	TATGGT	-0.128894926297
+ACCATC	GATGGT	-0.113161940262
+ACCATG	CATGGT	0.0567971909973
+ACCCAA	TTGGGT	-0.112170340264
+ACCCAC	GTGGGT	-0.0932909430755
+ACCCAG	CTGGGT	0.38534995457
+ACCCAT	ATGGGT	0.86383897393
+ACCCCA	TGGGGT	0.69577964714
+ACCCCC	GGGGGT	0.336687664266
+ACCCCG	CGGGGT	-0.0474965784183
+ACCCCT	AGGGGT	0.0583362287737
+ACCCGA	TCGGGT	-0.651964147142
+ACCCGC	GCGGGT	0.430185118239
+ACCCGG	CCGGGT	0.0136510502891
+ACCCGT	ACGGGT	-0.66740789625
+ACCCTA	TAGGGT	-0.0140774209654
+ACCCTC	GAGGGT	-0.0866227575362
+ACCCTG	CAGGGT	0.831880338582
+ACCGAA	TTCGGT	0.162092610395
+ACCGAC	GTCGGT	-0.213537840127
+ACCGAG	CTCGGT	-0.798483849782
+ACCGAT	ATCGGT	-0.171301259624
+ACCGCA	TGCGGT	1.12176529563
+ACCGCC	GGCGGT	-0.0399431587546
+ACCGCG	CGCGGT	-0.00200779866329
+ACCGCT	AGCGGT	0.807679694982
+ACCGGA	TCCGGT	0.475032143564
+ACCGGC	GCCGGT	0.644168012631
+ACCGGG	CCCGGT	0.0514057931436
+ACCGGT	ACCGGT	-0.201116834029
+ACCGTA	TACGGT	-0.0510651546867
+ACCGTC	GACGGT	-1.28871756987
+ACCGTG	CACGGT	0.480206522572
+ACCTAA	TTAGGT	-0.0164396390877
+ACCTAC	GTAGGT	-0.948871399654
+ACCTAG	CTAGGT	0.902143796699
+ACCTAT	ATAGGT	-0.805157821049
+ACCTCA	TGAGGT	0.0288013729329
+ACCTCC	GGAGGT	-0.123113465161
+ACCTCG	CGAGGT	0.262603098349
+ACCTCT	AGAGGT	-0.105633795468
+ACCTGA	TCAGGT	-1.25693360781
+ACCTGC	GCAGGT	-2.19188428198
+ACCTGG	CCAGGT	-1.22596740891
+ACCTTA	TAAGGT	-0.397764436469
+ACCTTC	GAAGGT	-0.365879915113
+ACCTTG	CAAGGT	-0.468430599887
+ACGAAA	TTTCGT	0.264150600855
+ACGAAC	GTTCGT	-0.264756483319
+ACGAAG	CTTCGT	0.204030639912
+ACGAAT	ATTCGT	-0.0361589742531
+ACGACA	TGTCGT	-0.610965249538
+ACGACC	GGTCGT	-0.241773839788
+ACGACG	CGTCGT	0.0286911417218
+ACGACT	AGTCGT	-0.446205245539
+ACGAGA	TCTCGT	0.605153633971
+ACGAGC	GCTCGT	-0.265340587538
+ACGAGG	CCTCGT	-0.268759016858
+ACGAGT	ACTCGT	0.640180324145
+ACGATA	TATCGT	0.155583247136
+ACGATC	GATCGT	0.0616053169407
+ACGATG	CATCGT	-0.458209843991
+ACGCAA	TTGCGT	0.525980393513
+ACGCAC	GTGCGT	-1.07211159219
+ACGCAG	CTGCGT	0.297215899525
+ACGCAT	ATGCGT	-0.62466515887
+ACGCCA	TGGCGT	-0.802770461001
+ACGCCC	GGGCGT	0.447952405036
+ACGCCG	CGGCGT	-0.115846359904
+ACGCCT	AGGCGT	-0.238430995845
+ACGCGA	TCGCGT	0.095946201326
+ACGCGC	GCGCGT	-0.0433332223788
+ACGCGG	CCGCGT	0.266229064785
+ACGCGT	ACGCGT	-0.163327119327
+ACGCTA	TAGCGT	0.237235200727
+ACGCTC	GAGCGT	-0.0921458803262
+ACGCTG	CAGCGT	0.199715587503
+ACGGAA	TTCCGT	-0.329353600663
+ACGGAC	GTCCGT	0.344570189474
+ACGGAG	CTCCGT	-0.0548389021114
+ACGGAT	ATCCGT	-0.545997471972
+ACGGCA	TGCCGT	0.280450942962
+ACGGCC	GGCCGT	1.41582328399
+ACGGCG	CGCCGT	-0.196303455354
+ACGGCT	AGCCGT	0.227947803931
+ACGGGA	TCCCGT	0.280829778818
+ACGGGC	GCCCGT	0.00018403437262
+ACGGGG	CCCCGT	0.580486853705
+ACGGTA	TACCGT	0.075495718471
+ACGGTC	GACCGT	0.348584555757
+ACGGTG	CACCGT	-0.182120731866
+ACGTAA	TTACGT	-0.0621196361007
+ACGTAC	GTACGT	0.466909984366
+ACGTAG	CTACGT	-0.663033009337
+ACGTAT	ATACGT	-0.308630159174
+ACGTCA	TGACGT	-1.99820059064
+ACGTCC	GGACGT	0.420415612207
+ACGTCG	CGACGT	-0.0602876602791
+ACGTCT	AGACGT	-0.634849462137
+ACGTGA	TCACGT	-0.198905336876
+ACGTGC	GCACGT	0.414998502382
+ACGTGG	CCACGT	0.200447884465
+ACGTTA	TAACGT	0.185859329067
+ACGTTC	GAACGT	0.52815687831
+ACGTTG	CAACGT	0.211039795795
+ACTAAA	TTTAGT	-0.50736915368
+ACTAAC	GTTAGT	0.475747236187
+ACTAAG	CTTAGT	0.138308785668
+ACTAAT	ATTAGT	1.06019516177
+ACTACA	TGTAGT	0.319192883857
+ACTACC	GGTAGT	0.487897945196
+ACTACG	CGTAGT	-0.74407054614
+ACTACT	AGTAGT	0.0639271446503
+ACTAGA	TCTAGT	-0.0298376878721
+ACTAGC	GCTAGT	-0.0429928576347
+ACTAGG	CCTAGT	0.333780155394
+ACTAGT	ACTAGT	-0.16686552786
+ACTATA	TATAGT	-0.405746674892
+ACTATC	GATAGT	-0.111727361497
+ACTATG	CATAGT	-0.162666443308
+ACTCAA	TTGAGT	0.0769394839718
+ACTCAC	GTGAGT	1.11965808913
+ACTCAG	CTGAGT	0.702350819167
+ACTCAT	ATGAGT	1.21992890886
+ACTCCA	TGGAGT	0.159797702837
+ACTCCC	GGGAGT	-0.25369524982
+ACTCCG	CGGAGT	-0.22211947957
+ACTCCT	AGGAGT	0.573765565902
+ACTCGA	TCGAGT	-0.52369931313
+ACTCGC	GCGAGT	0.0889533091085
+ACTCGG	CCGAGT	0.228330956989
+ACTCTA	TAGAGT	0.143486764445
+ACTCTC	GAGAGT	0.112394817019
+ACTCTG	CAGAGT	-0.15818031926
+ACTGAA	TTCAGT	-0.255344152434
+ACTGAC	GTCAGT	-0.494035697197
+ACTGAG	CTCAGT	0.356908231789
+ACTGAT	ATCAGT	-0.39586503844
+ACTGCA	TGCAGT	0.67076450454
+ACTGCC	GGCAGT	-0.33621057783
+ACTGCG	CGCAGT	0.397171550083
+ACTGCT	AGCAGT	-0.0353519946569
+ACTGGA	TCCAGT	0.221117097972
+ACTGGC	GCCAGT	-0.148482643928
+ACTGGG	CCCAGT	0.36938530952
+ACTGTA	TACAGT	0.387293260858
+ACTGTC	GACAGT	-0.0178021629868
+ACTGTG	CACAGT	0.618305777696
+ACTTAA	TTAAGT	0.437644834694
+ACTTAC	GTAAGT	0.033919287324
+ACTTAG	CTAAGT	-0.516377419414
+ACTTAT	ATAAGT	0.698841633408
+ACTTCA	TGAAGT	0.611347347435
+ACTTCC	GGAAGT	0.0973285263686
+ACTTCG	CGAAGT	-0.0915669240628
+ACTTCT	AGAAGT	0.518303185233
+ACTTGA	TCAAGT	-0.216079683422
+ACTTGC	GCAAGT	0.575477942051
+ACTTGG	CCAAGT	0.00543146924231
+ACTTTA	TAAAGT	-0.300213848597
+ACTTTC	GAAAGT	-0.148863314977
+ACTTTG	CAAAGT	-0.245595583167
+AGAAAA	TTTTCT	-0.137268535318
+AGAAAC	GTTTCT	0.572093479149
+AGAAAG	CTTTCT	0.098472865858
+AGAAAT	ATTTCT	0.410453396261
+AGAACA	TGTTCT	0.793454650212
+AGAACC	GGTTCT	0.0115494156458
+AGAACG	CGTTCT	0.82393226583
+AGAACT	AGTTCT	-0.119807464497
+AGAAGA	TCTTCT	-0.333758403042
+AGAAGC	GCTTCT	0.0844487814689
+AGAAGG	CCTTCT	-0.205219218112
+AGAATA	TATTCT	0.0633993564776
+AGAATC	GATTCT	-0.538136371548
+AGAATG	CATTCT	0.931621596147
+AGACAA	TTGTCT	-1.50574942362
+AGACAC	GTGTCT	-0.0167669078258
+AGACAG	CTGTCT	-0.12906421364
+AGACAT	ATGTCT	1.35404000457
+AGACCA	TGGTCT	0.328218872799
+AGACCC	GGGTCT	-0.0200725289044
+AGACCG	CGGTCT	0.765626742963
+AGACCT	AGGTCT	-0.456848517462
+AGACGA	TCGTCT	-0.802053904091
+AGACGC	GCGTCT	0.0253871768361
+AGACGG	CCGTCT	-0.139266727141
+AGACTA	TAGTCT	-0.320057583318
+AGACTC	GAGTCT	-0.834249291646
+AGACTG	CAGTCT	0.193508572354
+AGAGAA	TTCTCT	0.0931165815382
+AGAGAC	GTCTCT	-0.5884698684
+AGAGAG	CTCTCT	0.615476951972
+AGAGAT	ATCTCT	-0.299853214526
+AGAGCA	TGCTCT	-0.335642528646
+AGAGCC	GGCTCT	-0.883469618392
+AGAGCG	CGCTCT	0.251578584118
+AGAGCT	AGCTCT	0.487429375142
+AGAGGA	TCCTCT	0.463823218153
+AGAGGC	GCCTCT	0.916276432149
+AGAGGG	CCCTCT	-0.18684096125
+AGAGTA	TACTCT	-0.495138709385
+AGAGTC	GACTCT	-0.484655774219
+AGAGTG	CACTCT	0.115993902393
+AGATAA	TTATCT	-1.58907716419
+AGATAC	GTATCT	0.339921215435
+AGATAG	CTATCT	-0.367780034415
+AGATAT	ATATCT	0.287484551209
+AGATCA	TGATCT	-0.123844683894
+AGATCC	GGATCT	0.00247282057627
+AGATCG	CGATCT	-0.0307897262914
+AGATCT	AGATCT	0.227651621052
+AGATGA	TCATCT	0.386848181305
+AGATGC	GCATCT	-0.0213670882284
+AGATGG	CCATCT	-1.81021353589
+AGATTA	TAATCT	-0.8443233316
+AGATTC	GAATCT	-0.532888247722
+AGATTG	CAATCT	-0.107371277313
+AGCAAA	TTTGCT	-1.94599552918
+AGCAAC	GTTGCT	-0.0162424474542
+AGCAAG	CTTGCT	-0.342005889721
+AGCAAT	ATTGCT	1.13914459658
+AGCACA	TGTGCT	-0.00361898399215
+AGCACC	GGTGCT	-0.315148789185
+AGCACG	CGTGCT	0.303979187648
+AGCACT	AGTGCT	1.06273224797
+AGCAGA	TCTGCT	-0.762081808432
+AGCAGC	GCTGCT	-0.718835795316
+AGCAGG	CCTGCT	-0.805919785711
+AGCATA	TATGCT	-1.23962747197
+AGCATC	GATGCT	-0.72646586189
+AGCATG	CATGCT	0.464440738258
+AGCCAA	TTGGCT	-0.328449603835
+AGCCAC	GTGGCT	1.94437802586
+AGCCAG	CTGGCT	-0.220210493763
+AGCCAT	ATGGCT	-0.106927471131
+AGCCCA	TGGGCT	-0.206762784221
+AGCCCC	GGGGCT	0.51046992709
+AGCCCG	CGGGCT	1.1594447744
+AGCCCT	AGGGCT	-0.414622923074
+AGCCGA	TCGGCT	-0.0926922352106
+AGCCGC	GCGGCT	0.262876228393
+AGCCGG	CCGGCT	0.055499757051
+AGCCTA	TAGGCT	-0.197316160999
+AGCCTC	GAGGCT	0.21702983432
+AGCCTG	CAGGCT	0.435309995039
+AGCGAA	TTCGCT	-1.1066505883e-05
+AGCGAC	GTCGCT	-0.47360327321
+AGCGAG	CTCGCT	0.164778505221
+AGCGAT	ATCGCT	0.356498907503
+AGCGCA	TGCGCT	0.0505610378921
+AGCGCC	GGCGCT	-0.0890290036684
+AGCGCG	CGCGCT	0.178915191469
+AGCGCT	AGCGCT	0.11567687362
+AGCGGA	TCCGCT	-0.227235460911
+AGCGGC	GCCGCT	0.0584119341573
+AGCGGG	CCCGCT	0.144626430325
+AGCGTA	TACGCT	-0.573602800156
+AGCGTC	GACGCT	0.0226782464247
+AGCGTG	CACGCT	0.229831864487
+AGCTAA	TTAGCT	-0.845230674233
+AGCTAC	GTAGCT	0.164831388872
+AGCTAG	CTAGCT	0.500762258636
+AGCTAT	ATAGCT	-0.488234536435
+AGCTCA	TGAGCT	0.187034773314
+AGCTCC	GGAGCT	-0.401339822528
+AGCTCG	CGAGCT	0.156048007048
+AGCTGA	TCAGCT	-0.516709892962
+AGCTGC	GCAGCT	-1.11014090329
+AGCTGG	CCAGCT	-0.932348608248
+AGCTTA	TAAGCT	-0.176665598631
+AGCTTC	GAAGCT	0.0711278180534
+AGCTTG	CAAGCT	-0.434225228906
+AGGAAA	TTTCCT	-0.704291527622
+AGGAAC	GTTCCT	0.303469917629
+AGGAAG	CTTCCT	0.00524866323846
+AGGAAT	ATTCCT	1.58780848354
+AGGACA	TGTCCT	-0.15505321324
+AGGACC	GGTCCT	0.289757945024
+AGGACG	CGTCCT	0.481778993922
+AGGACT	AGTCCT	0.640823779903
+AGGAGA	TCTCCT	-0.235490942376
+AGGAGC	GCTCCT	0.235768365124
+AGGAGG	CCTCCT	-0.414415365132
+AGGATA	TATCCT	-0.0664042613035
+AGGATC	GATCCT	-0.434947351654
+AGGATG	CATCCT	0.366955472562
+AGGCAA	TTGCCT	-1.01850518506
+AGGCAC	GTGCCT	0.736932368634
+AGGCAG	CTGCCT	0.256955994847
+AGGCAT	ATGCCT	0.369133304411
+AGGCCA	TGGCCT	-0.561290343016
+AGGCCC	GGGCCT	1.03465518609
+AGGCCG	CGGCCT	0.522868342351
+AGGCCT	AGGCCT	0.564743685355
+AGGCGA	TCGCCT	0.424607293004
+AGGCGC	GCGCCT	-0.236182049346
+AGGCGG	CCGCCT	0.0445043432832
+AGGCTA	TAGCCT	0.0242969706881
+AGGCTC	GAGCCT	-0.0471097341017
+AGGCTG	CAGCCT	0.456595572887
+AGGGAA	TTCCCT	0.498834635405
+AGGGAC	GTCCCT	-0.260200051734
+AGGGAG	CTCCCT	0.240503537084
+AGGGAT	ATCCCT	-0.535599280412
+AGGGCA	TGCCCT	-0.00133954725493
+AGGGCC	GGCCCT	0.265601558657
+AGGGCG	CGCCCT	-0.545673685243
+AGGGGA	TCCCCT	0.513483423778
+AGGGGC	GCCCCT	-0.265469144844
+AGGGGG	CCCCCT	0.36693029363
+AGGGTA	TACCCT	0.104498083465
+AGGGTC	GACCCT	0.278328586346
+AGGGTG	CACCCT	0.428836989395
+AGGTAA	TTACCT	-0.725162219135
+AGGTAC	GTACCT	-1.09752982535
+AGGTAG	CTACCT	-0.341120431869
+AGGTAT	ATACCT	0.314857124255
+AGGTCA	TGACCT	-1.91741654129
+AGGTCC	GGACCT	0.810994316372
+AGGTCG	CGACCT	0.394159438614
+AGGTGA	TCACCT	-1.07617897961
+AGGTGC	GCACCT	-1.39197112403
+AGGTGG	CCACCT	-1.18178211992
+AGGTTA	TAACCT	-0.510476009759
+AGGTTC	GAACCT	-0.625187182346
+AGGTTG	CAACCT	0.143869769481
+AGTAAA	TTTACT	-1.50793279028
+AGTAAC	GTTACT	0.427455316037
+AGTAAG	CTTACT	0.279690202349
+AGTAAT	ATTACT	0.291866593552
+AGTACA	TGTACT	0.880452697751
+AGTACC	GGTACT	-0.445269994312
+AGTACG	CGTACT	0.579750490517
+AGTACT	AGTACT	0.829122008787
+AGTAGA	TCTACT	-0.191593190325
+AGTAGC	GCTACT	-0.0893420501576
+AGTAGG	CCTACT	0.14556137321
+AGTATA	TATACT	-0.849140839165
+AGTATC	GATACT	-0.61282065974
+AGTATG	CATACT	0.912596605147
+AGTCAA	TTGACT	-1.70769354035
+AGTCAC	GTGACT	1.34966048832
+AGTCAG	CTGACT	1.37319346673
+AGTCAT	ATGACT	2.72186586472
+AGTCCA	TGGACT	-0.0192132003426
+AGTCCC	GGGACT	0.578612760429
+AGTCCG	CGGACT	-0.212053165155
+AGTCGA	TCGACT	-0.0786424670568
+AGTCGC	GCGACT	-0.458716046909
+AGTCGG	CCGACT	0.248365258652
+AGTCTA	TAGACT	-0.320914441937
+AGTCTC	GAGACT	0.0850550293497
+AGTCTG	CAGACT	0.488675888517
+AGTGAA	TTCACT	0.0266570650857
+AGTGAC	GTCACT	-0.387049263014
+AGTGAG	CTCACT	0.430045767393
+AGTGAT	ATCACT	0.827141172737
+AGTGCA	TGCACT	0.070601211825
+AGTGCC	GGCACT	-0.966563356734
+AGTGCG	CGCACT	-0.281729870678
+AGTGGA	TCCACT	-0.381263157411
+AGTGGC	GCCACT	0.206378092816
+AGTGGG	CCCACT	0.389632179038
+AGTGTA	TACACT	-0.820978049153
+AGTGTC	GACACT	-0.600524497894
+AGTGTG	CACACT	0.713037712467
+AGTTAA	TTAACT	-0.0106066013167
+AGTTAC	GTAACT	0.0455408515653
+AGTTAG	CTAACT	0.466941093618
+AGTTAT	ATAACT	0.768829672421
+AGTTCA	TGAACT	-0.125866517956
+AGTTCC	GGAACT	0.668936562441
+AGTTCG	CGAACT	0.387968939941
+AGTTGA	TCAACT	-0.0474971487734
+AGTTGC	GCAACT	1.1127899844
+AGTTGG	CCAACT	-0.465573167102
+AGTTTA	TAAACT	0.284055967528
+AGTTTC	GAAACT	0.18619349173
+AGTTTG	CAAACT	0.12731264248
+ATAAAA	TTTTAT	1.02648675816
+ATAAAC	GTTTAT	0.732880225054
+ATAAAG	CTTTAT	-1.87548823673
+ATAAAT	ATTTAT	-0.873007961387
+ATAACA	TGTTAT	-1.36018600643
+ATAACC	GGTTAT	-0.202228583494
+ATAACG	CGTTAT	0.589468702095
+ATAAGA	TCTTAT	-0.257841865173
+ATAAGC	GCTTAT	-0.610042148574
+ATAAGG	CCTTAT	-1.22287623937
+ATAATA	TATTAT	-0.164184507604
+ATAATC	GATTAT	-0.722314659784
+ATAATG	CATTAT	-0.0511255174539
+ATACAA	TTGTAT	-0.950800433133
+ATACAC	GTGTAT	-0.119925598684
+ATACAG	CTGTAT	-0.0262094856091
+ATACAT	ATGTAT	0.249201529788
+ATACCA	TGGTAT	0.574806016908
+ATACCC	GGGTAT	-0.101360243386
+ATACCG	CGGTAT	-0.611029854896
+ATACGA	TCGTAT	-0.920389648385
+ATACGC	GCGTAT	-0.174514324887
+ATACGG	CCGTAT	-0.51787444727
+ATACTA	TAGTAT	0.229563124064
+ATACTC	GAGTAT	-0.595979909399
+ATACTG	CAGTAT	0.236738075475
+ATAGAA	TTCTAT	-1.07321125666
+ATAGAC	GTCTAT	-0.918035212801
+ATAGAG	CTCTAT	0.338560548219
+ATAGAT	ATCTAT	0.330811757112
+ATAGCA	TGCTAT	-0.923663860458
+ATAGCC	GGCTAT	0.16558344397
+ATAGCG	CGCTAT	0.462369612811
+ATAGGA	TCCTAT	0.425973895672
+ATAGGC	GCCTAT	0.314093043047
+ATAGGG	CCCTAT	0.219751962566
+ATAGTA	TACTAT	-0.900554379694
+ATAGTC	GACTAT	-0.0847074884239
+ATAGTG	CACTAT	-0.211791001641
+ATATAA	TTATAT	-0.276221858749
+ATATAC	GTATAT	-1.19823462791
+ATATAG	CTATAT	0.0633032493853
+ATATAT	ATATAT	0.429750726604
+ATATCA	TGATAT	-1.31621436594
+ATATCC	GGATAT	-0.127518411175
+ATATCG	CGATAT	-0.1582300913
+ATATGA	TCATAT	-1.22039654692
+ATATGC	GCATAT	-0.733718702885
+ATATGG	CCATAT	-0.410024811352
+ATATTA	TAATAT	-0.556385064056
+ATATTC	GAATAT	-0.193606749951
+ATATTG	CAATAT	-1.84656827598
+ATCAAA	TTTGAT	-1.48651833301
+ATCAAC	GTTGAT	-0.177019712196
+ATCAAG	CTTGAT	0.993564451911
+ATCAAT	ATTGAT	-1.73845783884
+ATCACA	TGTGAT	-0.7187838349
+ATCACC	GGTGAT	-0.522216302483
+ATCACG	CGTGAT	-0.717922773896
+ATCAGA	TCTGAT	-0.432098796815
+ATCAGC	GCTGAT	-0.177781089389
+ATCAGG	CCTGAT	-0.185638177915
+ATCATA	TATGAT	0.532155893918
+ATCATC	GATGAT	-0.124663780341
+ATCATG	CATGAT	-0.00689408482313
+ATCCAA	TTGGAT	0.860993420444
+ATCCAC	GTGGAT	-0.0725321924413
+ATCCAG	CTGGAT	-0.27103003808
+ATCCAT	ATGGAT	-1.06148966182
+ATCCCA	TGGGAT	0.61836822508
+ATCCCC	GGGGAT	0.953992982003
+ATCCCG	CGGGAT	-0.399268842253
+ATCCGA	TCGGAT	0.30018596428
+ATCCGC	GCGGAT	0.267936181036
+ATCCGG	CCGGAT	-0.621204560642
+ATCCTA	TAGGAT	-0.243979632628
+ATCCTC	GAGGAT	0.744817578624
+ATCCTG	CAGGAT	-0.504079419831
+ATCGAA	TTCGAT	-0.127059008672
+ATCGAC	GTCGAT	-0.0155499225919
+ATCGAG	CTCGAT	-0.547153990807
+ATCGAT	ATCGAT	-0.243041889295
+ATCGCA	TGCGAT	0.617626698462
+ATCGCC	GGCGAT	-0.549200964359
+ATCGCG	CGCGAT	-0.319928310366
+ATCGGA	TCCGAT	-0.0213478576162
+ATCGGC	GCCGAT	0.01012445378
+ATCGGG	CCCGAT	-0.16502415031
+ATCGTA	TACGAT	0.209770307092
+ATCGTC	GACGAT	0.446879950629
+ATCGTG	CACGAT	-0.0107015895665
+ATCTAA	TTAGAT	0.275281219402
+ATCTAC	GTAGAT	-0.505485858388
+ATCTAG	CTAGAT	-1.40045656324
+ATCTCA	TGAGAT	-0.796214522768
+ATCTCC	GGAGAT	0.357919780998
+ATCTCG	CGAGAT	0.330326321378
+ATCTGA	TCAGAT	0.54409789464
+ATCTGC	GCAGAT	-1.11848953262
+ATCTGG	CCAGAT	-1.74540660353
+ATCTTA	TAAGAT	0.653446604954
+ATCTTC	GAAGAT	-0.402855143504
+ATCTTG	CAAGAT	1.12868592482
+ATGAAA	TTTCAT	-0.931491848926
+ATGAAC	GTTCAT	-0.668998480935
+ATGAAG	CTTCAT	-0.234340414147
+ATGAAT	ATTCAT	1.32890513335
+ATGACA	TGTCAT	-0.407101486566
+ATGACC	GGTCAT	-0.10517111194
+ATGACG	CGTCAT	-1.81349916863
+ATGAGA	TCTCAT	-0.143474073688
+ATGAGC	GCTCAT	-0.422453674778
+ATGAGG	CCTCAT	0.24497065201
+ATGATA	TATCAT	-0.457971919127
+ATGATC	GATCAT	-0.956283043587
+ATGATG	CATCAT	0.280192974098
+ATGCAA	TTGCAT	0.0243701389727
+ATGCAC	GTGCAT	-0.19959196082
+ATGCAG	CTGCAT	0.349978081089
+ATGCAT	ATGCAT	0.13057511889
+ATGCCA	TGGCAT	0.381740306749
+ATGCCC	GGGCAT	-0.0217736885435
+ATGCCG	CGGCAT	0.62665478233
+ATGCGA	TCGCAT	0.335703166837
+ATGCGC	GCGCAT	0.783616840941
+ATGCGG	CCGCAT	0.0472938578306
+ATGCTA	TAGCAT	-0.0735655576493
+ATGCTC	GAGCAT	-0.840560139265
+ATGCTG	CAGCAT	0.15741810487
+ATGGAA	TTCCAT	-0.775848717153
+ATGGAC	GTCCAT	0.41782311792
+ATGGAG	CTCCAT	-0.163619300573
+ATGGCA	TGCCAT	-0.129694663435
+ATGGCC	GGCCAT	-0.416279326696
+ATGGCG	CGCCAT	-0.445757591838
+ATGGGA	TCCCAT	-0.128926465653
+ATGGGC	GCCCAT	0.771709467651
+ATGGGG	CCCCAT	-0.0745872493387
+ATGGTA	TACCAT	0.246690854305
+ATGGTC	GACCAT	0.0673503554008
+ATGGTG	CACCAT	-0.253032937261
+ATGTAA	TTACAT	-0.995516665438
+ATGTAC	GTACAT	-0.525286748875
+ATGTAG	CTACAT	0.68550253572
+ATGTCA	TGACAT	-2.11241919747
+ATGTCC	GGACAT	0.845134601462
+ATGTCG	CGACAT	0.516595304184
+ATGTGA	TCACAT	0.141325506081
+ATGTGC	GCACAT	0.0828091804383
+ATGTGG	CCACAT	0.947400656873
+ATGTTA	TAACAT	-0.947214214102
+ATGTTC	GAACAT	0.431810543746
+ATGTTG	CAACAT	-0.125258264151
+ATTAAA	TTTAAT	-1.31975538764
+ATTAAC	GTTAAT	-1.41512566175
+ATTAAG	CTTAAT	-0.199537767266
+ATTAAT	ATTAAT	-0.436068858224
+ATTACA	TGTAAT	-0.201133348119
+ATTACC	GGTAAT	0.172300094885
+ATTACG	CGTAAT	-0.417719262734
+ATTAGA	TCTAAT	0.93500523245
+ATTAGC	GCTAAT	-2.10811581856
+ATTAGG	CCTAAT	-0.467480191357
+ATTATA	TATAAT	0.363629728768
+ATTATC	GATAAT	-0.861960495475
+ATTATG	CATAAT	0.562723819118
+ATTCAA	TTGAAT	0.0200498460273
+ATTCAC	GTGAAT	0.418891749324
+ATTCAG	CTGAAT	-0.230767178642
+ATTCCA	TGGAAT	1.05154227319
+ATTCCC	GGGAAT	-0.13155797816
+ATTCCG	CGGAAT	0.0812893553365
+ATTCGA	TCGAAT	-0.0838650208544
+ATTCGC	GCGAAT	0.222296881979
+ATTCGG	CCGAAT	-0.139283061422
+ATTCTA	TAGAAT	-0.224824743806
+ATTCTC	GAGAAT	-0.557117996579
+ATTCTG	CAGAAT	1.0695468056
+ATTGAA	TTCAAT	-0.463986709186
+ATTGAC	GTCAAT	-1.69772334912
+ATTGAG	CTCAAT	-0.117012580083
+ATTGCA	TGCAAT	0.257329206735
+ATTGCC	GGCAAT	0.0584571835771
+ATTGCG	CGCAAT	0.406025923639
+ATTGGA	TCCAAT	-0.62851834273
+ATTGGC	GCCAAT	0.079438863957
+ATTGGG	CCCAAT	0.913438482245
+ATTGTA	TACAAT	0.450423686846
+ATTGTC	GACAAT	-1.01726290552
+ATTGTG	CACAAT	0.60669560725
+ATTTAA	TTAAAT	0.253196011315
+ATTTAC	GTAAAT	-2.97235568716
+ATTTAG	CTAAAT	-0.292154315684
+ATTTCA	TGAAAT	0.636662944131
+ATTTCC	GGAAAT	-0.6231521332
+ATTTCG	CGAAAT	0.503689389655
+ATTTGA	TCAAAT	-0.338609456935
+ATTTGC	GCAAAT	-2.07758131186
+ATTTGG	CCAAAT	0.333462478492
+ATTTTA	TAAAAT	-0.556525692846
+ATTTTC	GAAAAT	-0.840026641213
+ATTTTG	CAAAAT	1.35270004621
+CAAAAA	TTTTTG	1.47166885108
+CAAAAC	GTTTTG	0.109633599033
+CAAAAG	CTTTTG	-0.10115908148
+CAAACA	TGTTTG	-3.89334182039
+CAAACC	GGTTTG	0.963184089308
+CAAACG	CGTTTG	-0.0395814581096
+CAAAGA	TCTTTG	-0.0118964430112
+CAAAGC	GCTTTG	0.224207599526
+CAAAGG	CCTTTG	-0.175494095276
+CAAATA	TATTTG	-3.52207271503
+CAAATC	GATTTG	0.660053835664
+CAAATG	CATTTG	-0.727424259718
+CAACAA	TTGTTG	-1.19656276713
+CAACAC	GTGTTG	0.660747434725
+CAACAG	CTGTTG	-0.857485267387
+CAACCA	TGGTTG	0.404850702022
+CAACCC	GGGTTG	0.503951138249
+CAACCG	CGGTTG	-0.644416232942
+CAACGA	TCGTTG	-0.224419054868
+CAACGC	GCGTTG	-0.508914854709
+CAACGG	CCGTTG	-0.200076485177
+CAACTA	TAGTTG	0.0981933677308
+CAACTC	GAGTTG	0.150972486588
+CAACTG	CAGTTG	-0.921035707419
+CAAGAA	TTCTTG	0.922374683971
+CAAGAC	GTCTTG	0.423676757958
+CAAGAG	CTCTTG	-0.187883399321
+CAAGCA	TGCTTG	0.636543530499
+CAAGCC	GGCTTG	0.00071745286602
+CAAGCG	CGCTTG	-0.254180305152
+CAAGGA	TCCTTG	0.821021319418
+CAAGGC	GCCTTG	0.0604176049684
+CAAGGG	CCCTTG	0.226570746088
+CAAGTA	TACTTG	0.216374948595
+CAAGTC	GACTTG	0.729235603486
+CAAGTG	CACTTG	-0.56315324029
+CAATAA	TTATTG	-0.549020618619
+CAATAC	GTATTG	-0.208803364217
+CAATAG	CTATTG	-0.0777355282063
+CAATCA	TGATTG	-0.263891707036
+CAATCC	GGATTG	0.700155593708
+CAATCG	CGATTG	0.357897326676
+CAATGA	TCATTG	0.638987959753
+CAATGC	GCATTG	-0.191968495388
+CAATGG	CCATTG	0.665776651287
+CAATTA	TAATTG	-0.271883139449
+CAATTC	GAATTG	0.0983322987509
+CAATTG	CAATTG	-0.158614672809
+CACAAA	TTTGTG	0.598792191689
+CACAAC	GTTGTG	0.340518077962
+CACAAG	CTTGTG	0.510003745313
+CACACA	TGTGTG	-0.400928902962
+CACACC	GGTGTG	-1.11996656805
+CACACG	CGTGTG	-0.718947444703
+CACAGA	TCTGTG	-0.100922777882
+CACAGC	GCTGTG	-0.50134148629
+CACAGG	CCTGTG	0.252298561921
+CACATA	TATGTG	-0.110647463006
+CACATC	GATGTG	-0.651447413877
+CACATG	CATGTG	0.00381644641394
+CACCAA	TTGGTG	0.70840160186
+CACCAC	GTGGTG	0.582686488096
+CACCAG	CTGGTG	-0.793184758924
+CACCCA	TGGGTG	-0.0284926248858
+CACCCC	GGGGTG	0.893138419231
+CACCCG	CGGGTG	0.417490759867
+CACCGA	TCGGTG	-0.0699188856569
+CACCGC	GCGGTG	0.522058993046
+CACCGG	CCGGTG	0.0180860340686
+CACCTA	TAGGTG	-0.592893592202
+CACCTC	GAGGTG	0.676528270972
+CACCTG	CAGGTG	-5.27584299853
+CACGAA	TTCGTG	-0.382612993115
+CACGAC	GTCGTG	0.032371814243
+CACGAG	CTCGTG	0.254629628132
+CACGCA	TGCGTG	0.117247261513
+CACGCC	GGCGTG	0.00637478726345
+CACGCG	CGCGTG	-0.0917099552667
+CACGGA	TCCGTG	0.368340586739
+CACGGC	GCCGTG	1.30441576203
+CACGGG	CCCGTG	-0.264381491117
+CACGTA	TACGTG	-1.02682296878
+CACGTC	GACGTG	-0.219968085956
+CACGTG	CACGTG	1.22865752463
+CACTAA	TTAGTG	0.253977430376
+CACTAC	GTAGTG	-0.301750384238
+CACTAG	CTAGTG	-0.669946880366
+CACTCA	TGAGTG	-0.734281923173
+CACTCC	GGAGTG	-0.204493843897
+CACTCG	CGAGTG	-0.172095955086
+CACTGA	TCAGTG	-0.318554612147
+CACTGC	GCAGTG	0.452355062655
+CACTGG	CCAGTG	0.0113022350057
+CACTTA	TAAGTG	0.159408091311
+CACTTC	GAAGTG	0.111535665099
+CAGAAA	TTTCTG	0.013661920057
+CAGAAC	GTTCTG	-0.34634558884
+CAGAAG	CTTCTG	-0.268726628204
+CAGACA	TGTCTG	-0.0884146723771
+CAGACC	GGTCTG	0.848596588006
+CAGACG	CGTCTG	-0.176588448217
+CAGAGA	TCTCTG	0.213650101773
+CAGAGC	GCTCTG	0.72706788894
+CAGAGG	CCTCTG	0.222483758838
+CAGATA	TATCTG	0.134724484734
+CAGATC	GATCTG	0.135060768551
+CAGATG	CATCTG	-3.82365153576
+CAGCAA	TTGCTG	0.216246738966
+CAGCAC	GTGCTG	0.134612451799
+CAGCAG	CTGCTG	-0.782809242533
+CAGCCA	TGGCTG	0.25352023069
+CAGCCC	GGGCTG	0.892449121017
+CAGCCG	CGGCTG	-1.34095163643
+CAGCGA	TCGCTG	0.368898676461
+CAGCGC	GCGCTG	0.40336227877
+CAGCGG	CCGCTG	-0.041543776559
+CAGCTA	TAGCTG	0.347266756135
+CAGCTC	GAGCTG	1.48884499806
+CAGCTG	CAGCTG	-3.03276014117
+CAGGAA	TTCCTG	0.0661704451321
+CAGGAC	GTCCTG	0.86939329333
+CAGGAG	CTCCTG	-0.480221178798
+CAGGCA	TGCCTG	0.506805708618
+CAGGCC	GGCCTG	0.756166029763
+CAGGCG	CGCCTG	0.233566907124
+CAGGGA	TCCCTG	0.970287665608
+CAGGGC	GCCCTG	-0.32000823828
+CAGGGG	CCCCTG	-0.0409673970414
+CAGGTA	TACCTG	-1.16422982988
+CAGGTC	GACCTG	0.991958843425
+CAGTAA	TTACTG	-0.456065885023
+CAGTAC	GTACTG	0.866479551705
+CAGTAG	CTACTG	0.0492891641913
+CAGTCA	TGACTG	-0.864906645492
+CAGTCC	GGACTG	0.436595236134
+CAGTCG	CGACTG	-0.0778031521909
+CAGTGA	TCACTG	0.095401665363
+CAGTGC	GCACTG	0.809574408241
+CAGTGG	CCACTG	0.130805602072
+CAGTTA	TAACTG	0.102237048367
+CAGTTC	GAACTG	-0.329116232229
+CATAAA	TTTATG	1.09056149453
+CATAAC	GTTATG	-0.240307894957
+CATAAG	CTTATG	-0.274744434381
+CATACA	TGTATG	-0.316826193863
+CATACC	GGTATG	0.907504112042
+CATACG	CGTATG	-0.646410594912
+CATAGA	TCTATG	0.0743941919121
+CATAGC	GCTATG	0.604402059297
+CATAGG	CCTATG	0.132614581735
+CATATA	TATATG	0.102595570806
+CATATC	GATATG	-0.685577322551
+CATATG	CATATG	-1.85636550314
+CATCAA	TTGATG	0.528225242508
+CATCAC	GTGATG	-0.779076990822
+CATCAG	CTGATG	0.0792779772782
+CATCCA	TGGATG	0.380324367299
+CATCCC	GGGATG	0.476823595932
+CATCCG	CGGATG	-0.735460908783
+CATCGA	TCGATG	-0.364712952887
+CATCGC	GCGATG	-0.445769472916
+CATCGG	CCGATG	-0.213744964599
+CATCTA	TAGATG	0.591942642834
+CATCTC	GAGATG	0.175954704824
+CATGAA	TTCATG	-0.0345496547785
+CATGAC	GTCATG	0.128585442856
+CATGAG	CTCATG	0.093844395721
+CATGCA	TGCATG	0.500579541195
+CATGCC	GGCATG	0.534363670334
+CATGCG	CGCATG	-0.07767446656
+CATGGA	TCCATG	-0.915298913786
+CATGGC	GCCATG	0.496643001128
+CATGGG	CCCATG	0.412095419236
+CATGTA	TACATG	0.170530452886
+CATGTC	GACATG	0.409305171927
+CATTAA	TTAATG	-1.21674231256
+CATTAC	GTAATG	0.0363120647791
+CATTAG	CTAATG	0.113980713568
+CATTCA	TGAATG	0.211633133094
+CATTCC	GGAATG	1.3667834662
+CATTCG	CGAATG	-0.306612061174
+CATTGA	TCAATG	0.261275457012
+CATTGC	GCAATG	0.422488072429
+CATTGG	CCAATG	-0.0683147570932
+CATTTA	TAAATG	-0.674511635662
+CATTTC	GAAATG	0.149816454289
+CCAAAA	TTTTGG	0.722553040783
+CCAAAC	GTTTGG	0.136597481362
+CCAAAG	CTTTGG	0.335940148998
+CCAACA	TGTTGG	0.242613946152
+CCAACC	GGTTGG	0.193037046526
+CCAACG	CGTTGG	-0.464564126497
+CCAAGA	TCTTGG	0.449792823487
+CCAAGC	GCTTGG	0.0105858416551
+CCAAGG	CCTTGG	0.750008716703
+CCAATA	TATTGG	0.380191080614
+CCAATC	GATTGG	-0.291650702172
+CCACAA	TTGTGG	2.48928712228
+CCACAC	GTGTGG	0.042494328422
+CCACAG	CTGTGG	1.98559943258
+CCACCA	TGGTGG	0.139131185109
+CCACCC	GGGTGG	0.411865066012
+CCACCG	CGGTGG	0.214345177527
+CCACGA	TCGTGG	0.329121460372
+CCACGC	GCGTGG	0.853862453394
+CCACGG	CCGTGG	0.958047076313
+CCACTA	TAGTGG	0.990324695059
+CCACTC	GAGTGG	-0.865871154042
+CCAGAA	TTCTGG	1.35087980364
+CCAGAC	GTCTGG	0.330013500547
+CCAGAG	CTCTGG	0.850843625887
+CCAGCA	TGCTGG	-0.105656155248
+CCAGCC	GGCTGG	0.10376516441
+CCAGCG	CGCTGG	-0.151192099436
+CCAGGA	TCCTGG	-0.00188020552734
+CCAGGC	GCCTGG	0.477368129975
+CCAGGG	CCCTGG	0.330067567163
+CCAGTA	TACTGG	0.201689404211
+CCAGTC	GACTGG	0.172972219351
+CCATAA	TTATGG	0.864471006772
+CCATAC	GTATGG	-0.053674529194
+CCATAG	CTATGG	-0.628472779515
+CCATCA	TGATGG	0.316290664784
+CCATCC	GGATGG	0.541183344469
+CCATCG	CGATGG	-0.585435010409
+CCATGA	TCATGG	0.842999014507
+CCATGC	GCATGG	0.885922909323
+CCATGG	CCATGG	-1.19148404887
+CCATTA	TAATGG	0.461535283786
+CCATTC	GAATGG	-0.448015532577
+CCCAAA	TTTGGG	1.00513456604
+CCCAAC	GTTGGG	0.0589630047868
+CCCAAG	CTTGGG	0.612217890411
+CCCACA	TGTGGG	1.49280537935
+CCCACC	GGTGGG	-0.540327896372
+CCCACG	CGTGGG	-0.189765626178
+CCCAGA	TCTGGG	0.906005293679
+CCCAGC	GCTGGG	0.00258121804065
+CCCAGG	CCTGGG	0.385280928829
+CCCATA	TATGGG	0.580077252911
+CCCATC	GATGGG	0.105886687275
+CCCCAA	TTGGGG	0.64902551958
+CCCCAC	GTGGGG	0.177279574499
+CCCCAG	CTGGGG	1.0367549673
+CCCCCA	TGGGGG	-0.220124853692
+CCCCCC	GGGGGG	-0.180081108721
+CCCCCG	CGGGGG	0.373246680376
+CCCCGA	TCGGGG	0.635830201709
+CCCCGC	GCGGGG	0.000651576611126
+CCCCGG	CCGGGG	0.854947986857
+CCCCTA	TAGGGG	0.325441244643
+CCCCTC	GAGGGG	0.483696397734
+CCCGAA	TTCGGG	0.714032030274
+CCCGAC	GTCGGG	-0.208075368152
+CCCGAG	CTCGGG	0.440636877923
+CCCGCA	TGCGGG	0.402712812803
+CCCGCC	GGCGGG	0.476459210594
+CCCGCG	CGCGGG	0.023503459093
+CCCGGA	TCCGGG	-0.538869263981
+CCCGGC	GCCGGG	0.346443397541
+CCCGGG	CCCGGG	0.548834083398
+CCCGTA	TACGGG	0.139729969322
+CCCGTC	GACGGG	-0.214648757928
+CCCTAA	TTAGGG	-0.290368330245
+CCCTAC	GTAGGG	0.144663483304
+CCCTAG	CTAGGG	0.0802948074314
+CCCTCA	TGAGGG	0.408278749762
+CCCTCC	GGAGGG	-0.517676283937
+CCCTCG	CGAGGG	0.0815315059399
+CCCTGA	TCAGGG	0.0384525153021
+CCCTGC	GCAGGG	0.376976503211
+CCCTTA	TAAGGG	-0.343987739741
+CCCTTC	GAAGGG	0.152780345376
+CCGAAA	TTTCGG	0.701489171099
+CCGAAC	GTTCGG	0.371848478306
+CCGAAG	CTTCGG	0.108859525197
+CCGACA	TGTCGG	-0.0475200223019
+CCGACC	GGTCGG	0.190388885966
+CCGACG	CGTCGG	-0.43295891756
+CCGAGA	TCTCGG	-0.851488238354
+CCGAGC	GCTCGG	0.402946122489
+CCGAGG	CCTCGG	0.447649638689
+CCGATA	TATCGG	-0.583698466004
+CCGATC	GATCGG	0.214495804491
+CCGCAA	TTGCGG	0.704274885421
+CCGCAC	GTGCGG	-0.124930120604
+CCGCAG	CTGCGG	0.796608954246
+CCGCCA	TGGCGG	0.0933037773855
+CCGCCC	GGGCGG	-0.761788986379
+CCGCCG	CGGCGG	0.118893616093
+CCGCGA	TCGCGG	-0.214050849808
+CCGCGC	GCGCGG	0.427088586284
+CCGCGG	CCGCGG	-0.163354064297
+CCGCTA	TAGCGG	0.075663139979
+CCGCTC	GAGCGG	-0.0840465139463
+CCGGAA	TTCCGG	-0.815717995392
+CCGGAC	GTCCGG	-0.0199681503018
+CCGGAG	CTCCGG	-0.142607842875
+CCGGCA	TGCCGG	-0.211088707568
+CCGGCC	GGCCGG	0.20978106433
+CCGGCG	CGCCGG	0.186483274301
+CCGGGA	TCCCGG	-0.120892474243
+CCGGGC	GCCCGG	0.286207705144
+CCGGTA	TACCGG	-0.364061582654
+CCGGTC	GACCGG	0.537401184601
+CCGTAA	TTACGG	-0.368122547637
+CCGTAC	GTACGG	0.388891995455
+CCGTAG	CTACGG	-0.14921907099
+CCGTCA	TGACGG	-0.731374154141
+CCGTCC	GGACGG	0.142640525033
+CCGTCG	CGACGG	-0.219815741986
+CCGTGA	TCACGG	0.351505600383
+CCGTGC	GCACGG	0.0466167726132
+CCGTTA	TAACGG	-0.177534573202
+CCGTTC	GAACGG	0.930924136276
+CCTAAA	TTTAGG	-0.437162767665
+CCTAAC	GTTAGG	0.715494549297
+CCTAAG	CTTAGG	0.834398157528
+CCTACA	TGTAGG	0.352513677162
+CCTACC	GGTAGG	-0.188597324766
+CCTACG	CGTAGG	-0.527855146408
+CCTAGA	TCTAGG	-0.358834707149
+CCTAGC	GCTAGG	-0.48327631422
+CCTAGG	CCTAGG	0.712974479815
+CCTATA	TATAGG	-0.00662770590644
+CCTATC	GATAGG	0.321915138696
+CCTCAA	TTGAGG	0.478167716035
+CCTCAC	GTGAGG	-0.603310279315
+CCTCAG	CTGAGG	1.18451331899
+CCTCCA	TGGAGG	-0.156185760342
+CCTCCC	GGGAGG	0.196588926242
+CCTCCG	CGGAGG	0.34975369542
+CCTCGA	TCGAGG	-0.12704382375
+CCTCGC	GCGAGG	-0.690558364248
+CCTCTA	TAGAGG	0.666120847354
+CCTCTC	GAGAGG	0.326083338601
+CCTGAA	TTCAGG	0.0108115210728
+CCTGAC	GTCAGG	0.0764004186754
+CCTGAG	CTCAGG	0.140494023385
+CCTGCA	TGCAGG	-0.350864542641
+CCTGCC	GGCAGG	-0.156177205702
+CCTGCG	CGCAGG	0.202361500118
+CCTGGA	TCCAGG	-0.165955838374
+CCTGGC	GCCAGG	0.0156098804816
+CCTGTA	TACAGG	-0.631427205006
+CCTGTC	GACAGG	-0.29284206294
+CCTTAA	TTAAGG	0.313884227175
+CCTTAC	GTAAGG	0.666799661556
+CCTTAG	CTAAGG	0.577269508525
+CCTTCA	TGAAGG	-0.228064410555
+CCTTCC	GGAAGG	0.127618019043
+CCTTCG	CGAAGG	0.233061732027
+CCTTGA	TCAAGG	0.697814875901
+CCTTGC	GCAAGG	0.163330658666
+CCTTTA	TAAAGG	-1.10202858977
+CCTTTC	GAAAGG	-0.15641139741
+CGAAAA	TTTTCG	0.570946981635
+CGAAAC	GTTTCG	0.71891048699
+CGAAAG	CTTTCG	0.00936335629174
+CGAACA	TGTTCG	-0.544911197699
+CGAACC	GGTTCG	0.236185790077
+CGAACG	CGTTCG	0.0439294539998
+CGAAGA	TCTTCG	0.0635388313512
+CGAAGC	GCTTCG	-0.312490378281
+CGAATA	TATTCG	0.205254003141
+CGAATC	GATTCG	0.0901499283283
+CGACAA	TTGTCG	0.238050713336
+CGACAC	GTGTCG	-0.109116574917
+CGACAG	CTGTCG	-0.769009564377
+CGACCA	TGGTCG	0.0360443347963
+CGACCC	GGGTCG	0.216209586146
+CGACCG	CGGTCG	-0.00608417190027
+CGACGA	TCGTCG	0.109094760764
+CGACGC	GCGTCG	-0.0821660131027
+CGACTA	TAGTCG	-0.154626936131
+CGACTC	GAGTCG	-0.246273419905
+CGAGAA	TTCTCG	0.612418964386
+CGAGAC	GTCTCG	-0.0448240636466
+CGAGAG	CTCTCG	-0.751517796087
+CGAGCA	TGCTCG	-0.330608606967
+CGAGCC	GGCTCG	0.693245611149
+CGAGCG	CGCTCG	0.278683035903
+CGAGGA	TCCTCG	-0.187835599873
+CGAGGC	GCCTCG	-0.952734572307
+CGAGTA	TACTCG	0.184943150492
+CGAGTC	GACTCG	0.375230854406
+CGATAA	TTATCG	-0.393190761045
+CGATAC	GTATCG	0.0742054445512
+CGATAG	CTATCG	-0.519678403883
+CGATCA	TGATCG	0.0526959654511
+CGATCC	GGATCG	0.0664840380862
+CGATCG	CGATCG	0.15660945983
+CGATGA	TCATCG	-0.774933636073
+CGATGC	GCATCG	-0.391779119657
+CGATTA	TAATCG	-0.0474517133734
+CGATTC	GAATCG	0.235116843424
+CGCAAA	TTTGCG	-0.0809989053442
+CGCAAC	GTTGCG	0.914321615152
+CGCAAG	CTTGCG	0.902729943974
+CGCACA	TGTGCG	-0.643522000175
+CGCACC	GGTGCG	-0.127073746796
+CGCACG	CGTGCG	-0.771541292094
+CGCAGA	TCTGCG	0.165580560798
+CGCAGC	GCTGCG	0.0469455940157
+CGCATA	TATGCG	0.335290623274
+CGCATC	GATGCG	0.437930163851
+CGCCAA	TTGGCG	-0.109846650695
+CGCCAC	GTGGCG	-0.994485238576
+CGCCAG	CTGGCG	-0.142749553444
+CGCCCA	TGGGCG	1.05328824356
+CGCCCC	GGGGCG	0.0137809042937
+CGCCCG	CGGGCG	-0.583692912482
+CGCCGA	TCGGCG	0.403802926462
+CGCCGC	GCGGCG	0.0867379096548
+CGCCTA	TAGGCG	-0.0424714321252
+CGCCTC	GAGGCG	-0.196079793222
+CGCGAA	TTCGCG	0.198826174488
+CGCGAC	GTCGCG	0.0519079932143
+CGCGAG	CTCGCG	-0.0095714045129
+CGCGCA	TGCGCG	0.29733863628
+CGCGCC	GGCGCG	0.00534076408975
+CGCGCG	CGCGCG	0.133525541683
+CGCGGA	TCCGCG	0.0295590736422
+CGCGGC	GCCGCG	0.0211820057043
+CGCGTA	TACGCG	-0.00215667699398
+CGCGTC	GACGCG	0.177954856123
+CGCTAA	TTAGCG	-0.241135973823
+CGCTAC	GTAGCG	0.239902173491
+CGCTAG	CTAGCG	0.0596735417366
+CGCTCA	TGAGCG	-0.0533438512221
+CGCTCC	GGAGCG	0.0208450744523
+CGCTGA	TCAGCG	0.68121260517
+CGCTGC	GCAGCG	0.319030638925
+CGCTTA	TAAGCG	0.331742647103
+CGCTTC	GAAGCG	-0.220266241715
+CGGAAA	TTTCCG	-1.28580550145
+CGGAAC	GTTCCG	0.926154437465
+CGGAAG	CTTCCG	-0.532165941151
+CGGACA	TGTCCG	0.26801379322
+CGGACC	GGTCCG	0.208564671836
+CGGACG	CGTCCG	0.185658917881
+CGGAGA	TCTCCG	-0.0783602636003
+CGGAGC	GCTCCG	0.213447050966
+CGGATA	TATCCG	0.180933559441
+CGGATC	GATCCG	0.0115933607686
+CGGCAA	TTGCCG	-0.141333522478
+CGGCAC	GTGCCG	-0.0989810302207
+CGGCAG	CTGCCG	-0.315504532197
+CGGCCA	TGGCCG	0.520189196688
+CGGCCC	GGGCCG	0.0770477004768
+CGGCCG	CGGCCG	-0.0100293295169
+CGGCGA	TCGCCG	0.335335269266
+CGGCGC	GCGCCG	0.142338129608
+CGGCTA	TAGCCG	0.483570773894
+CGGCTC	GAGCCG	0.386036576617
+CGGGAA	TTCCCG	-0.464940358426
+CGGGAC	GTCCCG	0.447833530318
+CGGGAG	CTCCCG	0.0731420049918
+CGGGCA	TGCCCG	0.282329634235
+CGGGCC	GGCCCG	0.812549782764
+CGGGGA	TCCCCG	0.869420112553
+CGGGGC	GCCCCG	0.86056528281
+CGGGTA	TACCCG	-0.203275385127
+CGGGTC	GACCCG	-0.444672989957
+CGGTAA	TTACCG	-0.649508330752
+CGGTAC	GTACCG	-0.117712116536
+CGGTAG	CTACCG	0.281181384004
+CGGTCA	TGACCG	1.13661174882
+CGGTCC	GGACCG	0.0903119623658
+CGGTGA	TCACCG	-0.00769608361013
+CGGTGC	GCACCG	-0.800050077623
+CGGTTA	TAACCG	-0.240948411238
+CGGTTC	GAACCG	0.347248429131
+CGTAAA	TTTACG	0.413018459532
+CGTAAC	GTTACG	0.133628748349
+CGTAAG	CTTACG	-0.181667545798
+CGTACA	TGTACG	0.487388635308
+CGTACC	GGTACG	-0.0604415421058
+CGTACG	CGTACG	-0.23751294459
+CGTAGA	TCTACG	-0.125539098097
+CGTAGC	GCTACG	0.386530915361
+CGTATA	TATACG	-0.194131124774
+CGTATC	GATACG	-0.267160757641
+CGTCAA	TTGACG	-1.00863955903
+CGTCAC	GTGACG	-0.521674552853
+CGTCAG	CTGACG	-1.76038907438
+CGTCCA	TGGACG	0.0777766757824
+CGTCCC	GGGACG	0.803357191448
+CGTCGA	TCGACG	0.0477564437913
+CGTCGC	GCGACG	0.103336677444
+CGTCTA	TAGACG	-0.60967168838
+CGTCTC	GAGACG	-0.791340367
+CGTGAA	TTCACG	0.634022023138
+CGTGAC	GTCACG	0.443531335362
+CGTGAG	CTCACG	-0.289769453913
+CGTGCA	TGCACG	0.109747972914
+CGTGCC	GGCACG	0.464253873914
+CGTGGA	TCCACG	0.275127549406
+CGTGGC	GCCACG	0.531223592152
+CGTGTA	TACACG	0.642083015658
+CGTGTC	GACACG	0.136541839783
+CGTTAA	TTAACG	-0.497209435042
+CGTTAC	GTAACG	-0.0943435759699
+CGTTAG	CTAACG	-0.242661499343
+CGTTCA	TGAACG	0.504828978427
+CGTTCC	GGAACG	0.656549347858
+CGTTGA	TCAACG	0.482938477348
+CGTTGC	GCAACG	-0.32248741119
+CGTTTA	TAAACG	-0.413518821035
+CGTTTC	GAAACG	-0.819946564926
+CTAAAA	TTTTAG	-0.474998981835
+CTAAAC	GTTTAG	-0.0266242942178
+CTAAAG	CTTTAG	0.297713951577
+CTAACA	TGTTAG	-0.621545633382
+CTAACC	GGTTAG	0.63074726461
+CTAAGA	TCTTAG	0.597123137729
+CTAAGC	GCTTAG	0.459779530522
+CTAATA	TATTAG	0.051814051491
+CTAATC	GATTAG	-0.192871916975
+CTACAA	TTGTAG	0.415390381047
+CTACAC	GTGTAG	0.314508278002
+CTACAG	CTGTAG	-0.112671340464
+CTACCA	TGGTAG	-0.264705309307
+CTACCC	GGGTAG	0.552961906046
+CTACGA	TCGTAG	0.489104112382
+CTACGC	GCGTAG	-0.684139469516
+CTACTA	TAGTAG	-0.106050671268
+CTACTC	GAGTAG	-0.218538947551
+CTAGAA	TTCTAG	0.76196340125
+CTAGAC	GTCTAG	-0.314054292133
+CTAGAG	CTCTAG	0.170762667208
+CTAGCA	TGCTAG	-0.201084106705
+CTAGCC	GGCTAG	0.657253973787
+CTAGGA	TCCTAG	-0.343732437303
+CTAGGC	GCCTAG	-0.399983888794
+CTAGTA	TACTAG	0.279272209924
+CTAGTC	GACTAG	0.63773352209
+CTATAA	TTATAG	-0.540591918194
+CTATAC	GTATAG	0.0316033486072
+CTATAG	CTATAG	-0.110441744258
+CTATCA	TGATAG	-0.986583008494
+CTATCC	GGATAG	0.0938549592711
+CTATGA	TCATAG	1.19060286606
+CTATGC	GCATAG	0.77976599654
+CTATTA	TAATAG	0.494683632528
+CTATTC	GAATAG	-0.0659811786152
+CTCAAA	TTTGAG	-0.435334739428
+CTCAAC	GTTGAG	0.673295862992
+CTCAAG	CTTGAG	0.814663961939
+CTCACA	TGTGAG	-0.125711909834
+CTCACC	GGTGAG	-0.0607596963595
+CTCAGA	TCTGAG	0.550689436444
+CTCAGC	GCTGAG	-0.272692249315
+CTCATA	TATGAG	0.669202456635
+CTCATC	GATGAG	-0.214653088571
+CTCCAA	TTGGAG	-0.404875066778
+CTCCAC	GTGGAG	0.544704103667
+CTCCAG	CTGGAG	0.300968023705
+CTCCCA	TGGGAG	-0.166515521703
+CTCCCC	GGGGAG	-0.0107883337468
+CTCCGA	TCGGAG	0.475523145763
+CTCCGC	GCGGAG	-0.234881416584
+CTCCTA	TAGGAG	-0.440390954167
+CTCCTC	GAGGAG	0.252207292196
+CTCGAA	TTCGAG	0.458096440119
+CTCGAC	GTCGAG	-0.648461079866
+CTCGAG	CTCGAG	0.238421232408
+CTCGCA	TGCGAG	-0.770225157474
+CTCGCC	GGCGAG	0.367453385031
+CTCGGA	TCCGAG	0.339650543736
+CTCGGC	GCCGAG	0.421157785256
+CTCGTA	TACGAG	0.362284718227
+CTCGTC	GACGAG	-0.209973906057
+CTCTAA	TTAGAG	-0.585120608238
+CTCTAC	GTAGAG	-0.0381226169544
+CTCTCA	TGAGAG	0.412400836814
+CTCTCC	GGAGAG	-0.500495460815
+CTCTGA	TCAGAG	-0.0509816398658
+CTCTGC	GCAGAG	-0.137150029459
+CTCTTA	TAAGAG	-0.0995119601555
+CTCTTC	GAAGAG	0.0293315576627
+CTGAAA	TTTCAG	0.637905924395
+CTGAAC	GTTCAG	0.0801852652274
+CTGAAG	CTTCAG	1.29884632524
+CTGACA	TGTCAG	-1.45442440761
+CTGACC	GGTCAG	1.08710062922
+CTGAGA	TCTCAG	-0.182920626709
+CTGAGC	GCTCAG	-0.261916169948
+CTGATA	TATCAG	-1.5286762778
+CTGATC	GATCAG	-0.261819591815
+CTGCAA	TTGCAG	-0.367828379023
+CTGCAC	GTGCAG	-0.0851053710219
+CTGCAG	CTGCAG	0.017373979076
+CTGCCA	TGGCAG	-0.328262173072
+CTGCCC	GGGCAG	0.396136640158
+CTGCGA	TCGCAG	-0.236577233975
+CTGCGC	GCGCAG	-0.0340134961712
+CTGCTA	TAGCAG	-0.90401510954
+CTGCTC	GAGCAG	-0.482167659855
+CTGGAA	TTCCAG	0.480592453838
+CTGGAC	GTCCAG	0.124783447514
+CTGGCA	TGCCAG	-0.744896236415
+CTGGCC	GGCCAG	0.508550304958
+CTGGGA	TCCCAG	0.5595983837
+CTGGGC	GCCCAG	-0.420503142996
+CTGGTA	TACCAG	0.351352305298
+CTGGTC	GACCAG	-0.333524602904
+CTGTAA	TTACAG	-0.344930581819
+CTGTAC	GTACAG	0.422072688223
+CTGTCA	TGACAG	-1.32137018932
+CTGTCC	GGACAG	-0.0204497524117
+CTGTGA	TCACAG	-0.318233094975
+CTGTGC	GCACAG	-0.452689661808
+CTGTTA	TAACAG	-0.682203584522
+CTGTTC	GAACAG	-0.032586371998
+CTTAAA	TTTAAG	0.522514091698
+CTTAAC	GTTAAG	-1.0306648626
+CTTAAG	CTTAAG	1.38874859741
+CTTACA	TGTAAG	0.301805562881
+CTTACC	GGTAAG	0.0844070155044
+CTTAGA	TCTAAG	0.170474445016
+CTTAGC	GCTAAG	0.138185199667
+CTTATA	TATAAG	0.454300274475
+CTTATC	GATAAG	-1.35801301952
+CTTCAA	TTGAAG	0.19993761629
+CTTCAC	GTGAAG	0.108019941224
+CTTCCA	TGGAAG	0.362648019349
+CTTCCC	GGGAAG	0.131998098522
+CTTCGA	TCGAAG	-0.105193206331
+CTTCGC	GCGAAG	-0.390704103225
+CTTCTA	TAGAAG	-0.155899121808
+CTTCTC	GAGAAG	-0.0739598398458
+CTTGAA	TTCAAG	-0.0719916676968
+CTTGAC	GTCAAG	0.068351491377
+CTTGCA	TGCAAG	0.463268254042
+CTTGCC	GGCAAG	-0.766831412922
+CTTGGA	TCCAAG	0.314061693873
+CTTGGC	GCCAAG	0.405573462214
+CTTGTA	TACAAG	0.65851026313
+CTTGTC	GACAAG	0.421432544956
+CTTTAA	TTAAAG	0.0423904357395
+CTTTAC	GTAAAG	0.655866345313
+CTTTCA	TGAAAG	-0.707971281847
+CTTTCC	GGAAAG	-0.305665628725
+CTTTGA	TCAAAG	-0.390594122457
+CTTTGC	GCAAAG	-0.253785395266
+CTTTTA	TAAAAG	0.520772205443
+CTTTTC	GAAAAG	-0.0834230798663
+GAAAAA	TTTTTC	-0.394594492275
+GAAAAC	GTTTTC	-0.889752597605
+GAAACA	TGTTTC	0.981733612947
+GAAACC	GGTTTC	-0.0721495314535
+GAAAGA	TCTTTC	-0.475164148563
+GAAAGC	GCTTTC	-0.0120812417699
+GAAATA	TATTTC	0.128630511111
+GAAATC	GATTTC	-0.258506502151
+GAACAA	TTGTTC	0.072282920944
+GAACAC	GTGTTC	0.701467646999
+GAACCA	TGGTTC	-0.542881494194
+GAACCC	GGGTTC	0.340658710073
+GAACGA	TCGTTC	-0.199593582018
+GAACGC	GCGTTC	0.454300760466
+GAACTA	TAGTTC	-0.295720723936
+GAACTC	GAGTTC	1.20394131743
+GAAGAA	TTCTTC	0.506500337578
+GAAGAC	GTCTTC	0.0907880643499
+GAAGCA	TGCTTC	-1.19156928449
+GAAGCC	GGCTTC	0.397646726543
+GAAGGA	TCCTTC	0.253377264863
+GAAGGC	GCCTTC	-0.525939145222
+GAAGTA	TACTTC	0.662195710792
+GAAGTC	GACTTC	-0.419934443362
+GAATAA	TTATTC	0.123387568539
+GAATAC	GTATTC	-0.383087785228
+GAATCA	TGATTC	1.27939625155
+GAATCC	GGATTC	-0.0498963377435
+GAATGA	TCATTC	-0.620744593633
+GAATGC	GCATTC	1.04194079095
+GAATTA	TAATTC	-0.149020713486
+GAATTC	GAATTC	0.644297830158
+GACAAA	TTTGTC	-0.134936102515
+GACAAC	GTTGTC	-0.309021420318
+GACACA	TGTGTC	1.29887852736
+GACACC	GGTGTC	-0.787922948829
+GACAGA	TCTGTC	-0.863814253112
+GACAGC	GCTGTC	-0.676822231763
+GACATA	TATGTC	0.827736434537
+GACATC	GATGTC	-0.130237675939
+GACCAA	TTGGTC	-0.475509904338
+GACCAC	GTGGTC	2.00597812344
+GACCCA	TGGGTC	0.594605764824
+GACCCC	GGGGTC	-0.25038724278
+GACCGA	TCGGTC	0.077158904808
+GACCGC	GCGGTC	0.996665204047
+GACCTA	TAGGTC	-0.0140856102395
+GACCTC	GAGGTC	-0.0141400877685
+GACGAA	TTCGTC	-0.24855256907
+GACGAC	GTCGTC	-0.787091955547
+GACGCA	TGCGTC	-0.79119100016
+GACGCC	GGCGTC	-0.064943799736
+GACGGA	TCCGTC	0.507946533831
+GACGGC	GCCGTC	0.0996726291695
+GACGTA	TACGTC	-1.129683455
+GACGTC	GACGTC	-0.325314874102
+GACTAA	TTAGTC	1.9212514778
+GACTAC	GTAGTC	-0.489310082414
+GACTCA	TGAGTC	3.18513477299
+GACTCC	GGAGTC	0.00318724773961
+GACTGA	TCAGTC	-0.284654331124
+GACTGC	GCAGTC	0.253234563706
+GACTTA	TAAGTC	0.53783328072
+GAGAAA	TTTCTC	-0.107943987489
+GAGAAC	GTTCTC	0.763065903804
+GAGACA	TGTCTC	0.0238226221677
+GAGACC	GGTCTC	0.000346769354435
+GAGAGA	TCTCTC	-0.366063998677
+GAGAGC	GCTCTC	-0.0716698934044
+GAGATA	TATCTC	-0.479728321524
+GAGATC	GATCTC	0.647198822351
+GAGCAA	TTGCTC	0.278077669471
+GAGCAC	GTGCTC	0.442986942774
+GAGCCA	TGGCTC	-0.0889483545925
+GAGCCC	GGGCTC	-0.310590547764
+GAGCGA	TCGCTC	0.294194487089
+GAGCGC	GCGCTC	0.0827346373539
+GAGCTA	TAGCTC	-0.730332859895
+GAGCTC	GAGCTC	-0.199506840782
+GAGGAA	TTCCTC	0.543305692157
+GAGGAC	GTCCTC	-0.349616541062
+GAGGCA	TGCCTC	-0.105697066717
+GAGGCC	GGCCTC	0.858282680338
+GAGGGA	TCCCTC	-0.124510451688
+GAGGGC	GCCCTC	0.323767770759
+GAGGTA	TACCTC	-0.844332688078
+GAGTAA	TTACTC	0.758120437701
+GAGTAC	GTACTC	0.104306241785
+GAGTCA	TGACTC	3.58563459353
+GAGTCC	GGACTC	0.339341967911
+GAGTGA	TCACTC	0.066165066026
+GAGTGC	GCACTC	-0.514233890221
+GAGTTA	TAACTC	0.671276468041
+GATAAA	TTTATC	-1.16448423447
+GATAAC	GTTATC	-0.476107186466
+GATACA	TGTATC	-0.172248071576
+GATACC	GGTATC	0.922018577146
+GATAGA	TCTATC	-0.678310902513
+GATAGC	GCTATC	-1.0739725457
+GATATA	TATATC	0.323693340333
+GATATC	GATATC	-0.13829875248
+GATCAA	TTGATC	-0.263370217811
+GATCAC	GTGATC	-0.383180034094
+GATCCA	TGGATC	-0.418998551659
+GATCCC	GGGATC	0.459522351525
+GATCGA	TCGATC	0.174830497643
+GATCGC	GCGATC	-0.0493224221696
+GATCTA	TAGATC	-0.114539458553
+GATGAA	TTCATC	0.113123798729
+GATGAC	GTCATC	0.420960424606
+GATGCA	TGCATC	-0.740595126486
+GATGCC	GGCATC	-0.0146831740442
+GATGGA	TCCATC	-0.44789540412
+GATGGC	GCCATC	-0.976518189565
+GATGTA	TACATC	0.315735783162
+GATTAA	TTAATC	-0.768397991554
+GATTAC	GTAATC	-0.159295927856
+GATTCA	TGAATC	1.91569273635
+GATTCC	GGAATC	0.028441642914
+GATTGA	TCAATC	0.586533848838
+GATTGC	GCAATC	0.0359466523255
+GATTTA	TAAATC	-0.43347973413
+GCAAAA	TTTTGC	1.57245669278
+GCAAAC	GTTTGC	-2.41567256424
+GCAACA	TGTTGC	-0.0212126198682
+GCAACC	GGTTGC	0.167308203586
+GCAAGA	TCTTGC	0.230496794516
+GCAAGC	GCTTGC	-0.279906494617
+GCAATA	TATTGC	1.03289974237
+GCACAA	TTGTGC	0.168080718089
+GCACAC	GTGTGC	-0.126747337538
+GCACCA	TGGTGC	-0.370948557776
+GCACCC	GGGTGC	0.486126157128
+GCACGA	TCGTGC	0.115382004439
+GCACGC	GCGTGC	-0.384704484018
+GCACTA	TAGTGC	-0.732242903933
+GCAGAA	TTCTGC	-0.441638279756
+GCAGAC	GTCTGC	0.577048501416
+GCAGCA	TGCTGC	0.0643315561072
+GCAGCC	GGCTGC	0.0719820075452
+GCAGGA	TCCTGC	0.034078237827
+GCAGGC	GCCTGC	0.0751087587256
+GCAGTA	TACTGC	-0.140907462619
+GCATAA	TTATGC	-0.216901949377
+GCATAC	GTATGC	-0.417062652314
+GCATCA	TGATGC	-0.509813438412
+GCATCC	GGATGC	0.157724061262
+GCATGA	TCATGC	0.311792502655
+GCATGC	GCATGC	0.0273955313285
+GCATTA	TAATGC	-0.038383608378
+GCCAAA	TTTGGC	-0.0363591138599
+GCCAAC	GTTGGC	-0.48002846426
+GCCACA	TGTGGC	0.478891420952
+GCCACC	GGTGGC	0.046888332347
+GCCAGA	TCTGGC	-0.325597697717
+GCCAGC	GCTGGC	-0.199926154665
+GCCATA	TATGGC	-0.951578017522
+GCCCAA	TTGGGC	1.10276199076
+GCCCAC	GTGGGC	0.676887327364
+GCCCCA	TGGGGC	0.35936393068
+GCCCCC	GGGGGC	-0.398755700607
+GCCCGA	TCGGGC	1.06470764147
+GCCCGC	GCGGGC	0.47965310734
+GCCCTA	TAGGGC	0.435710100752
+GCCGAA	TTCGGC	0.0377072993252
+GCCGAC	GTCGGC	0.22768512467
+GCCGCA	TGCGGC	0.115534020858
+GCCGCC	GGCGGC	-0.732735770282
+GCCGGA	TCCGGC	-0.38520088123
+GCCGGC	GCCGGC	-0.185586613226
+GCCGTA	TACGGC	-0.332516669945
+GCCTAA	TTAGGC	0.0973747347692
+GCCTAC	GTAGGC	0.0804171248438
+GCCTCA	TGAGGC	0.64675754844
+GCCTCC	GGAGGC	-0.235791468531
+GCCTGA	TCAGGC	0.871312603586
+GCCTTA	TAAGGC	0.66398535763
+GCGAAA	TTTCGC	0.216500601374
+GCGAAC	GTTCGC	0.176665221767
+GCGACA	TGTCGC	-0.206525750848
+GCGACC	GGTCGC	0.867020226992
+GCGAGA	TCTCGC	0.39834591882
+GCGAGC	GCTCGC	-0.0047486305465
+GCGATA	TATCGC	-0.212587858109
+GCGCAA	TTGCGC	0.249470449698
+GCGCAC	GTGCGC	-0.576649184299
+GCGCCA	TGGCGC	-0.370226281141
+GCGCCC	GGGCGC	0.381404303337
+GCGCGA	TCGCGC	0.244534402725
+GCGCGC	GCGCGC	0.0601779542866
+GCGCTA	TAGCGC	-0.184211933615
+GCGGAA	TTCCGC	-0.14917985125
+GCGGAC	GTCCGC	-0.0437387199239
+GCGGCA	TGCCGC	0.124099157921
+GCGGCC	GGCCGC	-0.868770297365
+GCGGGA	TCCCGC	-0.330802769301
+GCGGTA	TACCGC	-0.293792191657
+GCGTAA	TTACGC	-0.308730784191
+GCGTAC	GTACGC	-0.493705045035
+GCGTCA	TGACGC	-1.40894601949
+GCGTCC	GGACGC	0.650092537073
+GCGTGA	TCACGC	0.485604781064
+GCGTTA	TAACGC	-0.16348782665
+GCTAAA	TTTAGC	0.0633281870082
+GCTAAC	GTTAGC	0.0446987046769
+GCTACA	TGTAGC	0.139951985661
+GCTACC	GGTAGC	-0.582191094114
+GCTAGA	TCTAGC	0.425355245083
+GCTAGC	GCTAGC	0.544395057547
+GCTATA	TATAGC	-0.122878741617
+GCTCAA	TTGAGC	0.357260216643
+GCTCAC	GTGAGC	-0.362077448999
+GCTCCA	TGGAGC	0.234865357242
+GCTCCC	GGGAGC	0.410356067134
+GCTCGA	TCGAGC	0.523443780445
+GCTCTA	TAGAGC	-0.503747504809
+GCTGAA	TTCAGC	0.292869298061
+GCTGAC	GTCAGC	-0.149757550449
+GCTGCA	TGCAGC	0.0619331271517
+GCTGCC	GGCAGC	0.116465057744
+GCTGGA	TCCAGC	0.00864608267586
+GCTGTA	TACAGC	-0.0817336518524
+GCTTAA	TTAAGC	0.664269635099
+GCTTAC	GTAAGC	-0.573448259095
+GCTTCA	TGAAGC	-0.527498202661
+GCTTCC	GGAAGC	0.0466074141633
+GCTTGA	TCAAGC	-0.223589414042
+GCTTTA	TAAAGC	0.213691377209
+GGAAAA	TTTTCC	-2.05613895077
+GGAAAC	GTTTCC	-0.31843375495
+GGAACA	TGTTCC	0.878948290896
+GGAACC	GGTTCC	-0.41733947832
+GGAAGA	TCTTCC	0.292117518108
+GGAATA	TATTCC	-0.310943681072
+GGACAA	TTGTCC	0.390414984809
+GGACAC	GTGTCC	0.174292489707
+GGACCA	TGGTCC	-0.523110637041
+GGACCC	GGGTCC	0.5472877955
+GGACGA	TCGTCC	0.53114236268
+GGACTA	TAGTCC	0.793132421151
+GGAGAA	TTCTCC	-0.163792630984
+GGAGAC	GTCTCC	0.206727520829
+GGAGCA	TGCTCC	0.456407748447
+GGAGCC	GGCTCC	0.295421561451
+GGAGGA	TCCTCC	0.328364640098
+GGAGTA	TACTCC	-0.151742588951
+GGATAA	TTATCC	-0.0585638459289
+GGATAC	GTATCC	-0.309412420714
+GGATCA	TGATCC	-0.480562203251
+GGATCC	GGATCC	0.335487965753
+GGATGA	TCATCC	0.177410986413
+GGATTA	TAATCC	-1.1939070998
+GGCAAA	TTTGCC	0.000624112947654
+GGCAAC	GTTGCC	-1.15978921165
+GGCACA	TGTGCC	0.693216860903
+GGCACC	GGTGCC	-0.881907424195
+GGCAGA	TCTGCC	0.202002559673
+GGCATA	TATGCC	0.0739074460809
+GGCCAA	TTGGCC	0.233613871256
+GGCCAC	GTGGCC	0.349420517541
+GGCCCA	TGGGCC	0.76801497851
+GGCCCC	GGGGCC	0.27541536492
+GGCCGA	TCGGCC	0.663089975748
+GGCCTA	TAGGCC	0.219005607787
+GGCGAA	TTCGCC	0.226797469513
+GGCGAC	GTCGCC	0.127570597429
+GGCGCA	TGCGCC	-0.0446136938549
+GGCGCC	GGCGCC	-0.0745645953993
+GGCGGA	TCCGCC	-0.226505856389
+GGCGTA	TACGCC	-0.566154746416
+GGCTAA	TTAGCC	0.410090602753
+GGCTAC	GTAGCC	0.167521133316
+GGCTCA	TGAGCC	-0.182448764472
+GGCTGA	TCAGCC	0.305315803898
+GGCTTA	TAAGCC	0.0549165179339
+GGGAAA	TTTCCC	0.211185115876
+GGGAAC	GTTCCC	0.0891726153306
+GGGACA	TGTCCC	0.798353329738
+GGGACC	GGTCCC	0.327460829345
+GGGAGA	TCTCCC	-0.00772602123998
+GGGATA	TATCCC	0.042485553485
+GGGCAA	TTGCCC	-0.398865388366
+GGGCAC	GTGCCC	0.531369020502
+GGGCCA	TGGCCC	0.421850727294
+GGGCCC	GGGCCC	0.13715245724
+GGGCGA	TCGCCC	-0.207341953811
+GGGCTA	TAGCCC	0.860581648468
+GGGGAA	TTCCCC	0.414828826681
+GGGGAC	GTCCCC	1.09337858056
+GGGGCA	TGCCCC	0.188790419324
+GGGGGA	TCCCCC	0.564505418018
+GGGGTA	TACCCC	0.337102528271
+GGGTAA	TTACCC	0.152772959878
+GGGTAC	GTACCC	0.0850292801245
+GGGTCA	TGACCC	-0.277523149632
+GGGTGA	TCACCC	0.389877602576
+GGGTTA	TAACCC	-1.1488463254
+GGTAAA	TTTACC	-1.93485319848
+GGTAAC	GTTACC	-0.444349645812
+GGTACA	TGTACC	0.533161182312
+GGTACC	GGTACC	-0.307030306627
+GGTAGA	TCTACC	0.0409651099541
+GGTATA	TATACC	-1.16022327914
+GGTCAA	TTGACC	0.139153520691
+GGTCAC	GTGACC	-0.450515748733
+GGTCCA	TGGACC	0.169460723222
+GGTCGA	TCGACC	-0.122888767331
+GGTCTA	TAGACC	-0.0850652207286
+GGTGAA	TTCACC	0.524001085704
+GGTGAC	GTCACC	-0.17925252457
+GGTGCA	TGCACC	-0.7324301946
+GGTGGA	TCCACC	0.377800190353
+GGTGTA	TACACC	0.617772381551
+GGTTAA	TTAACC	0.156217996285
+GGTTAC	GTAACC	-0.890319968576
+GGTTCA	TGAACC	-0.551493468635
+GGTTGA	TCAACC	0.213366284121
+GGTTTA	TAAACC	-0.015482073629
+GTAAAA	TTTTAC	1.18200993615
+GTAAAC	GTTTAC	-3.59764423275
+GTAACA	TGTTAC	0.863881272261
+GTAAGA	TCTTAC	0.696118619435
+GTAATA	TATTAC	-0.0790481100302
+GTACAA	TTGTAC	0.959255249776
+GTACAC	GTGTAC	-0.381103239165
+GTACCA	TGGTAC	0.673058575835
+GTACGA	TCGTAC	0.303418617214
+GTACTA	TAGTAC	0.368286056561
+GTAGAA	TTCTAC	0.777466438187
+GTAGAC	GTCTAC	-0.260392489063
+GTAGCA	TGCTAC	-0.557171886797
+GTAGGA	TCCTAC	0.249764914617
+GTAGTA	TACTAC	1.10613154186
+GTATAA	TTATAC	-0.225205794143
+GTATAC	GTATAC	-0.709411981795
+GTATCA	TGATAC	-0.102084079336
+GTATGA	TCATAC	0.883480966804
+GTATTA	TAATAC	0.112117340424
+GTCAAA	TTTGAC	0.143115014765
+GTCAAC	GTTGAC	-2.14211821008
+GTCACA	TGTGAC	0.438253073163
+GTCAGA	TCTGAC	-0.426249226609
+GTCATA	TATGAC	0.201153560659
+GTCCAA	TTGGAC	0.0470205002071
+GTCCAC	GTGGAC	-0.21284193494
+GTCCCA	TGGGAC	1.6176606758
+GTCCGA	TCGGAC	0.114722967227
+GTCCTA	TAGGAC	0.771952812089
+GTCGAA	TTCGAC	0.724088020994
+GTCGAC	GTCGAC	0.251614152656
+GTCGCA	TGCGAC	0.675711700536
+GTCGGA	TCCGAC	0.193393621986
+GTCGTA	TACGAC	0.0186019337447
+GTCTAA	TTAGAC	-0.187936713171
+GTCTCA	TGAGAC	-0.491335152028
+GTCTGA	TCAGAC	0.360208311696
+GTCTTA	TAAGAC	0.0625579178154
+GTGAAA	TTTCAC	-0.563337549375
+GTGAAC	GTTCAC	-0.349524060758
+GTGACA	TGTCAC	-0.410586341492
+GTGAGA	TCTCAC	-1.0355009926
+GTGATA	TATCAC	-0.736410349306
+GTGCAA	TTGCAC	-0.260755290625
+GTGCAC	GTGCAC	-0.744407650846
+GTGCCA	TGGCAC	-1.07262578178
+GTGCGA	TCGCAC	-0.164782650071
+GTGCTA	TAGCAC	-0.662274879805
+GTGGAA	TTCCAC	0.176989759501
+GTGGCA	TGCCAC	-0.159364738365
+GTGGGA	TCCCAC	0.988068726378
+GTGGTA	TACCAC	-0.246902705808
+GTGTAA	TTACAC	-0.891262927583
+GTGTCA	TGACAC	-0.0379293872633
+GTGTGA	TCACAC	-1.95184795358
+GTGTTA	TAACAC	-1.3607239668
+GTTAAA	TTTAAC	-0.186873285101
+GTTAAC	GTTAAC	0.482940767605
+GTTACA	TGTAAC	0.329573937825
+GTTAGA	TCTAAC	-1.00236498509
+GTTATA	TATAAC	-0.163258685865
+GTTCAA	TTGAAC	0.577729784916
+GTTCCA	TGGAAC	0.42827050466
+GTTCGA	TCGAAC	-0.160584230436
+GTTCTA	TAGAAC	0.628168497995
+GTTGAA	TTCAAC	0.0122596223788
+GTTGCA	TGCAAC	1.28935831187
+GTTGGA	TCCAAC	0.501609010251
+GTTGTA	TACAAC	0.536022722898
+GTTTAA	TTAAAC	-0.00554204032662
+GTTTCA	TGAAAC	-0.106888470485
+GTTTGA	TCAAAC	-0.313019133275
+GTTTTA	TAAAAC	0.992384413805
+TAAAAA	TTTTTA	0.579463327305
+TAAACA	TGTTTA	-2.43807962232
+TAAAGA	TCTTTA	-0.0559249115822
+TAAATA	TATTTA	-2.16727500814
+TAACAA	TTGTTA	0.5362496048
+TAACCA	TGGTTA	1.25918224499
+TAACGA	TCGTTA	-0.245420018049
+TAACTA	TAGTTA	0.608128184593
+TAAGAA	TTCTTA	1.02556160172
+TAAGCA	TGCTTA	-0.00178390331913
+TAAGGA	TCCTTA	0.837198657402
+TAAGTA	TACTTA	0.0721724774768
+TAATAA	TTATTA	-0.189941492074
+TAATCA	TGATTA	0.004309724706
+TAATGA	TCATTA	-0.927698594817
+TAATTA	TAATTA	-1.12948086023
+TACAAA	TTTGTA	-0.668160668233
+TACACA	TGTGTA	-1.5497729664
+TACAGA	TCTGTA	-0.00599134182996
+TACATA	TATGTA	-1.12195560298
+TACCAA	TTGGTA	-0.281401801063
+TACCCA	TGGGTA	0.279140164003
+TACCGA	TCGGTA	-0.674861229256
+TACCTA	TAGGTA	0.232148610987
+TACGAA	TTCGTA	-0.107764774043
+TACGCA	TGCGTA	-0.519175400062
+TACGGA	TCCGTA	-0.402472215133
+TACGTA	TACGTA	0.541316396612
+TACTAA	TTAGTA	-0.135239105598
+TACTCA	TGAGTA	0.639564666607
+TACTGA	TCAGTA	-0.482886609395
+TAGAAA	TTTCTA	0.0445682798936
+TAGACA	TGTCTA	-0.45261373791
+TAGAGA	TCTCTA	-0.0954516438137
+TAGATA	TATCTA	-0.532370750539
+TAGCAA	TTGCTA	-0.652336647837
+TAGCCA	TGGCTA	0.291364926411
+TAGCGA	TCGCTA	0.149053684558
+TAGCTA	TAGCTA	0.686821117845
+TAGGAA	TTCCTA	0.597312086206
+TAGGCA	TGCCTA	-0.0392374245184
+TAGGGA	TCCCTA	-0.898776106474
+TAGTAA	TTACTA	-0.0409191001809
+TAGTCA	TGACTA	1.32002868308
+TAGTGA	TCACTA	0.306784253534
+TATAAA	TTTATA	-0.121630582814
+TATACA	TGTATA	-0.557527238557
+TATAGA	TCTATA	-0.29108877277
+TATATA	TATATA	-0.239193588677
+TATCAA	TTGATA	-1.21281286807
+TATCCA	TGGATA	-0.143758324237
+TATCGA	TCGATA	-0.316281900517
+TATGAA	TTCATA	0.567777510974
+TATGCA	TGCATA	0.382062812319
+TATGGA	TCCATA	0.18823076363
+TATTAA	TTAATA	0.370202791227
+TATTCA	TGAATA	-0.0890453547066
+TATTGA	TCAATA	-3.17710597328
+TCAAAA	TTTTGA	0.303875411059
+TCAACA	TGTTGA	-2.00254784618
+TCAAGA	TCTTGA	1.4720641259
+TCACAA	TTGTGA	0.295459860043
+TCACCA	TGGTGA	0.210574562759
+TCACGA	TCGTGA	-0.515739048241
+TCAGAA	TTCTGA	-0.292759366807
+TCAGCA	TGCTGA	-0.620868456293
+TCAGGA	TCCTGA	0.0391644847042
+TCATAA	TTATGA	0.46490835305
+TCATCA	TGATGA	0.156189576126
+TCATGA	TCATGA	-0.28992182543
+TCCAAA	TTTGGA	0.372353469926
+TCCACA	TGTGGA	-0.303324084643
+TCCAGA	TCTGGA	1.11791545491
+TCCCAA	TTGGGA	1.0120690282
+TCCCCA	TGGGGA	0.475206609554
+TCCCGA	TCGGGA	-0.218933163821
+TCCGAA	TTCGGA	0.231675785391
+TCCGCA	TGCGGA	-0.0565746658942
+TCCGGA	TCCGGA	-0.534349085125
+TCCTAA	TTAGGA	0.426713370402
+TCCTCA	TGAGGA	-0.101009274429
+TCGAAA	TTTCGA	0.385602827192
+TCGACA	TGTCGA	0.567716035117
+TCGAGA	TCTCGA	-0.0811772342038
+TCGCAA	TTGCGA	0.471042033643
+TCGCCA	TGGCGA	-0.433031956525
+TCGCGA	TCGCGA	-0.10259765071
+TCGGAA	TTCCGA	0.152092661722
+TCGGCA	TGCCGA	-0.277526003969
+TCGTAA	TTACGA	0.6872310151
+TCGTCA	TGACGA	-0.633071677798
+TCTAAA	TTTAGA	-0.119569291318
+TCTACA	TGTAGA	0.531537891496
+TCTAGA	TCTAGA	-0.582228401295
+TCTCAA	TTGAGA	0.0181823636774
+TCTCCA	TGGAGA	-0.0990443536722
+TCTGAA	TTCAGA	0.818735079074
+TCTGCA	TGCAGA	-0.708985771217
+TCTTAA	TTAAGA	0.684573742868
+TCTTCA	TGAAGA	0.231633308785
+TGAAAA	TTTTCA	-0.590753014937
+TGAACA	TGTTCA	0.143205522462
+TGACAA	TTGTCA	-0.109523687459
+TGACCA	TGGTCA	1.47045933497
+TGAGAA	TTCTCA	-0.555364387516
+TGAGCA	TGCTCA	-0.498941620454
+TGATAA	TTATCA	-1.78189703088
+TGATCA	TGATCA	-0.58090561671
+TGCAAA	TTTGCA	-0.642834024972
+TGCACA	TGTGCA	-1.10325139721
+TGCCAA	TTGGCA	-0.349619012912
+TGCCCA	TGGGCA	0.294732505972
+TGCGAA	TTCGCA	-0.0604088395824
+TGCGCA	TGCGCA	0.0208781532991
+TGCTAA	TTAGCA	-0.902952269579
+TGGAAA	TTTCCA	-1.65646848688
+TGGACA	TGTCCA	0.212014771381
+TGGCAA	TTGCCA	-0.299231706691
+TGGCCA	TGGCCA	0.0133271697043
+TGGGAA	TTCCCA	-0.0169051652559
+TGGTAA	TTACCA	-0.741148671428
+TGTAAA	TTTACA	-1.90833491575
+TGTACA	TGTACA	-0.781209085217
+TGTCAA	TTGACA	-1.54128600175
+TGTGAA	TTCACA	-2.24458672601
+TGTTAA	TTAACA	-1.12490394498
+TTAAAA	TTTTAA	-0.0119631686901
+TTACAA	TTGTAA	0.81347268468
+TTAGAA	TTCTAA	0.271221556202
+TTATAA	TTATAA	0.774386643995
+TTCAAA	TTTGAA	0.809237031692
+TTCCAA	TTGGAA	0.158724969294
+TTCGAA	TTCGAA	-0.405674192258
+TTGAAA	TTTCAA	-0.306551492839
+TTGCAA	TTGCAA	0.851414898595
+TTTAAA	TTTAAA	0.479575745295
--- a/kmersvm/scripts/kmersvm_train.py	Mon Aug 20 21:42:29 2012 -0400
+++ b/kmersvm/scripts/kmersvm_train.py	Sun Jun 16 18:06:14 2013 -0400
@@ -754,7 +754,8 @@
 	sids = sids_pos + sids_neg
 
 	if options.weight == 0:
-		options.weight = 1 + log(nneg/npos)
+		#DEBUGGED by dlee 02/17/13
+		options.weight = 1 + log(nneg/float(npos))
 
 	if options.quiet == False:
 		sys.stderr.write('SVM parameters:\n')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kmersvm/scripts/kmersvm_train_kfb_copy.py	Sun Jun 16 18:06:14 2013 -0400
@@ -0,0 +1,894 @@
+#!/usr/bin/env python
+"""
+	kmersvm_train.py; train a support vector machine using shogun toolbox
+	Copyright (C) 2011 Dongwon Lee
+
+	This program is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+"""
+
+
+
+import sys
+import optparse
+import random
+import numpy
+from math import log, exp
+
+from libkmersvm import *
+try:
+	from shogun.PreProc import SortWordString, SortUlongString
+except ImportError:
+	from shogun.Preprocessor import SortWordString, SortUlongString
+from shogun.Kernel import CommWordStringKernel, CommUlongStringKernel, \
+		CombinedKernel
+		
+from shogun.Features import StringWordFeatures, StringUlongFeatures, \
+		StringCharFeatures, CombinedFeatures, DNA, Labels
+from shogun.Classifier import MSG_INFO, MSG_ERROR
+try:
+	from shogun.Classifier import SVMLight
+except ImportError:
+	from shogun.Classifier import LibSVM
+
+"""
+global variables
+"""
+g_kmers = []
+g_rcmap = []
+
+
+def kmerid2kmer(kmerid, kmerlen):
+	"""convert integer kmerid to kmer string
+
+	Arguments:
+	kmerid -- integer, id of k-mer
+	kmerlen -- integer, length of k-mer
+
+	Return:
+	kmer string
+	"""
+
+	nts = "ACGT"
+	kmernts = []
+	kmerid2 = kmerid
+
+	for i in xrange(kmerlen):
+		ntid = kmerid2 % 4
+		kmernts.append(nts[ntid])
+		kmerid2 = int((kmerid2-ntid)/4)
+
+	return ''.join(reversed(kmernts))
+
+
+def kmer2kmerid(kmer, kmerlen):
+	"""convert kmer string to integer kmerid
+
+	Arguments:
+	kmerid -- integer, id of k-mer
+	kmerlen -- integer, length of k-mer
+
+	Return:
+	id of k-mer
+	"""
+
+	nt2id = {'A':0, 'C':1, 'G':2, 'T':3}
+
+	return reduce(lambda x, y: (4*x+y), [nt2id[x] for x in kmer])
+
+
+def get_rcmap(kmerid, kmerlen):
+	"""mapping kmerid to its reverse complement k-mer on-the-fly
+
+	Arguments:
+	kmerid -- integer, id of k-mer
+	kmerlen -- integer, length of k-mer
+
+	Return:
+	integer kmerid after mapping to its reverse complement
+	"""
+
+	#1. get kmer from kmerid
+	#2. get reverse complement kmer
+	#3. get kmerid from revcomp kmer
+	rckmerid = kmer2kmerid(revcomp(kmerid2kmer(kmerid, kmerlen)), kmerlen)
+
+	if rckmerid < kmerid:
+		return rckmerid
+
+	return kmerid
+
+
+def non_redundant_word_features(feats, kmerlen):
+	"""convert the features from Shogun toolbox to non-redundant word features (handle reverse complements)
+	Arguments:
+	feats -- StringWordFeatures
+	kmerlen -- integer, length of k-mer
+
+	Return:
+	StringWordFeatures after converting reverse complement k-mer ids
+	"""
+
+	rcmap = g_rcmap
+
+	for i in xrange(feats.get_num_vectors()):
+		nf = [rcmap[int(kmerid)] for kmerid in feats.get_feature_vector(i)]
+
+		feats.set_feature_vector(numpy.array(nf, numpy.dtype('u2')), i)
+
+	preproc = SortWordString()
+	preproc.init(feats)
+	try:
+		feats.add_preproc(preproc)
+		feats.apply_preproc()
+	except AttributeError:
+		feats.add_preprocessor(preproc)
+		feats.apply_preprocessor()	
+
+	return feats
+
+
+def non_redundant_ulong_features(feats, kmerlen):
+	"""convert the features from Shogun toolbox to non-redundant ulong features
+	Arguments:
+	feats -- StringUlongFeatures
+	kmerlen -- integer, length of k-mer
+
+	Return:
+	StringUlongFeatures after converting reverse complement k-mer ids
+	"""
+
+	for i in xrange(feats.get_num_vectors()):
+		nf = [get_rcmap(int(kmerid), kmerlen) \
+				for kmerid in feats.get_feature_vector(i)]
+
+		feats.set_feature_vector(numpy.array(nf, numpy.dtype('u8')), i)
+
+	preproc = SortUlongString()
+	preproc.init(feats)
+	try:
+		feats.add_preproc(preproc)
+		feats.apply_preproc()
+	except AttributeError:
+		feats.add_preprocessor(preproc)
+		feats.apply_preprocessor()
+
+	return feats
+
+
+def svm_learn(kernel, labels, options):
+	"""train SVM using SVMLight or LibSVM
+
+	Arguments:
+	kernel -- kernel object from Shogun toolbox
+	lebels -- list of labels
+	options -- object containing option data 
+
+	Return:
+	trained svm object 
+	"""
+
+	try: 
+		svm=SVMLight(options.svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double)))
+	except NameError:
+		svm=LibSVM(options.svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double)))
+
+	if options.quiet == False:
+		svm.io.set_loglevel(MSG_INFO)
+		svm.io.set_target_to_stderr()
+
+	svm.set_epsilon(options.epsilon)
+	svm.parallel.set_num_threads(1)
+	if options.weight != 1.0:
+		svm.set_C(options.svmC, options.svmC*options.weight)
+	svm.train()
+
+	if options.quiet == False:
+		svm.io.set_loglevel(MSG_ERROR)
+
+	return svm
+
+
+def _get_spectrum_features(seqs, kmerlen):
+	"""generate spectrum features (internal)
+
+	Arguments:
+	seqs -- list of sequences 
+	kmerlen -- integer, length of k-mer
+
+	Return:
+	StringWord(Ulong)Features after treatment of redundant reverse complement k-mers
+	"""
+
+	char_feats = StringCharFeatures(seqs, DNA)
+
+	if kmerlen <= 8:
+		string_features = StringWordFeatures
+		non_redundant_features = non_redundant_word_features
+	else:
+		string_features = StringUlongFeatures
+		non_redundant_features = non_redundant_ulong_features
+	
+	feats = string_features(DNA)
+	feats.obtain_from_char(char_feats, kmerlen-1, kmerlen, 0, False)
+	return non_redundant_features(feats, kmerlen)
+
+
+def get_spectrum_features(seqs, options):
+	"""generate spectrum features (wrapper)
+	"""
+	return _get_spectrum_features(seqs, options.kmerlen)
+
+
+def get_weighted_spectrum_features(seqs, options):
+	"""generate weighted spectrum features
+	"""
+	global g_kmers
+	global g_rcmap
+
+	subfeats_list = []
+
+	for k in xrange(options.kmerlen, options.kmerlen2+1):
+		char_feats = StringCharFeatures(seqs, DNA)
+		if k <= 8:
+			g_kmers = generate_kmers(k)
+			g_rcmap = generate_rcmap_table(k, g_kmers)
+
+		subfeats = _get_spectrum_features(seqs, k)
+		subfeats_list.append(subfeats)
+
+	return subfeats_list
+
+
+def get_spectrum_kernel(feats, options):
+	"""build spectrum kernel with non-redundant k-mer list (removing reverse complement)
+
+	Arguments:
+	feats -- feature object
+	options -- object containing option data 
+
+	Return:
+	StringWord(Ulong)Features, CommWord(Ulong)StringKernel
+	"""
+	if options.kmerlen <= 8:
+		return CommWordStringKernel(feats, feats)
+	else:
+		return CommUlongStringKernel(feats, feats)
+
+
+def get_weighted_spectrum_kernel(subfeats_list, options):
+	"""build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement)
+
+	Arguments:
+	subfeats_list -- list of sub-feature objects
+	options -- object containing option data 
+
+	Return:
+	CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel 
+	"""
+	kmerlen = options.kmerlen
+	kmerlen2 = options.kmerlen2
+
+	subkernels = 0
+	kernel = CombinedKernel()
+	feats = CombinedFeatures()
+
+	for subfeats in subfeats_list:
+		feats.append_feature_obj(subfeats)
+
+	for k in xrange(kmerlen, kmerlen2+1):
+		if k <= 8:
+			subkernel = CommWordStringKernel(10, False)
+		else:
+			subkernel = CommUlongStringKernel(10, False)
+
+		kernel.append_kernel(subkernel)
+		subkernels+=1
+
+	kernel.init(feats, feats)
+
+	kernel.set_subkernel_weights(numpy.array([1/float(subkernels)]*subkernels, numpy.dtype('float64')))
+
+	return kernel
+		
+
+def init_spectrum_kernel(kern, feats_lhs, feats_rhs):
+	"""initialize spectrum kernel (wrapper function)
+	"""
+	kern.init(feats_lhs, feats_rhs)
+
+
+def init_weighted_spectrum_kernel(kern, subfeats_list_lhs, subfeats_list_rhs):
+	"""initialize weighted spectrum kernel (wrapper function)
+	"""
+	feats_lhs = CombinedFeatures()
+	feats_rhs = CombinedFeatures()
+
+	for subfeats in subfeats_list_lhs:
+		feats_lhs.append_feature_obj(subfeats)
+
+	for subfeats in subfeats_list_rhs:
+		feats_rhs.append_feature_obj(subfeats)
+
+	kern.init(feats_lhs, feats_rhs)
+
+
+def get_sksvm_weights(svm, feats, options):
+	"""calculate the SVM weight vector of spectrum kernel
+	"""
+	kmerlen = options.kmerlen
+	alphas = svm.get_alphas()
+	support_vector_ids = svm.get_support_vectors()
+
+	w = numpy.array([0]*(2**(2*kmerlen)), numpy.double)
+
+	for i in xrange(len(alphas)):
+		x = [0]*(2**(2*kmerlen))
+		for kmerid in feats.get_feature_vector(int(support_vector_ids[i])):
+			x[int(kmerid)] += 1
+		x = numpy.array(x, numpy.double)
+		w += (alphas[i]*x/numpy.sqrt(numpy.sum(x**2)))
+	
+	return w
+
+def get_feature_counts(svm, feats, options):
+	"""calculate feature counts for SVs
+	"""
+	kmerlen = options.kmerlen
+	alphas = svm.get_alphas()
+	support_vector_ids = svm.get_support_vectors()
+	output = options.outputname + "_counts.out"
+	
+	global g_kmers
+	global g_rcmap
+	
+	w = numpy.array([0]*(2**(2*kmerlen)), numpy.double)
+	
+	for i in xrange(len(support_vector_ids)):
+		x = [0]*(2**(2*kmerlen))
+		for kmerid in feats.get_feature_vector(int(support_vector_ids[i])):
+			x[int(kmerid)] += 1
+			
+		x = numpy.array(x, numpy.double)
+		w += x
+	
+	if options.sort:
+		w_sorted = sorted(zip(range(len(w)), w), key=lambda x: x[1], reverse=True)
+	else:
+		w_sorted = zip(range(len(w)), w)
+	
+	for i in map(lambda x: x[0], w_sorted): 
+		if i == g_rcmap[i]:
+			f.write('\t'.join( [g_kmers[i], revcomp(g_kmers[i]), str(w[i])] ) + '\n')
+	
+	f.close()
+	
+
+
+def get_wsksvm_weights(svm, subfeats_list, options):
+	"""calculate the SVM weight vector of weighted spectrum kernel
+	"""
+	kmerlen = options.kmerlen
+	kmerlen2 = options.kmerlen2
+	alphas = svm.get_alphas()
+	support_vector_ids = svm.get_support_vectors()
+	kmerlens = range(kmerlen, kmerlen2+1)
+
+	weights = []
+	for idx in xrange(len(kmerlens)):
+		subfeats = subfeats_list[idx]
+
+		k = kmerlens[idx]
+		w = numpy.array([0]*(2**(2*k)), numpy.double)
+
+		for i in xrange(len(alphas)):
+			x = [0]*(2**(2*k))
+			for kmerid in subfeats.get_feature_vector(int(support_vector_ids[i])):
+				x[int(kmerid)] += 1
+			x = numpy.array(x, numpy.double)
+			w += (alphas[i]*x/numpy.sqrt(numpy.sum(x**2)))
+	
+		w /= len(kmerlens)
+		weights.append(w)
+	
+	return weights 
+
+
+def save_header(f, bias, A, B, options):
+	f.write("#parameters:\n")
+	f.write("#kernel=" + str(options.ktype) + "\n")
+	f.write("#kmerlen=" + str(options.kmerlen) + "\n")
+	if options.ktype == 2:
+		f.write("#kmerlen2=" + str(options.kmerlen2) + "\n")
+	f.write("#bias=" + str(bias) + "\n")
+	f.write("#A=" + str(A) + "\n")
+	f.write("#B=" + str(B) + "\n")
+	f.write("#NOTE: k-mers with large negative weights are also important. They can be found at the bottom of the list.\n")
+	f.write("#k-mer\trevcomp\tSVM-weight\n")
+
+
+def save_sksvm_weights(w, bias, A, B, options):
+	"""save the SVM weight vector from spectrum kernel
+	"""
+	output = options.outputname + "_weights.out"
+	kmerlen = options.kmerlen
+
+	f = open(output, 'w')
+	save_header(f, bias, A, B, options)
+
+	global g_kmers
+	global g_rcmap
+
+	if options.sort:
+		w_sorted = sorted(zip(range(len(w)), w), key=lambda x: x[1], reverse=True)
+	else:
+		w_sorted = zip(range(len(w)), w)
+
+	if kmerlen <= 8:
+		for i in map(lambda x: x[0], w_sorted): 
+			if i == g_rcmap[i]:
+				f.write('\t'.join( [g_kmers[i], revcomp(g_kmers[i]), str(w[i])] ) + '\n')
+	else:
+		for i in map(lambda x: x[0], w_sorted): 
+			if i == get_rcmap(i, kmerlen):
+				kmer = kmerid2kmer(i, kmerlen)
+				f.write('\t'.join( [kmer, revcomp(kmer), str(w[i])] ) + '\n')
+
+	f.close()
+
+
+def save_wsksvm_weights(w, bias, A, B, options):
+	"""save the SVM weight vector from weighted spectrum kernel
+	"""
+	output = options.outputname + "_weights.out"
+	kmerlen = options.kmerlen
+	kmerlen2 = options.kmerlen2
+
+	f = open(output, 'w')
+	save_header(f, bias, A, B, options)
+
+	global g_kmers
+	global g_rcmap
+
+	kmerlens = range(kmerlen, kmerlen2+1)
+	for idx in xrange(len(kmerlens)):
+		k = kmerlens[idx]
+		subw = w[idx]
+
+		if options.sort:
+			subw_sorted = sorted(zip(range(len(subw)), subw), key=lambda x: x[1], reverse=True)
+		else:
+			subw_sorted = zip(range(len(subw)), subw)
+
+		if k <= 8:
+			g_kmers = generate_kmers(k)
+			g_rcmap = generate_rcmap_table(k, g_kmers)
+			for i in map(lambda x: x[0], subw_sorted): 
+				if i == g_rcmap[i]:
+					f.write('\t'.join( [g_kmers[i], revcomp(g_kmers[i]), str(subw[i])] ) + "\n")
+		else:
+			for i in map(lambda x: x[0], subw_sorted): 
+				if i == get_rcmap(i, k):
+					kmer = kmerid2kmer(i, k)
+					f.write('\t'.join( [kmers, revcomp(kmers), str(subw[i])] ) + "\n")
+
+	f.close()
+
+
+def save_predictions(output, preds, cvs):
+	"""save prediction 
+	"""
+	f = open(output, 'w')
+	f.write('\t'.join(["#seq_id", "SVM score", "label", "NCV"]) + "\n")
+	for i in xrange(len(preds)): 
+		f.write('\t'.join([preds[i][1], str(preds[i][2]), str(preds[i][3]), str(cvs[i])]) + "\n")
+	f.close()
+
+
+def generate_cv_list(ncv, n1, n2):
+	"""generate the N-fold cross validation list
+
+	Arguments:
+	ncv -- integer, number of cross-validation
+	n1 -- integer, number of positives
+	n2 -- integer, number of negatives
+
+	Return:
+	a list of N-fold cross validation
+	"""
+
+	shuffled_idx_list1 = range(n1)
+	shuffled_idx_list2 = range(n1,n1+n2)
+
+	random.shuffle(shuffled_idx_list1)
+	random.shuffle(shuffled_idx_list2)
+
+	shuffled_idx_list = shuffled_idx_list1 + shuffled_idx_list2
+
+	idx = 0
+	icv = 0
+	cv = [0] * (n1+n2)
+	while(idx < (n1+n2)):
+		cv[shuffled_idx_list[idx]] = icv
+
+		idx += 1
+		icv += 1
+		if icv == ncv:
+			icv = 0
+
+	return cv
+
+
+def split_cv_list(cvlist, icv, data):
+	"""split data into training and test based on cross-validation list
+
+	Arguments:
+	cvlist -- list, cross-validation list
+	icv -- integer, corss-validation set of interest
+	data -- list, data set to be splitted
+
+	Return:
+	a list of training set and a list of test set
+	"""
+
+	tr_data = []
+	te_data = []
+
+	for i in xrange(len(data)):
+		if cvlist[i] == icv:
+			te_data.append(data[i])
+		else:
+			tr_data.append(data[i])
+	
+	return tr_data, te_data
+
+
+def LMAI(svms, labels, prior0, prior1):
+	"""fitting svms to sigmoid function (improved version introduced by Lin 2003)
+
+	Arguments:
+	svms -- list of svm scores
+	labels -- list of labels
+	prior0 -- prior of negative set
+	prior1 -- prior of positive set
+
+	Return:
+	A, B parameter of 1/(1+exp(A*SVM+B))
+	"""
+
+	#parameter settings
+	maxiter = 100
+	minstep = 1e-10
+	sigma = 1e-3
+
+	hiTarget = (prior1+1.0)/float(prior1+2.0)
+	loTarget = 1/float(prior0+2.0)
+
+	t = [0]*len(labels)
+	for i in xrange(len(labels)):
+		if labels[i] == 1:
+			t[i] = hiTarget
+		else:
+			t[i] = loTarget
+
+	A = 0.0
+	B = log((prior0+1.0)/float(prior1+1.0))
+	fval = 0.0
+
+	for i in xrange(len(labels)):
+		fApB = svms[i]*A+B
+		if fApB >= 0:
+			fval += (t[i]*fApB+log(1+exp(-fApB)))
+		else:
+			fval += ((t[i]-1)*fApB+log(1+exp(fApB)))
+
+
+	for it in xrange(maxiter):
+		#print "iteration:", it
+		#Update Graidient and Hessian (use H'= H + sigma I)
+		h11 = sigma
+		h22 = sigma
+		h21 = 0.0
+		g1 = 0.0
+		g2 = 0.0
+
+		for i in xrange(len(labels)):
+			fApB = svms[i]*A+B
+			if fApB >= 0:
+				p = exp(-fApB) / float(1.0+exp(-fApB))
+				q = 1.0 / float(1.0 + exp(-fApB))
+			else:
+				p = 1.0 / float(1.0 + exp(fApB))
+				q = exp(fApB) / float(1.0+exp(fApB))
+			d2 = p*q
+			h11 += (svms[i]*svms[i]*d2)
+			h22 += d2
+			h21 += (svms[i]*d2)
+			d1 = t[i]-p
+			g1 += (svms[i]*d1)
+			g2 += d1
+
+		#Stopping criteria
+		if (abs(g1)<1e-5) and (abs(g2)<1e-5):
+			break
+
+		det = h11*h22-h21*h21
+		dA = -(h22*g1-h21*g2)/float(det)
+		dB = -(-h21*g1+h11*g2)/float(det)
+		gd = g1*dA+g2*dB
+		stepsize=1
+		while stepsize >= minstep:
+			newA = A+stepsize*dA
+			newB = B+stepsize*dB
+			newf = 0.0
+
+			for i in xrange(len(labels)):
+				fApB = svms[i]*newA+newB
+				if fApB >= 0:
+					newf += (t[i]*fApB + log(1+exp(-fApB)))
+				else:
+					newf += ((t[i]-1)*fApB + log(1+exp(fApB)))
+
+			if newf < (fval+0.0001*stepsize*gd):
+				A=newA
+				B=newB
+				fval=newf
+				break
+			else:
+				stepsize=stepsize/float(2.0)
+
+		#Line search failes
+		if stepsize < minstep:
+			#print "Line search fails"
+			break
+
+	#if it >= maxiter:
+	#	print "Reaching maximum iterations"
+
+	return A, B
+
+
+def wsksvm_classify(seqs, svm, kern, feats, options):
+	feats_te = get_weighted_spectrum_features(seqs, options)
+	init_weighted_spectrum_kernel(kern, feats, feats_te)
+
+	return svm.apply().get_labels().tolist()
+
+
+def score_seq(s, svmw, kmerlen):
+	"""calculate SVM score of given sequence using single set of svm weights
+
+	Arguments:
+	s -- string, DNA sequence
+	svmw -- numpy array, SVM weights 
+	kmerlen -- integer, length of k-mer of SVM weight
+
+	Return:
+	SVM score
+	"""
+
+	global g_rcmap
+	kmer2kmerid_func = kmer2kmerid
+
+	x = [0]*(2**(2*kmerlen))
+	for j in xrange(len(s)-kmerlen+1):
+		x[ g_rcmap[kmer2kmerid_func(s[j:j+kmerlen], kmerlen)] ] += 1
+
+	x = numpy.array(x, numpy.double)
+	score_norm = numpy.dot(svmw, x)/numpy.sqrt(numpy.sum(x**2))
+
+	return score_norm
+
+
+def sksvm_classify(seqs, svm, kern, feats, options):
+	"""classify the given sequences
+	"""
+	if options.kmerlen <= 8:
+		#this is much faster when the length of kmer is short, and SVs are many
+		svmw = get_sksvm_weights(svm, feats, options)
+		return [score_seq(s, svmw, options.kmerlen)+svm.get_bias() for s in seqs]
+	else:
+		feats_te = get_spectrum_features(seqs, options)
+		init_spectrum_kernel(kern, feats, feats_te)
+
+		return svm.apply().get_labels().tolist()
+
+
+def main(argv = sys.argv):
+	usage = "Usage: %prog [options] POSITIVE_SEQ NEGATIVE_SEQ"
+	desc  = "1. take two files(FASTA format) as input, 2. train an SVM and store the trained SVM weights"
+	parser = optparse.OptionParser(usage=usage, description=desc)
+	parser.add_option("-t", dest="ktype", type="int", default=1, \
+			help="set the type of kernel, 1:Spectrum, 2:Weighted Spectrums (default=1.Spectrum)")
+
+	parser.add_option("-C", dest="svmC", type="float", default=1, \
+			help="set the regularization parameter svmC (default=1)")
+
+	parser.add_option("-e", dest="epsilon", type="float", default=0.00001, \
+			help="set the precision parameter epsilon (default=0.00001)")
+
+	parser.add_option("-w", dest="weight", type="float", default=0.0, \
+			help="set the weight for positive set (default=auto, 1+log(N/P))")
+
+	parser.add_option("-k", dest="kmerlen", type="int",default=6, \
+			help="set the (min) length of k-mer for (weighted) spectrum kernel (default = 6)")
+
+	parser.add_option("-K", dest="kmerlen2", type="int",default=8, \
+			help="set the max length of k-mer for weighted spectrum kernel (default = 8)")
+
+	parser.add_option("-n", dest="outputname", default="kmersvm_output", \
+  			help="set the name of output files (default=kmersvm_output)")
+
+	parser.add_option("-v", dest="ncv", type="int", default=0, \
+			help="if set, it will perform N-fold cross-validation and generate a prediction file (default = 0)")
+
+	parser.add_option("-p", dest="posteriorp", default=False, action="store_true", \
+  			help="estimate parameters for posterior probability with N-CV. this option requires -v option to be set (default=false)")
+
+	parser.add_option("-r", dest="rseed", type="int", default=1, \
+			help="set the random number seed for cross-validation (-p option) (default=1)")
+
+	parser.add_option("-q", dest="quiet", default=False, action="store_true", \
+  			help="supress messages (default=false)")
+
+	parser.add_option("-s", dest="sort", default=False, action="store_true", \
+  			help="sort the kmers by absolute values of SVM weights (default=false)")
+
+	ktype_str = ["", "Spectrum", "Weighted Spectrums"]
+
+	(options, args) = parser.parse_args()
+
+	if len(args) == 0:
+		parser.print_help()
+		sys.exit(0)
+
+	if len(args) != 2:
+		parser.error("incorrect number of arguments")
+		parser.print_help()
+		sys.exit(0)
+
+	if options.posteriorp and options.ncv == 0:
+		parser.error("posterior probability estimation requires N-fold CV process (-v option should be set)")
+		parser.print_help()
+		sys.exit(0)
+
+	random.seed(options.rseed)
+
+	"""
+	set global variable
+	"""
+	if (options.ktype == 1) and (options.kmerlen <= 8):
+		global g_kmers
+		global g_rcmap
+
+		g_kmers = generate_kmers(options.kmerlen)
+		g_rcmap = generate_rcmap_table(options.kmerlen, g_kmers)
+	
+	posf = args[0]
+	negf = args[1]
+	
+	seqs_pos, sids_pos = read_fastafile(posf)
+	seqs_neg, sids_neg = read_fastafile(negf)
+	npos = len(seqs_pos)
+	nneg = len(seqs_neg)
+	seqs = seqs_pos + seqs_neg
+	sids = sids_pos + sids_neg
+
+	if options.weight == 0:
+		#DEBUGGED by dlee 02/17/13
+		options.weight = 1 + log(nneg/float(npos))
+
+	if options.quiet == False:
+		sys.stderr.write('SVM parameters:\n')
+		sys.stderr.write('  kernel-type: ' + str(options.ktype) + "." + ktype_str[options.ktype] + '\n')
+		sys.stderr.write('  svm-C: ' + str(options.svmC) + '\n')
+		sys.stderr.write('  epsilon: ' + str(options.epsilon) + '\n')
+		sys.stderr.write('  weight: ' + str(options.weight) + '\n')
+		sys.stderr.write('\n')
+
+		sys.stderr.write('Other options:\n')
+		sys.stderr.write('  kmerlen: ' + str(options.kmerlen) + '\n')
+		if options.ktype == 2:
+			sys.stderr.write('  kmerlen2: ' + str(options.kmerlen2) + '\n')
+		sys.stderr.write('  outputname: ' + options.outputname + '\n')
+		sys.stderr.write('  posteriorp: ' + str(options.posteriorp) + '\n')
+		if options.ncv > 0:
+			sys.stderr.write('  ncv: ' + str(options.ncv) + '\n')
+			sys.stderr.write('  rseed: ' + str(options.rseed) + '\n')
+		sys.stderr.write('  sorted-weight: ' + str(options.sort) + '\n')
+
+		sys.stderr.write('\n')
+
+		sys.stderr.write('Input args:\n')
+		sys.stderr.write('  positive sequence file: ' + posf + '\n')
+		sys.stderr.write('  negative sequence file: ' + negf + '\n')
+		sys.stderr.write('\n')
+
+		sys.stderr.write('numer of total positive seqs: ' + str(npos) + '\n')
+		sys.stderr.write('numer of total negative seqs: ' + str(nneg) + '\n')
+		sys.stderr.write('\n')
+
+	#generate labels
+	labels = [1]*npos + [-1]*nneg
+
+	if options.ktype == 1:
+		get_features = get_spectrum_features
+		get_kernel = get_spectrum_kernel
+		get_weights = get_sksvm_weights
+		save_weights = save_sksvm_weights
+		svm_classify = sksvm_classify
+	elif options.ktype == 2:
+		get_features = get_weighted_spectrum_features
+		get_kernel = get_weighted_spectrum_kernel
+		get_weights = get_wsksvm_weights
+		save_weights = save_wsksvm_weights
+		svm_classify = wsksvm_classify
+	else:
+		sys.stderr.write('..unknown kernel..\n')
+		sys.exit(0)
+
+	A = B = 0
+	if options.ncv > 0:
+		if options.quiet == False:
+			sys.stderr.write('..Cross-validation\n')
+
+		cvlist = generate_cv_list(options.ncv, npos, nneg)
+		labels_cv = []
+		preds_cv = []
+		sids_cv = []
+		indices_cv = []
+		for icv in xrange(options.ncv):
+			#split data into training and test set
+			seqs_tr, seqs_te = split_cv_list(cvlist, icv, seqs) 
+			labs_tr, labs_te = split_cv_list(cvlist, icv, labels)
+			sids_tr, sids_te = split_cv_list(cvlist, icv, sids)
+			indices_tr, indices_te = split_cv_list(cvlist, icv, range(len(seqs)))
+
+			#train SVM
+			feats_tr = get_features(seqs_tr, options)
+			kernel_tr = get_kernel(feats_tr, options)
+			svm_cv = svm_learn(kernel_tr, labs_tr, options)
+
+			preds_cv = preds_cv + svm_classify(seqs_te, svm_cv, kernel_tr, feats_tr, options)
+			
+			labels_cv = labels_cv + labs_te
+			sids_cv = sids_cv + sids_te
+			indices_cv = indices_cv + indices_te
+
+		output_cvpred = options.outputname + "_cvpred.out"
+		prediction_results = sorted(zip(indices_cv, sids_cv, preds_cv, labels_cv), key=lambda p: p[0])
+		save_predictions(output_cvpred, prediction_results, cvlist)
+
+		if options.posteriorp:
+			A, B = LMAI(preds_cv, labels_cv, labels_cv.count(-1), labels_cv.count(1))
+
+			if options.quiet == False:
+				sys.stderr.write('Estimated Parameters:\n')
+				sys.stderr.write('  A: ' + str(A) + '\n')
+				sys.stderr.write('  B: ' + str(B) + '\n')
+
+	if options.quiet == False:
+		sys.stderr.write('..SVM weights\n')
+
+	feats = get_features(seqs, options)
+	kernel = get_kernel(feats, options)
+	svm = svm_learn(kernel, labels, options)
+	jj = get_feature_counts(svm, feats, options)
+	w = get_weights(svm, feats, options)
+	b = svm.get_bias()
+
+	save_weights(w, b, A, B, options)
+
+if __name__=='__main__': main()
Binary file kmersvm/scripts/libkmersvm.pyc has changed
--- a/kmersvm/scripts/nullseq_generate.py	Mon Aug 20 21:42:29 2012 -0400
+++ b/kmersvm/scripts/nullseq_generate.py	Sun Jun 16 18:06:14 2013 -0400
@@ -71,8 +71,7 @@
 def sample_sequences(positions, buildname, basedir, options):
 	"""
 	"""
-	rpt_err = options.rpt_err
-	gc_err = options.gc_err
+	max_fails = 20
 	max_trys = options.max_trys
 	norpt = options.norpt
 	nogc = options.nogc
@@ -121,6 +120,12 @@
 		else:
 			count = options.count
 
+		#initialize paramter
+		#added by dlee 2/17/13	
+		ncfails = 0
+		rpt_err = options.rpt_err
+		gc_err = options.gc_err
+
 		sampled_positions = []
 		while len(sampled_positions) < count:
 			sampled_prof = random.choice(profiles)
@@ -128,6 +133,15 @@
 			sampled_gc = sampled_prof[2]
 			sampled_rpt = sampled_prof[3]
 
+			#relax rpt_err and gc_err if it keep fail to sample a region
+			#added by dlee 2/17/13	
+			if ncfails >= max_fails:
+				if options.quiet == False:
+					sys.stderr.write("reached max_fail. relax gc and rpt err criteria\n")
+				ncfails = 0
+				rpt_err += 0.01
+				gc_err += 0.01
+
 			rpt_err_allowed = int(rpt_err*sampled_len)
 			gc_err_allowed = int(gc_err*sampled_len)
 			trys = 0
@@ -156,9 +170,17 @@
 
 				sampled_positions.append((chrom, pos, pos_e))
 
+				#reset the counter of consecutive fails
+				#added by dlee 2/17/13	
+				ncfails = 0
+
 				#print trys, chrom, pos, pos_e, sampled_len, pos_rpt, sampled_rpt, pos_gc, sampled_gc
 				break
 			else:
+				#increase the counter of consecutive fails
+				#added by dlee 2/17/13	
+				ncfails += 1
+
 				if options.quiet == False:
 					sys.stderr.write(' '.join(["fail to sample from", \
 							"len=", str(sampled_len), \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kmersvm/tomtom.xml	Sun Jun 16 18:06:14 2013 -0400
@@ -0,0 +1,84 @@
+<tool id="tomtom" name="Tomtom" version="1.0.0">
+
+	<description>Tomtom tool for motif searching</description>
+	<command>/home/galaxy/meme/bin/tomtom -no-ssc -internal -text -verbosity 1 -thresh $thresh 
+		#if str($cut.cut_choice) == 'e.value':
+			-evalue
+		#end if
+
+		#if str($dist.dist) == 'ed':
+			-dist ed
+		#elif str($dist.dist) == 'sw':
+			-dist sandelin
+		#else
+			-dist pearson	
+		#end if
+	
+	 $input1 /home/galaxy/meme/db/combined_db.meme > tomtom_out.txt
+	 
+	 </command>
+	 <inputs>
+	 	<param format="txt" name="input1" type="data" label="PWM File"/>
+		<param type="float" value="0.5" label="Threshold" name="thresh"/>
+	 	<conditional name="cut">
+	 		<param name="cut_choice" type="select" label="Threshold Type">
+	 			<option value="q.value" selected="true">q-value</option>
+	 			<option value="e.value">E-value</option>
+	 		</param>	
+	 	</conditional>
+	 	
+	 	<conditional name="dist">
+	 		<param name="dist" type="select" label="Distance Metric">
+	 			<option value="pearson" selected="true">Pearson</option>
+	 			<option value="ed">Euclidean</option>
+	 			<option value="sw">Sandelin-Wasserman Function</option>
+	 		</param>
+	 	</conditional>
+	 </inputs>
+	 
+	 <outputs>
+	 	<data format="txt" name="Tomtom Results" from_work_dir="tomtom_out.txt" label="${tool.name} on ${on_string}: Tomtom Matches"/>
+
+	 </outputs>
+	<help>
+
+Tomtom is a tool for comparing a DNA motif to a database of known motifs.  For an in-depth explanation of the Tomtom software see here_.
+
+----
+
+**Recommended Settings**
+
+We recommend most users use the Tomtom defaults of q-value for score, the cutoff of 0.5 and the Pearson correlation coefficent for distance metric.
+
+----
+
+**Parameters**
+
+We offer users the options of choosing which distance metric can be used to find matching motifs. Specifically, we offer the Pearson correlation coefficient, the Euclidean distance and the Sandelin-Wasserman Function.
+
+  * The Pearson correlation coefficient measures the similarity between columns of position weight matrices (PWMs).
+
+  * The Euclidean distance can be thought of as the length of the straight line between two PWMs.
+
+  * The Sandelin-Wasserman function sums the column-wise differences between PWMs.
+
+We also offer the choice of E-value and q-value to threshold the results returned by Tomtom.
+
+  * The E-value controls the expected number of false positives and can be any number.  
+
+  * The q-value controls the false discovery rate and is a number between 0 and 1.
+
+----
+
+Note that at this time we only offer Tomtom output in txt format.
+
+----
+
+**Citation**
+
+If you use this tool, please cite: Shobhit Gupta, JA Stamatoyannopolous, Timothy Bailey and William Stafford Noble, "Quantifying similarity between motifs", Genome Biology, 8(2):R24, 2007.
+
+.. _here: http://meme.nbcr.net/meme/tomtom-intro.html
+
+  </help>
+</tool>
--- a/kmersvm/train.xml	Mon Aug 20 21:42:29 2012 -0400
+++ b/kmersvm/train.xml	Sun Jun 16 18:06:14 2013 -0400
@@ -47,8 +47,12 @@
     		<param name="weight" type="float" value="1" label="Input The Value of Positive Set Weight" />   
 		</when>
     </conditional>
-    <param name="SVMC" type="integer" value="1" label="Regularization Param C" />
-    <param name="EPS" type="float" value="0.00001" label="Precision Param E" />
+    <param name="SVMC" type="float" value="1" label="Regularization Param C" >
+	<validator type="in_range" message="SVMC must be in range 1 - 10" min="0.01" max="1" />
+    </param>
+    <param name="EPS" type="float" value="0.00001" label="Precision Param E" >
+	<validator type="in_range" message="EPS must be in range 1e-1 to 1e-5" min="0.00001" max="0.1" />
+    </param>
   </inputs>
   <outputs>
     <data format="tabular" name="SVM_weights" from_work_dir="kmersvm_output_weights.out" label="${tool.name} on ${on_string} : Weights" />
@@ -79,11 +83,27 @@
   
 Takes as input 2 FASTA files, 1 of positive sequences and 1 of negative sequences.  Produces 2 outputs: 
   
-  A) Weights: list of sequences of length K ranked by score and posterior probability for that score.
+  A) Weights: list of sequences of length K ranked by score.
   	
-  B) Predictions: results of N-fold cross validation
+  B) Predictions: results of N-fold cross validation.
   
 ----
+
+**Recommended Settings**
+
+Kernel: Spectrum
+
+Kmer length: 6
+
+N-Fold Cross-Validation: 5
+
+Weight: We recommend letting the Positive Set Weight be selected automatically, unless it has been separately optimized.
+
+Regularization Parameter C: We recommend values between 0.1 and 1.
+
+Precision Parameter E: We recommend using the default and staying below 0.1.
+
+----
   
 **Parameters**
   
@@ -91,8 +111,9 @@
   
   A) Spectrum Kernel: Analyzes a sequence using strings of length K.
   	
-  B) Weighted Spectrum Kernel: Analyzes a sequence using strings of range of lengths K1 - Kn.
-  	
+  B) Weighted Spectrum Kernel: Analyzes a sequence using strings of range of lengths K_min - K_max.
+
+	
 N-Fold Cross Validation: Number of partitions of training data used for cross validation.
   
 Weight: Increases importance of positive data (increase if positive sets are very trustworthy or for training with very large negative sequence sets).
@@ -100,7 +121,7 @@
 Regularization Parameter: Penalty for misclassification.  Trade-off is overfitting (high parameter) versus high error rate (low parameter).
   
 Precision Parameter:  Insensitivity zone.  Affects precision of SVM by altering number of support vectors used.
-  
+
 ----
   
 **Example**