Mercurial > repos > cafletezbrant > kmersvm
changeset 7:fd740d515502 draft default tip
Uploaded revised kmer-SVM to include modules from kmer-visual.
author | cafletezbrant |
---|---|
date | Sun, 16 Jun 2013 18:06:14 -0400 |
parents | 1aea7c1a9ab1 |
children | |
files | kmersvm/README.txt kmersvm/install.sh kmersvm/kmer2meme.pl kmersvm/kmertopwm.xml kmersvm/nullseq.xml kmersvm/scripts/kmersvm_output_weights.out kmersvm/scripts/kmersvm_train.py kmersvm/scripts/kmersvm_train_kfb_copy.py kmersvm/scripts/libkmersvm.pyc kmersvm/scripts/nullseq_generate.py kmersvm/tomtom.xml kmersvm/train.xml |
diffstat | 12 files changed, 3218 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/kmersvm/README.txt Mon Aug 20 21:42:29 2012 -0400 +++ b/kmersvm/README.txt Sun Jun 16 18:06:14 2013 -0400 @@ -68,6 +68,8 @@ <tool file="kmersvm/train.xml"/> <tool file="kmersvm/split_genome.xml"/> <tool file="kmersvm/seqprofile.xml" /> + <tool file="kmersvm/kmertopwm.xml" /> + <tool file="kmersvm/tomtom.xml" /> </section> Tool Tests:
--- a/kmersvm/install.sh Mon Aug 20 21:42:29 2012 -0400 +++ b/kmersvm/install.sh Sun Jun 16 18:06:14 2013 -0400 @@ -1,12 +1,11 @@ #!/bin/bash -cd "$1" -cp tool-data/nullseq_indices.loc.sample ../../tool-data/nullseq_indices.loc -cp tool-data/sample_roc_chen.png ../../tool-data -cp tool-data/classify_output.out ../../test-data -cp tool-data/classify_test.fa ../../test-data -cp tool-data/kmersvm_output_weights.out ../../test-data -cp tool-data/test_positive.fa ../../test-data -cp tool-data/test_negative.fa ../../test-data -cp tool-data/test_weights.out ../../test-data -cp tool-data/train_predictions.out ../../test-data +cp tool-data/nullseq_indices.loc.sample ~/galaxy-dist/tool-data/nullseq_indices.loc +cp tool-data/sample_roc_chen.png ~/galaxy-dist/tool-data +cp tool-data/classify_output.out ~/galaxy-dist/test-data +cp tool-data/classify_test.fa ~/galaxy-dist/test-data +cp tool-data/kmersvm_output_weights.out ~/galaxy-dist/test-data +cp tool-data/test_positive.fa ~/galaxy-dist/test-data +cp tool-data/test_negative.fa ~/galaxy-dist/test-data +cp tool-data/test_weights.out ~/galaxy-dist/test-data +cp tool-data/train_predictions.out ~/galaxy-dist/test-data
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kmersvm/kmer2meme.pl Sun Jun 16 18:06:14 2013 -0400 @@ -0,0 +1,49 @@ +use strict; + +open(my $w_fh, "<", $ARGV[0]); +my $num_kmers = $ARGV[1]; +my @weights = <$w_fh>; + +my @temp_k = @weights[8..(7+$num_kmers), (-$num_kmers..-1)]; + +my @kmers = (); +#cleanup kmers +for my $i (0..($#temp_k)){ + my @temp = split('\t',$temp_k[$i]); + #modified by dongwon 042713 + #push(@kmers, ($temp[0], $temp[1])); + push(@kmers, $temp[0]); +} + +open(my $o_fh, ">", "kmer2meme.meme"); + +print $o_fh +"MEME version 4 + +ALPHABET= ACGT + +strands: + - + +Background letter frequencies (from no specific genome): +A 0.25 C 0.25 G 0.25 T 0.25\n\n"; + +foreach my $kmer (@kmers) { + print $o_fh "MOTIF $kmer\n"; + my $l = length($kmer); + print $o_fh "letter-probability matrix: alength= 4 w= $l nsites= 1 E= 0\n"; + foreach my $i (0..($l-1)) { + my $nc = substr($kmer, $i, 1); + if ($nc eq "A") { + print $o_fh " 1.00 0.00 0.00 0.00\n"; + }elsif ($nc eq "C") { + print $o_fh " 0.00 1.00 0.00 0.00\n"; + }elsif ($nc eq "G") { + print $o_fh " 0.00 0.00 1.00 0.00\n"; + }elsif ($nc eq "T") { + print $o_fh " 0.00 0.00 0.00 1.00\n"; + }else { + print " 0.25 0.25 0.25 0.25\n"; + } + } + print $o_fh "\n"; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kmersvm/kmertopwm.xml Sun Jun 16 18:06:14 2013 -0400 @@ -0,0 +1,25 @@ +<tool id="kmer2meme" name="Kmer To MEME"> + <description>Convert kmers to MEME format for motif finding by Tomtom</description> + <command interpreter="perl">kmer2meme.pl + $weights $N + </command> + + <inputs> + <param format="tabular" name="weights" type="data" label="Kmer Weights"/> + <param type="integer" name="N" value="10" label="Kmer Number"> + <validator type="in_range" message="Kmer number must be in range 1 - 50" min="1" max="50"/> + </param> + </inputs> + + <outputs> + <data format="txt" from_work_dir="kmer2meme.meme" name="MEME for Kmers" label="${tool.name} on ${on_string}: MEME"/> + </outputs> + + <help> +This is a utility function that creates PWMs in MEME format for use with Tomtom. + +'Kmer Weights' is the weight file generated by 'Train SVM'. + +'Kmer Number' is the number of most positive and most negative kmers to be processed. + </help> +</tool>
--- a/kmersvm/nullseq.xml Mon Aug 20 21:42:29 2012 -0400 +++ b/kmersvm/nullseq.xml Sun Jun 16 18:06:14 2013 -0400 @@ -7,9 +7,9 @@ -x $fold -r $rseed -g $gc_err -t $rpt_err $input $dbkey ${indices_path.fields.path} </command> <inputs> - <param name="fold" type="integer" value="1" label="# of Fold-Increase" /> - <param name="gc_err" type="float" value="0.02" label="Allowable GC Error" /> - <param name="rpt_err" type="float" value="0.02" label="Allowable Repeat Error" /> + <param name="fold" type="integer" value="10" label="# of Fold-Increase" min="1" max="50" /> + <param name="gc_err" type="float" value="0.02" label="Allowable GC Error" min="0.01" max="0.1"/> + <param name="rpt_err" type="float" value="0.02" label="Allowable Repeat Error" min="0.01" max="0.1"/> <param name="rseed" type="integer" value="1" label="Random Number Seed" /> <param format="interval" name="input" type="data" label="BED File of Positive Regions" /> <validator type="unspecified_build" /> @@ -44,6 +44,16 @@ **What it does** Takes an input BED file and generates a set of sequences for use as negative data (null sequences) in Train SVM similar in length, GC content and repeat fraction. Uses random sampling for efficiency. + +---- + +**Recommended Settings** + +Fold-Increase: Default is recommended, up to 50x positive set. + +GC Error, Repeat Error: Default is recommended. + +---- **Parameters**
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kmersvm/scripts/kmersvm_output_weights.out Sun Jun 16 18:06:14 2013 -0400 @@ -0,0 +1,2088 @@ +#parameters: +#kernel=1 +#kmerlen=6 +#bias=0.930368454935 +#A=0 +#B=0 +#NOTE: k-mers with large negative weights are also important. They can be found at the bottom of the list. +#k-mer revcomp SVM-weight +AAAAAA TTTTTT 0.553324469582 +AAAAAC GTTTTT 1.0689111563 +AAAAAG CTTTTT 0.386997519222 +AAAAAT ATTTTT 0.371506923691 +AAAACA TGTTTT 0.582941243013 +AAAACC GGTTTT -0.00322550380692 +AAAACG CGTTTT 0.115121834279 +AAAACT AGTTTT 0.64234562623 +AAAAGA TCTTTT 0.180098364822 +AAAAGC GCTTTT -0.370020965708 +AAAAGG CCTTTT -0.148530185678 +AAAAGT ACTTTT 1.19477154105 +AAAATA TATTTT 1.23378644064 +AAAATC GATTTT -0.980691936551 +AAAATG CATTTT 0.221932570601 +AAAATT AATTTT 0.449293989111 +AAACAA TTGTTT -1.57507857322 +AAACAC GTGTTT -2.1383477652 +AAACAG CTGTTT -0.720402198466 +AAACAT ATGTTT -0.915754056705 +AAACCA TGGTTT 0.959609519802 +AAACCC GGGTTT 0.150812627734 +AAACCG CGGTTT -0.204853254781 +AAACCT AGGTTT 0.486872195933 +AAACGA TCGTTT -0.404172254228 +AAACGC GCGTTT 0.471891306908 +AAACGG CCGTTT -0.732914484007 +AAACGT ACGTTT -0.79028442459 +AAACTA TAGTTT -0.200848111441 +AAACTC GAGTTT -0.00260431934722 +AAACTG CAGTTT 0.456381173353 +AAACTT AAGTTT 0.639062115506 +AAAGAA TTCTTT 0.257495713463 +AAAGAC GTCTTT -0.228023730318 +AAAGAG CTCTTT 0.247579662852 +AAAGAT ATCTTT -0.304817901111 +AAAGCA TGCTTT -0.155658358179 +AAAGCC GGCTTT 0.416290507318 +AAAGCG CGCTTT -0.319122803172 +AAAGCT AGCTTT -0.10365386651 +AAAGGA TCCTTT 0.465546368844 +AAAGGC GCCTTT 0.293788204177 +AAAGGG CCCTTT -0.738483493496 +AAAGGT ACCTTT -1.46557110152 +AAAGTA TACTTT -0.487013201424 +AAAGTC GACTTT -0.815561145197 +AAAGTG CACTTT 0.523242409873 +AAAGTT AACTTT 1.49610361616 +AAATAA TTATTT -0.50775903415 +AAATAC GTATTT -0.925034113885 +AAATAG CTATTT -1.3099174763 +AAATAT ATATTT -1.8047214372 +AAATCA TGATTT -0.899342838259 +AAATCC GGATTT -0.146519411262 +AAATCG CGATTT -0.267007765303 +AAATCT AGATTT 0.291560176957 +AAATGA TCATTT -0.514145682209 +AAATGC GCATTT 0.954279511728 +AAATGG CCATTT -0.711449233898 +AAATGT ACATTT -0.752526282583 +AAATTA TAATTT 0.00593646027611 +AAATTC GAATTT 1.26182226428 +AAATTG CAATTT -0.0953103902516 +AAATTT AAATTT 0.189613989631 +AACAAA TTTGTT -1.37122264124 +AACAAC GTTGTT 0.00146931165158 +AACAAG CTTGTT -0.803037783522 +AACAAT ATTGTT 0.00385062783094 +AACACA TGTGTT -0.363356864114 +AACACC GGTGTT -0.779849447985 +AACACG CGTGTT -0.97471290289 +AACACT AGTGTT -0.335935444604 +AACAGA TCTGTT -1.28171369495 +AACAGC GCTGTT -0.411448258216 +AACAGG CCTGTT -0.469780016788 +AACAGT ACTGTT -0.453227635948 +AACATA TATGTT -0.945101613087 +AACATC GATGTT -0.0283361724906 +AACATG CATGTT -0.575985749697 +AACATT AATGTT -0.0429091030472 +AACCAA TTGGTT -0.0823228706445 +AACCAC GTGGTT 2.58639657949 +AACCAG CTGGTT -0.276555339554 +AACCAT ATGGTT -0.11357479766 +AACCCA TGGGTT 0.192569792654 +AACCCC GGGGTT -0.0425603516266 +AACCCG CGGGTT -0.404973603501 +AACCCT AGGGTT 0.0764485451656 +AACCGA TCGGTT -0.137853811078 +AACCGC GCGGTT 0.710876928983 +AACCGG CCGGTT 0.272143672682 +AACCGT ACGGTT -1.42589113548 +AACCTA TAGGTT -0.611888789113 +AACCTC GAGGTT 0.837839227815 +AACCTG CAGGTT -0.422972816872 +AACCTT AAGGTT 0.0794552714245 +AACGAA TTCGTT 0.662384258058 +AACGAC GTCGTT -0.711145623237 +AACGAG CTCGTT 0.198654543303 +AACGAT ATCGTT -1.14468704666 +AACGCA TGCGTT -0.143027192823 +AACGCC GGCGTT -0.0833645930753 +AACGCG CGCGTT 0.0613946992336 +AACGCT AGCGTT 0.379426684798 +AACGGA TCCGTT -0.902189680896 +AACGGC GCCGTT 0.725518300654 +AACGGG CCCGTT 0.487999502266 +AACGGT ACCGTT -0.323411669378 +AACGTA TACGTT 0.429654445762 +AACGTC GACGTT -0.392752266586 +AACGTG CACGTT -1.04792887194 +AACGTT AACGTT 0.616207780774 +AACTAA TTAGTT -0.843322479317 +AACTAC GTAGTT 0.184493095017 +AACTAG CTAGTT 0.0179086348231 +AACTAT ATAGTT 0.994586833037 +AACTCA TGAGTT -0.12838936418 +AACTCC GGAGTT 0.726028047244 +AACTCG CGAGTT 0.205501965615 +AACTCT AGAGTT 0.78739364499 +AACTGA TCAGTT 0.168022862889 +AACTGC GCAGTT 0.216791948549 +AACTGG CCAGTT -0.314557426071 +AACTGT ACAGTT -0.0111281613254 +AACTTA TAAGTT -0.183787054209 +AACTTC GAAGTT 0.84215541061 +AACTTG CAAGTT 0.376469022105 +AAGAAA TTTCTT 0.832667586229 +AAGAAC GTTCTT 0.93622383333 +AAGAAG CTTCTT 0.271875957941 +AAGAAT ATTCTT 1.43057617592 +AAGACA TGTCTT -0.132932072786 +AAGACC GGTCTT 0.0903286121328 +AAGACG CGTCTT 0.184576127381 +AAGACT AGTCTT 0.222042406341 +AAGAGA TCTCTT -0.0130328461327 +AAGAGC GCTCTT -0.37404789079 +AAGAGG CCTCTT -0.163448118904 +AAGAGT ACTCTT 0.769472446615 +AAGATA TATCTT -0.790403171158 +AAGATC GATCTT -0.120008098951 +AAGATG CATCTT 0.598644235302 +AAGATT AATCTT 1.39556497538 +AAGCAA TTGCTT -0.984888908248 +AAGCAC GTGCTT 0.783311673894 +AAGCAG CTGCTT -0.551197739368 +AAGCAT ATGCTT 0.368035643478 +AAGCCA TGGCTT 0.281990338241 +AAGCCC GGGCTT -0.699900156956 +AAGCCG CGGCTT 0.986454860217 +AAGCCT AGGCTT 0.446570897308 +AAGCGA TCGCTT -0.717502459474 +AAGCGC GCGCTT 0.292841378565 +AAGCGG CCGCTT 0.726632808198 +AAGCGT ACGCTT -0.441284795806 +AAGCTA TAGCTT -1.65918664431 +AAGCTC GAGCTT 0.0882183240244 +AAGCTG CAGCTT -0.134531324525 +AAGCTT AAGCTT 0.499772590447 +AAGGAA TTCCTT 0.608701292821 +AAGGAC GTCCTT 0.174988238866 +AAGGAG CTCCTT 0.56638313976 +AAGGAT ATCCTT 0.86759780737 +AAGGCA TGCCTT -0.0660388079911 +AAGGCC GGCCTT 0.353849453837 +AAGGCG CGCCTT -0.103035156648 +AAGGCT AGCCTT 0.226115108478 +AAGGGA TCCCTT -0.0242048325334 +AAGGGC GCCCTT -0.991808457742 +AAGGGG CCCCTT -0.108713197864 +AAGGGT ACCCTT 0.426314095539 +AAGGTA TACCTT 0.0063082317847 +AAGGTC GACCTT -2.22148605405 +AAGGTG CACCTT -0.171279446553 +AAGTAA TTACTT -0.660359179691 +AAGTAC GTACTT 1.37990716767 +AAGTAG CTACTT 0.00364551059326 +AAGTAT ATACTT 0.0627944758868 +AAGTCA TGACTT -0.0836841889637 +AAGTCC GGACTT -0.0783950838806 +AAGTCG CGACTT -0.331907177283 +AAGTCT AGACTT 0.922862248198 +AAGTGA TCACTT 0.301713638482 +AAGTGC GCACTT 0.372118346492 +AAGTGG CCACTT -0.29435234237 +AAGTGT ACACTT 0.453500782049 +AAGTTA TAACTT -0.0413529710922 +AAGTTC GAACTT 0.490694081954 +AAGTTG CAACTT 1.43527540302 +AATAAA TTTATT -0.464685282825 +AATAAC GTTATT 0.473126192871 +AATAAG CTTATT -0.361537503398 +AATAAT ATTATT -1.14135793996 +AATACA TGTATT -0.0868676244573 +AATACC GGTATT -0.432019199972 +AATACG CGTATT -0.812104843229 +AATACT AGTATT -0.160253986465 +AATAGA TCTATT -0.819357586187 +AATAGC GCTATT -0.582278240034 +AATAGG CCTATT -0.102936475866 +AATAGT ACTATT 0.64849424254 +AATATA TATATT -0.875327209013 +AATATC GATATT -0.538482532464 +AATATG CATATT 0.442497512442 +AATATT AATATT -1.60012723551 +AATCAA TTGATT -1.37621833951 +AATCAC GTGATT 0.91628767144 +AATCAG CTGATT 0.070484765244 +AATCAT ATGATT 0.650606183815 +AATCCA TGGATT -1.02720580521 +AATCCC GGGATT -0.352811994914 +AATCCG CGGATT 0.0165933980204 +AATCCT AGGATT 0.712428149182 +AATCGA TCGATT -0.592300021647 +AATCGC GCGATT 0.812676084435 +AATCGG CCGATT 0.39632534305 +AATCGT ACGATT -0.342808208442 +AATCTA TAGATT -1.480756961 +AATCTC GAGATT -0.751509737277 +AATCTG CAGATT -0.0237559933613 +AATGAA TTCATT -0.771072829647 +AATGAC GTCATT -0.14602458728 +AATGAG CTCATT 0.492363745269 +AATGAT ATCATT -0.609265638394 +AATGCA TGCATT 0.167952139321 +AATGCC GGCATT 0.965994735545 +AATGCG CGCATT -0.166276358058 +AATGCT AGCATT 1.03827471911 +AATGGA TCCATT -0.187500612316 +AATGGC GCCATT 0.216365462216 +AATGGG CCCATT -0.0888492445946 +AATGGT ACCATT 0.14433579757 +AATGTA TACATT 0.283672586491 +AATGTC GACATT -0.913297517025 +AATGTG CACATT 1.12759664753 +AATTAA TTAATT -2.21736658818 +AATTAC GTAATT 0.126090373031 +AATTAG CTAATT -0.499643372776 +AATTAT ATAATT 0.0590969699364 +AATTCA TGAATT -0.535790423504 +AATTCC GGAATT 1.30916473709 +AATTCG CGAATT 0.0530089957774 +AATTCT AGAATT 1.3354098108 +AATTGA TCAATT -1.66820825185 +AATTGC GCAATT 0.00310194879804 +AATTGG CCAATT 0.419449404673 +AATTGT ACAATT 0.464182538132 +AATTTA TAAATT -0.570808133223 +AATTTC GAAATT 0.929450761295 +AATTTG CAAATT 0.406154967173 +ACAAAA TTTTGT 0.534003859773 +ACAAAC GTTTGT -0.750597270967 +ACAAAG CTTTGT -0.174225381133 +ACAAAT ATTTGT -0.967477603914 +ACAACA TGTTGT 0.684531901144 +ACAACC GGTTGT 0.306111794846 +ACAACG CGTTGT -0.492170779986 +ACAACT AGTTGT 0.12647703187 +ACAAGA TCTTGT 0.435693866629 +ACAAGC GCTTGT 0.162579020622 +ACAAGG CCTTGT -0.482270829511 +ACAAGT ACTTGT 0.368700538071 +ACAATA TATTGT -0.556960796215 +ACAATC GATTGT 0.447210789307 +ACAATG CATTGT -0.652007172748 +ACACAA TTGTGT -0.58426177344 +ACACAC GTGTGT 0.0976403710637 +ACACAG CTGTGT -0.67562334546 +ACACAT ATGTGT -0.783431510249 +ACACCA TGGTGT -0.0559002312137 +ACACCC GGGTGT -0.279278917913 +ACACCG CGGTGT 0.927647825457 +ACACCT AGGTGT -2.31444811782 +ACACGA TCGTGT -0.290620517011 +ACACGC GCGTGT -0.433731209379 +ACACGG CCGTGT 0.133955133112 +ACACGT ACGTGT -0.0840820691034 +ACACTA TAGTGT -1.53601873195 +ACACTC GAGTGT -0.0411823725391 +ACACTG CAGTGT -0.133117765869 +ACAGAA TTCTGT 0.192830326341 +ACAGAC GTCTGT -0.344297166277 +ACAGAG CTCTGT 0.0995945155779 +ACAGAT ATCTGT -1.71253677969 +ACAGCA TGCTGT 0.103705732884 +ACAGCC GGCTGT -0.141361720091 +ACAGCG CGCTGT 0.0923052988622 +ACAGCT AGCTGT -2.06591471431 +ACAGGA TCCTGT 0.106606820089 +ACAGGC GCCTGT 0.243978095226 +ACAGGG CCCTGT -0.163198751642 +ACAGGT ACCTGT -1.35902898114 +ACAGTA TACTGT 0.330450384923 +ACAGTC GACTGT -0.0441089075653 +ACAGTG CACTGT 0.410210459073 +ACATAA TTATGT -0.124637932465 +ACATAC GTATGT 0.0408944886861 +ACATAG CTATGT -0.281098621777 +ACATAT ATATGT -1.5461561949 +ACATCA TGATGT 0.404823860207 +ACATCC GGATGT -0.0771250376801 +ACATCG CGATGT 0.348036576745 +ACATCT AGATGT -1.0135367165 +ACATGA TCATGT -0.0533364791011 +ACATGC GCATGT -0.230663552067 +ACATGG CCATGT 0.354870946287 +ACATGT ACATGT -0.255821119156 +ACATTA TAATGT -0.305695214437 +ACATTC GAATGT 2.10633976985 +ACATTG CAATGT -0.969761944969 +ACCAAA TTTGGT -0.0894837549998 +ACCAAC GTTGGT -0.310977709975 +ACCAAG CTTGGT 0.126792582447 +ACCAAT ATTGGT -0.208620509384 +ACCACA TGTGGT 3.95883323455 +ACCACC GGTGGT -0.550768309866 +ACCACG CGTGGT 1.64662237122 +ACCACT AGTGGT -0.252204442565 +ACCAGA TCTGGT -0.182102497222 +ACCAGC GCTGGT -0.666347426374 +ACCAGG CCTGGT -0.302144138217 +ACCAGT ACTGGT 0.0178732652384 +ACCATA TATGGT -0.128894926297 +ACCATC GATGGT -0.113161940262 +ACCATG CATGGT 0.0567971909973 +ACCCAA TTGGGT -0.112170340264 +ACCCAC GTGGGT -0.0932909430755 +ACCCAG CTGGGT 0.38534995457 +ACCCAT ATGGGT 0.86383897393 +ACCCCA TGGGGT 0.69577964714 +ACCCCC GGGGGT 0.336687664266 +ACCCCG CGGGGT -0.0474965784183 +ACCCCT AGGGGT 0.0583362287737 +ACCCGA TCGGGT -0.651964147142 +ACCCGC GCGGGT 0.430185118239 +ACCCGG CCGGGT 0.0136510502891 +ACCCGT ACGGGT -0.66740789625 +ACCCTA TAGGGT -0.0140774209654 +ACCCTC GAGGGT -0.0866227575362 +ACCCTG CAGGGT 0.831880338582 +ACCGAA TTCGGT 0.162092610395 +ACCGAC GTCGGT -0.213537840127 +ACCGAG CTCGGT -0.798483849782 +ACCGAT ATCGGT -0.171301259624 +ACCGCA TGCGGT 1.12176529563 +ACCGCC GGCGGT -0.0399431587546 +ACCGCG CGCGGT -0.00200779866329 +ACCGCT AGCGGT 0.807679694982 +ACCGGA TCCGGT 0.475032143564 +ACCGGC GCCGGT 0.644168012631 +ACCGGG CCCGGT 0.0514057931436 +ACCGGT ACCGGT -0.201116834029 +ACCGTA TACGGT -0.0510651546867 +ACCGTC GACGGT -1.28871756987 +ACCGTG CACGGT 0.480206522572 +ACCTAA TTAGGT -0.0164396390877 +ACCTAC GTAGGT -0.948871399654 +ACCTAG CTAGGT 0.902143796699 +ACCTAT ATAGGT -0.805157821049 +ACCTCA TGAGGT 0.0288013729329 +ACCTCC GGAGGT -0.123113465161 +ACCTCG CGAGGT 0.262603098349 +ACCTCT AGAGGT -0.105633795468 +ACCTGA TCAGGT -1.25693360781 +ACCTGC GCAGGT -2.19188428198 +ACCTGG CCAGGT -1.22596740891 +ACCTTA TAAGGT -0.397764436469 +ACCTTC GAAGGT -0.365879915113 +ACCTTG CAAGGT -0.468430599887 +ACGAAA TTTCGT 0.264150600855 +ACGAAC GTTCGT -0.264756483319 +ACGAAG CTTCGT 0.204030639912 +ACGAAT ATTCGT -0.0361589742531 +ACGACA TGTCGT -0.610965249538 +ACGACC GGTCGT -0.241773839788 +ACGACG CGTCGT 0.0286911417218 +ACGACT AGTCGT -0.446205245539 +ACGAGA TCTCGT 0.605153633971 +ACGAGC GCTCGT -0.265340587538 +ACGAGG CCTCGT -0.268759016858 +ACGAGT ACTCGT 0.640180324145 +ACGATA TATCGT 0.155583247136 +ACGATC GATCGT 0.0616053169407 +ACGATG CATCGT -0.458209843991 +ACGCAA TTGCGT 0.525980393513 +ACGCAC GTGCGT -1.07211159219 +ACGCAG CTGCGT 0.297215899525 +ACGCAT ATGCGT -0.62466515887 +ACGCCA TGGCGT -0.802770461001 +ACGCCC GGGCGT 0.447952405036 +ACGCCG CGGCGT -0.115846359904 +ACGCCT AGGCGT -0.238430995845 +ACGCGA TCGCGT 0.095946201326 +ACGCGC GCGCGT -0.0433332223788 +ACGCGG CCGCGT 0.266229064785 +ACGCGT ACGCGT -0.163327119327 +ACGCTA TAGCGT 0.237235200727 +ACGCTC GAGCGT -0.0921458803262 +ACGCTG CAGCGT 0.199715587503 +ACGGAA TTCCGT -0.329353600663 +ACGGAC GTCCGT 0.344570189474 +ACGGAG CTCCGT -0.0548389021114 +ACGGAT ATCCGT -0.545997471972 +ACGGCA TGCCGT 0.280450942962 +ACGGCC GGCCGT 1.41582328399 +ACGGCG CGCCGT -0.196303455354 +ACGGCT AGCCGT 0.227947803931 +ACGGGA TCCCGT 0.280829778818 +ACGGGC GCCCGT 0.00018403437262 +ACGGGG CCCCGT 0.580486853705 +ACGGTA TACCGT 0.075495718471 +ACGGTC GACCGT 0.348584555757 +ACGGTG CACCGT -0.182120731866 +ACGTAA TTACGT -0.0621196361007 +ACGTAC GTACGT 0.466909984366 +ACGTAG CTACGT -0.663033009337 +ACGTAT ATACGT -0.308630159174 +ACGTCA TGACGT -1.99820059064 +ACGTCC GGACGT 0.420415612207 +ACGTCG CGACGT -0.0602876602791 +ACGTCT AGACGT -0.634849462137 +ACGTGA TCACGT -0.198905336876 +ACGTGC GCACGT 0.414998502382 +ACGTGG CCACGT 0.200447884465 +ACGTTA TAACGT 0.185859329067 +ACGTTC GAACGT 0.52815687831 +ACGTTG CAACGT 0.211039795795 +ACTAAA TTTAGT -0.50736915368 +ACTAAC GTTAGT 0.475747236187 +ACTAAG CTTAGT 0.138308785668 +ACTAAT ATTAGT 1.06019516177 +ACTACA TGTAGT 0.319192883857 +ACTACC GGTAGT 0.487897945196 +ACTACG CGTAGT -0.74407054614 +ACTACT AGTAGT 0.0639271446503 +ACTAGA TCTAGT -0.0298376878721 +ACTAGC GCTAGT -0.0429928576347 +ACTAGG CCTAGT 0.333780155394 +ACTAGT ACTAGT -0.16686552786 +ACTATA TATAGT -0.405746674892 +ACTATC GATAGT -0.111727361497 +ACTATG CATAGT -0.162666443308 +ACTCAA TTGAGT 0.0769394839718 +ACTCAC GTGAGT 1.11965808913 +ACTCAG CTGAGT 0.702350819167 +ACTCAT ATGAGT 1.21992890886 +ACTCCA TGGAGT 0.159797702837 +ACTCCC GGGAGT -0.25369524982 +ACTCCG CGGAGT -0.22211947957 +ACTCCT AGGAGT 0.573765565902 +ACTCGA TCGAGT -0.52369931313 +ACTCGC GCGAGT 0.0889533091085 +ACTCGG CCGAGT 0.228330956989 +ACTCTA TAGAGT 0.143486764445 +ACTCTC GAGAGT 0.112394817019 +ACTCTG CAGAGT -0.15818031926 +ACTGAA TTCAGT -0.255344152434 +ACTGAC GTCAGT -0.494035697197 +ACTGAG CTCAGT 0.356908231789 +ACTGAT ATCAGT -0.39586503844 +ACTGCA TGCAGT 0.67076450454 +ACTGCC GGCAGT -0.33621057783 +ACTGCG CGCAGT 0.397171550083 +ACTGCT AGCAGT -0.0353519946569 +ACTGGA TCCAGT 0.221117097972 +ACTGGC GCCAGT -0.148482643928 +ACTGGG CCCAGT 0.36938530952 +ACTGTA TACAGT 0.387293260858 +ACTGTC GACAGT -0.0178021629868 +ACTGTG CACAGT 0.618305777696 +ACTTAA TTAAGT 0.437644834694 +ACTTAC GTAAGT 0.033919287324 +ACTTAG CTAAGT -0.516377419414 +ACTTAT ATAAGT 0.698841633408 +ACTTCA TGAAGT 0.611347347435 +ACTTCC GGAAGT 0.0973285263686 +ACTTCG CGAAGT -0.0915669240628 +ACTTCT AGAAGT 0.518303185233 +ACTTGA TCAAGT -0.216079683422 +ACTTGC GCAAGT 0.575477942051 +ACTTGG CCAAGT 0.00543146924231 +ACTTTA TAAAGT -0.300213848597 +ACTTTC GAAAGT -0.148863314977 +ACTTTG CAAAGT -0.245595583167 +AGAAAA TTTTCT -0.137268535318 +AGAAAC GTTTCT 0.572093479149 +AGAAAG CTTTCT 0.098472865858 +AGAAAT ATTTCT 0.410453396261 +AGAACA TGTTCT 0.793454650212 +AGAACC GGTTCT 0.0115494156458 +AGAACG CGTTCT 0.82393226583 +AGAACT AGTTCT -0.119807464497 +AGAAGA TCTTCT -0.333758403042 +AGAAGC GCTTCT 0.0844487814689 +AGAAGG CCTTCT -0.205219218112 +AGAATA TATTCT 0.0633993564776 +AGAATC GATTCT -0.538136371548 +AGAATG CATTCT 0.931621596147 +AGACAA TTGTCT -1.50574942362 +AGACAC GTGTCT -0.0167669078258 +AGACAG CTGTCT -0.12906421364 +AGACAT ATGTCT 1.35404000457 +AGACCA TGGTCT 0.328218872799 +AGACCC GGGTCT -0.0200725289044 +AGACCG CGGTCT 0.765626742963 +AGACCT AGGTCT -0.456848517462 +AGACGA TCGTCT -0.802053904091 +AGACGC GCGTCT 0.0253871768361 +AGACGG CCGTCT -0.139266727141 +AGACTA TAGTCT -0.320057583318 +AGACTC GAGTCT -0.834249291646 +AGACTG CAGTCT 0.193508572354 +AGAGAA TTCTCT 0.0931165815382 +AGAGAC GTCTCT -0.5884698684 +AGAGAG CTCTCT 0.615476951972 +AGAGAT ATCTCT -0.299853214526 +AGAGCA TGCTCT -0.335642528646 +AGAGCC GGCTCT -0.883469618392 +AGAGCG CGCTCT 0.251578584118 +AGAGCT AGCTCT 0.487429375142 +AGAGGA TCCTCT 0.463823218153 +AGAGGC GCCTCT 0.916276432149 +AGAGGG CCCTCT -0.18684096125 +AGAGTA TACTCT -0.495138709385 +AGAGTC GACTCT -0.484655774219 +AGAGTG CACTCT 0.115993902393 +AGATAA TTATCT -1.58907716419 +AGATAC GTATCT 0.339921215435 +AGATAG CTATCT -0.367780034415 +AGATAT ATATCT 0.287484551209 +AGATCA TGATCT -0.123844683894 +AGATCC GGATCT 0.00247282057627 +AGATCG CGATCT -0.0307897262914 +AGATCT AGATCT 0.227651621052 +AGATGA TCATCT 0.386848181305 +AGATGC GCATCT -0.0213670882284 +AGATGG CCATCT -1.81021353589 +AGATTA TAATCT -0.8443233316 +AGATTC GAATCT -0.532888247722 +AGATTG CAATCT -0.107371277313 +AGCAAA TTTGCT -1.94599552918 +AGCAAC GTTGCT -0.0162424474542 +AGCAAG CTTGCT -0.342005889721 +AGCAAT ATTGCT 1.13914459658 +AGCACA TGTGCT -0.00361898399215 +AGCACC GGTGCT -0.315148789185 +AGCACG CGTGCT 0.303979187648 +AGCACT AGTGCT 1.06273224797 +AGCAGA TCTGCT -0.762081808432 +AGCAGC GCTGCT -0.718835795316 +AGCAGG CCTGCT -0.805919785711 +AGCATA TATGCT -1.23962747197 +AGCATC GATGCT -0.72646586189 +AGCATG CATGCT 0.464440738258 +AGCCAA TTGGCT -0.328449603835 +AGCCAC GTGGCT 1.94437802586 +AGCCAG CTGGCT -0.220210493763 +AGCCAT ATGGCT -0.106927471131 +AGCCCA TGGGCT -0.206762784221 +AGCCCC GGGGCT 0.51046992709 +AGCCCG CGGGCT 1.1594447744 +AGCCCT AGGGCT -0.414622923074 +AGCCGA TCGGCT -0.0926922352106 +AGCCGC GCGGCT 0.262876228393 +AGCCGG CCGGCT 0.055499757051 +AGCCTA TAGGCT -0.197316160999 +AGCCTC GAGGCT 0.21702983432 +AGCCTG CAGGCT 0.435309995039 +AGCGAA TTCGCT -1.1066505883e-05 +AGCGAC GTCGCT -0.47360327321 +AGCGAG CTCGCT 0.164778505221 +AGCGAT ATCGCT 0.356498907503 +AGCGCA TGCGCT 0.0505610378921 +AGCGCC GGCGCT -0.0890290036684 +AGCGCG CGCGCT 0.178915191469 +AGCGCT AGCGCT 0.11567687362 +AGCGGA TCCGCT -0.227235460911 +AGCGGC GCCGCT 0.0584119341573 +AGCGGG CCCGCT 0.144626430325 +AGCGTA TACGCT -0.573602800156 +AGCGTC GACGCT 0.0226782464247 +AGCGTG CACGCT 0.229831864487 +AGCTAA TTAGCT -0.845230674233 +AGCTAC GTAGCT 0.164831388872 +AGCTAG CTAGCT 0.500762258636 +AGCTAT ATAGCT -0.488234536435 +AGCTCA TGAGCT 0.187034773314 +AGCTCC GGAGCT -0.401339822528 +AGCTCG CGAGCT 0.156048007048 +AGCTGA TCAGCT -0.516709892962 +AGCTGC GCAGCT -1.11014090329 +AGCTGG CCAGCT -0.932348608248 +AGCTTA TAAGCT -0.176665598631 +AGCTTC GAAGCT 0.0711278180534 +AGCTTG CAAGCT -0.434225228906 +AGGAAA TTTCCT -0.704291527622 +AGGAAC GTTCCT 0.303469917629 +AGGAAG CTTCCT 0.00524866323846 +AGGAAT ATTCCT 1.58780848354 +AGGACA TGTCCT -0.15505321324 +AGGACC GGTCCT 0.289757945024 +AGGACG CGTCCT 0.481778993922 +AGGACT AGTCCT 0.640823779903 +AGGAGA TCTCCT -0.235490942376 +AGGAGC GCTCCT 0.235768365124 +AGGAGG CCTCCT -0.414415365132 +AGGATA TATCCT -0.0664042613035 +AGGATC GATCCT -0.434947351654 +AGGATG CATCCT 0.366955472562 +AGGCAA TTGCCT -1.01850518506 +AGGCAC GTGCCT 0.736932368634 +AGGCAG CTGCCT 0.256955994847 +AGGCAT ATGCCT 0.369133304411 +AGGCCA TGGCCT -0.561290343016 +AGGCCC GGGCCT 1.03465518609 +AGGCCG CGGCCT 0.522868342351 +AGGCCT AGGCCT 0.564743685355 +AGGCGA TCGCCT 0.424607293004 +AGGCGC GCGCCT -0.236182049346 +AGGCGG CCGCCT 0.0445043432832 +AGGCTA TAGCCT 0.0242969706881 +AGGCTC GAGCCT -0.0471097341017 +AGGCTG CAGCCT 0.456595572887 +AGGGAA TTCCCT 0.498834635405 +AGGGAC GTCCCT -0.260200051734 +AGGGAG CTCCCT 0.240503537084 +AGGGAT ATCCCT -0.535599280412 +AGGGCA TGCCCT -0.00133954725493 +AGGGCC GGCCCT 0.265601558657 +AGGGCG CGCCCT -0.545673685243 +AGGGGA TCCCCT 0.513483423778 +AGGGGC GCCCCT -0.265469144844 +AGGGGG CCCCCT 0.36693029363 +AGGGTA TACCCT 0.104498083465 +AGGGTC GACCCT 0.278328586346 +AGGGTG CACCCT 0.428836989395 +AGGTAA TTACCT -0.725162219135 +AGGTAC GTACCT -1.09752982535 +AGGTAG CTACCT -0.341120431869 +AGGTAT ATACCT 0.314857124255 +AGGTCA TGACCT -1.91741654129 +AGGTCC GGACCT 0.810994316372 +AGGTCG CGACCT 0.394159438614 +AGGTGA TCACCT -1.07617897961 +AGGTGC GCACCT -1.39197112403 +AGGTGG CCACCT -1.18178211992 +AGGTTA TAACCT -0.510476009759 +AGGTTC GAACCT -0.625187182346 +AGGTTG CAACCT 0.143869769481 +AGTAAA TTTACT -1.50793279028 +AGTAAC GTTACT 0.427455316037 +AGTAAG CTTACT 0.279690202349 +AGTAAT ATTACT 0.291866593552 +AGTACA TGTACT 0.880452697751 +AGTACC GGTACT -0.445269994312 +AGTACG CGTACT 0.579750490517 +AGTACT AGTACT 0.829122008787 +AGTAGA TCTACT -0.191593190325 +AGTAGC GCTACT -0.0893420501576 +AGTAGG CCTACT 0.14556137321 +AGTATA TATACT -0.849140839165 +AGTATC GATACT -0.61282065974 +AGTATG CATACT 0.912596605147 +AGTCAA TTGACT -1.70769354035 +AGTCAC GTGACT 1.34966048832 +AGTCAG CTGACT 1.37319346673 +AGTCAT ATGACT 2.72186586472 +AGTCCA TGGACT -0.0192132003426 +AGTCCC GGGACT 0.578612760429 +AGTCCG CGGACT -0.212053165155 +AGTCGA TCGACT -0.0786424670568 +AGTCGC GCGACT -0.458716046909 +AGTCGG CCGACT 0.248365258652 +AGTCTA TAGACT -0.320914441937 +AGTCTC GAGACT 0.0850550293497 +AGTCTG CAGACT 0.488675888517 +AGTGAA TTCACT 0.0266570650857 +AGTGAC GTCACT -0.387049263014 +AGTGAG CTCACT 0.430045767393 +AGTGAT ATCACT 0.827141172737 +AGTGCA TGCACT 0.070601211825 +AGTGCC GGCACT -0.966563356734 +AGTGCG CGCACT -0.281729870678 +AGTGGA TCCACT -0.381263157411 +AGTGGC GCCACT 0.206378092816 +AGTGGG CCCACT 0.389632179038 +AGTGTA TACACT -0.820978049153 +AGTGTC GACACT -0.600524497894 +AGTGTG CACACT 0.713037712467 +AGTTAA TTAACT -0.0106066013167 +AGTTAC GTAACT 0.0455408515653 +AGTTAG CTAACT 0.466941093618 +AGTTAT ATAACT 0.768829672421 +AGTTCA TGAACT -0.125866517956 +AGTTCC GGAACT 0.668936562441 +AGTTCG CGAACT 0.387968939941 +AGTTGA TCAACT -0.0474971487734 +AGTTGC GCAACT 1.1127899844 +AGTTGG CCAACT -0.465573167102 +AGTTTA TAAACT 0.284055967528 +AGTTTC GAAACT 0.18619349173 +AGTTTG CAAACT 0.12731264248 +ATAAAA TTTTAT 1.02648675816 +ATAAAC GTTTAT 0.732880225054 +ATAAAG CTTTAT -1.87548823673 +ATAAAT ATTTAT -0.873007961387 +ATAACA TGTTAT -1.36018600643 +ATAACC GGTTAT -0.202228583494 +ATAACG CGTTAT 0.589468702095 +ATAAGA TCTTAT -0.257841865173 +ATAAGC GCTTAT -0.610042148574 +ATAAGG CCTTAT -1.22287623937 +ATAATA TATTAT -0.164184507604 +ATAATC GATTAT -0.722314659784 +ATAATG CATTAT -0.0511255174539 +ATACAA TTGTAT -0.950800433133 +ATACAC GTGTAT -0.119925598684 +ATACAG CTGTAT -0.0262094856091 +ATACAT ATGTAT 0.249201529788 +ATACCA TGGTAT 0.574806016908 +ATACCC GGGTAT -0.101360243386 +ATACCG CGGTAT -0.611029854896 +ATACGA TCGTAT -0.920389648385 +ATACGC GCGTAT -0.174514324887 +ATACGG CCGTAT -0.51787444727 +ATACTA TAGTAT 0.229563124064 +ATACTC GAGTAT -0.595979909399 +ATACTG CAGTAT 0.236738075475 +ATAGAA TTCTAT -1.07321125666 +ATAGAC GTCTAT -0.918035212801 +ATAGAG CTCTAT 0.338560548219 +ATAGAT ATCTAT 0.330811757112 +ATAGCA TGCTAT -0.923663860458 +ATAGCC GGCTAT 0.16558344397 +ATAGCG CGCTAT 0.462369612811 +ATAGGA TCCTAT 0.425973895672 +ATAGGC GCCTAT 0.314093043047 +ATAGGG CCCTAT 0.219751962566 +ATAGTA TACTAT -0.900554379694 +ATAGTC GACTAT -0.0847074884239 +ATAGTG CACTAT -0.211791001641 +ATATAA TTATAT -0.276221858749 +ATATAC GTATAT -1.19823462791 +ATATAG CTATAT 0.0633032493853 +ATATAT ATATAT 0.429750726604 +ATATCA TGATAT -1.31621436594 +ATATCC GGATAT -0.127518411175 +ATATCG CGATAT -0.1582300913 +ATATGA TCATAT -1.22039654692 +ATATGC GCATAT -0.733718702885 +ATATGG CCATAT -0.410024811352 +ATATTA TAATAT -0.556385064056 +ATATTC GAATAT -0.193606749951 +ATATTG CAATAT -1.84656827598 +ATCAAA TTTGAT -1.48651833301 +ATCAAC GTTGAT -0.177019712196 +ATCAAG CTTGAT 0.993564451911 +ATCAAT ATTGAT -1.73845783884 +ATCACA TGTGAT -0.7187838349 +ATCACC GGTGAT -0.522216302483 +ATCACG CGTGAT -0.717922773896 +ATCAGA TCTGAT -0.432098796815 +ATCAGC GCTGAT -0.177781089389 +ATCAGG CCTGAT -0.185638177915 +ATCATA TATGAT 0.532155893918 +ATCATC GATGAT -0.124663780341 +ATCATG CATGAT -0.00689408482313 +ATCCAA TTGGAT 0.860993420444 +ATCCAC GTGGAT -0.0725321924413 +ATCCAG CTGGAT -0.27103003808 +ATCCAT ATGGAT -1.06148966182 +ATCCCA TGGGAT 0.61836822508 +ATCCCC GGGGAT 0.953992982003 +ATCCCG CGGGAT -0.399268842253 +ATCCGA TCGGAT 0.30018596428 +ATCCGC GCGGAT 0.267936181036 +ATCCGG CCGGAT -0.621204560642 +ATCCTA TAGGAT -0.243979632628 +ATCCTC GAGGAT 0.744817578624 +ATCCTG CAGGAT -0.504079419831 +ATCGAA TTCGAT -0.127059008672 +ATCGAC GTCGAT -0.0155499225919 +ATCGAG CTCGAT -0.547153990807 +ATCGAT ATCGAT -0.243041889295 +ATCGCA TGCGAT 0.617626698462 +ATCGCC GGCGAT -0.549200964359 +ATCGCG CGCGAT -0.319928310366 +ATCGGA TCCGAT -0.0213478576162 +ATCGGC GCCGAT 0.01012445378 +ATCGGG CCCGAT -0.16502415031 +ATCGTA TACGAT 0.209770307092 +ATCGTC GACGAT 0.446879950629 +ATCGTG CACGAT -0.0107015895665 +ATCTAA TTAGAT 0.275281219402 +ATCTAC GTAGAT -0.505485858388 +ATCTAG CTAGAT -1.40045656324 +ATCTCA TGAGAT -0.796214522768 +ATCTCC GGAGAT 0.357919780998 +ATCTCG CGAGAT 0.330326321378 +ATCTGA TCAGAT 0.54409789464 +ATCTGC GCAGAT -1.11848953262 +ATCTGG CCAGAT -1.74540660353 +ATCTTA TAAGAT 0.653446604954 +ATCTTC GAAGAT -0.402855143504 +ATCTTG CAAGAT 1.12868592482 +ATGAAA TTTCAT -0.931491848926 +ATGAAC GTTCAT -0.668998480935 +ATGAAG CTTCAT -0.234340414147 +ATGAAT ATTCAT 1.32890513335 +ATGACA TGTCAT -0.407101486566 +ATGACC GGTCAT -0.10517111194 +ATGACG CGTCAT -1.81349916863 +ATGAGA TCTCAT -0.143474073688 +ATGAGC GCTCAT -0.422453674778 +ATGAGG CCTCAT 0.24497065201 +ATGATA TATCAT -0.457971919127 +ATGATC GATCAT -0.956283043587 +ATGATG CATCAT 0.280192974098 +ATGCAA TTGCAT 0.0243701389727 +ATGCAC GTGCAT -0.19959196082 +ATGCAG CTGCAT 0.349978081089 +ATGCAT ATGCAT 0.13057511889 +ATGCCA TGGCAT 0.381740306749 +ATGCCC GGGCAT -0.0217736885435 +ATGCCG CGGCAT 0.62665478233 +ATGCGA TCGCAT 0.335703166837 +ATGCGC GCGCAT 0.783616840941 +ATGCGG CCGCAT 0.0472938578306 +ATGCTA TAGCAT -0.0735655576493 +ATGCTC GAGCAT -0.840560139265 +ATGCTG CAGCAT 0.15741810487 +ATGGAA TTCCAT -0.775848717153 +ATGGAC GTCCAT 0.41782311792 +ATGGAG CTCCAT -0.163619300573 +ATGGCA TGCCAT -0.129694663435 +ATGGCC GGCCAT -0.416279326696 +ATGGCG CGCCAT -0.445757591838 +ATGGGA TCCCAT -0.128926465653 +ATGGGC GCCCAT 0.771709467651 +ATGGGG CCCCAT -0.0745872493387 +ATGGTA TACCAT 0.246690854305 +ATGGTC GACCAT 0.0673503554008 +ATGGTG CACCAT -0.253032937261 +ATGTAA TTACAT -0.995516665438 +ATGTAC GTACAT -0.525286748875 +ATGTAG CTACAT 0.68550253572 +ATGTCA TGACAT -2.11241919747 +ATGTCC GGACAT 0.845134601462 +ATGTCG CGACAT 0.516595304184 +ATGTGA TCACAT 0.141325506081 +ATGTGC GCACAT 0.0828091804383 +ATGTGG CCACAT 0.947400656873 +ATGTTA TAACAT -0.947214214102 +ATGTTC GAACAT 0.431810543746 +ATGTTG CAACAT -0.125258264151 +ATTAAA TTTAAT -1.31975538764 +ATTAAC GTTAAT -1.41512566175 +ATTAAG CTTAAT -0.199537767266 +ATTAAT ATTAAT -0.436068858224 +ATTACA TGTAAT -0.201133348119 +ATTACC GGTAAT 0.172300094885 +ATTACG CGTAAT -0.417719262734 +ATTAGA TCTAAT 0.93500523245 +ATTAGC GCTAAT -2.10811581856 +ATTAGG CCTAAT -0.467480191357 +ATTATA TATAAT 0.363629728768 +ATTATC GATAAT -0.861960495475 +ATTATG CATAAT 0.562723819118 +ATTCAA TTGAAT 0.0200498460273 +ATTCAC GTGAAT 0.418891749324 +ATTCAG CTGAAT -0.230767178642 +ATTCCA TGGAAT 1.05154227319 +ATTCCC GGGAAT -0.13155797816 +ATTCCG CGGAAT 0.0812893553365 +ATTCGA TCGAAT -0.0838650208544 +ATTCGC GCGAAT 0.222296881979 +ATTCGG CCGAAT -0.139283061422 +ATTCTA TAGAAT -0.224824743806 +ATTCTC GAGAAT -0.557117996579 +ATTCTG CAGAAT 1.0695468056 +ATTGAA TTCAAT -0.463986709186 +ATTGAC GTCAAT -1.69772334912 +ATTGAG CTCAAT -0.117012580083 +ATTGCA TGCAAT 0.257329206735 +ATTGCC GGCAAT 0.0584571835771 +ATTGCG CGCAAT 0.406025923639 +ATTGGA TCCAAT -0.62851834273 +ATTGGC GCCAAT 0.079438863957 +ATTGGG CCCAAT 0.913438482245 +ATTGTA TACAAT 0.450423686846 +ATTGTC GACAAT -1.01726290552 +ATTGTG CACAAT 0.60669560725 +ATTTAA TTAAAT 0.253196011315 +ATTTAC GTAAAT -2.97235568716 +ATTTAG CTAAAT -0.292154315684 +ATTTCA TGAAAT 0.636662944131 +ATTTCC GGAAAT -0.6231521332 +ATTTCG CGAAAT 0.503689389655 +ATTTGA TCAAAT -0.338609456935 +ATTTGC GCAAAT -2.07758131186 +ATTTGG CCAAAT 0.333462478492 +ATTTTA TAAAAT -0.556525692846 +ATTTTC GAAAAT -0.840026641213 +ATTTTG CAAAAT 1.35270004621 +CAAAAA TTTTTG 1.47166885108 +CAAAAC GTTTTG 0.109633599033 +CAAAAG CTTTTG -0.10115908148 +CAAACA TGTTTG -3.89334182039 +CAAACC GGTTTG 0.963184089308 +CAAACG CGTTTG -0.0395814581096 +CAAAGA TCTTTG -0.0118964430112 +CAAAGC GCTTTG 0.224207599526 +CAAAGG CCTTTG -0.175494095276 +CAAATA TATTTG -3.52207271503 +CAAATC GATTTG 0.660053835664 +CAAATG CATTTG -0.727424259718 +CAACAA TTGTTG -1.19656276713 +CAACAC GTGTTG 0.660747434725 +CAACAG CTGTTG -0.857485267387 +CAACCA TGGTTG 0.404850702022 +CAACCC GGGTTG 0.503951138249 +CAACCG CGGTTG -0.644416232942 +CAACGA TCGTTG -0.224419054868 +CAACGC GCGTTG -0.508914854709 +CAACGG CCGTTG -0.200076485177 +CAACTA TAGTTG 0.0981933677308 +CAACTC GAGTTG 0.150972486588 +CAACTG CAGTTG -0.921035707419 +CAAGAA TTCTTG 0.922374683971 +CAAGAC GTCTTG 0.423676757958 +CAAGAG CTCTTG -0.187883399321 +CAAGCA TGCTTG 0.636543530499 +CAAGCC GGCTTG 0.00071745286602 +CAAGCG CGCTTG -0.254180305152 +CAAGGA TCCTTG 0.821021319418 +CAAGGC GCCTTG 0.0604176049684 +CAAGGG CCCTTG 0.226570746088 +CAAGTA TACTTG 0.216374948595 +CAAGTC GACTTG 0.729235603486 +CAAGTG CACTTG -0.56315324029 +CAATAA TTATTG -0.549020618619 +CAATAC GTATTG -0.208803364217 +CAATAG CTATTG -0.0777355282063 +CAATCA TGATTG -0.263891707036 +CAATCC GGATTG 0.700155593708 +CAATCG CGATTG 0.357897326676 +CAATGA TCATTG 0.638987959753 +CAATGC GCATTG -0.191968495388 +CAATGG CCATTG 0.665776651287 +CAATTA TAATTG -0.271883139449 +CAATTC GAATTG 0.0983322987509 +CAATTG CAATTG -0.158614672809 +CACAAA TTTGTG 0.598792191689 +CACAAC GTTGTG 0.340518077962 +CACAAG CTTGTG 0.510003745313 +CACACA TGTGTG -0.400928902962 +CACACC GGTGTG -1.11996656805 +CACACG CGTGTG -0.718947444703 +CACAGA TCTGTG -0.100922777882 +CACAGC GCTGTG -0.50134148629 +CACAGG CCTGTG 0.252298561921 +CACATA TATGTG -0.110647463006 +CACATC GATGTG -0.651447413877 +CACATG CATGTG 0.00381644641394 +CACCAA TTGGTG 0.70840160186 +CACCAC GTGGTG 0.582686488096 +CACCAG CTGGTG -0.793184758924 +CACCCA TGGGTG -0.0284926248858 +CACCCC GGGGTG 0.893138419231 +CACCCG CGGGTG 0.417490759867 +CACCGA TCGGTG -0.0699188856569 +CACCGC GCGGTG 0.522058993046 +CACCGG CCGGTG 0.0180860340686 +CACCTA TAGGTG -0.592893592202 +CACCTC GAGGTG 0.676528270972 +CACCTG CAGGTG -5.27584299853 +CACGAA TTCGTG -0.382612993115 +CACGAC GTCGTG 0.032371814243 +CACGAG CTCGTG 0.254629628132 +CACGCA TGCGTG 0.117247261513 +CACGCC GGCGTG 0.00637478726345 +CACGCG CGCGTG -0.0917099552667 +CACGGA TCCGTG 0.368340586739 +CACGGC GCCGTG 1.30441576203 +CACGGG CCCGTG -0.264381491117 +CACGTA TACGTG -1.02682296878 +CACGTC GACGTG -0.219968085956 +CACGTG CACGTG 1.22865752463 +CACTAA TTAGTG 0.253977430376 +CACTAC GTAGTG -0.301750384238 +CACTAG CTAGTG -0.669946880366 +CACTCA TGAGTG -0.734281923173 +CACTCC GGAGTG -0.204493843897 +CACTCG CGAGTG -0.172095955086 +CACTGA TCAGTG -0.318554612147 +CACTGC GCAGTG 0.452355062655 +CACTGG CCAGTG 0.0113022350057 +CACTTA TAAGTG 0.159408091311 +CACTTC GAAGTG 0.111535665099 +CAGAAA TTTCTG 0.013661920057 +CAGAAC GTTCTG -0.34634558884 +CAGAAG CTTCTG -0.268726628204 +CAGACA TGTCTG -0.0884146723771 +CAGACC GGTCTG 0.848596588006 +CAGACG CGTCTG -0.176588448217 +CAGAGA TCTCTG 0.213650101773 +CAGAGC GCTCTG 0.72706788894 +CAGAGG CCTCTG 0.222483758838 +CAGATA TATCTG 0.134724484734 +CAGATC GATCTG 0.135060768551 +CAGATG CATCTG -3.82365153576 +CAGCAA TTGCTG 0.216246738966 +CAGCAC GTGCTG 0.134612451799 +CAGCAG CTGCTG -0.782809242533 +CAGCCA TGGCTG 0.25352023069 +CAGCCC GGGCTG 0.892449121017 +CAGCCG CGGCTG -1.34095163643 +CAGCGA TCGCTG 0.368898676461 +CAGCGC GCGCTG 0.40336227877 +CAGCGG CCGCTG -0.041543776559 +CAGCTA TAGCTG 0.347266756135 +CAGCTC GAGCTG 1.48884499806 +CAGCTG CAGCTG -3.03276014117 +CAGGAA TTCCTG 0.0661704451321 +CAGGAC GTCCTG 0.86939329333 +CAGGAG CTCCTG -0.480221178798 +CAGGCA TGCCTG 0.506805708618 +CAGGCC GGCCTG 0.756166029763 +CAGGCG CGCCTG 0.233566907124 +CAGGGA TCCCTG 0.970287665608 +CAGGGC GCCCTG -0.32000823828 +CAGGGG CCCCTG -0.0409673970414 +CAGGTA TACCTG -1.16422982988 +CAGGTC GACCTG 0.991958843425 +CAGTAA TTACTG -0.456065885023 +CAGTAC GTACTG 0.866479551705 +CAGTAG CTACTG 0.0492891641913 +CAGTCA TGACTG -0.864906645492 +CAGTCC GGACTG 0.436595236134 +CAGTCG CGACTG -0.0778031521909 +CAGTGA TCACTG 0.095401665363 +CAGTGC GCACTG 0.809574408241 +CAGTGG CCACTG 0.130805602072 +CAGTTA TAACTG 0.102237048367 +CAGTTC GAACTG -0.329116232229 +CATAAA TTTATG 1.09056149453 +CATAAC GTTATG -0.240307894957 +CATAAG CTTATG -0.274744434381 +CATACA TGTATG -0.316826193863 +CATACC GGTATG 0.907504112042 +CATACG CGTATG -0.646410594912 +CATAGA TCTATG 0.0743941919121 +CATAGC GCTATG 0.604402059297 +CATAGG CCTATG 0.132614581735 +CATATA TATATG 0.102595570806 +CATATC GATATG -0.685577322551 +CATATG CATATG -1.85636550314 +CATCAA TTGATG 0.528225242508 +CATCAC GTGATG -0.779076990822 +CATCAG CTGATG 0.0792779772782 +CATCCA TGGATG 0.380324367299 +CATCCC GGGATG 0.476823595932 +CATCCG CGGATG -0.735460908783 +CATCGA TCGATG -0.364712952887 +CATCGC GCGATG -0.445769472916 +CATCGG CCGATG -0.213744964599 +CATCTA TAGATG 0.591942642834 +CATCTC GAGATG 0.175954704824 +CATGAA TTCATG -0.0345496547785 +CATGAC GTCATG 0.128585442856 +CATGAG CTCATG 0.093844395721 +CATGCA TGCATG 0.500579541195 +CATGCC GGCATG 0.534363670334 +CATGCG CGCATG -0.07767446656 +CATGGA TCCATG -0.915298913786 +CATGGC GCCATG 0.496643001128 +CATGGG CCCATG 0.412095419236 +CATGTA TACATG 0.170530452886 +CATGTC GACATG 0.409305171927 +CATTAA TTAATG -1.21674231256 +CATTAC GTAATG 0.0363120647791 +CATTAG CTAATG 0.113980713568 +CATTCA TGAATG 0.211633133094 +CATTCC GGAATG 1.3667834662 +CATTCG CGAATG -0.306612061174 +CATTGA TCAATG 0.261275457012 +CATTGC GCAATG 0.422488072429 +CATTGG CCAATG -0.0683147570932 +CATTTA TAAATG -0.674511635662 +CATTTC GAAATG 0.149816454289 +CCAAAA TTTTGG 0.722553040783 +CCAAAC GTTTGG 0.136597481362 +CCAAAG CTTTGG 0.335940148998 +CCAACA TGTTGG 0.242613946152 +CCAACC GGTTGG 0.193037046526 +CCAACG CGTTGG -0.464564126497 +CCAAGA TCTTGG 0.449792823487 +CCAAGC GCTTGG 0.0105858416551 +CCAAGG CCTTGG 0.750008716703 +CCAATA TATTGG 0.380191080614 +CCAATC GATTGG -0.291650702172 +CCACAA TTGTGG 2.48928712228 +CCACAC GTGTGG 0.042494328422 +CCACAG CTGTGG 1.98559943258 +CCACCA TGGTGG 0.139131185109 +CCACCC GGGTGG 0.411865066012 +CCACCG CGGTGG 0.214345177527 +CCACGA TCGTGG 0.329121460372 +CCACGC GCGTGG 0.853862453394 +CCACGG CCGTGG 0.958047076313 +CCACTA TAGTGG 0.990324695059 +CCACTC GAGTGG -0.865871154042 +CCAGAA TTCTGG 1.35087980364 +CCAGAC GTCTGG 0.330013500547 +CCAGAG CTCTGG 0.850843625887 +CCAGCA TGCTGG -0.105656155248 +CCAGCC GGCTGG 0.10376516441 +CCAGCG CGCTGG -0.151192099436 +CCAGGA TCCTGG -0.00188020552734 +CCAGGC GCCTGG 0.477368129975 +CCAGGG CCCTGG 0.330067567163 +CCAGTA TACTGG 0.201689404211 +CCAGTC GACTGG 0.172972219351 +CCATAA TTATGG 0.864471006772 +CCATAC GTATGG -0.053674529194 +CCATAG CTATGG -0.628472779515 +CCATCA TGATGG 0.316290664784 +CCATCC GGATGG 0.541183344469 +CCATCG CGATGG -0.585435010409 +CCATGA TCATGG 0.842999014507 +CCATGC GCATGG 0.885922909323 +CCATGG CCATGG -1.19148404887 +CCATTA TAATGG 0.461535283786 +CCATTC GAATGG -0.448015532577 +CCCAAA TTTGGG 1.00513456604 +CCCAAC GTTGGG 0.0589630047868 +CCCAAG CTTGGG 0.612217890411 +CCCACA TGTGGG 1.49280537935 +CCCACC GGTGGG -0.540327896372 +CCCACG CGTGGG -0.189765626178 +CCCAGA TCTGGG 0.906005293679 +CCCAGC GCTGGG 0.00258121804065 +CCCAGG CCTGGG 0.385280928829 +CCCATA TATGGG 0.580077252911 +CCCATC GATGGG 0.105886687275 +CCCCAA TTGGGG 0.64902551958 +CCCCAC GTGGGG 0.177279574499 +CCCCAG CTGGGG 1.0367549673 +CCCCCA TGGGGG -0.220124853692 +CCCCCC GGGGGG -0.180081108721 +CCCCCG CGGGGG 0.373246680376 +CCCCGA TCGGGG 0.635830201709 +CCCCGC GCGGGG 0.000651576611126 +CCCCGG CCGGGG 0.854947986857 +CCCCTA TAGGGG 0.325441244643 +CCCCTC GAGGGG 0.483696397734 +CCCGAA TTCGGG 0.714032030274 +CCCGAC GTCGGG -0.208075368152 +CCCGAG CTCGGG 0.440636877923 +CCCGCA TGCGGG 0.402712812803 +CCCGCC GGCGGG 0.476459210594 +CCCGCG CGCGGG 0.023503459093 +CCCGGA TCCGGG -0.538869263981 +CCCGGC GCCGGG 0.346443397541 +CCCGGG CCCGGG 0.548834083398 +CCCGTA TACGGG 0.139729969322 +CCCGTC GACGGG -0.214648757928 +CCCTAA TTAGGG -0.290368330245 +CCCTAC GTAGGG 0.144663483304 +CCCTAG CTAGGG 0.0802948074314 +CCCTCA TGAGGG 0.408278749762 +CCCTCC GGAGGG -0.517676283937 +CCCTCG CGAGGG 0.0815315059399 +CCCTGA TCAGGG 0.0384525153021 +CCCTGC GCAGGG 0.376976503211 +CCCTTA TAAGGG -0.343987739741 +CCCTTC GAAGGG 0.152780345376 +CCGAAA TTTCGG 0.701489171099 +CCGAAC GTTCGG 0.371848478306 +CCGAAG CTTCGG 0.108859525197 +CCGACA TGTCGG -0.0475200223019 +CCGACC GGTCGG 0.190388885966 +CCGACG CGTCGG -0.43295891756 +CCGAGA TCTCGG -0.851488238354 +CCGAGC GCTCGG 0.402946122489 +CCGAGG CCTCGG 0.447649638689 +CCGATA TATCGG -0.583698466004 +CCGATC GATCGG 0.214495804491 +CCGCAA TTGCGG 0.704274885421 +CCGCAC GTGCGG -0.124930120604 +CCGCAG CTGCGG 0.796608954246 +CCGCCA TGGCGG 0.0933037773855 +CCGCCC GGGCGG -0.761788986379 +CCGCCG CGGCGG 0.118893616093 +CCGCGA TCGCGG -0.214050849808 +CCGCGC GCGCGG 0.427088586284 +CCGCGG CCGCGG -0.163354064297 +CCGCTA TAGCGG 0.075663139979 +CCGCTC GAGCGG -0.0840465139463 +CCGGAA TTCCGG -0.815717995392 +CCGGAC GTCCGG -0.0199681503018 +CCGGAG CTCCGG -0.142607842875 +CCGGCA TGCCGG -0.211088707568 +CCGGCC GGCCGG 0.20978106433 +CCGGCG CGCCGG 0.186483274301 +CCGGGA TCCCGG -0.120892474243 +CCGGGC GCCCGG 0.286207705144 +CCGGTA TACCGG -0.364061582654 +CCGGTC GACCGG 0.537401184601 +CCGTAA TTACGG -0.368122547637 +CCGTAC GTACGG 0.388891995455 +CCGTAG CTACGG -0.14921907099 +CCGTCA TGACGG -0.731374154141 +CCGTCC GGACGG 0.142640525033 +CCGTCG CGACGG -0.219815741986 +CCGTGA TCACGG 0.351505600383 +CCGTGC GCACGG 0.0466167726132 +CCGTTA TAACGG -0.177534573202 +CCGTTC GAACGG 0.930924136276 +CCTAAA TTTAGG -0.437162767665 +CCTAAC GTTAGG 0.715494549297 +CCTAAG CTTAGG 0.834398157528 +CCTACA TGTAGG 0.352513677162 +CCTACC GGTAGG -0.188597324766 +CCTACG CGTAGG -0.527855146408 +CCTAGA TCTAGG -0.358834707149 +CCTAGC GCTAGG -0.48327631422 +CCTAGG CCTAGG 0.712974479815 +CCTATA TATAGG -0.00662770590644 +CCTATC GATAGG 0.321915138696 +CCTCAA TTGAGG 0.478167716035 +CCTCAC GTGAGG -0.603310279315 +CCTCAG CTGAGG 1.18451331899 +CCTCCA TGGAGG -0.156185760342 +CCTCCC GGGAGG 0.196588926242 +CCTCCG CGGAGG 0.34975369542 +CCTCGA TCGAGG -0.12704382375 +CCTCGC GCGAGG -0.690558364248 +CCTCTA TAGAGG 0.666120847354 +CCTCTC GAGAGG 0.326083338601 +CCTGAA TTCAGG 0.0108115210728 +CCTGAC GTCAGG 0.0764004186754 +CCTGAG CTCAGG 0.140494023385 +CCTGCA TGCAGG -0.350864542641 +CCTGCC GGCAGG -0.156177205702 +CCTGCG CGCAGG 0.202361500118 +CCTGGA TCCAGG -0.165955838374 +CCTGGC GCCAGG 0.0156098804816 +CCTGTA TACAGG -0.631427205006 +CCTGTC GACAGG -0.29284206294 +CCTTAA TTAAGG 0.313884227175 +CCTTAC GTAAGG 0.666799661556 +CCTTAG CTAAGG 0.577269508525 +CCTTCA TGAAGG -0.228064410555 +CCTTCC GGAAGG 0.127618019043 +CCTTCG CGAAGG 0.233061732027 +CCTTGA TCAAGG 0.697814875901 +CCTTGC GCAAGG 0.163330658666 +CCTTTA TAAAGG -1.10202858977 +CCTTTC GAAAGG -0.15641139741 +CGAAAA TTTTCG 0.570946981635 +CGAAAC GTTTCG 0.71891048699 +CGAAAG CTTTCG 0.00936335629174 +CGAACA TGTTCG -0.544911197699 +CGAACC GGTTCG 0.236185790077 +CGAACG CGTTCG 0.0439294539998 +CGAAGA TCTTCG 0.0635388313512 +CGAAGC GCTTCG -0.312490378281 +CGAATA TATTCG 0.205254003141 +CGAATC GATTCG 0.0901499283283 +CGACAA TTGTCG 0.238050713336 +CGACAC GTGTCG -0.109116574917 +CGACAG CTGTCG -0.769009564377 +CGACCA TGGTCG 0.0360443347963 +CGACCC GGGTCG 0.216209586146 +CGACCG CGGTCG -0.00608417190027 +CGACGA TCGTCG 0.109094760764 +CGACGC GCGTCG -0.0821660131027 +CGACTA TAGTCG -0.154626936131 +CGACTC GAGTCG -0.246273419905 +CGAGAA TTCTCG 0.612418964386 +CGAGAC GTCTCG -0.0448240636466 +CGAGAG CTCTCG -0.751517796087 +CGAGCA TGCTCG -0.330608606967 +CGAGCC GGCTCG 0.693245611149 +CGAGCG CGCTCG 0.278683035903 +CGAGGA TCCTCG -0.187835599873 +CGAGGC GCCTCG -0.952734572307 +CGAGTA TACTCG 0.184943150492 +CGAGTC GACTCG 0.375230854406 +CGATAA TTATCG -0.393190761045 +CGATAC GTATCG 0.0742054445512 +CGATAG CTATCG -0.519678403883 +CGATCA TGATCG 0.0526959654511 +CGATCC GGATCG 0.0664840380862 +CGATCG CGATCG 0.15660945983 +CGATGA TCATCG -0.774933636073 +CGATGC GCATCG -0.391779119657 +CGATTA TAATCG -0.0474517133734 +CGATTC GAATCG 0.235116843424 +CGCAAA TTTGCG -0.0809989053442 +CGCAAC GTTGCG 0.914321615152 +CGCAAG CTTGCG 0.902729943974 +CGCACA TGTGCG -0.643522000175 +CGCACC GGTGCG -0.127073746796 +CGCACG CGTGCG -0.771541292094 +CGCAGA TCTGCG 0.165580560798 +CGCAGC GCTGCG 0.0469455940157 +CGCATA TATGCG 0.335290623274 +CGCATC GATGCG 0.437930163851 +CGCCAA TTGGCG -0.109846650695 +CGCCAC GTGGCG -0.994485238576 +CGCCAG CTGGCG -0.142749553444 +CGCCCA TGGGCG 1.05328824356 +CGCCCC GGGGCG 0.0137809042937 +CGCCCG CGGGCG -0.583692912482 +CGCCGA TCGGCG 0.403802926462 +CGCCGC GCGGCG 0.0867379096548 +CGCCTA TAGGCG -0.0424714321252 +CGCCTC GAGGCG -0.196079793222 +CGCGAA TTCGCG 0.198826174488 +CGCGAC GTCGCG 0.0519079932143 +CGCGAG CTCGCG -0.0095714045129 +CGCGCA TGCGCG 0.29733863628 +CGCGCC GGCGCG 0.00534076408975 +CGCGCG CGCGCG 0.133525541683 +CGCGGA TCCGCG 0.0295590736422 +CGCGGC GCCGCG 0.0211820057043 +CGCGTA TACGCG -0.00215667699398 +CGCGTC GACGCG 0.177954856123 +CGCTAA TTAGCG -0.241135973823 +CGCTAC GTAGCG 0.239902173491 +CGCTAG CTAGCG 0.0596735417366 +CGCTCA TGAGCG -0.0533438512221 +CGCTCC GGAGCG 0.0208450744523 +CGCTGA TCAGCG 0.68121260517 +CGCTGC GCAGCG 0.319030638925 +CGCTTA TAAGCG 0.331742647103 +CGCTTC GAAGCG -0.220266241715 +CGGAAA TTTCCG -1.28580550145 +CGGAAC GTTCCG 0.926154437465 +CGGAAG CTTCCG -0.532165941151 +CGGACA TGTCCG 0.26801379322 +CGGACC GGTCCG 0.208564671836 +CGGACG CGTCCG 0.185658917881 +CGGAGA TCTCCG -0.0783602636003 +CGGAGC GCTCCG 0.213447050966 +CGGATA TATCCG 0.180933559441 +CGGATC GATCCG 0.0115933607686 +CGGCAA TTGCCG -0.141333522478 +CGGCAC GTGCCG -0.0989810302207 +CGGCAG CTGCCG -0.315504532197 +CGGCCA TGGCCG 0.520189196688 +CGGCCC GGGCCG 0.0770477004768 +CGGCCG CGGCCG -0.0100293295169 +CGGCGA TCGCCG 0.335335269266 +CGGCGC GCGCCG 0.142338129608 +CGGCTA TAGCCG 0.483570773894 +CGGCTC GAGCCG 0.386036576617 +CGGGAA TTCCCG -0.464940358426 +CGGGAC GTCCCG 0.447833530318 +CGGGAG CTCCCG 0.0731420049918 +CGGGCA TGCCCG 0.282329634235 +CGGGCC GGCCCG 0.812549782764 +CGGGGA TCCCCG 0.869420112553 +CGGGGC GCCCCG 0.86056528281 +CGGGTA TACCCG -0.203275385127 +CGGGTC GACCCG -0.444672989957 +CGGTAA TTACCG -0.649508330752 +CGGTAC GTACCG -0.117712116536 +CGGTAG CTACCG 0.281181384004 +CGGTCA TGACCG 1.13661174882 +CGGTCC GGACCG 0.0903119623658 +CGGTGA TCACCG -0.00769608361013 +CGGTGC GCACCG -0.800050077623 +CGGTTA TAACCG -0.240948411238 +CGGTTC GAACCG 0.347248429131 +CGTAAA TTTACG 0.413018459532 +CGTAAC GTTACG 0.133628748349 +CGTAAG CTTACG -0.181667545798 +CGTACA TGTACG 0.487388635308 +CGTACC GGTACG -0.0604415421058 +CGTACG CGTACG -0.23751294459 +CGTAGA TCTACG -0.125539098097 +CGTAGC GCTACG 0.386530915361 +CGTATA TATACG -0.194131124774 +CGTATC GATACG -0.267160757641 +CGTCAA TTGACG -1.00863955903 +CGTCAC GTGACG -0.521674552853 +CGTCAG CTGACG -1.76038907438 +CGTCCA TGGACG 0.0777766757824 +CGTCCC GGGACG 0.803357191448 +CGTCGA TCGACG 0.0477564437913 +CGTCGC GCGACG 0.103336677444 +CGTCTA TAGACG -0.60967168838 +CGTCTC GAGACG -0.791340367 +CGTGAA TTCACG 0.634022023138 +CGTGAC GTCACG 0.443531335362 +CGTGAG CTCACG -0.289769453913 +CGTGCA TGCACG 0.109747972914 +CGTGCC GGCACG 0.464253873914 +CGTGGA TCCACG 0.275127549406 +CGTGGC GCCACG 0.531223592152 +CGTGTA TACACG 0.642083015658 +CGTGTC GACACG 0.136541839783 +CGTTAA TTAACG -0.497209435042 +CGTTAC GTAACG -0.0943435759699 +CGTTAG CTAACG -0.242661499343 +CGTTCA TGAACG 0.504828978427 +CGTTCC GGAACG 0.656549347858 +CGTTGA TCAACG 0.482938477348 +CGTTGC GCAACG -0.32248741119 +CGTTTA TAAACG -0.413518821035 +CGTTTC GAAACG -0.819946564926 +CTAAAA TTTTAG -0.474998981835 +CTAAAC GTTTAG -0.0266242942178 +CTAAAG CTTTAG 0.297713951577 +CTAACA TGTTAG -0.621545633382 +CTAACC GGTTAG 0.63074726461 +CTAAGA TCTTAG 0.597123137729 +CTAAGC GCTTAG 0.459779530522 +CTAATA TATTAG 0.051814051491 +CTAATC GATTAG -0.192871916975 +CTACAA TTGTAG 0.415390381047 +CTACAC GTGTAG 0.314508278002 +CTACAG CTGTAG -0.112671340464 +CTACCA TGGTAG -0.264705309307 +CTACCC GGGTAG 0.552961906046 +CTACGA TCGTAG 0.489104112382 +CTACGC GCGTAG -0.684139469516 +CTACTA TAGTAG -0.106050671268 +CTACTC GAGTAG -0.218538947551 +CTAGAA TTCTAG 0.76196340125 +CTAGAC GTCTAG -0.314054292133 +CTAGAG CTCTAG 0.170762667208 +CTAGCA TGCTAG -0.201084106705 +CTAGCC GGCTAG 0.657253973787 +CTAGGA TCCTAG -0.343732437303 +CTAGGC GCCTAG -0.399983888794 +CTAGTA TACTAG 0.279272209924 +CTAGTC GACTAG 0.63773352209 +CTATAA TTATAG -0.540591918194 +CTATAC GTATAG 0.0316033486072 +CTATAG CTATAG -0.110441744258 +CTATCA TGATAG -0.986583008494 +CTATCC GGATAG 0.0938549592711 +CTATGA TCATAG 1.19060286606 +CTATGC GCATAG 0.77976599654 +CTATTA TAATAG 0.494683632528 +CTATTC GAATAG -0.0659811786152 +CTCAAA TTTGAG -0.435334739428 +CTCAAC GTTGAG 0.673295862992 +CTCAAG CTTGAG 0.814663961939 +CTCACA TGTGAG -0.125711909834 +CTCACC GGTGAG -0.0607596963595 +CTCAGA TCTGAG 0.550689436444 +CTCAGC GCTGAG -0.272692249315 +CTCATA TATGAG 0.669202456635 +CTCATC GATGAG -0.214653088571 +CTCCAA TTGGAG -0.404875066778 +CTCCAC GTGGAG 0.544704103667 +CTCCAG CTGGAG 0.300968023705 +CTCCCA TGGGAG -0.166515521703 +CTCCCC GGGGAG -0.0107883337468 +CTCCGA TCGGAG 0.475523145763 +CTCCGC GCGGAG -0.234881416584 +CTCCTA TAGGAG -0.440390954167 +CTCCTC GAGGAG 0.252207292196 +CTCGAA TTCGAG 0.458096440119 +CTCGAC GTCGAG -0.648461079866 +CTCGAG CTCGAG 0.238421232408 +CTCGCA TGCGAG -0.770225157474 +CTCGCC GGCGAG 0.367453385031 +CTCGGA TCCGAG 0.339650543736 +CTCGGC GCCGAG 0.421157785256 +CTCGTA TACGAG 0.362284718227 +CTCGTC GACGAG -0.209973906057 +CTCTAA TTAGAG -0.585120608238 +CTCTAC GTAGAG -0.0381226169544 +CTCTCA TGAGAG 0.412400836814 +CTCTCC GGAGAG -0.500495460815 +CTCTGA TCAGAG -0.0509816398658 +CTCTGC GCAGAG -0.137150029459 +CTCTTA TAAGAG -0.0995119601555 +CTCTTC GAAGAG 0.0293315576627 +CTGAAA TTTCAG 0.637905924395 +CTGAAC GTTCAG 0.0801852652274 +CTGAAG CTTCAG 1.29884632524 +CTGACA TGTCAG -1.45442440761 +CTGACC GGTCAG 1.08710062922 +CTGAGA TCTCAG -0.182920626709 +CTGAGC GCTCAG -0.261916169948 +CTGATA TATCAG -1.5286762778 +CTGATC GATCAG -0.261819591815 +CTGCAA TTGCAG -0.367828379023 +CTGCAC GTGCAG -0.0851053710219 +CTGCAG CTGCAG 0.017373979076 +CTGCCA TGGCAG -0.328262173072 +CTGCCC GGGCAG 0.396136640158 +CTGCGA TCGCAG -0.236577233975 +CTGCGC GCGCAG -0.0340134961712 +CTGCTA TAGCAG -0.90401510954 +CTGCTC GAGCAG -0.482167659855 +CTGGAA TTCCAG 0.480592453838 +CTGGAC GTCCAG 0.124783447514 +CTGGCA TGCCAG -0.744896236415 +CTGGCC GGCCAG 0.508550304958 +CTGGGA TCCCAG 0.5595983837 +CTGGGC GCCCAG -0.420503142996 +CTGGTA TACCAG 0.351352305298 +CTGGTC GACCAG -0.333524602904 +CTGTAA TTACAG -0.344930581819 +CTGTAC GTACAG 0.422072688223 +CTGTCA TGACAG -1.32137018932 +CTGTCC GGACAG -0.0204497524117 +CTGTGA TCACAG -0.318233094975 +CTGTGC GCACAG -0.452689661808 +CTGTTA TAACAG -0.682203584522 +CTGTTC GAACAG -0.032586371998 +CTTAAA TTTAAG 0.522514091698 +CTTAAC GTTAAG -1.0306648626 +CTTAAG CTTAAG 1.38874859741 +CTTACA TGTAAG 0.301805562881 +CTTACC GGTAAG 0.0844070155044 +CTTAGA TCTAAG 0.170474445016 +CTTAGC GCTAAG 0.138185199667 +CTTATA TATAAG 0.454300274475 +CTTATC GATAAG -1.35801301952 +CTTCAA TTGAAG 0.19993761629 +CTTCAC GTGAAG 0.108019941224 +CTTCCA TGGAAG 0.362648019349 +CTTCCC GGGAAG 0.131998098522 +CTTCGA TCGAAG -0.105193206331 +CTTCGC GCGAAG -0.390704103225 +CTTCTA TAGAAG -0.155899121808 +CTTCTC GAGAAG -0.0739598398458 +CTTGAA TTCAAG -0.0719916676968 +CTTGAC GTCAAG 0.068351491377 +CTTGCA TGCAAG 0.463268254042 +CTTGCC GGCAAG -0.766831412922 +CTTGGA TCCAAG 0.314061693873 +CTTGGC GCCAAG 0.405573462214 +CTTGTA TACAAG 0.65851026313 +CTTGTC GACAAG 0.421432544956 +CTTTAA TTAAAG 0.0423904357395 +CTTTAC GTAAAG 0.655866345313 +CTTTCA TGAAAG -0.707971281847 +CTTTCC GGAAAG -0.305665628725 +CTTTGA TCAAAG -0.390594122457 +CTTTGC GCAAAG -0.253785395266 +CTTTTA TAAAAG 0.520772205443 +CTTTTC GAAAAG -0.0834230798663 +GAAAAA TTTTTC -0.394594492275 +GAAAAC GTTTTC -0.889752597605 +GAAACA TGTTTC 0.981733612947 +GAAACC GGTTTC -0.0721495314535 +GAAAGA TCTTTC -0.475164148563 +GAAAGC GCTTTC -0.0120812417699 +GAAATA TATTTC 0.128630511111 +GAAATC GATTTC -0.258506502151 +GAACAA TTGTTC 0.072282920944 +GAACAC GTGTTC 0.701467646999 +GAACCA TGGTTC -0.542881494194 +GAACCC GGGTTC 0.340658710073 +GAACGA TCGTTC -0.199593582018 +GAACGC GCGTTC 0.454300760466 +GAACTA TAGTTC -0.295720723936 +GAACTC GAGTTC 1.20394131743 +GAAGAA TTCTTC 0.506500337578 +GAAGAC GTCTTC 0.0907880643499 +GAAGCA TGCTTC -1.19156928449 +GAAGCC GGCTTC 0.397646726543 +GAAGGA TCCTTC 0.253377264863 +GAAGGC GCCTTC -0.525939145222 +GAAGTA TACTTC 0.662195710792 +GAAGTC GACTTC -0.419934443362 +GAATAA TTATTC 0.123387568539 +GAATAC GTATTC -0.383087785228 +GAATCA TGATTC 1.27939625155 +GAATCC GGATTC -0.0498963377435 +GAATGA TCATTC -0.620744593633 +GAATGC GCATTC 1.04194079095 +GAATTA TAATTC -0.149020713486 +GAATTC GAATTC 0.644297830158 +GACAAA TTTGTC -0.134936102515 +GACAAC GTTGTC -0.309021420318 +GACACA TGTGTC 1.29887852736 +GACACC GGTGTC -0.787922948829 +GACAGA TCTGTC -0.863814253112 +GACAGC GCTGTC -0.676822231763 +GACATA TATGTC 0.827736434537 +GACATC GATGTC -0.130237675939 +GACCAA TTGGTC -0.475509904338 +GACCAC GTGGTC 2.00597812344 +GACCCA TGGGTC 0.594605764824 +GACCCC GGGGTC -0.25038724278 +GACCGA TCGGTC 0.077158904808 +GACCGC GCGGTC 0.996665204047 +GACCTA TAGGTC -0.0140856102395 +GACCTC GAGGTC -0.0141400877685 +GACGAA TTCGTC -0.24855256907 +GACGAC GTCGTC -0.787091955547 +GACGCA TGCGTC -0.79119100016 +GACGCC GGCGTC -0.064943799736 +GACGGA TCCGTC 0.507946533831 +GACGGC GCCGTC 0.0996726291695 +GACGTA TACGTC -1.129683455 +GACGTC GACGTC -0.325314874102 +GACTAA TTAGTC 1.9212514778 +GACTAC GTAGTC -0.489310082414 +GACTCA TGAGTC 3.18513477299 +GACTCC GGAGTC 0.00318724773961 +GACTGA TCAGTC -0.284654331124 +GACTGC GCAGTC 0.253234563706 +GACTTA TAAGTC 0.53783328072 +GAGAAA TTTCTC -0.107943987489 +GAGAAC GTTCTC 0.763065903804 +GAGACA TGTCTC 0.0238226221677 +GAGACC GGTCTC 0.000346769354435 +GAGAGA TCTCTC -0.366063998677 +GAGAGC GCTCTC -0.0716698934044 +GAGATA TATCTC -0.479728321524 +GAGATC GATCTC 0.647198822351 +GAGCAA TTGCTC 0.278077669471 +GAGCAC GTGCTC 0.442986942774 +GAGCCA TGGCTC -0.0889483545925 +GAGCCC GGGCTC -0.310590547764 +GAGCGA TCGCTC 0.294194487089 +GAGCGC GCGCTC 0.0827346373539 +GAGCTA TAGCTC -0.730332859895 +GAGCTC GAGCTC -0.199506840782 +GAGGAA TTCCTC 0.543305692157 +GAGGAC GTCCTC -0.349616541062 +GAGGCA TGCCTC -0.105697066717 +GAGGCC GGCCTC 0.858282680338 +GAGGGA TCCCTC -0.124510451688 +GAGGGC GCCCTC 0.323767770759 +GAGGTA TACCTC -0.844332688078 +GAGTAA TTACTC 0.758120437701 +GAGTAC GTACTC 0.104306241785 +GAGTCA TGACTC 3.58563459353 +GAGTCC GGACTC 0.339341967911 +GAGTGA TCACTC 0.066165066026 +GAGTGC GCACTC -0.514233890221 +GAGTTA TAACTC 0.671276468041 +GATAAA TTTATC -1.16448423447 +GATAAC GTTATC -0.476107186466 +GATACA TGTATC -0.172248071576 +GATACC GGTATC 0.922018577146 +GATAGA TCTATC -0.678310902513 +GATAGC GCTATC -1.0739725457 +GATATA TATATC 0.323693340333 +GATATC GATATC -0.13829875248 +GATCAA TTGATC -0.263370217811 +GATCAC GTGATC -0.383180034094 +GATCCA TGGATC -0.418998551659 +GATCCC GGGATC 0.459522351525 +GATCGA TCGATC 0.174830497643 +GATCGC GCGATC -0.0493224221696 +GATCTA TAGATC -0.114539458553 +GATGAA TTCATC 0.113123798729 +GATGAC GTCATC 0.420960424606 +GATGCA TGCATC -0.740595126486 +GATGCC GGCATC -0.0146831740442 +GATGGA TCCATC -0.44789540412 +GATGGC GCCATC -0.976518189565 +GATGTA TACATC 0.315735783162 +GATTAA TTAATC -0.768397991554 +GATTAC GTAATC -0.159295927856 +GATTCA TGAATC 1.91569273635 +GATTCC GGAATC 0.028441642914 +GATTGA TCAATC 0.586533848838 +GATTGC GCAATC 0.0359466523255 +GATTTA TAAATC -0.43347973413 +GCAAAA TTTTGC 1.57245669278 +GCAAAC GTTTGC -2.41567256424 +GCAACA TGTTGC -0.0212126198682 +GCAACC GGTTGC 0.167308203586 +GCAAGA TCTTGC 0.230496794516 +GCAAGC GCTTGC -0.279906494617 +GCAATA TATTGC 1.03289974237 +GCACAA TTGTGC 0.168080718089 +GCACAC GTGTGC -0.126747337538 +GCACCA TGGTGC -0.370948557776 +GCACCC GGGTGC 0.486126157128 +GCACGA TCGTGC 0.115382004439 +GCACGC GCGTGC -0.384704484018 +GCACTA TAGTGC -0.732242903933 +GCAGAA TTCTGC -0.441638279756 +GCAGAC GTCTGC 0.577048501416 +GCAGCA TGCTGC 0.0643315561072 +GCAGCC GGCTGC 0.0719820075452 +GCAGGA TCCTGC 0.034078237827 +GCAGGC GCCTGC 0.0751087587256 +GCAGTA TACTGC -0.140907462619 +GCATAA TTATGC -0.216901949377 +GCATAC GTATGC -0.417062652314 +GCATCA TGATGC -0.509813438412 +GCATCC GGATGC 0.157724061262 +GCATGA TCATGC 0.311792502655 +GCATGC GCATGC 0.0273955313285 +GCATTA TAATGC -0.038383608378 +GCCAAA TTTGGC -0.0363591138599 +GCCAAC GTTGGC -0.48002846426 +GCCACA TGTGGC 0.478891420952 +GCCACC GGTGGC 0.046888332347 +GCCAGA TCTGGC -0.325597697717 +GCCAGC GCTGGC -0.199926154665 +GCCATA TATGGC -0.951578017522 +GCCCAA TTGGGC 1.10276199076 +GCCCAC GTGGGC 0.676887327364 +GCCCCA TGGGGC 0.35936393068 +GCCCCC GGGGGC -0.398755700607 +GCCCGA TCGGGC 1.06470764147 +GCCCGC GCGGGC 0.47965310734 +GCCCTA TAGGGC 0.435710100752 +GCCGAA TTCGGC 0.0377072993252 +GCCGAC GTCGGC 0.22768512467 +GCCGCA TGCGGC 0.115534020858 +GCCGCC GGCGGC -0.732735770282 +GCCGGA TCCGGC -0.38520088123 +GCCGGC GCCGGC -0.185586613226 +GCCGTA TACGGC -0.332516669945 +GCCTAA TTAGGC 0.0973747347692 +GCCTAC GTAGGC 0.0804171248438 +GCCTCA TGAGGC 0.64675754844 +GCCTCC GGAGGC -0.235791468531 +GCCTGA TCAGGC 0.871312603586 +GCCTTA TAAGGC 0.66398535763 +GCGAAA TTTCGC 0.216500601374 +GCGAAC GTTCGC 0.176665221767 +GCGACA TGTCGC -0.206525750848 +GCGACC GGTCGC 0.867020226992 +GCGAGA TCTCGC 0.39834591882 +GCGAGC GCTCGC -0.0047486305465 +GCGATA TATCGC -0.212587858109 +GCGCAA TTGCGC 0.249470449698 +GCGCAC GTGCGC -0.576649184299 +GCGCCA TGGCGC -0.370226281141 +GCGCCC GGGCGC 0.381404303337 +GCGCGA TCGCGC 0.244534402725 +GCGCGC GCGCGC 0.0601779542866 +GCGCTA TAGCGC -0.184211933615 +GCGGAA TTCCGC -0.14917985125 +GCGGAC GTCCGC -0.0437387199239 +GCGGCA TGCCGC 0.124099157921 +GCGGCC GGCCGC -0.868770297365 +GCGGGA TCCCGC -0.330802769301 +GCGGTA TACCGC -0.293792191657 +GCGTAA TTACGC -0.308730784191 +GCGTAC GTACGC -0.493705045035 +GCGTCA TGACGC -1.40894601949 +GCGTCC GGACGC 0.650092537073 +GCGTGA TCACGC 0.485604781064 +GCGTTA TAACGC -0.16348782665 +GCTAAA TTTAGC 0.0633281870082 +GCTAAC GTTAGC 0.0446987046769 +GCTACA TGTAGC 0.139951985661 +GCTACC GGTAGC -0.582191094114 +GCTAGA TCTAGC 0.425355245083 +GCTAGC GCTAGC 0.544395057547 +GCTATA TATAGC -0.122878741617 +GCTCAA TTGAGC 0.357260216643 +GCTCAC GTGAGC -0.362077448999 +GCTCCA TGGAGC 0.234865357242 +GCTCCC GGGAGC 0.410356067134 +GCTCGA TCGAGC 0.523443780445 +GCTCTA TAGAGC -0.503747504809 +GCTGAA TTCAGC 0.292869298061 +GCTGAC GTCAGC -0.149757550449 +GCTGCA TGCAGC 0.0619331271517 +GCTGCC GGCAGC 0.116465057744 +GCTGGA TCCAGC 0.00864608267586 +GCTGTA TACAGC -0.0817336518524 +GCTTAA TTAAGC 0.664269635099 +GCTTAC GTAAGC -0.573448259095 +GCTTCA TGAAGC -0.527498202661 +GCTTCC GGAAGC 0.0466074141633 +GCTTGA TCAAGC -0.223589414042 +GCTTTA TAAAGC 0.213691377209 +GGAAAA TTTTCC -2.05613895077 +GGAAAC GTTTCC -0.31843375495 +GGAACA TGTTCC 0.878948290896 +GGAACC GGTTCC -0.41733947832 +GGAAGA TCTTCC 0.292117518108 +GGAATA TATTCC -0.310943681072 +GGACAA TTGTCC 0.390414984809 +GGACAC GTGTCC 0.174292489707 +GGACCA TGGTCC -0.523110637041 +GGACCC GGGTCC 0.5472877955 +GGACGA TCGTCC 0.53114236268 +GGACTA TAGTCC 0.793132421151 +GGAGAA TTCTCC -0.163792630984 +GGAGAC GTCTCC 0.206727520829 +GGAGCA TGCTCC 0.456407748447 +GGAGCC GGCTCC 0.295421561451 +GGAGGA TCCTCC 0.328364640098 +GGAGTA TACTCC -0.151742588951 +GGATAA TTATCC -0.0585638459289 +GGATAC GTATCC -0.309412420714 +GGATCA TGATCC -0.480562203251 +GGATCC GGATCC 0.335487965753 +GGATGA TCATCC 0.177410986413 +GGATTA TAATCC -1.1939070998 +GGCAAA TTTGCC 0.000624112947654 +GGCAAC GTTGCC -1.15978921165 +GGCACA TGTGCC 0.693216860903 +GGCACC GGTGCC -0.881907424195 +GGCAGA TCTGCC 0.202002559673 +GGCATA TATGCC 0.0739074460809 +GGCCAA TTGGCC 0.233613871256 +GGCCAC GTGGCC 0.349420517541 +GGCCCA TGGGCC 0.76801497851 +GGCCCC GGGGCC 0.27541536492 +GGCCGA TCGGCC 0.663089975748 +GGCCTA TAGGCC 0.219005607787 +GGCGAA TTCGCC 0.226797469513 +GGCGAC GTCGCC 0.127570597429 +GGCGCA TGCGCC -0.0446136938549 +GGCGCC GGCGCC -0.0745645953993 +GGCGGA TCCGCC -0.226505856389 +GGCGTA TACGCC -0.566154746416 +GGCTAA TTAGCC 0.410090602753 +GGCTAC GTAGCC 0.167521133316 +GGCTCA TGAGCC -0.182448764472 +GGCTGA TCAGCC 0.305315803898 +GGCTTA TAAGCC 0.0549165179339 +GGGAAA TTTCCC 0.211185115876 +GGGAAC GTTCCC 0.0891726153306 +GGGACA TGTCCC 0.798353329738 +GGGACC GGTCCC 0.327460829345 +GGGAGA TCTCCC -0.00772602123998 +GGGATA TATCCC 0.042485553485 +GGGCAA TTGCCC -0.398865388366 +GGGCAC GTGCCC 0.531369020502 +GGGCCA TGGCCC 0.421850727294 +GGGCCC GGGCCC 0.13715245724 +GGGCGA TCGCCC -0.207341953811 +GGGCTA TAGCCC 0.860581648468 +GGGGAA TTCCCC 0.414828826681 +GGGGAC GTCCCC 1.09337858056 +GGGGCA TGCCCC 0.188790419324 +GGGGGA TCCCCC 0.564505418018 +GGGGTA TACCCC 0.337102528271 +GGGTAA TTACCC 0.152772959878 +GGGTAC GTACCC 0.0850292801245 +GGGTCA TGACCC -0.277523149632 +GGGTGA TCACCC 0.389877602576 +GGGTTA TAACCC -1.1488463254 +GGTAAA TTTACC -1.93485319848 +GGTAAC GTTACC -0.444349645812 +GGTACA TGTACC 0.533161182312 +GGTACC GGTACC -0.307030306627 +GGTAGA TCTACC 0.0409651099541 +GGTATA TATACC -1.16022327914 +GGTCAA TTGACC 0.139153520691 +GGTCAC GTGACC -0.450515748733 +GGTCCA TGGACC 0.169460723222 +GGTCGA TCGACC -0.122888767331 +GGTCTA TAGACC -0.0850652207286 +GGTGAA TTCACC 0.524001085704 +GGTGAC GTCACC -0.17925252457 +GGTGCA TGCACC -0.7324301946 +GGTGGA TCCACC 0.377800190353 +GGTGTA TACACC 0.617772381551 +GGTTAA TTAACC 0.156217996285 +GGTTAC GTAACC -0.890319968576 +GGTTCA TGAACC -0.551493468635 +GGTTGA TCAACC 0.213366284121 +GGTTTA TAAACC -0.015482073629 +GTAAAA TTTTAC 1.18200993615 +GTAAAC GTTTAC -3.59764423275 +GTAACA TGTTAC 0.863881272261 +GTAAGA TCTTAC 0.696118619435 +GTAATA TATTAC -0.0790481100302 +GTACAA TTGTAC 0.959255249776 +GTACAC GTGTAC -0.381103239165 +GTACCA TGGTAC 0.673058575835 +GTACGA TCGTAC 0.303418617214 +GTACTA TAGTAC 0.368286056561 +GTAGAA TTCTAC 0.777466438187 +GTAGAC GTCTAC -0.260392489063 +GTAGCA TGCTAC -0.557171886797 +GTAGGA TCCTAC 0.249764914617 +GTAGTA TACTAC 1.10613154186 +GTATAA TTATAC -0.225205794143 +GTATAC GTATAC -0.709411981795 +GTATCA TGATAC -0.102084079336 +GTATGA TCATAC 0.883480966804 +GTATTA TAATAC 0.112117340424 +GTCAAA TTTGAC 0.143115014765 +GTCAAC GTTGAC -2.14211821008 +GTCACA TGTGAC 0.438253073163 +GTCAGA TCTGAC -0.426249226609 +GTCATA TATGAC 0.201153560659 +GTCCAA TTGGAC 0.0470205002071 +GTCCAC GTGGAC -0.21284193494 +GTCCCA TGGGAC 1.6176606758 +GTCCGA TCGGAC 0.114722967227 +GTCCTA TAGGAC 0.771952812089 +GTCGAA TTCGAC 0.724088020994 +GTCGAC GTCGAC 0.251614152656 +GTCGCA TGCGAC 0.675711700536 +GTCGGA TCCGAC 0.193393621986 +GTCGTA TACGAC 0.0186019337447 +GTCTAA TTAGAC -0.187936713171 +GTCTCA TGAGAC -0.491335152028 +GTCTGA TCAGAC 0.360208311696 +GTCTTA TAAGAC 0.0625579178154 +GTGAAA TTTCAC -0.563337549375 +GTGAAC GTTCAC -0.349524060758 +GTGACA TGTCAC -0.410586341492 +GTGAGA TCTCAC -1.0355009926 +GTGATA TATCAC -0.736410349306 +GTGCAA TTGCAC -0.260755290625 +GTGCAC GTGCAC -0.744407650846 +GTGCCA TGGCAC -1.07262578178 +GTGCGA TCGCAC -0.164782650071 +GTGCTA TAGCAC -0.662274879805 +GTGGAA TTCCAC 0.176989759501 +GTGGCA TGCCAC -0.159364738365 +GTGGGA TCCCAC 0.988068726378 +GTGGTA TACCAC -0.246902705808 +GTGTAA TTACAC -0.891262927583 +GTGTCA TGACAC -0.0379293872633 +GTGTGA TCACAC -1.95184795358 +GTGTTA TAACAC -1.3607239668 +GTTAAA TTTAAC -0.186873285101 +GTTAAC GTTAAC 0.482940767605 +GTTACA TGTAAC 0.329573937825 +GTTAGA TCTAAC -1.00236498509 +GTTATA TATAAC -0.163258685865 +GTTCAA TTGAAC 0.577729784916 +GTTCCA TGGAAC 0.42827050466 +GTTCGA TCGAAC -0.160584230436 +GTTCTA TAGAAC 0.628168497995 +GTTGAA TTCAAC 0.0122596223788 +GTTGCA TGCAAC 1.28935831187 +GTTGGA TCCAAC 0.501609010251 +GTTGTA TACAAC 0.536022722898 +GTTTAA TTAAAC -0.00554204032662 +GTTTCA TGAAAC -0.106888470485 +GTTTGA TCAAAC -0.313019133275 +GTTTTA TAAAAC 0.992384413805 +TAAAAA TTTTTA 0.579463327305 +TAAACA TGTTTA -2.43807962232 +TAAAGA TCTTTA -0.0559249115822 +TAAATA TATTTA -2.16727500814 +TAACAA TTGTTA 0.5362496048 +TAACCA TGGTTA 1.25918224499 +TAACGA TCGTTA -0.245420018049 +TAACTA TAGTTA 0.608128184593 +TAAGAA TTCTTA 1.02556160172 +TAAGCA TGCTTA -0.00178390331913 +TAAGGA TCCTTA 0.837198657402 +TAAGTA TACTTA 0.0721724774768 +TAATAA TTATTA -0.189941492074 +TAATCA TGATTA 0.004309724706 +TAATGA TCATTA -0.927698594817 +TAATTA TAATTA -1.12948086023 +TACAAA TTTGTA -0.668160668233 +TACACA TGTGTA -1.5497729664 +TACAGA TCTGTA -0.00599134182996 +TACATA TATGTA -1.12195560298 +TACCAA TTGGTA -0.281401801063 +TACCCA TGGGTA 0.279140164003 +TACCGA TCGGTA -0.674861229256 +TACCTA TAGGTA 0.232148610987 +TACGAA TTCGTA -0.107764774043 +TACGCA TGCGTA -0.519175400062 +TACGGA TCCGTA -0.402472215133 +TACGTA TACGTA 0.541316396612 +TACTAA TTAGTA -0.135239105598 +TACTCA TGAGTA 0.639564666607 +TACTGA TCAGTA -0.482886609395 +TAGAAA TTTCTA 0.0445682798936 +TAGACA TGTCTA -0.45261373791 +TAGAGA TCTCTA -0.0954516438137 +TAGATA TATCTA -0.532370750539 +TAGCAA TTGCTA -0.652336647837 +TAGCCA TGGCTA 0.291364926411 +TAGCGA TCGCTA 0.149053684558 +TAGCTA TAGCTA 0.686821117845 +TAGGAA TTCCTA 0.597312086206 +TAGGCA TGCCTA -0.0392374245184 +TAGGGA TCCCTA -0.898776106474 +TAGTAA TTACTA -0.0409191001809 +TAGTCA TGACTA 1.32002868308 +TAGTGA TCACTA 0.306784253534 +TATAAA TTTATA -0.121630582814 +TATACA TGTATA -0.557527238557 +TATAGA TCTATA -0.29108877277 +TATATA TATATA -0.239193588677 +TATCAA TTGATA -1.21281286807 +TATCCA TGGATA -0.143758324237 +TATCGA TCGATA -0.316281900517 +TATGAA TTCATA 0.567777510974 +TATGCA TGCATA 0.382062812319 +TATGGA TCCATA 0.18823076363 +TATTAA TTAATA 0.370202791227 +TATTCA TGAATA -0.0890453547066 +TATTGA TCAATA -3.17710597328 +TCAAAA TTTTGA 0.303875411059 +TCAACA TGTTGA -2.00254784618 +TCAAGA TCTTGA 1.4720641259 +TCACAA TTGTGA 0.295459860043 +TCACCA TGGTGA 0.210574562759 +TCACGA TCGTGA -0.515739048241 +TCAGAA TTCTGA -0.292759366807 +TCAGCA TGCTGA -0.620868456293 +TCAGGA TCCTGA 0.0391644847042 +TCATAA TTATGA 0.46490835305 +TCATCA TGATGA 0.156189576126 +TCATGA TCATGA -0.28992182543 +TCCAAA TTTGGA 0.372353469926 +TCCACA TGTGGA -0.303324084643 +TCCAGA TCTGGA 1.11791545491 +TCCCAA TTGGGA 1.0120690282 +TCCCCA TGGGGA 0.475206609554 +TCCCGA TCGGGA -0.218933163821 +TCCGAA TTCGGA 0.231675785391 +TCCGCA TGCGGA -0.0565746658942 +TCCGGA TCCGGA -0.534349085125 +TCCTAA TTAGGA 0.426713370402 +TCCTCA TGAGGA -0.101009274429 +TCGAAA TTTCGA 0.385602827192 +TCGACA TGTCGA 0.567716035117 +TCGAGA TCTCGA -0.0811772342038 +TCGCAA TTGCGA 0.471042033643 +TCGCCA TGGCGA -0.433031956525 +TCGCGA TCGCGA -0.10259765071 +TCGGAA TTCCGA 0.152092661722 +TCGGCA TGCCGA -0.277526003969 +TCGTAA TTACGA 0.6872310151 +TCGTCA TGACGA -0.633071677798 +TCTAAA TTTAGA -0.119569291318 +TCTACA TGTAGA 0.531537891496 +TCTAGA TCTAGA -0.582228401295 +TCTCAA TTGAGA 0.0181823636774 +TCTCCA TGGAGA -0.0990443536722 +TCTGAA TTCAGA 0.818735079074 +TCTGCA TGCAGA -0.708985771217 +TCTTAA TTAAGA 0.684573742868 +TCTTCA TGAAGA 0.231633308785 +TGAAAA TTTTCA -0.590753014937 +TGAACA TGTTCA 0.143205522462 +TGACAA TTGTCA -0.109523687459 +TGACCA TGGTCA 1.47045933497 +TGAGAA TTCTCA -0.555364387516 +TGAGCA TGCTCA -0.498941620454 +TGATAA TTATCA -1.78189703088 +TGATCA TGATCA -0.58090561671 +TGCAAA TTTGCA -0.642834024972 +TGCACA TGTGCA -1.10325139721 +TGCCAA TTGGCA -0.349619012912 +TGCCCA TGGGCA 0.294732505972 +TGCGAA TTCGCA -0.0604088395824 +TGCGCA TGCGCA 0.0208781532991 +TGCTAA TTAGCA -0.902952269579 +TGGAAA TTTCCA -1.65646848688 +TGGACA TGTCCA 0.212014771381 +TGGCAA TTGCCA -0.299231706691 +TGGCCA TGGCCA 0.0133271697043 +TGGGAA TTCCCA -0.0169051652559 +TGGTAA TTACCA -0.741148671428 +TGTAAA TTTACA -1.90833491575 +TGTACA TGTACA -0.781209085217 +TGTCAA TTGACA -1.54128600175 +TGTGAA TTCACA -2.24458672601 +TGTTAA TTAACA -1.12490394498 +TTAAAA TTTTAA -0.0119631686901 +TTACAA TTGTAA 0.81347268468 +TTAGAA TTCTAA 0.271221556202 +TTATAA TTATAA 0.774386643995 +TTCAAA TTTGAA 0.809237031692 +TTCCAA TTGGAA 0.158724969294 +TTCGAA TTCGAA -0.405674192258 +TTGAAA TTTCAA -0.306551492839 +TTGCAA TTGCAA 0.851414898595 +TTTAAA TTTAAA 0.479575745295
--- a/kmersvm/scripts/kmersvm_train.py Mon Aug 20 21:42:29 2012 -0400 +++ b/kmersvm/scripts/kmersvm_train.py Sun Jun 16 18:06:14 2013 -0400 @@ -754,7 +754,8 @@ sids = sids_pos + sids_neg if options.weight == 0: - options.weight = 1 + log(nneg/npos) + #DEBUGGED by dlee 02/17/13 + options.weight = 1 + log(nneg/float(npos)) if options.quiet == False: sys.stderr.write('SVM parameters:\n')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kmersvm/scripts/kmersvm_train_kfb_copy.py Sun Jun 16 18:06:14 2013 -0400 @@ -0,0 +1,894 @@ +#!/usr/bin/env python +""" + kmersvm_train.py; train a support vector machine using shogun toolbox + Copyright (C) 2011 Dongwon Lee + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + + +""" + + + +import sys +import optparse +import random +import numpy +from math import log, exp + +from libkmersvm import * +try: + from shogun.PreProc import SortWordString, SortUlongString +except ImportError: + from shogun.Preprocessor import SortWordString, SortUlongString +from shogun.Kernel import CommWordStringKernel, CommUlongStringKernel, \ + CombinedKernel + +from shogun.Features import StringWordFeatures, StringUlongFeatures, \ + StringCharFeatures, CombinedFeatures, DNA, Labels +from shogun.Classifier import MSG_INFO, MSG_ERROR +try: + from shogun.Classifier import SVMLight +except ImportError: + from shogun.Classifier import LibSVM + +""" +global variables +""" +g_kmers = [] +g_rcmap = [] + + +def kmerid2kmer(kmerid, kmerlen): + """convert integer kmerid to kmer string + + Arguments: + kmerid -- integer, id of k-mer + kmerlen -- integer, length of k-mer + + Return: + kmer string + """ + + nts = "ACGT" + kmernts = [] + kmerid2 = kmerid + + for i in xrange(kmerlen): + ntid = kmerid2 % 4 + kmernts.append(nts[ntid]) + kmerid2 = int((kmerid2-ntid)/4) + + return ''.join(reversed(kmernts)) + + +def kmer2kmerid(kmer, kmerlen): + """convert kmer string to integer kmerid + + Arguments: + kmerid -- integer, id of k-mer + kmerlen -- integer, length of k-mer + + Return: + id of k-mer + """ + + nt2id = {'A':0, 'C':1, 'G':2, 'T':3} + + return reduce(lambda x, y: (4*x+y), [nt2id[x] for x in kmer]) + + +def get_rcmap(kmerid, kmerlen): + """mapping kmerid to its reverse complement k-mer on-the-fly + + Arguments: + kmerid -- integer, id of k-mer + kmerlen -- integer, length of k-mer + + Return: + integer kmerid after mapping to its reverse complement + """ + + #1. get kmer from kmerid + #2. get reverse complement kmer + #3. get kmerid from revcomp kmer + rckmerid = kmer2kmerid(revcomp(kmerid2kmer(kmerid, kmerlen)), kmerlen) + + if rckmerid < kmerid: + return rckmerid + + return kmerid + + +def non_redundant_word_features(feats, kmerlen): + """convert the features from Shogun toolbox to non-redundant word features (handle reverse complements) + Arguments: + feats -- StringWordFeatures + kmerlen -- integer, length of k-mer + + Return: + StringWordFeatures after converting reverse complement k-mer ids + """ + + rcmap = g_rcmap + + for i in xrange(feats.get_num_vectors()): + nf = [rcmap[int(kmerid)] for kmerid in feats.get_feature_vector(i)] + + feats.set_feature_vector(numpy.array(nf, numpy.dtype('u2')), i) + + preproc = SortWordString() + preproc.init(feats) + try: + feats.add_preproc(preproc) + feats.apply_preproc() + except AttributeError: + feats.add_preprocessor(preproc) + feats.apply_preprocessor() + + return feats + + +def non_redundant_ulong_features(feats, kmerlen): + """convert the features from Shogun toolbox to non-redundant ulong features + Arguments: + feats -- StringUlongFeatures + kmerlen -- integer, length of k-mer + + Return: + StringUlongFeatures after converting reverse complement k-mer ids + """ + + for i in xrange(feats.get_num_vectors()): + nf = [get_rcmap(int(kmerid), kmerlen) \ + for kmerid in feats.get_feature_vector(i)] + + feats.set_feature_vector(numpy.array(nf, numpy.dtype('u8')), i) + + preproc = SortUlongString() + preproc.init(feats) + try: + feats.add_preproc(preproc) + feats.apply_preproc() + except AttributeError: + feats.add_preprocessor(preproc) + feats.apply_preprocessor() + + return feats + + +def svm_learn(kernel, labels, options): + """train SVM using SVMLight or LibSVM + + Arguments: + kernel -- kernel object from Shogun toolbox + lebels -- list of labels + options -- object containing option data + + Return: + trained svm object + """ + + try: + svm=SVMLight(options.svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double))) + except NameError: + svm=LibSVM(options.svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double))) + + if options.quiet == False: + svm.io.set_loglevel(MSG_INFO) + svm.io.set_target_to_stderr() + + svm.set_epsilon(options.epsilon) + svm.parallel.set_num_threads(1) + if options.weight != 1.0: + svm.set_C(options.svmC, options.svmC*options.weight) + svm.train() + + if options.quiet == False: + svm.io.set_loglevel(MSG_ERROR) + + return svm + + +def _get_spectrum_features(seqs, kmerlen): + """generate spectrum features (internal) + + Arguments: + seqs -- list of sequences + kmerlen -- integer, length of k-mer + + Return: + StringWord(Ulong)Features after treatment of redundant reverse complement k-mers + """ + + char_feats = StringCharFeatures(seqs, DNA) + + if kmerlen <= 8: + string_features = StringWordFeatures + non_redundant_features = non_redundant_word_features + else: + string_features = StringUlongFeatures + non_redundant_features = non_redundant_ulong_features + + feats = string_features(DNA) + feats.obtain_from_char(char_feats, kmerlen-1, kmerlen, 0, False) + return non_redundant_features(feats, kmerlen) + + +def get_spectrum_features(seqs, options): + """generate spectrum features (wrapper) + """ + return _get_spectrum_features(seqs, options.kmerlen) + + +def get_weighted_spectrum_features(seqs, options): + """generate weighted spectrum features + """ + global g_kmers + global g_rcmap + + subfeats_list = [] + + for k in xrange(options.kmerlen, options.kmerlen2+1): + char_feats = StringCharFeatures(seqs, DNA) + if k <= 8: + g_kmers = generate_kmers(k) + g_rcmap = generate_rcmap_table(k, g_kmers) + + subfeats = _get_spectrum_features(seqs, k) + subfeats_list.append(subfeats) + + return subfeats_list + + +def get_spectrum_kernel(feats, options): + """build spectrum kernel with non-redundant k-mer list (removing reverse complement) + + Arguments: + feats -- feature object + options -- object containing option data + + Return: + StringWord(Ulong)Features, CommWord(Ulong)StringKernel + """ + if options.kmerlen <= 8: + return CommWordStringKernel(feats, feats) + else: + return CommUlongStringKernel(feats, feats) + + +def get_weighted_spectrum_kernel(subfeats_list, options): + """build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement) + + Arguments: + subfeats_list -- list of sub-feature objects + options -- object containing option data + + Return: + CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel + """ + kmerlen = options.kmerlen + kmerlen2 = options.kmerlen2 + + subkernels = 0 + kernel = CombinedKernel() + feats = CombinedFeatures() + + for subfeats in subfeats_list: + feats.append_feature_obj(subfeats) + + for k in xrange(kmerlen, kmerlen2+1): + if k <= 8: + subkernel = CommWordStringKernel(10, False) + else: + subkernel = CommUlongStringKernel(10, False) + + kernel.append_kernel(subkernel) + subkernels+=1 + + kernel.init(feats, feats) + + kernel.set_subkernel_weights(numpy.array([1/float(subkernels)]*subkernels, numpy.dtype('float64'))) + + return kernel + + +def init_spectrum_kernel(kern, feats_lhs, feats_rhs): + """initialize spectrum kernel (wrapper function) + """ + kern.init(feats_lhs, feats_rhs) + + +def init_weighted_spectrum_kernel(kern, subfeats_list_lhs, subfeats_list_rhs): + """initialize weighted spectrum kernel (wrapper function) + """ + feats_lhs = CombinedFeatures() + feats_rhs = CombinedFeatures() + + for subfeats in subfeats_list_lhs: + feats_lhs.append_feature_obj(subfeats) + + for subfeats in subfeats_list_rhs: + feats_rhs.append_feature_obj(subfeats) + + kern.init(feats_lhs, feats_rhs) + + +def get_sksvm_weights(svm, feats, options): + """calculate the SVM weight vector of spectrum kernel + """ + kmerlen = options.kmerlen + alphas = svm.get_alphas() + support_vector_ids = svm.get_support_vectors() + + w = numpy.array([0]*(2**(2*kmerlen)), numpy.double) + + for i in xrange(len(alphas)): + x = [0]*(2**(2*kmerlen)) + for kmerid in feats.get_feature_vector(int(support_vector_ids[i])): + x[int(kmerid)] += 1 + x = numpy.array(x, numpy.double) + w += (alphas[i]*x/numpy.sqrt(numpy.sum(x**2))) + + return w + +def get_feature_counts(svm, feats, options): + """calculate feature counts for SVs + """ + kmerlen = options.kmerlen + alphas = svm.get_alphas() + support_vector_ids = svm.get_support_vectors() + output = options.outputname + "_counts.out" + + global g_kmers + global g_rcmap + + w = numpy.array([0]*(2**(2*kmerlen)), numpy.double) + + for i in xrange(len(support_vector_ids)): + x = [0]*(2**(2*kmerlen)) + for kmerid in feats.get_feature_vector(int(support_vector_ids[i])): + x[int(kmerid)] += 1 + + x = numpy.array(x, numpy.double) + w += x + + if options.sort: + w_sorted = sorted(zip(range(len(w)), w), key=lambda x: x[1], reverse=True) + else: + w_sorted = zip(range(len(w)), w) + + for i in map(lambda x: x[0], w_sorted): + if i == g_rcmap[i]: + f.write('\t'.join( [g_kmers[i], revcomp(g_kmers[i]), str(w[i])] ) + '\n') + + f.close() + + + +def get_wsksvm_weights(svm, subfeats_list, options): + """calculate the SVM weight vector of weighted spectrum kernel + """ + kmerlen = options.kmerlen + kmerlen2 = options.kmerlen2 + alphas = svm.get_alphas() + support_vector_ids = svm.get_support_vectors() + kmerlens = range(kmerlen, kmerlen2+1) + + weights = [] + for idx in xrange(len(kmerlens)): + subfeats = subfeats_list[idx] + + k = kmerlens[idx] + w = numpy.array([0]*(2**(2*k)), numpy.double) + + for i in xrange(len(alphas)): + x = [0]*(2**(2*k)) + for kmerid in subfeats.get_feature_vector(int(support_vector_ids[i])): + x[int(kmerid)] += 1 + x = numpy.array(x, numpy.double) + w += (alphas[i]*x/numpy.sqrt(numpy.sum(x**2))) + + w /= len(kmerlens) + weights.append(w) + + return weights + + +def save_header(f, bias, A, B, options): + f.write("#parameters:\n") + f.write("#kernel=" + str(options.ktype) + "\n") + f.write("#kmerlen=" + str(options.kmerlen) + "\n") + if options.ktype == 2: + f.write("#kmerlen2=" + str(options.kmerlen2) + "\n") + f.write("#bias=" + str(bias) + "\n") + f.write("#A=" + str(A) + "\n") + f.write("#B=" + str(B) + "\n") + f.write("#NOTE: k-mers with large negative weights are also important. They can be found at the bottom of the list.\n") + f.write("#k-mer\trevcomp\tSVM-weight\n") + + +def save_sksvm_weights(w, bias, A, B, options): + """save the SVM weight vector from spectrum kernel + """ + output = options.outputname + "_weights.out" + kmerlen = options.kmerlen + + f = open(output, 'w') + save_header(f, bias, A, B, options) + + global g_kmers + global g_rcmap + + if options.sort: + w_sorted = sorted(zip(range(len(w)), w), key=lambda x: x[1], reverse=True) + else: + w_sorted = zip(range(len(w)), w) + + if kmerlen <= 8: + for i in map(lambda x: x[0], w_sorted): + if i == g_rcmap[i]: + f.write('\t'.join( [g_kmers[i], revcomp(g_kmers[i]), str(w[i])] ) + '\n') + else: + for i in map(lambda x: x[0], w_sorted): + if i == get_rcmap(i, kmerlen): + kmer = kmerid2kmer(i, kmerlen) + f.write('\t'.join( [kmer, revcomp(kmer), str(w[i])] ) + '\n') + + f.close() + + +def save_wsksvm_weights(w, bias, A, B, options): + """save the SVM weight vector from weighted spectrum kernel + """ + output = options.outputname + "_weights.out" + kmerlen = options.kmerlen + kmerlen2 = options.kmerlen2 + + f = open(output, 'w') + save_header(f, bias, A, B, options) + + global g_kmers + global g_rcmap + + kmerlens = range(kmerlen, kmerlen2+1) + for idx in xrange(len(kmerlens)): + k = kmerlens[idx] + subw = w[idx] + + if options.sort: + subw_sorted = sorted(zip(range(len(subw)), subw), key=lambda x: x[1], reverse=True) + else: + subw_sorted = zip(range(len(subw)), subw) + + if k <= 8: + g_kmers = generate_kmers(k) + g_rcmap = generate_rcmap_table(k, g_kmers) + for i in map(lambda x: x[0], subw_sorted): + if i == g_rcmap[i]: + f.write('\t'.join( [g_kmers[i], revcomp(g_kmers[i]), str(subw[i])] ) + "\n") + else: + for i in map(lambda x: x[0], subw_sorted): + if i == get_rcmap(i, k): + kmer = kmerid2kmer(i, k) + f.write('\t'.join( [kmers, revcomp(kmers), str(subw[i])] ) + "\n") + + f.close() + + +def save_predictions(output, preds, cvs): + """save prediction + """ + f = open(output, 'w') + f.write('\t'.join(["#seq_id", "SVM score", "label", "NCV"]) + "\n") + for i in xrange(len(preds)): + f.write('\t'.join([preds[i][1], str(preds[i][2]), str(preds[i][3]), str(cvs[i])]) + "\n") + f.close() + + +def generate_cv_list(ncv, n1, n2): + """generate the N-fold cross validation list + + Arguments: + ncv -- integer, number of cross-validation + n1 -- integer, number of positives + n2 -- integer, number of negatives + + Return: + a list of N-fold cross validation + """ + + shuffled_idx_list1 = range(n1) + shuffled_idx_list2 = range(n1,n1+n2) + + random.shuffle(shuffled_idx_list1) + random.shuffle(shuffled_idx_list2) + + shuffled_idx_list = shuffled_idx_list1 + shuffled_idx_list2 + + idx = 0 + icv = 0 + cv = [0] * (n1+n2) + while(idx < (n1+n2)): + cv[shuffled_idx_list[idx]] = icv + + idx += 1 + icv += 1 + if icv == ncv: + icv = 0 + + return cv + + +def split_cv_list(cvlist, icv, data): + """split data into training and test based on cross-validation list + + Arguments: + cvlist -- list, cross-validation list + icv -- integer, corss-validation set of interest + data -- list, data set to be splitted + + Return: + a list of training set and a list of test set + """ + + tr_data = [] + te_data = [] + + for i in xrange(len(data)): + if cvlist[i] == icv: + te_data.append(data[i]) + else: + tr_data.append(data[i]) + + return tr_data, te_data + + +def LMAI(svms, labels, prior0, prior1): + """fitting svms to sigmoid function (improved version introduced by Lin 2003) + + Arguments: + svms -- list of svm scores + labels -- list of labels + prior0 -- prior of negative set + prior1 -- prior of positive set + + Return: + A, B parameter of 1/(1+exp(A*SVM+B)) + """ + + #parameter settings + maxiter = 100 + minstep = 1e-10 + sigma = 1e-3 + + hiTarget = (prior1+1.0)/float(prior1+2.0) + loTarget = 1/float(prior0+2.0) + + t = [0]*len(labels) + for i in xrange(len(labels)): + if labels[i] == 1: + t[i] = hiTarget + else: + t[i] = loTarget + + A = 0.0 + B = log((prior0+1.0)/float(prior1+1.0)) + fval = 0.0 + + for i in xrange(len(labels)): + fApB = svms[i]*A+B + if fApB >= 0: + fval += (t[i]*fApB+log(1+exp(-fApB))) + else: + fval += ((t[i]-1)*fApB+log(1+exp(fApB))) + + + for it in xrange(maxiter): + #print "iteration:", it + #Update Graidient and Hessian (use H'= H + sigma I) + h11 = sigma + h22 = sigma + h21 = 0.0 + g1 = 0.0 + g2 = 0.0 + + for i in xrange(len(labels)): + fApB = svms[i]*A+B + if fApB >= 0: + p = exp(-fApB) / float(1.0+exp(-fApB)) + q = 1.0 / float(1.0 + exp(-fApB)) + else: + p = 1.0 / float(1.0 + exp(fApB)) + q = exp(fApB) / float(1.0+exp(fApB)) + d2 = p*q + h11 += (svms[i]*svms[i]*d2) + h22 += d2 + h21 += (svms[i]*d2) + d1 = t[i]-p + g1 += (svms[i]*d1) + g2 += d1 + + #Stopping criteria + if (abs(g1)<1e-5) and (abs(g2)<1e-5): + break + + det = h11*h22-h21*h21 + dA = -(h22*g1-h21*g2)/float(det) + dB = -(-h21*g1+h11*g2)/float(det) + gd = g1*dA+g2*dB + stepsize=1 + while stepsize >= minstep: + newA = A+stepsize*dA + newB = B+stepsize*dB + newf = 0.0 + + for i in xrange(len(labels)): + fApB = svms[i]*newA+newB + if fApB >= 0: + newf += (t[i]*fApB + log(1+exp(-fApB))) + else: + newf += ((t[i]-1)*fApB + log(1+exp(fApB))) + + if newf < (fval+0.0001*stepsize*gd): + A=newA + B=newB + fval=newf + break + else: + stepsize=stepsize/float(2.0) + + #Line search failes + if stepsize < minstep: + #print "Line search fails" + break + + #if it >= maxiter: + # print "Reaching maximum iterations" + + return A, B + + +def wsksvm_classify(seqs, svm, kern, feats, options): + feats_te = get_weighted_spectrum_features(seqs, options) + init_weighted_spectrum_kernel(kern, feats, feats_te) + + return svm.apply().get_labels().tolist() + + +def score_seq(s, svmw, kmerlen): + """calculate SVM score of given sequence using single set of svm weights + + Arguments: + s -- string, DNA sequence + svmw -- numpy array, SVM weights + kmerlen -- integer, length of k-mer of SVM weight + + Return: + SVM score + """ + + global g_rcmap + kmer2kmerid_func = kmer2kmerid + + x = [0]*(2**(2*kmerlen)) + for j in xrange(len(s)-kmerlen+1): + x[ g_rcmap[kmer2kmerid_func(s[j:j+kmerlen], kmerlen)] ] += 1 + + x = numpy.array(x, numpy.double) + score_norm = numpy.dot(svmw, x)/numpy.sqrt(numpy.sum(x**2)) + + return score_norm + + +def sksvm_classify(seqs, svm, kern, feats, options): + """classify the given sequences + """ + if options.kmerlen <= 8: + #this is much faster when the length of kmer is short, and SVs are many + svmw = get_sksvm_weights(svm, feats, options) + return [score_seq(s, svmw, options.kmerlen)+svm.get_bias() for s in seqs] + else: + feats_te = get_spectrum_features(seqs, options) + init_spectrum_kernel(kern, feats, feats_te) + + return svm.apply().get_labels().tolist() + + +def main(argv = sys.argv): + usage = "Usage: %prog [options] POSITIVE_SEQ NEGATIVE_SEQ" + desc = "1. take two files(FASTA format) as input, 2. train an SVM and store the trained SVM weights" + parser = optparse.OptionParser(usage=usage, description=desc) + parser.add_option("-t", dest="ktype", type="int", default=1, \ + help="set the type of kernel, 1:Spectrum, 2:Weighted Spectrums (default=1.Spectrum)") + + parser.add_option("-C", dest="svmC", type="float", default=1, \ + help="set the regularization parameter svmC (default=1)") + + parser.add_option("-e", dest="epsilon", type="float", default=0.00001, \ + help="set the precision parameter epsilon (default=0.00001)") + + parser.add_option("-w", dest="weight", type="float", default=0.0, \ + help="set the weight for positive set (default=auto, 1+log(N/P))") + + parser.add_option("-k", dest="kmerlen", type="int",default=6, \ + help="set the (min) length of k-mer for (weighted) spectrum kernel (default = 6)") + + parser.add_option("-K", dest="kmerlen2", type="int",default=8, \ + help="set the max length of k-mer for weighted spectrum kernel (default = 8)") + + parser.add_option("-n", dest="outputname", default="kmersvm_output", \ + help="set the name of output files (default=kmersvm_output)") + + parser.add_option("-v", dest="ncv", type="int", default=0, \ + help="if set, it will perform N-fold cross-validation and generate a prediction file (default = 0)") + + parser.add_option("-p", dest="posteriorp", default=False, action="store_true", \ + help="estimate parameters for posterior probability with N-CV. this option requires -v option to be set (default=false)") + + parser.add_option("-r", dest="rseed", type="int", default=1, \ + help="set the random number seed for cross-validation (-p option) (default=1)") + + parser.add_option("-q", dest="quiet", default=False, action="store_true", \ + help="supress messages (default=false)") + + parser.add_option("-s", dest="sort", default=False, action="store_true", \ + help="sort the kmers by absolute values of SVM weights (default=false)") + + ktype_str = ["", "Spectrum", "Weighted Spectrums"] + + (options, args) = parser.parse_args() + + if len(args) == 0: + parser.print_help() + sys.exit(0) + + if len(args) != 2: + parser.error("incorrect number of arguments") + parser.print_help() + sys.exit(0) + + if options.posteriorp and options.ncv == 0: + parser.error("posterior probability estimation requires N-fold CV process (-v option should be set)") + parser.print_help() + sys.exit(0) + + random.seed(options.rseed) + + """ + set global variable + """ + if (options.ktype == 1) and (options.kmerlen <= 8): + global g_kmers + global g_rcmap + + g_kmers = generate_kmers(options.kmerlen) + g_rcmap = generate_rcmap_table(options.kmerlen, g_kmers) + + posf = args[0] + negf = args[1] + + seqs_pos, sids_pos = read_fastafile(posf) + seqs_neg, sids_neg = read_fastafile(negf) + npos = len(seqs_pos) + nneg = len(seqs_neg) + seqs = seqs_pos + seqs_neg + sids = sids_pos + sids_neg + + if options.weight == 0: + #DEBUGGED by dlee 02/17/13 + options.weight = 1 + log(nneg/float(npos)) + + if options.quiet == False: + sys.stderr.write('SVM parameters:\n') + sys.stderr.write(' kernel-type: ' + str(options.ktype) + "." + ktype_str[options.ktype] + '\n') + sys.stderr.write(' svm-C: ' + str(options.svmC) + '\n') + sys.stderr.write(' epsilon: ' + str(options.epsilon) + '\n') + sys.stderr.write(' weight: ' + str(options.weight) + '\n') + sys.stderr.write('\n') + + sys.stderr.write('Other options:\n') + sys.stderr.write(' kmerlen: ' + str(options.kmerlen) + '\n') + if options.ktype == 2: + sys.stderr.write(' kmerlen2: ' + str(options.kmerlen2) + '\n') + sys.stderr.write(' outputname: ' + options.outputname + '\n') + sys.stderr.write(' posteriorp: ' + str(options.posteriorp) + '\n') + if options.ncv > 0: + sys.stderr.write(' ncv: ' + str(options.ncv) + '\n') + sys.stderr.write(' rseed: ' + str(options.rseed) + '\n') + sys.stderr.write(' sorted-weight: ' + str(options.sort) + '\n') + + sys.stderr.write('\n') + + sys.stderr.write('Input args:\n') + sys.stderr.write(' positive sequence file: ' + posf + '\n') + sys.stderr.write(' negative sequence file: ' + negf + '\n') + sys.stderr.write('\n') + + sys.stderr.write('numer of total positive seqs: ' + str(npos) + '\n') + sys.stderr.write('numer of total negative seqs: ' + str(nneg) + '\n') + sys.stderr.write('\n') + + #generate labels + labels = [1]*npos + [-1]*nneg + + if options.ktype == 1: + get_features = get_spectrum_features + get_kernel = get_spectrum_kernel + get_weights = get_sksvm_weights + save_weights = save_sksvm_weights + svm_classify = sksvm_classify + elif options.ktype == 2: + get_features = get_weighted_spectrum_features + get_kernel = get_weighted_spectrum_kernel + get_weights = get_wsksvm_weights + save_weights = save_wsksvm_weights + svm_classify = wsksvm_classify + else: + sys.stderr.write('..unknown kernel..\n') + sys.exit(0) + + A = B = 0 + if options.ncv > 0: + if options.quiet == False: + sys.stderr.write('..Cross-validation\n') + + cvlist = generate_cv_list(options.ncv, npos, nneg) + labels_cv = [] + preds_cv = [] + sids_cv = [] + indices_cv = [] + for icv in xrange(options.ncv): + #split data into training and test set + seqs_tr, seqs_te = split_cv_list(cvlist, icv, seqs) + labs_tr, labs_te = split_cv_list(cvlist, icv, labels) + sids_tr, sids_te = split_cv_list(cvlist, icv, sids) + indices_tr, indices_te = split_cv_list(cvlist, icv, range(len(seqs))) + + #train SVM + feats_tr = get_features(seqs_tr, options) + kernel_tr = get_kernel(feats_tr, options) + svm_cv = svm_learn(kernel_tr, labs_tr, options) + + preds_cv = preds_cv + svm_classify(seqs_te, svm_cv, kernel_tr, feats_tr, options) + + labels_cv = labels_cv + labs_te + sids_cv = sids_cv + sids_te + indices_cv = indices_cv + indices_te + + output_cvpred = options.outputname + "_cvpred.out" + prediction_results = sorted(zip(indices_cv, sids_cv, preds_cv, labels_cv), key=lambda p: p[0]) + save_predictions(output_cvpred, prediction_results, cvlist) + + if options.posteriorp: + A, B = LMAI(preds_cv, labels_cv, labels_cv.count(-1), labels_cv.count(1)) + + if options.quiet == False: + sys.stderr.write('Estimated Parameters:\n') + sys.stderr.write(' A: ' + str(A) + '\n') + sys.stderr.write(' B: ' + str(B) + '\n') + + if options.quiet == False: + sys.stderr.write('..SVM weights\n') + + feats = get_features(seqs, options) + kernel = get_kernel(feats, options) + svm = svm_learn(kernel, labels, options) + jj = get_feature_counts(svm, feats, options) + w = get_weights(svm, feats, options) + b = svm.get_bias() + + save_weights(w, b, A, B, options) + +if __name__=='__main__': main()
--- a/kmersvm/scripts/nullseq_generate.py Mon Aug 20 21:42:29 2012 -0400 +++ b/kmersvm/scripts/nullseq_generate.py Sun Jun 16 18:06:14 2013 -0400 @@ -71,8 +71,7 @@ def sample_sequences(positions, buildname, basedir, options): """ """ - rpt_err = options.rpt_err - gc_err = options.gc_err + max_fails = 20 max_trys = options.max_trys norpt = options.norpt nogc = options.nogc @@ -121,6 +120,12 @@ else: count = options.count + #initialize paramter + #added by dlee 2/17/13 + ncfails = 0 + rpt_err = options.rpt_err + gc_err = options.gc_err + sampled_positions = [] while len(sampled_positions) < count: sampled_prof = random.choice(profiles) @@ -128,6 +133,15 @@ sampled_gc = sampled_prof[2] sampled_rpt = sampled_prof[3] + #relax rpt_err and gc_err if it keep fail to sample a region + #added by dlee 2/17/13 + if ncfails >= max_fails: + if options.quiet == False: + sys.stderr.write("reached max_fail. relax gc and rpt err criteria\n") + ncfails = 0 + rpt_err += 0.01 + gc_err += 0.01 + rpt_err_allowed = int(rpt_err*sampled_len) gc_err_allowed = int(gc_err*sampled_len) trys = 0 @@ -156,9 +170,17 @@ sampled_positions.append((chrom, pos, pos_e)) + #reset the counter of consecutive fails + #added by dlee 2/17/13 + ncfails = 0 + #print trys, chrom, pos, pos_e, sampled_len, pos_rpt, sampled_rpt, pos_gc, sampled_gc break else: + #increase the counter of consecutive fails + #added by dlee 2/17/13 + ncfails += 1 + if options.quiet == False: sys.stderr.write(' '.join(["fail to sample from", \ "len=", str(sampled_len), \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kmersvm/tomtom.xml Sun Jun 16 18:06:14 2013 -0400 @@ -0,0 +1,84 @@ +<tool id="tomtom" name="Tomtom" version="1.0.0"> + + <description>Tomtom tool for motif searching</description> + <command>/home/galaxy/meme/bin/tomtom -no-ssc -internal -text -verbosity 1 -thresh $thresh + #if str($cut.cut_choice) == 'e.value': + -evalue + #end if + + #if str($dist.dist) == 'ed': + -dist ed + #elif str($dist.dist) == 'sw': + -dist sandelin + #else + -dist pearson + #end if + + $input1 /home/galaxy/meme/db/combined_db.meme > tomtom_out.txt + + </command> + <inputs> + <param format="txt" name="input1" type="data" label="PWM File"/> + <param type="float" value="0.5" label="Threshold" name="thresh"/> + <conditional name="cut"> + <param name="cut_choice" type="select" label="Threshold Type"> + <option value="q.value" selected="true">q-value</option> + <option value="e.value">E-value</option> + </param> + </conditional> + + <conditional name="dist"> + <param name="dist" type="select" label="Distance Metric"> + <option value="pearson" selected="true">Pearson</option> + <option value="ed">Euclidean</option> + <option value="sw">Sandelin-Wasserman Function</option> + </param> + </conditional> + </inputs> + + <outputs> + <data format="txt" name="Tomtom Results" from_work_dir="tomtom_out.txt" label="${tool.name} on ${on_string}: Tomtom Matches"/> + + </outputs> + <help> + +Tomtom is a tool for comparing a DNA motif to a database of known motifs. For an in-depth explanation of the Tomtom software see here_. + +---- + +**Recommended Settings** + +We recommend most users use the Tomtom defaults of q-value for score, the cutoff of 0.5 and the Pearson correlation coefficent for distance metric. + +---- + +**Parameters** + +We offer users the options of choosing which distance metric can be used to find matching motifs. Specifically, we offer the Pearson correlation coefficient, the Euclidean distance and the Sandelin-Wasserman Function. + + * The Pearson correlation coefficient measures the similarity between columns of position weight matrices (PWMs). + + * The Euclidean distance can be thought of as the length of the straight line between two PWMs. + + * The Sandelin-Wasserman function sums the column-wise differences between PWMs. + +We also offer the choice of E-value and q-value to threshold the results returned by Tomtom. + + * The E-value controls the expected number of false positives and can be any number. + + * The q-value controls the false discovery rate and is a number between 0 and 1. + +---- + +Note that at this time we only offer Tomtom output in txt format. + +---- + +**Citation** + +If you use this tool, please cite: Shobhit Gupta, JA Stamatoyannopolous, Timothy Bailey and William Stafford Noble, "Quantifying similarity between motifs", Genome Biology, 8(2):R24, 2007. + +.. _here: http://meme.nbcr.net/meme/tomtom-intro.html + + </help> +</tool>
--- a/kmersvm/train.xml Mon Aug 20 21:42:29 2012 -0400 +++ b/kmersvm/train.xml Sun Jun 16 18:06:14 2013 -0400 @@ -47,8 +47,12 @@ <param name="weight" type="float" value="1" label="Input The Value of Positive Set Weight" /> </when> </conditional> - <param name="SVMC" type="integer" value="1" label="Regularization Param C" /> - <param name="EPS" type="float" value="0.00001" label="Precision Param E" /> + <param name="SVMC" type="float" value="1" label="Regularization Param C" > + <validator type="in_range" message="SVMC must be in range 1 - 10" min="0.01" max="1" /> + </param> + <param name="EPS" type="float" value="0.00001" label="Precision Param E" > + <validator type="in_range" message="EPS must be in range 1e-1 to 1e-5" min="0.00001" max="0.1" /> + </param> </inputs> <outputs> <data format="tabular" name="SVM_weights" from_work_dir="kmersvm_output_weights.out" label="${tool.name} on ${on_string} : Weights" /> @@ -79,11 +83,27 @@ Takes as input 2 FASTA files, 1 of positive sequences and 1 of negative sequences. Produces 2 outputs: - A) Weights: list of sequences of length K ranked by score and posterior probability for that score. + A) Weights: list of sequences of length K ranked by score. - B) Predictions: results of N-fold cross validation + B) Predictions: results of N-fold cross validation. ---- + +**Recommended Settings** + +Kernel: Spectrum + +Kmer length: 6 + +N-Fold Cross-Validation: 5 + +Weight: We recommend letting the Positive Set Weight be selected automatically, unless it has been separately optimized. + +Regularization Parameter C: We recommend values between 0.1 and 1. + +Precision Parameter E: We recommend using the default and staying below 0.1. + +---- **Parameters** @@ -91,8 +111,9 @@ A) Spectrum Kernel: Analyzes a sequence using strings of length K. - B) Weighted Spectrum Kernel: Analyzes a sequence using strings of range of lengths K1 - Kn. - + B) Weighted Spectrum Kernel: Analyzes a sequence using strings of range of lengths K_min - K_max. + + N-Fold Cross Validation: Number of partitions of training data used for cross validation. Weight: Increases importance of positive data (increase if positive sets are very trustworthy or for training with very large negative sequence sets). @@ -100,7 +121,7 @@ Regularization Parameter: Penalty for misclassification. Trade-off is overfitting (high parameter) versus high error rate (low parameter). Precision Parameter: Insensitivity zone. Affects precision of SVM by altering number of support vectors used. - + ---- **Example**