Mercurial > repos > cafletezbrant > kmersvm
annotate kmersvm/scripts/kmersvm_train_kfb_copy.py @ 7:fd740d515502 draft default tip
Uploaded revised kmer-SVM to include modules from kmer-visual.
author | cafletezbrant |
---|---|
date | Sun, 16 Jun 2013 18:06:14 -0400 |
parents | |
children |
rev | line source |
---|---|
7
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
1 #!/usr/bin/env python |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
2 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
3 kmersvm_train.py; train a support vector machine using shogun toolbox |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
4 Copyright (C) 2011 Dongwon Lee |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
5 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
6 This program is free software: you can redistribute it and/or modify |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
7 it under the terms of the GNU General Public License as published by |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
8 the Free Software Foundation, either version 3 of the License, or |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
9 (at your option) any later version. |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
10 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
11 This program is distributed in the hope that it will be useful, |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
14 GNU General Public License for more details. |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
15 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
16 You should have received a copy of the GNU General Public License |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
17 along with this program. If not, see <http://www.gnu.org/licenses/>. |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
18 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
19 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
20 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
21 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
22 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
23 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
24 import sys |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
25 import optparse |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
26 import random |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
27 import numpy |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
28 from math import log, exp |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
29 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
30 from libkmersvm import * |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
31 try: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
32 from shogun.PreProc import SortWordString, SortUlongString |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
33 except ImportError: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
34 from shogun.Preprocessor import SortWordString, SortUlongString |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
35 from shogun.Kernel import CommWordStringKernel, CommUlongStringKernel, \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
36 CombinedKernel |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
37 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
38 from shogun.Features import StringWordFeatures, StringUlongFeatures, \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
39 StringCharFeatures, CombinedFeatures, DNA, Labels |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
40 from shogun.Classifier import MSG_INFO, MSG_ERROR |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
41 try: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
42 from shogun.Classifier import SVMLight |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
43 except ImportError: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
44 from shogun.Classifier import LibSVM |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
45 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
46 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
47 global variables |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
48 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
49 g_kmers = [] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
50 g_rcmap = [] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
51 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
52 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
53 def kmerid2kmer(kmerid, kmerlen): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
54 """convert integer kmerid to kmer string |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
55 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
56 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
57 kmerid -- integer, id of k-mer |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
58 kmerlen -- integer, length of k-mer |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
59 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
60 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
61 kmer string |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
62 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
63 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
64 nts = "ACGT" |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
65 kmernts = [] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
66 kmerid2 = kmerid |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
67 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
68 for i in xrange(kmerlen): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
69 ntid = kmerid2 % 4 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
70 kmernts.append(nts[ntid]) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
71 kmerid2 = int((kmerid2-ntid)/4) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
72 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
73 return ''.join(reversed(kmernts)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
74 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
75 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
76 def kmer2kmerid(kmer, kmerlen): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
77 """convert kmer string to integer kmerid |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
78 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
79 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
80 kmerid -- integer, id of k-mer |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
81 kmerlen -- integer, length of k-mer |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
82 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
83 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
84 id of k-mer |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
85 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
86 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
87 nt2id = {'A':0, 'C':1, 'G':2, 'T':3} |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
88 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
89 return reduce(lambda x, y: (4*x+y), [nt2id[x] for x in kmer]) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
90 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
91 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
92 def get_rcmap(kmerid, kmerlen): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
93 """mapping kmerid to its reverse complement k-mer on-the-fly |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
94 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
95 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
96 kmerid -- integer, id of k-mer |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
97 kmerlen -- integer, length of k-mer |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
98 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
99 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
100 integer kmerid after mapping to its reverse complement |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
101 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
102 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
103 #1. get kmer from kmerid |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
104 #2. get reverse complement kmer |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
105 #3. get kmerid from revcomp kmer |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
106 rckmerid = kmer2kmerid(revcomp(kmerid2kmer(kmerid, kmerlen)), kmerlen) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
107 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
108 if rckmerid < kmerid: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
109 return rckmerid |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
110 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
111 return kmerid |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
112 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
113 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
114 def non_redundant_word_features(feats, kmerlen): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
115 """convert the features from Shogun toolbox to non-redundant word features (handle reverse complements) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
116 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
117 feats -- StringWordFeatures |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
118 kmerlen -- integer, length of k-mer |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
119 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
120 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
121 StringWordFeatures after converting reverse complement k-mer ids |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
122 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
123 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
124 rcmap = g_rcmap |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
125 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
126 for i in xrange(feats.get_num_vectors()): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
127 nf = [rcmap[int(kmerid)] for kmerid in feats.get_feature_vector(i)] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
128 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
129 feats.set_feature_vector(numpy.array(nf, numpy.dtype('u2')), i) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
130 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
131 preproc = SortWordString() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
132 preproc.init(feats) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
133 try: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
134 feats.add_preproc(preproc) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
135 feats.apply_preproc() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
136 except AttributeError: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
137 feats.add_preprocessor(preproc) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
138 feats.apply_preprocessor() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
139 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
140 return feats |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
141 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
142 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
143 def non_redundant_ulong_features(feats, kmerlen): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
144 """convert the features from Shogun toolbox to non-redundant ulong features |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
145 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
146 feats -- StringUlongFeatures |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
147 kmerlen -- integer, length of k-mer |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
148 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
149 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
150 StringUlongFeatures after converting reverse complement k-mer ids |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
151 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
152 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
153 for i in xrange(feats.get_num_vectors()): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
154 nf = [get_rcmap(int(kmerid), kmerlen) \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
155 for kmerid in feats.get_feature_vector(i)] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
156 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
157 feats.set_feature_vector(numpy.array(nf, numpy.dtype('u8')), i) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
158 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
159 preproc = SortUlongString() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
160 preproc.init(feats) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
161 try: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
162 feats.add_preproc(preproc) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
163 feats.apply_preproc() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
164 except AttributeError: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
165 feats.add_preprocessor(preproc) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
166 feats.apply_preprocessor() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
167 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
168 return feats |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
169 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
170 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
171 def svm_learn(kernel, labels, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
172 """train SVM using SVMLight or LibSVM |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
173 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
174 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
175 kernel -- kernel object from Shogun toolbox |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
176 lebels -- list of labels |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
177 options -- object containing option data |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
178 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
179 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
180 trained svm object |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
181 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
182 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
183 try: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
184 svm=SVMLight(options.svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double))) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
185 except NameError: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
186 svm=LibSVM(options.svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double))) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
187 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
188 if options.quiet == False: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
189 svm.io.set_loglevel(MSG_INFO) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
190 svm.io.set_target_to_stderr() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
191 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
192 svm.set_epsilon(options.epsilon) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
193 svm.parallel.set_num_threads(1) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
194 if options.weight != 1.0: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
195 svm.set_C(options.svmC, options.svmC*options.weight) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
196 svm.train() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
197 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
198 if options.quiet == False: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
199 svm.io.set_loglevel(MSG_ERROR) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
200 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
201 return svm |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
202 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
203 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
204 def _get_spectrum_features(seqs, kmerlen): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
205 """generate spectrum features (internal) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
206 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
207 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
208 seqs -- list of sequences |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
209 kmerlen -- integer, length of k-mer |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
210 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
211 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
212 StringWord(Ulong)Features after treatment of redundant reverse complement k-mers |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
213 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
214 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
215 char_feats = StringCharFeatures(seqs, DNA) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
216 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
217 if kmerlen <= 8: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
218 string_features = StringWordFeatures |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
219 non_redundant_features = non_redundant_word_features |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
220 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
221 string_features = StringUlongFeatures |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
222 non_redundant_features = non_redundant_ulong_features |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
223 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
224 feats = string_features(DNA) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
225 feats.obtain_from_char(char_feats, kmerlen-1, kmerlen, 0, False) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
226 return non_redundant_features(feats, kmerlen) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
227 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
228 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
229 def get_spectrum_features(seqs, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
230 """generate spectrum features (wrapper) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
231 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
232 return _get_spectrum_features(seqs, options.kmerlen) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
233 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
234 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
235 def get_weighted_spectrum_features(seqs, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
236 """generate weighted spectrum features |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
237 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
238 global g_kmers |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
239 global g_rcmap |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
240 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
241 subfeats_list = [] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
242 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
243 for k in xrange(options.kmerlen, options.kmerlen2+1): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
244 char_feats = StringCharFeatures(seqs, DNA) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
245 if k <= 8: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
246 g_kmers = generate_kmers(k) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
247 g_rcmap = generate_rcmap_table(k, g_kmers) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
248 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
249 subfeats = _get_spectrum_features(seqs, k) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
250 subfeats_list.append(subfeats) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
251 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
252 return subfeats_list |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
253 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
254 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
255 def get_spectrum_kernel(feats, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
256 """build spectrum kernel with non-redundant k-mer list (removing reverse complement) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
257 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
258 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
259 feats -- feature object |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
260 options -- object containing option data |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
261 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
262 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
263 StringWord(Ulong)Features, CommWord(Ulong)StringKernel |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
264 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
265 if options.kmerlen <= 8: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
266 return CommWordStringKernel(feats, feats) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
267 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
268 return CommUlongStringKernel(feats, feats) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
269 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
270 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
271 def get_weighted_spectrum_kernel(subfeats_list, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
272 """build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
273 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
274 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
275 subfeats_list -- list of sub-feature objects |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
276 options -- object containing option data |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
277 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
278 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
279 CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
280 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
281 kmerlen = options.kmerlen |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
282 kmerlen2 = options.kmerlen2 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
283 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
284 subkernels = 0 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
285 kernel = CombinedKernel() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
286 feats = CombinedFeatures() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
287 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
288 for subfeats in subfeats_list: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
289 feats.append_feature_obj(subfeats) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
290 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
291 for k in xrange(kmerlen, kmerlen2+1): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
292 if k <= 8: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
293 subkernel = CommWordStringKernel(10, False) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
294 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
295 subkernel = CommUlongStringKernel(10, False) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
296 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
297 kernel.append_kernel(subkernel) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
298 subkernels+=1 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
299 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
300 kernel.init(feats, feats) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
301 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
302 kernel.set_subkernel_weights(numpy.array([1/float(subkernels)]*subkernels, numpy.dtype('float64'))) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
303 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
304 return kernel |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
305 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
306 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
307 def init_spectrum_kernel(kern, feats_lhs, feats_rhs): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
308 """initialize spectrum kernel (wrapper function) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
309 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
310 kern.init(feats_lhs, feats_rhs) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
311 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
312 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
313 def init_weighted_spectrum_kernel(kern, subfeats_list_lhs, subfeats_list_rhs): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
314 """initialize weighted spectrum kernel (wrapper function) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
315 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
316 feats_lhs = CombinedFeatures() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
317 feats_rhs = CombinedFeatures() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
318 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
319 for subfeats in subfeats_list_lhs: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
320 feats_lhs.append_feature_obj(subfeats) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
321 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
322 for subfeats in subfeats_list_rhs: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
323 feats_rhs.append_feature_obj(subfeats) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
324 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
325 kern.init(feats_lhs, feats_rhs) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
326 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
327 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
328 def get_sksvm_weights(svm, feats, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
329 """calculate the SVM weight vector of spectrum kernel |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
330 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
331 kmerlen = options.kmerlen |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
332 alphas = svm.get_alphas() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
333 support_vector_ids = svm.get_support_vectors() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
334 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
335 w = numpy.array([0]*(2**(2*kmerlen)), numpy.double) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
336 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
337 for i in xrange(len(alphas)): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
338 x = [0]*(2**(2*kmerlen)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
339 for kmerid in feats.get_feature_vector(int(support_vector_ids[i])): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
340 x[int(kmerid)] += 1 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
341 x = numpy.array(x, numpy.double) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
342 w += (alphas[i]*x/numpy.sqrt(numpy.sum(x**2))) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
343 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
344 return w |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
345 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
346 def get_feature_counts(svm, feats, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
347 """calculate feature counts for SVs |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
348 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
349 kmerlen = options.kmerlen |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
350 alphas = svm.get_alphas() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
351 support_vector_ids = svm.get_support_vectors() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
352 output = options.outputname + "_counts.out" |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
353 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
354 global g_kmers |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
355 global g_rcmap |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
356 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
357 w = numpy.array([0]*(2**(2*kmerlen)), numpy.double) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
358 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
359 for i in xrange(len(support_vector_ids)): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
360 x = [0]*(2**(2*kmerlen)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
361 for kmerid in feats.get_feature_vector(int(support_vector_ids[i])): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
362 x[int(kmerid)] += 1 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
363 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
364 x = numpy.array(x, numpy.double) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
365 w += x |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
366 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
367 if options.sort: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
368 w_sorted = sorted(zip(range(len(w)), w), key=lambda x: x[1], reverse=True) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
369 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
370 w_sorted = zip(range(len(w)), w) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
371 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
372 for i in map(lambda x: x[0], w_sorted): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
373 if i == g_rcmap[i]: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
374 f.write('\t'.join( [g_kmers[i], revcomp(g_kmers[i]), str(w[i])] ) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
375 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
376 f.close() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
377 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
378 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
379 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
380 def get_wsksvm_weights(svm, subfeats_list, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
381 """calculate the SVM weight vector of weighted spectrum kernel |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
382 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
383 kmerlen = options.kmerlen |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
384 kmerlen2 = options.kmerlen2 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
385 alphas = svm.get_alphas() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
386 support_vector_ids = svm.get_support_vectors() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
387 kmerlens = range(kmerlen, kmerlen2+1) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
388 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
389 weights = [] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
390 for idx in xrange(len(kmerlens)): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
391 subfeats = subfeats_list[idx] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
392 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
393 k = kmerlens[idx] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
394 w = numpy.array([0]*(2**(2*k)), numpy.double) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
395 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
396 for i in xrange(len(alphas)): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
397 x = [0]*(2**(2*k)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
398 for kmerid in subfeats.get_feature_vector(int(support_vector_ids[i])): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
399 x[int(kmerid)] += 1 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
400 x = numpy.array(x, numpy.double) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
401 w += (alphas[i]*x/numpy.sqrt(numpy.sum(x**2))) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
402 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
403 w /= len(kmerlens) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
404 weights.append(w) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
405 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
406 return weights |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
407 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
408 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
409 def save_header(f, bias, A, B, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
410 f.write("#parameters:\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
411 f.write("#kernel=" + str(options.ktype) + "\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
412 f.write("#kmerlen=" + str(options.kmerlen) + "\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
413 if options.ktype == 2: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
414 f.write("#kmerlen2=" + str(options.kmerlen2) + "\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
415 f.write("#bias=" + str(bias) + "\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
416 f.write("#A=" + str(A) + "\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
417 f.write("#B=" + str(B) + "\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
418 f.write("#NOTE: k-mers with large negative weights are also important. They can be found at the bottom of the list.\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
419 f.write("#k-mer\trevcomp\tSVM-weight\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
420 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
421 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
422 def save_sksvm_weights(w, bias, A, B, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
423 """save the SVM weight vector from spectrum kernel |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
424 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
425 output = options.outputname + "_weights.out" |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
426 kmerlen = options.kmerlen |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
427 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
428 f = open(output, 'w') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
429 save_header(f, bias, A, B, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
430 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
431 global g_kmers |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
432 global g_rcmap |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
433 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
434 if options.sort: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
435 w_sorted = sorted(zip(range(len(w)), w), key=lambda x: x[1], reverse=True) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
436 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
437 w_sorted = zip(range(len(w)), w) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
438 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
439 if kmerlen <= 8: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
440 for i in map(lambda x: x[0], w_sorted): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
441 if i == g_rcmap[i]: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
442 f.write('\t'.join( [g_kmers[i], revcomp(g_kmers[i]), str(w[i])] ) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
443 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
444 for i in map(lambda x: x[0], w_sorted): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
445 if i == get_rcmap(i, kmerlen): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
446 kmer = kmerid2kmer(i, kmerlen) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
447 f.write('\t'.join( [kmer, revcomp(kmer), str(w[i])] ) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
448 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
449 f.close() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
450 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
451 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
452 def save_wsksvm_weights(w, bias, A, B, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
453 """save the SVM weight vector from weighted spectrum kernel |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
454 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
455 output = options.outputname + "_weights.out" |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
456 kmerlen = options.kmerlen |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
457 kmerlen2 = options.kmerlen2 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
458 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
459 f = open(output, 'w') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
460 save_header(f, bias, A, B, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
461 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
462 global g_kmers |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
463 global g_rcmap |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
464 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
465 kmerlens = range(kmerlen, kmerlen2+1) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
466 for idx in xrange(len(kmerlens)): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
467 k = kmerlens[idx] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
468 subw = w[idx] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
469 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
470 if options.sort: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
471 subw_sorted = sorted(zip(range(len(subw)), subw), key=lambda x: x[1], reverse=True) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
472 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
473 subw_sorted = zip(range(len(subw)), subw) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
474 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
475 if k <= 8: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
476 g_kmers = generate_kmers(k) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
477 g_rcmap = generate_rcmap_table(k, g_kmers) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
478 for i in map(lambda x: x[0], subw_sorted): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
479 if i == g_rcmap[i]: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
480 f.write('\t'.join( [g_kmers[i], revcomp(g_kmers[i]), str(subw[i])] ) + "\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
481 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
482 for i in map(lambda x: x[0], subw_sorted): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
483 if i == get_rcmap(i, k): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
484 kmer = kmerid2kmer(i, k) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
485 f.write('\t'.join( [kmers, revcomp(kmers), str(subw[i])] ) + "\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
486 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
487 f.close() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
488 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
489 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
490 def save_predictions(output, preds, cvs): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
491 """save prediction |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
492 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
493 f = open(output, 'w') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
494 f.write('\t'.join(["#seq_id", "SVM score", "label", "NCV"]) + "\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
495 for i in xrange(len(preds)): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
496 f.write('\t'.join([preds[i][1], str(preds[i][2]), str(preds[i][3]), str(cvs[i])]) + "\n") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
497 f.close() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
498 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
499 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
500 def generate_cv_list(ncv, n1, n2): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
501 """generate the N-fold cross validation list |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
502 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
503 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
504 ncv -- integer, number of cross-validation |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
505 n1 -- integer, number of positives |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
506 n2 -- integer, number of negatives |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
507 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
508 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
509 a list of N-fold cross validation |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
510 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
511 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
512 shuffled_idx_list1 = range(n1) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
513 shuffled_idx_list2 = range(n1,n1+n2) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
514 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
515 random.shuffle(shuffled_idx_list1) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
516 random.shuffle(shuffled_idx_list2) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
517 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
518 shuffled_idx_list = shuffled_idx_list1 + shuffled_idx_list2 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
519 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
520 idx = 0 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
521 icv = 0 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
522 cv = [0] * (n1+n2) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
523 while(idx < (n1+n2)): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
524 cv[shuffled_idx_list[idx]] = icv |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
525 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
526 idx += 1 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
527 icv += 1 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
528 if icv == ncv: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
529 icv = 0 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
530 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
531 return cv |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
532 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
533 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
534 def split_cv_list(cvlist, icv, data): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
535 """split data into training and test based on cross-validation list |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
536 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
537 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
538 cvlist -- list, cross-validation list |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
539 icv -- integer, corss-validation set of interest |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
540 data -- list, data set to be splitted |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
541 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
542 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
543 a list of training set and a list of test set |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
544 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
545 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
546 tr_data = [] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
547 te_data = [] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
548 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
549 for i in xrange(len(data)): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
550 if cvlist[i] == icv: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
551 te_data.append(data[i]) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
552 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
553 tr_data.append(data[i]) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
554 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
555 return tr_data, te_data |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
556 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
557 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
558 def LMAI(svms, labels, prior0, prior1): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
559 """fitting svms to sigmoid function (improved version introduced by Lin 2003) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
560 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
561 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
562 svms -- list of svm scores |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
563 labels -- list of labels |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
564 prior0 -- prior of negative set |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
565 prior1 -- prior of positive set |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
566 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
567 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
568 A, B parameter of 1/(1+exp(A*SVM+B)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
569 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
570 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
571 #parameter settings |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
572 maxiter = 100 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
573 minstep = 1e-10 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
574 sigma = 1e-3 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
575 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
576 hiTarget = (prior1+1.0)/float(prior1+2.0) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
577 loTarget = 1/float(prior0+2.0) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
578 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
579 t = [0]*len(labels) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
580 for i in xrange(len(labels)): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
581 if labels[i] == 1: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
582 t[i] = hiTarget |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
583 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
584 t[i] = loTarget |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
585 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
586 A = 0.0 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
587 B = log((prior0+1.0)/float(prior1+1.0)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
588 fval = 0.0 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
589 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
590 for i in xrange(len(labels)): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
591 fApB = svms[i]*A+B |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
592 if fApB >= 0: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
593 fval += (t[i]*fApB+log(1+exp(-fApB))) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
594 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
595 fval += ((t[i]-1)*fApB+log(1+exp(fApB))) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
596 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
597 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
598 for it in xrange(maxiter): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
599 #print "iteration:", it |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
600 #Update Graidient and Hessian (use H'= H + sigma I) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
601 h11 = sigma |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
602 h22 = sigma |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
603 h21 = 0.0 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
604 g1 = 0.0 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
605 g2 = 0.0 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
606 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
607 for i in xrange(len(labels)): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
608 fApB = svms[i]*A+B |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
609 if fApB >= 0: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
610 p = exp(-fApB) / float(1.0+exp(-fApB)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
611 q = 1.0 / float(1.0 + exp(-fApB)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
612 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
613 p = 1.0 / float(1.0 + exp(fApB)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
614 q = exp(fApB) / float(1.0+exp(fApB)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
615 d2 = p*q |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
616 h11 += (svms[i]*svms[i]*d2) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
617 h22 += d2 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
618 h21 += (svms[i]*d2) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
619 d1 = t[i]-p |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
620 g1 += (svms[i]*d1) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
621 g2 += d1 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
622 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
623 #Stopping criteria |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
624 if (abs(g1)<1e-5) and (abs(g2)<1e-5): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
625 break |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
626 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
627 det = h11*h22-h21*h21 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
628 dA = -(h22*g1-h21*g2)/float(det) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
629 dB = -(-h21*g1+h11*g2)/float(det) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
630 gd = g1*dA+g2*dB |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
631 stepsize=1 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
632 while stepsize >= minstep: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
633 newA = A+stepsize*dA |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
634 newB = B+stepsize*dB |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
635 newf = 0.0 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
636 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
637 for i in xrange(len(labels)): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
638 fApB = svms[i]*newA+newB |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
639 if fApB >= 0: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
640 newf += (t[i]*fApB + log(1+exp(-fApB))) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
641 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
642 newf += ((t[i]-1)*fApB + log(1+exp(fApB))) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
643 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
644 if newf < (fval+0.0001*stepsize*gd): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
645 A=newA |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
646 B=newB |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
647 fval=newf |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
648 break |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
649 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
650 stepsize=stepsize/float(2.0) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
651 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
652 #Line search failes |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
653 if stepsize < minstep: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
654 #print "Line search fails" |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
655 break |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
656 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
657 #if it >= maxiter: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
658 # print "Reaching maximum iterations" |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
659 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
660 return A, B |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
661 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
662 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
663 def wsksvm_classify(seqs, svm, kern, feats, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
664 feats_te = get_weighted_spectrum_features(seqs, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
665 init_weighted_spectrum_kernel(kern, feats, feats_te) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
666 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
667 return svm.apply().get_labels().tolist() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
668 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
669 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
670 def score_seq(s, svmw, kmerlen): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
671 """calculate SVM score of given sequence using single set of svm weights |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
672 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
673 Arguments: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
674 s -- string, DNA sequence |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
675 svmw -- numpy array, SVM weights |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
676 kmerlen -- integer, length of k-mer of SVM weight |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
677 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
678 Return: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
679 SVM score |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
680 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
681 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
682 global g_rcmap |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
683 kmer2kmerid_func = kmer2kmerid |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
684 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
685 x = [0]*(2**(2*kmerlen)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
686 for j in xrange(len(s)-kmerlen+1): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
687 x[ g_rcmap[kmer2kmerid_func(s[j:j+kmerlen], kmerlen)] ] += 1 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
688 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
689 x = numpy.array(x, numpy.double) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
690 score_norm = numpy.dot(svmw, x)/numpy.sqrt(numpy.sum(x**2)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
691 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
692 return score_norm |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
693 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
694 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
695 def sksvm_classify(seqs, svm, kern, feats, options): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
696 """classify the given sequences |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
697 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
698 if options.kmerlen <= 8: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
699 #this is much faster when the length of kmer is short, and SVs are many |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
700 svmw = get_sksvm_weights(svm, feats, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
701 return [score_seq(s, svmw, options.kmerlen)+svm.get_bias() for s in seqs] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
702 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
703 feats_te = get_spectrum_features(seqs, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
704 init_spectrum_kernel(kern, feats, feats_te) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
705 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
706 return svm.apply().get_labels().tolist() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
707 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
708 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
709 def main(argv = sys.argv): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
710 usage = "Usage: %prog [options] POSITIVE_SEQ NEGATIVE_SEQ" |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
711 desc = "1. take two files(FASTA format) as input, 2. train an SVM and store the trained SVM weights" |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
712 parser = optparse.OptionParser(usage=usage, description=desc) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
713 parser.add_option("-t", dest="ktype", type="int", default=1, \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
714 help="set the type of kernel, 1:Spectrum, 2:Weighted Spectrums (default=1.Spectrum)") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
715 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
716 parser.add_option("-C", dest="svmC", type="float", default=1, \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
717 help="set the regularization parameter svmC (default=1)") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
718 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
719 parser.add_option("-e", dest="epsilon", type="float", default=0.00001, \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
720 help="set the precision parameter epsilon (default=0.00001)") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
721 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
722 parser.add_option("-w", dest="weight", type="float", default=0.0, \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
723 help="set the weight for positive set (default=auto, 1+log(N/P))") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
724 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
725 parser.add_option("-k", dest="kmerlen", type="int",default=6, \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
726 help="set the (min) length of k-mer for (weighted) spectrum kernel (default = 6)") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
727 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
728 parser.add_option("-K", dest="kmerlen2", type="int",default=8, \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
729 help="set the max length of k-mer for weighted spectrum kernel (default = 8)") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
730 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
731 parser.add_option("-n", dest="outputname", default="kmersvm_output", \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
732 help="set the name of output files (default=kmersvm_output)") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
733 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
734 parser.add_option("-v", dest="ncv", type="int", default=0, \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
735 help="if set, it will perform N-fold cross-validation and generate a prediction file (default = 0)") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
736 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
737 parser.add_option("-p", dest="posteriorp", default=False, action="store_true", \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
738 help="estimate parameters for posterior probability with N-CV. this option requires -v option to be set (default=false)") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
739 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
740 parser.add_option("-r", dest="rseed", type="int", default=1, \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
741 help="set the random number seed for cross-validation (-p option) (default=1)") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
742 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
743 parser.add_option("-q", dest="quiet", default=False, action="store_true", \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
744 help="supress messages (default=false)") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
745 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
746 parser.add_option("-s", dest="sort", default=False, action="store_true", \ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
747 help="sort the kmers by absolute values of SVM weights (default=false)") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
748 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
749 ktype_str = ["", "Spectrum", "Weighted Spectrums"] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
750 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
751 (options, args) = parser.parse_args() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
752 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
753 if len(args) == 0: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
754 parser.print_help() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
755 sys.exit(0) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
756 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
757 if len(args) != 2: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
758 parser.error("incorrect number of arguments") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
759 parser.print_help() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
760 sys.exit(0) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
761 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
762 if options.posteriorp and options.ncv == 0: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
763 parser.error("posterior probability estimation requires N-fold CV process (-v option should be set)") |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
764 parser.print_help() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
765 sys.exit(0) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
766 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
767 random.seed(options.rseed) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
768 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
769 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
770 set global variable |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
771 """ |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
772 if (options.ktype == 1) and (options.kmerlen <= 8): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
773 global g_kmers |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
774 global g_rcmap |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
775 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
776 g_kmers = generate_kmers(options.kmerlen) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
777 g_rcmap = generate_rcmap_table(options.kmerlen, g_kmers) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
778 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
779 posf = args[0] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
780 negf = args[1] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
781 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
782 seqs_pos, sids_pos = read_fastafile(posf) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
783 seqs_neg, sids_neg = read_fastafile(negf) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
784 npos = len(seqs_pos) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
785 nneg = len(seqs_neg) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
786 seqs = seqs_pos + seqs_neg |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
787 sids = sids_pos + sids_neg |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
788 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
789 if options.weight == 0: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
790 #DEBUGGED by dlee 02/17/13 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
791 options.weight = 1 + log(nneg/float(npos)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
792 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
793 if options.quiet == False: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
794 sys.stderr.write('SVM parameters:\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
795 sys.stderr.write(' kernel-type: ' + str(options.ktype) + "." + ktype_str[options.ktype] + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
796 sys.stderr.write(' svm-C: ' + str(options.svmC) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
797 sys.stderr.write(' epsilon: ' + str(options.epsilon) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
798 sys.stderr.write(' weight: ' + str(options.weight) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
799 sys.stderr.write('\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
800 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
801 sys.stderr.write('Other options:\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
802 sys.stderr.write(' kmerlen: ' + str(options.kmerlen) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
803 if options.ktype == 2: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
804 sys.stderr.write(' kmerlen2: ' + str(options.kmerlen2) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
805 sys.stderr.write(' outputname: ' + options.outputname + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
806 sys.stderr.write(' posteriorp: ' + str(options.posteriorp) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
807 if options.ncv > 0: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
808 sys.stderr.write(' ncv: ' + str(options.ncv) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
809 sys.stderr.write(' rseed: ' + str(options.rseed) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
810 sys.stderr.write(' sorted-weight: ' + str(options.sort) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
811 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
812 sys.stderr.write('\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
813 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
814 sys.stderr.write('Input args:\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
815 sys.stderr.write(' positive sequence file: ' + posf + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
816 sys.stderr.write(' negative sequence file: ' + negf + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
817 sys.stderr.write('\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
818 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
819 sys.stderr.write('numer of total positive seqs: ' + str(npos) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
820 sys.stderr.write('numer of total negative seqs: ' + str(nneg) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
821 sys.stderr.write('\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
822 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
823 #generate labels |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
824 labels = [1]*npos + [-1]*nneg |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
825 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
826 if options.ktype == 1: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
827 get_features = get_spectrum_features |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
828 get_kernel = get_spectrum_kernel |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
829 get_weights = get_sksvm_weights |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
830 save_weights = save_sksvm_weights |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
831 svm_classify = sksvm_classify |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
832 elif options.ktype == 2: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
833 get_features = get_weighted_spectrum_features |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
834 get_kernel = get_weighted_spectrum_kernel |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
835 get_weights = get_wsksvm_weights |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
836 save_weights = save_wsksvm_weights |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
837 svm_classify = wsksvm_classify |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
838 else: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
839 sys.stderr.write('..unknown kernel..\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
840 sys.exit(0) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
841 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
842 A = B = 0 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
843 if options.ncv > 0: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
844 if options.quiet == False: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
845 sys.stderr.write('..Cross-validation\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
846 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
847 cvlist = generate_cv_list(options.ncv, npos, nneg) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
848 labels_cv = [] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
849 preds_cv = [] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
850 sids_cv = [] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
851 indices_cv = [] |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
852 for icv in xrange(options.ncv): |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
853 #split data into training and test set |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
854 seqs_tr, seqs_te = split_cv_list(cvlist, icv, seqs) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
855 labs_tr, labs_te = split_cv_list(cvlist, icv, labels) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
856 sids_tr, sids_te = split_cv_list(cvlist, icv, sids) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
857 indices_tr, indices_te = split_cv_list(cvlist, icv, range(len(seqs))) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
858 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
859 #train SVM |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
860 feats_tr = get_features(seqs_tr, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
861 kernel_tr = get_kernel(feats_tr, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
862 svm_cv = svm_learn(kernel_tr, labs_tr, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
863 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
864 preds_cv = preds_cv + svm_classify(seqs_te, svm_cv, kernel_tr, feats_tr, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
865 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
866 labels_cv = labels_cv + labs_te |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
867 sids_cv = sids_cv + sids_te |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
868 indices_cv = indices_cv + indices_te |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
869 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
870 output_cvpred = options.outputname + "_cvpred.out" |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
871 prediction_results = sorted(zip(indices_cv, sids_cv, preds_cv, labels_cv), key=lambda p: p[0]) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
872 save_predictions(output_cvpred, prediction_results, cvlist) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
873 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
874 if options.posteriorp: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
875 A, B = LMAI(preds_cv, labels_cv, labels_cv.count(-1), labels_cv.count(1)) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
876 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
877 if options.quiet == False: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
878 sys.stderr.write('Estimated Parameters:\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
879 sys.stderr.write(' A: ' + str(A) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
880 sys.stderr.write(' B: ' + str(B) + '\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
881 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
882 if options.quiet == False: |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
883 sys.stderr.write('..SVM weights\n') |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
884 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
885 feats = get_features(seqs, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
886 kernel = get_kernel(feats, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
887 svm = svm_learn(kernel, labels, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
888 jj = get_feature_counts(svm, feats, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
889 w = get_weights(svm, feats, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
890 b = svm.get_bias() |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
891 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
892 save_weights(w, b, A, B, options) |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
893 |
fd740d515502
Uploaded revised kmer-SVM to include modules from kmer-visual.
cafletezbrant
parents:
diff
changeset
|
894 if __name__=='__main__': main() |