annotate TEisotools-1.1.a/commons/core/seq/Bioseq.py @ 13:feef9a0db09d draft

Uploaded
author urgi-team
date Wed, 20 Jul 2016 09:04:42 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
1 # Copyright INRA (Institut National de la Recherche Agronomique)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
2 # http://www.inra.fr
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
3 # http://urgi.versailles.inra.fr
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
4 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
9 # "http://www.cecill.info".
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
10 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
15 # liability.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
16 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
26 # same conditions as regards security.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
27 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
30
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
31
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
32 import re
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
33 import sys
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
34 import random
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
35 import string
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
36 import cStringIO
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
37 from commons.core.coord.Map import Map
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
38 from commons.core.checker.RepetException import RepetException
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
39
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
40 DNA_ALPHABET_WITH_N = set(['A', 'T', 'G', 'C', 'N'])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
41 IUPAC = set(['A', 'T', 'G', 'C', 'U', 'R', 'Y', 'M', 'K', 'W', 'S', 'B', 'D', 'H', 'V', 'N'])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
42
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
43
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
44 ## Record a sequence with its header
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
45 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
46 class Bioseq(object):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
47
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
48 __slots__ = ("header", "sequence", '__dict__')
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
49
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
50 ## constructor
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
51 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
52 # @param name the header of sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
53 # @param seq sequence (DNA, RNA, protein)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
54 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
55 def __init__(self, name = "", seq = ""):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
56 self.header = name
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
57 self.sequence = seq
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
58
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
59
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
60 ## Equal operator
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
61 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
62 def __eq__(self, o):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
63 if type(o) is type(self) and self.header == o.header and self.sequence == o.sequence:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
64 return True
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
65 return False
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
66
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
67 ## Not equal operator
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
68 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
69 def __ne__(self, o):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
70 return not self.__eq__(o)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
71
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
72 ## overload __repr__
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
73 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
74 def __repr__(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
75 return "%s;%s" % (self.header, self.sequence)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
76
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
77
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
78 ## set attribute header
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
79 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
80 # @param header a string
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
81 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
82 def setHeader(self, header):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
83 self.header = header
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
84
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
85
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
86 ## get attribute header
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
87 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
88 # @return header
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
89 def getHeader(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
90 return self.header
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
91
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
92
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
93 ## set attribute sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
94 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
95 # @param sequence a string
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
96 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
97 def setSequence(self, sequence):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
98 self.sequence = sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
99
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
100
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
101 def getSequence(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
102 return self.sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
103
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
104 ## reset
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
105 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
106 def reset(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
107 self.setHeader("")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
108 self.setSequence("")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
109
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
110
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
111 ## Test if bioseq is empty
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
112 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
113 def isEmpty(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
114 return self.header == "" and self.sequence == ""
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
115
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
116
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
117 ## Reverse the sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
118 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
119 def reverse(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
120 tmp = self.sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
121 self.sequence = tmp[::-1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
122
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
123
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
124 ## Turn the sequence into its complement
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
125 # Force upper case letters
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
126 # @warning: old name in pyRepet.Bioseq realComplement
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
127 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
128 def complement(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
129 complement = ""
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
130 self.upCase()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
131 for i in xrange(0, len(self.sequence), 1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
132 if self.sequence[i] == "A":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
133 complement += "T"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
134 elif self.sequence[i] == "T":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
135 complement += "A"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
136 elif self.sequence[i] == "C":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
137 complement += "G"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
138 elif self.sequence[i] == "G":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
139 complement += "C"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
140 elif self.sequence[i] == "M":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
141 complement += "K"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
142 elif self.sequence[i] == "R":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
143 complement += "Y"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
144 elif self.sequence[i] == "W":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
145 complement += "W"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
146 elif self.sequence[i] == "S":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
147 complement += "S"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
148 elif self.sequence[i] == "Y":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
149 complement += "R"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
150 elif self.sequence[i] == "K":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
151 complement += "M"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
152 elif self.sequence[i] == "V":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
153 complement += "B"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
154 elif self.sequence[i] == "H":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
155 complement += "D"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
156 elif self.sequence[i] == "D":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
157 complement += "H"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
158 elif self.sequence[i] == "B":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
159 complement += "V"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
160 elif self.sequence[i] == "N":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
161 complement += "N"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
162 elif self.sequence[i] == "-":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
163 complement += "-"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
164 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
165 print "WARNING: unknown symbol '%s', replacing it by N" % (self.sequence[i])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
166 complement += "N"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
167 self.sequence = complement
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
168
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
169
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
170 ## Reverse and complement the sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
171 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
172 # Force upper case letters
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
173 # @warning: old name in pyRepet.Bioseq : complement
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
174 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
175 def reverseComplement(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
176 self.reverse()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
177 self.complement()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
178
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
179
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
180 ## Remove gap in the sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
181 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
182 def cleanGap(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
183 self.sequence = self.sequence.replace("-", "")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
184
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
185
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
186 ## Copy current Bioseq Instance
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
187 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
188 # @return: a Bioseq instance, a copy of current sequence.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
189 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
190 def copyBioseqInstance(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
191 seq = Bioseq()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
192 seq.sequence = self.sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
193 seq.header = self.header
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
194 return seq
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
195
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
196
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
197 ## Add phase information after the name of sequence in header
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
198 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
199 # @param phase integer representing phase (1, 2, 3, -1, -2, -3)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
200 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
201 def setFrameInfoOnHeader(self, phase):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
202 if " " in self.header:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
203 name, desc = self.header.split(" ", 1)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
204 name = name + "_" + str(phase)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
205 self.header = name + " " + desc
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
206 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
207 self.header = self.header + "_" + str(phase)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
208
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
209
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
210 ## Fill Bioseq attributes with fasta file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
211 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
212 # @param faFileHandler file handler of a fasta file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
213 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
214 def read(self, faFileHandler):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
215 line = faFileHandler.readline()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
216 if line == "":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
217 self.header = None
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
218 self.sequence = None
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
219 return
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
220 while line == "\n":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
221 line = faFileHandler.readline()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
222 if line[0] == '>':
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
223 self.header = string.rstrip(line[1:])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
224 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
225 print "error, line is", string.rstrip(line)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
226 return
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
227 line = " "
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
228 seq = cStringIO.StringIO()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
229 while line:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
230 prev_pos = faFileHandler.tell()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
231 line = faFileHandler.readline()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
232 if line == "":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
233 break
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
234 if line[0] == '>':
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
235 faFileHandler.seek(prev_pos)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
236 break
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
237 seq.write(string.rstrip(line))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
238 self.sequence = seq.getvalue()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
239
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
240
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
241 ## Create a subsequence with a modified header
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
242 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
243 # @param s integer start a required subsequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
244 # @param e integer end a required subsequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
245 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
246 # @return a Bioseq instance, a subsequence of current sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
247 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
248 def subseq(self, s, e = 0):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
249 if e == 0 :
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
250 e = len(self.sequence)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
251 if s > e :
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
252 print "error: start must be < or = to end"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
253 return
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
254 if s <= 0 :
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
255 print "error: start must be > 0"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
256 return
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
257 sub = Bioseq()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
258 sub.header = self.header + " fragment " + str(s) + ".." + str(e)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
259 sub.sequence = self.sequence[(s - 1):e]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
260 return sub
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
261
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
262
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
263 ## Get the nucleotide or aminoacid at the given position
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
264 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
265 # @param pos integer nucleotide or aminoacid position
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
266 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
267 # @return a string
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
268 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
269 def getNtFromPosition(self, pos):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
270 result = None
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
271 if not (pos < 1 or pos > self.getLength()):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
272 result = self.sequence[pos - 1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
273 return result
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
274
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
275
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
276 ## Print in stdout the Bioseq in fasta format with 60 characters lines
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
277 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
278 # @param l length of required sequence default is whole sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
279 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
280 def view(self, l = 0):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
281 print '>' + self.header
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
282 i = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
283 if(l == 0):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
284 l = len(self.sequence)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
285 seq = self.sequence[0:l]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
286
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
287 while i < len(seq):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
288 print seq[i:i + 60]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
289 i = i + 60
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
290
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
291
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
292 ## Get length of sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
293 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
294 # @param avoidN boolean don't count 'N' nucleotides
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
295 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
296 # @return length of current sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
297 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
298 def getLength(self, countN = True):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
299 if countN:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
300 return len(self.sequence)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
301 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
302 return len(self.sequence) - self.countNt('N')
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
303
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
304
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
305 ## Return the proportion of a specific character
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
306 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
307 # @param nt character that we want to count
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
308 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
309 def propNt(self, nt):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
310 return self.countNt(nt) / float(self.getLength())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
311
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
312
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
313 ## Count occurrence of specific character
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
314 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
315 # @param nt character that we want to count
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
316 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
317 # @return nb of occurrences
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
318 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
319 def countNt(self, nt):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
320 return self.sequence.count(nt)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
321
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
322
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
323 ## Count occurrence of each nucleotide in current seq
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
324 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
325 # @return a dict, keys are nucleotides, values are nb of occurrences
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
326 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
327 def countAllNt(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
328 dNt2Count = {}
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
329 for nt in ["A", "T", "G", "C", "N"]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
330 dNt2Count[ nt ] = self.countNt(nt)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
331 return dNt2Count
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
332
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
333
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
334 ## Return a dict with the number of occurrences for each combination of ATGC of specified size and number of word found
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
335 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
336 # @param size integer required length word
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
337 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
338 def occ_word(self, size):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
339 occ = {}
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
340 if size == 0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
341 return occ, 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
342 nbword = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
343 srch = re.compile('[^ATGC]+')
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
344 wordlist = self._createWordList(size)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
345 for i in wordlist:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
346 occ[i] = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
347 lenseq = len(self.sequence)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
348 i = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
349 while i < lenseq - size + 1:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
350 word = self.sequence[i:i + size].upper()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
351 m = srch.search(word)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
352 if m == None:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
353 occ[word] = occ[word] + 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
354 nbword = nbword + 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
355 i = i + 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
356 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
357 i = i + m.end(0)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
358 return occ, nbword
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
359
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
360
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
361 ## Return a dictionary with the frequency of occurs for each combination of ATGC of specified size
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
362 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
363 # @param size integer required length word
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
364 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
365 def freq_word(self, size):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
366 dOcc, nbWords = self.occ_word(size)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
367 freq = {}
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
368 for word in dOcc.keys():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
369 freq[word] = float(dOcc[word]) / nbWords
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
370 return freq
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
371
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
372
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
373 ## Find ORF in each phase
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
374 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
375 # @return: a dict, keys are phases, values are stop codon positions.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
376 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
377 def findORF (self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
378 orf = {0:[], 1:[], 2:[]}
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
379 length = len (self.sequence)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
380 for i in xrange(0, length):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
381 triplet = self.sequence[i:i + 3]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
382 if (triplet == "TAA" or triplet == "TAG" or triplet == "TGA"):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
383 phase = i % 3
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
384 orf[phase].append(i)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
385 return orf
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
386
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
387
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
388 ## Convert the sequence into upper case
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
389 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
390 def upCase(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
391 self.sequence = self.sequence.upper()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
392
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
393
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
394 ## Convert the sequence into lower case
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
395 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
396 def lowCase(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
397 self.sequence = self.sequence.lower()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
398
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
399
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
400 ## Extract the cluster of the fragment (output from Grouper)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
401 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
402 # @return cluster id (string)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
403 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
404 def getClusterID(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
405 data = self.header.split()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
406 return data[0].split("Cl")[1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
407
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
408
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
409 ## Extract the group of the sequence (output from Grouper)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
410 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
411 # @return group id (string)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
412 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
413 def getGroupID(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
414 data = self.header.split()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
415 return data[0].split("Gr")[1].split("Cl")[0]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
416
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
417
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
418 ## Get the header of the full sequence (output from Grouper)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
419 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
420 # @example 'Dmel_Grouper_3091_Malign_3:LARD' from '>MbS1566Gr81Cl81 Dmel_Grouper_3091_Malign_3:LARD {Fragment} 1..5203'
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
421 # @return header (string)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
422 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
423 def getHeaderFullSeq(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
424 data = self.header.split()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
425 return data[1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
426
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
427
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
428 ## Get the strand of the fragment (output from Grouper)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
429 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
430 # @return: strand (+ or -)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
431 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
432 def getFragStrand(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
433 data = self.header.split()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
434 coord = data[3].split("..")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
435 if int(coord[0]) < int(coord[-1]):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
436 return "+"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
437 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
438 return "-"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
439
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
440
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
441 ## Get A, T, G, C or N from an IUPAC letter
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
442 # IUPAC = ['A','T','G','C','U','R','Y','M','K','W','S','B','D','H','V','N']
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
443 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
444 # @return A, T, G, C or N
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
445 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
446 def getATGCNFromIUPAC(self, nt):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
447 subset = ["A", "T", "G", "C", "N"]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
448
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
449 if nt in subset:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
450 return nt
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
451 elif nt == "U":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
452 return "T"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
453 elif nt == "R":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
454 return random.choice("AG")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
455 elif nt == "Y":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
456 return random.choice("CT")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
457 elif nt == "M":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
458 return random.choice("CA")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
459 elif nt == "K":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
460 return random.choice("TG")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
461 elif nt == "W":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
462 return random.choice("TA")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
463 elif nt == "S":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
464 return random.choice("CG")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
465 elif nt == "B":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
466 return random.choice("CTG")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
467 elif nt == "D":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
468 return random.choice("ATG")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
469 elif nt == "H":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
470 return random.choice("ATC")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
471 elif nt == "V":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
472 return random.choice("ACG")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
473 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
474 return "N"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
475
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
476 ## Get nucleotide from an IUPAC letter and a nucleotide
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
477 # Works only for IUPAC code with two possibilities ['R','Y','M','K','W','S']
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
478 # Examples:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
479 # Y and C returns T
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
480 # Y and T returns C
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
481 # B and C throws RepetException
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
482 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
483 # @return A, T, G, C
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
484 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
485 def getATGCNFromIUPACandATGCN(self, IUPACCode, nt):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
486 if IUPACCode == "R":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
487 possibleNt = set(["A", "G"])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
488 if nt not in possibleNt:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
489 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
490 return (possibleNt - set(nt)).pop()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
491
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
492 elif IUPACCode == "Y":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
493 possibleNt = set(["C", "T"])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
494 if nt not in possibleNt:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
495 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
496 return (possibleNt - set(nt)).pop()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
497
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
498 elif IUPACCode == "M":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
499 possibleNt = set(["A", "C"])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
500 if nt not in possibleNt:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
501 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
502 return (possibleNt - set(nt)).pop()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
503
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
504 elif IUPACCode == "K":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
505 possibleNt = set(["T", "G"])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
506 if nt not in possibleNt:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
507 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
508 return (possibleNt - set(nt)).pop()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
509
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
510 elif IUPACCode == "W":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
511 possibleNt = set(["A", "T"])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
512 if nt not in possibleNt:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
513 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
514 return (possibleNt - set(nt)).pop()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
515
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
516 elif IUPACCode == "S":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
517 possibleNt = set(["C", "G"])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
518 if nt not in possibleNt:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
519 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
520 return (possibleNt - set(nt)).pop()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
521
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
522 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
523 raise RepetException("Can't retrieve the third nucleotide from IUPAC code '%s' and nucleotide '%s'" % (IUPACCode, nt))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
524
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
525 def getSeqWithOnlyATGCN(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
526 newSeq = ""
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
527 for nt in self.sequence:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
528 newSeq += self.getATGCNFromIUPAC(nt)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
529 return newSeq
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
530
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
531
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
532 ## Replace any symbol not in (A,T,G,C,N) by another nucleotide it represents
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
533 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
534 def partialIUPAC(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
535 self.sequence = self.getSeqWithOnlyATGCN()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
536
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
537
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
538 ## Remove non Unix end-of-line symbols, if any
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
539 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
540 def checkEOF(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
541 symbol = "\r" # corresponds to '^M' from Windows
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
542 if symbol in self.sequence:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
543 print "WARNING: Windows EOF removed in '%s'" % (self.header)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
544 sys.stdout.flush()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
545 newSeq = self.sequence.replace(symbol, "")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
546 self.sequence = newSeq
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
547
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
548
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
549 ## Write Bioseq instance into a fasta file handler
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
550 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
551 # @param faFileHandler file handler of a fasta file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
552 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
553 def write(self, faFileHandler):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
554 faFileHandler.write(">%s\n" % (self.header))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
555 self.writeSeqInFasta(faFileHandler)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
556
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
557
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
558 ## Write only the sequence of Bioseq instance into a fasta file handler
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
559 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
560 # @param faFileHandler file handler of a fasta file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
561 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
562 def writeSeqInFasta(self, faFileHandler):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
563 i = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
564 while i < self.getLength():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
565 faFileHandler.write("%s\n" % (self.sequence[i:i + 60]))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
566 i += 60
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
567
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
568
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
569 ## Append Bioseq instance to a fasta file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
570 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
571 # @param faFile name of a fasta file as a string
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
572 # @param mode 'write' or 'append'
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
573 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
574 def save(self, faFile, mode = "a"):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
575 faFileHandler = open(faFile, mode)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
576 self.write(faFileHandler)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
577 faFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
578
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
579
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
580 ## Append Bioseq instance to a fasta file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
581 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
582 # @param faFile name of a fasta file as a string
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
583 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
584 def appendBioseqInFile(self, faFile):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
585 self.save(faFile, "a")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
586
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
587
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
588 ## Write Bioseq instance into a fasta file handler
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
589 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
590 # @param faFileHandler file handler on a file with writing right
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
591 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
592 def writeABioseqInAFastaFile(self, faFileHandler):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
593 self.write(faFileHandler)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
594
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
595
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
596 ## Write Bioseq instance with other header into a fasta file handler
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
597 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
598 # @param faFileHandler file handler on a file with writing right
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
599 # @param otherHeader a string representing a new header (without the > and the \n)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
600 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
601 def writeWithOtherHeader(self, faFileHandler, otherHeader):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
602 self.header = otherHeader
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
603 self.write(faFileHandler)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
604
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
605
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
606 ## Append Bioseq header and Bioseq sequence in a fasta file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
607 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
608 # @param faFileHandler file handler on a file with writing right
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
609 # @param otherHeader a string representing a new header (without the > and the \n)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
610 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
611 def writeABioseqInAFastaFileWithOtherHeader(self, faFileHandler, otherHeader):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
612 self.writeWithOtherHeader(faFileHandler, otherHeader)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
613
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
614
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
615 ## get the list of Maps corresponding to seq without gap
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
616 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
617 # @warning This method was called getMap() in pyRepet.Bioseq
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
618 # @return a list of Map object
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
619 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
620 def getLMapWhithoutGap(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
621 lMaps = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
622 countSite = 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
623 countSubseq = 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
624 inGap = False
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
625 startMap = -1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
626 endMap = -1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
627
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
628 # initialize with the first site
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
629 if self.sequence[0] == "-":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
630 inGap = True
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
631 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
632 startMap = countSite
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
633
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
634 # for each remaining site
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
635 for site in self.sequence[1:]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
636 countSite += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
637
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
638 # if it is a gap
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
639 if site == "-":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
640
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
641 # if this is the beginning of a gap, record the previous subsequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
642 if inGap == False:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
643 inGap = True
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
644 endMap = countSite - 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
645 lMaps.append(Map("%s_subSeq%i" % (self.header, countSubseq), self.header, startMap, endMap))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
646 countSubseq += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
647
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
648 # if it is NOT a gap
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
649 if site != "-":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
650
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
651 # if it is the end of a gap, begin the next subsequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
652 if inGap == True:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
653 inGap = False
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
654 startMap = countSite
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
655
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
656 # if it is the last site
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
657 if countSite == self.getLength():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
658 endMap = countSite
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
659 lMaps.append(Map("%s_subSeq%i" % (self.header, countSubseq), self.header, startMap, endMap))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
660
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
661 return lMaps
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
662
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
663
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
664 ## get the percentage of GC
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
665 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
666 # @return a percentage
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
667 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
668 def getGCpercentage(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
669 tmpSeq = self.getSeqWithOnlyATGCN()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
670 nbGC = tmpSeq.count("G") + tmpSeq.count("C")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
671 return 100 * nbGC / float(self.getLength())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
672
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
673 ## get the percentage of GC of a sequence without counting N in sequence length
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
674 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
675 # @return a percentage
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
676 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
677 def getGCpercentageInSequenceWithoutCountNInLength(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
678 tmpSeq = self.getSeqWithOnlyATGCN()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
679 nbGC = tmpSeq.count("G") + tmpSeq.count("C")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
680 return 100 * nbGC / float(self.getLength() - self.countNt("N"))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
681
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
682 ## get the 5 prime subsequence of a given length at the given position
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
683 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
684 # @param position integer
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
685 # @param flankLength integer subsequence length
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
686 # @return a sequence string
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
687 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
688 def get5PrimeFlank(self, position, flankLength):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
689 if(position == 1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
690 return ""
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
691 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
692 startOfFlank = 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
693 endOfFlank = position - 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
694
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
695 if((position - flankLength) > 0):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
696 startOfFlank = position - flankLength
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
697 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
698 startOfFlank = 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
699
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
700 return self.subseq(startOfFlank, endOfFlank).sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
701
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
702
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
703 ## get the 3 prime subsequence of a given length at the given position
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
704 # In the case of indels, the polymorphism length can be specified
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
705 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
706 # @param position integer
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
707 # @param flankLength integer subsequence length
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
708 # @param polymLength integer polymorphism length
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
709 # @return a sequence string
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
710 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
711 def get3PrimeFlank(self, position, flankLength, polymLength = 1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
712 if((position + polymLength) > len(self.sequence)):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
713 return ""
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
714 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
715 startOfFlank = position + polymLength
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
716
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
717 if((position + polymLength + flankLength) > len(self.sequence)):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
718 endOfFlank = len(self.sequence)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
719 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
720 endOfFlank = position + polymLength + flankLength - 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
721
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
722 return self.subseq(startOfFlank, endOfFlank).sequence
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
723
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
724
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
725 def _createWordList(self, size, l = ['A', 'T', 'G', 'C']):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
726 if size == 1 :
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
727 return l
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
728 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
729 l2 = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
730 for i in l:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
731 for j in ['A', 'T', 'G', 'C']:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
732 l2.append(i + j)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
733 return self._createWordList(size - 1, l2)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
734
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
735
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
736 def removeSymbol(self, symbol):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
737 tmp = self.sequence.replace(symbol, "")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
738 self.sequence = tmp