Mercurial > repos > davidmurphy > codonlogo
comparison corebio/seq_io/genbank_io.py @ 0:c55bdc2fb9fa
Uploaded
author | davidmurphy |
---|---|
date | Thu, 27 Oct 2011 12:09:09 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c55bdc2fb9fa |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 | |
4 """Read GenBank flat files. | |
5 | |
6 Currently only reads sequence data and not annotations. | |
7 | |
8 """ | |
9 from corebio.utils import * | |
10 from corebio.seq import * | |
11 | |
12 | |
13 names = ( 'genbank',) | |
14 extensions = ('gb','genbank', 'gbk') | |
15 | |
16 | |
17 | |
18 def read(fin, alphabet=None): | |
19 """Read and parse a file of genbank records. | |
20 | |
21 Args: | |
22 fin -- A stream or file to read | |
23 alphabet -- The expected alphabet of the data, if given | |
24 | |
25 Returns: | |
26 SeqList -- A list of sequences | |
27 | |
28 Raises: | |
29 ValueError -- If the file is unparsable | |
30 """ | |
31 seqs = [ s for s in iterseq(fin, alphabet)] | |
32 return SeqList(seqs) | |
33 | |
34 | |
35 def iterseq(fin, alphabet=None): | |
36 """ Iterate over genbank records | |
37 | |
38 Args: | |
39 fin -- A stream or file to read | |
40 alphabet -- The expected alphabet of the data, if given | |
41 | |
42 Yeilds: | |
43 Seq -- One alphabetic sequence at a time. | |
44 | |
45 Raises: | |
46 ValueError -- If the file is unparsable | |
47 """ | |
48 alphabet = Alphabet(alphabet) | |
49 | |
50 seq = [] | |
51 | |
52 def notblank(string) : | |
53 return not isblank(string) | |
54 | |
55 lines = Reiterate(iter(fin)) | |
56 | |
57 | |
58 while True : | |
59 line = lines.filter( notblank ) | |
60 if not line.startswith('LOCUS') : | |
61 raise ValueError( | |
62 "Cannot find start of record at line %d"% lines.index() ) | |
63 | |
64 line = lines.filter(lambda s : s.startswith('ORIGIN') | |
65 or s.startswith('//') ) | |
66 | |
67 if line.startswith('//') : | |
68 # No sequence data | |
69 yield Seq( '', alphabet) | |
70 else: | |
71 for line in lines : | |
72 if line.startswith('//') : | |
73 yield Seq( ''.join(seq), alphabet) | |
74 seq = [] | |
75 break | |
76 seq.extend( line.split()[1:] ) | |
77 | |
78 | |
79 | |
80 | |
81 | |
82 | |
83 | |
84 | |
85 |