Mercurial > repos > davidmurphy > codonlogo
comparison corebio/seq_io/table_io.py @ 0:c55bdc2fb9fa
Uploaded
author | davidmurphy |
---|---|
date | Thu, 27 Oct 2011 12:09:09 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c55bdc2fb9fa |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 # Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com> | |
4 # | |
5 # This software is distributed under the MIT Open Source License. | |
6 # <http://www.opensource.org/licenses/mit-license.html> | |
7 # | |
8 # Permission is hereby granted, free of charge, to any person obtaining a | |
9 # copy of this software and associated documentation files (the "Software"), | |
10 # to deal in the Software without restriction, including without limitation | |
11 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
12 # and/or sell copies of the Software, and to permit persons to whom the | |
13 # Software is furnished to do so, subject to the following conditions: | |
14 # | |
15 # The above copyright notice and this permission notice shall be included | |
16 # in all copies or substantial portions of the Software. | |
17 # | |
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
24 # THE SOFTWARE. | |
25 # | |
26 | |
27 """Read and write sequence information in tab delimited format. | |
28 | |
29 This very simple format has two columns per line. The first column is a sequence name, the second column is the sequence itself. The columns are separated by a single tab ("\\t") character. | |
30 | |
31 """ | |
32 from corebio.utils import * | |
33 from corebio.seq import * | |
34 from corebio.seq_io import * | |
35 | |
36 | |
37 names = ( 'table', 'tab') | |
38 extensions = ('tbl') | |
39 | |
40 | |
41 example = """ | |
42 EC0001 MKRISTTITTTITITTGNGAG | |
43 EC0002 MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAM | |
44 EC0003 MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLG | |
45 EC0004 MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLD | |
46 EC0005 MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGH | |
47 EC0006 MLILISPAKTLDYQSPLTTTRYTLPELLDNSQQLIHEARKLTPPQISTLM | |
48 EC0007 MPDFFSFINSVLWGSVMIYLLFGAGCWFTFRTGFVQFRYIRQFGKSLKNS | |
49 EC0008 MTDKLTSLRQYTTVVADTGDIAAMKLYQPQDATTNPSLILNAAQIPEYRK | |
50 EC0009 MNTLRIGLVSISDRASSGVYQDKGIPALEEWLTSALTTPFELETRLIPDE | |
51 EC0010 MGNTKLANPAPLGLMGFGMTTILLNLHNVGYFALDGIILAMGIFYGGIAQ | |
52 """ | |
53 | |
54 | |
55 | |
56 | |
57 def read(fin, alphabet=None): | |
58 """Read and parse file. | |
59 | |
60 Args: | |
61 fin -- A stream or file to read | |
62 alphabet -- The expected alphabet of the data, if given | |
63 Returns: | |
64 SeqList -- A list of sequences | |
65 Raises: | |
66 ValueError -- If the file is unparsable | |
67 """ | |
68 seqs = [ s for s in iterseq(fin, alphabet)] | |
69 return SeqList(seqs) | |
70 | |
71 | |
72 def iterseq(fin, alphabet=None): | |
73 """ Parse a file and generate sequences. | |
74 | |
75 Args: | |
76 fin -- A stream or file to read | |
77 alphabet -- The expected alphabet of the data, if given | |
78 Yeilds: | |
79 Seq -- One alphabetic sequence at a time. | |
80 Raises: | |
81 ValueError -- If the file is unparsable | |
82 """ | |
83 alphabet = Alphabet(alphabet) | |
84 | |
85 for lineno, line in enumerate(fin) : | |
86 line = line.strip() | |
87 if line == '' : continue | |
88 | |
89 columns = line.split('\t') | |
90 if len(columns) !=2 : | |
91 raise ValueError( "Parse failed on line %d: did not find two " | |
92 "columns seperated by a tab." % (lineno) ) | |
93 yield Seq(columns[1], alphabet=alphabet, name=columns[0]) | |
94 | |
95 | |
96 def write(fout, seqs): | |
97 """Write a two column, tab delineated file. | |
98 | |
99 Args: | |
100 fout -- A writable stream. | |
101 seqs -- A list of Seq's | |
102 """ | |
103 for s in seqs : writeseq(fout, s) | |
104 | |
105 | |
106 def writeseq(fout, seq): | |
107 """ Write a single sequence in fasta format. | |
108 | |
109 Args: | |
110 afile -- A writable stream. | |
111 seq -- A Seq instance | |
112 """ | |
113 | |
114 name = seq.name or '' | |
115 print >>fout, name, '\t', seq | |
116 | |
117 | |
118 | |
119 | |
120 | |
121 | |
122 | |
123 | |
124 | |
125 | |
126 |