Mercurial > repos > yufei-luo > s_mart
comparison commons/core/sql/TableSeqAdaptator.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:ea3082881bf8 | 6:769e306b7933 |
---|---|
1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
2 # http://www.inra.fr | |
3 # http://urgi.versailles.inra.fr | |
4 # | |
5 # This software is governed by the CeCILL license under French law and | |
6 # abiding by the rules of distribution of free software. You can use, | |
7 # modify and/ or redistribute the software under the terms of the CeCILL | |
8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
9 # "http://www.cecill.info". | |
10 # | |
11 # As a counterpart to the access to the source code and rights to copy, | |
12 # modify and redistribute granted by the license, users are provided only | |
13 # with a limited warranty and the software's author, the holder of the | |
14 # economic rights, and the successive licensors have only limited | |
15 # liability. | |
16 # | |
17 # In this respect, the user's attention is drawn to the risks associated | |
18 # with loading, using, modifying and/or developing or reproducing the | |
19 # software by the user in light of its specific status of free software, | |
20 # that may mean that it is complicated to manipulate, and that also | |
21 # therefore means that it is reserved for developers and experienced | |
22 # professionals having in-depth computer knowledge. Users are therefore | |
23 # encouraged to load and test the software's suitability as regards their | |
24 # requirements in conditions enabling the security of their systems and/or | |
25 # data to be ensured and, more generally, to use and operate it in the | |
26 # same conditions as regards security. | |
27 # | |
28 # The fact that you are presently reading this means that you have had | |
29 # knowledge of the CeCILL license and that you accept its terms. | |
30 | |
31 | |
32 import sys | |
33 from commons.core.sql.TableAdaptator import TableAdaptator | |
34 from commons.core.sql.ITableSeqAdaptator import ITableSeqAdaptator | |
35 from commons.core.coord.SetUtils import SetUtils | |
36 from commons.core.seq.Bioseq import Bioseq | |
37 | |
38 | |
39 ## Adaptator for a Seq table | |
40 # | |
41 class TableSeqAdaptator( TableAdaptator, ITableSeqAdaptator ): | |
42 | |
43 ## Retrieve all the distinct accession names in a list. | |
44 # | |
45 # @return lAccessions list of accessions | |
46 # | |
47 def getAccessionsList( self ): | |
48 sqlCmd = "SELECT DISTINCT accession FROM %s;" % ( self._table ) | |
49 lAccessions = self._getStringListWithSQLCmd(sqlCmd) | |
50 return lAccessions | |
51 | |
52 ## Save sequences in a fasta file from a list of accession names. | |
53 # | |
54 # @param lAccessions list of accessions | |
55 # @param outFileName string Fasta file | |
56 # | |
57 def saveAccessionsListInFastaFile( self, lAccessions, outFileName ): | |
58 outFile = open( outFileName, "w" ) | |
59 for ac in lAccessions: | |
60 bs = self.getBioseqFromHeader( ac ) | |
61 bs.write(outFile) | |
62 outFile.close() | |
63 | |
64 ## Get a bioseq instance given its header | |
65 # | |
66 # @param header string name of the sequence ('accession' field in the 'seq' table) | |
67 # @return bioseq instance | |
68 # | |
69 def getBioseqFromHeader( self, header ): | |
70 sqlCmd = "SELECT * FROM %s WHERE accession='%s';" % ( self._table, header ) | |
71 self._iDb.execute( sqlCmd ) | |
72 res = self._iDb.fetchall() | |
73 return Bioseq( res[0][0], res[0][1] ) | |
74 | |
75 ## Retrieve the length of a sequence given its name. | |
76 # | |
77 # @param accession name of the sequence | |
78 # @return seqLength integer length of the sequence | |
79 # | |
80 def getSeqLengthFromAccession( self, accession ): | |
81 sqlCmd = 'SELECT length FROM %s WHERE accession="%s"' % ( self._table, accession ) | |
82 seqLength = self._iDb.getIntegerWithSQLCmd(sqlCmd) | |
83 return seqLength | |
84 | |
85 ## Retrieve the length of a sequence given its description. | |
86 # | |
87 # @param description of the sequence | |
88 # @return seqLength integer length of the sequence | |
89 # | |
90 def getSeqLengthFromDescription( self, description ): | |
91 sqlCmd = 'SELECT length FROM %s WHERE description="%s"' % ( self._table, description ) | |
92 seqLength = self._iDb.getIntegerWithSQLCmd(sqlCmd) | |
93 return seqLength | |
94 | |
95 ## Retrieve all the accessions with length in a list of tuples | |
96 # | |
97 # @return lAccessionLengthTuples list of tuples | |
98 # | |
99 def getAccessionAndLengthList(self): | |
100 sqlCmd = 'SELECT accession, length FROM %s' % self._table | |
101 self._iDb.execute(sqlCmd) | |
102 res = self._iDb.fetchall() | |
103 lAccessionLengthTuples = [] | |
104 for i in res: | |
105 lAccessionLengthTuples.append(i) | |
106 return lAccessionLengthTuples | |
107 | |
108 ## get subsequence according to given parameters | |
109 # | |
110 # @param accession | |
111 # @param start integer | |
112 # @param end integer | |
113 # @return bioseq.sequence string | |
114 # | |
115 def getSubSequence( self, accession, start, end ): | |
116 bs = Bioseq() | |
117 if start <= 0 or end <= 0: | |
118 print "ERROR with coordinates start=%i or end=%i" % ( start, end ) | |
119 sys.exit(1) | |
120 | |
121 if accession not in self.getAccessionsList(): | |
122 print "ERROR: accession '%s' absent from table '%s'" % ( accession, self._table ) | |
123 sys.exit(1) | |
124 | |
125 lengthAccession = self.getSeqLengthFromAccession( accession ) | |
126 if start > lengthAccession or end > lengthAccession: | |
127 print "ERROR: coordinates start=%i end=%i out of sequence '%s' range (%i bp)" % ( start, end, accession, lengthAccession ) | |
128 sys.exit(1) | |
129 | |
130 sqlCmd = "SELECT SUBSTRING(sequence,%i,%i) FROM %s WHERE accession='%s'" % ( min(start,end), abs(end-start)+ 1, self._table, accession ) | |
131 self._iDb.execute( sqlCmd ) | |
132 res = self._iDb.fetchall() | |
133 bs.setSequence( res[0][0] ) | |
134 if start > end: | |
135 bs.reverseComplement() | |
136 return bs.sequence | |
137 | |
138 ## get bioseq from given set list | |
139 # | |
140 # @param lSets set list of sets | |
141 # @return bioseq instance | |
142 # | |
143 def getBioseqFromSetList( self, lSets ): | |
144 header = "%s::%i %s " % ( lSets[0].name, lSets[0].id, lSets[0].seqname ) | |
145 sequence = "" | |
146 lSortedSets = SetUtils.getSetListSortedByIncreasingMinThenMax( lSets ) | |
147 if not lSets[0].isOnDirectStrand(): | |
148 lSortedSets.reverse() | |
149 for iSet in lSortedSets: | |
150 header += "%i..%i," % ( iSet.getStart(), iSet.getEnd() ) | |
151 sequence += self.getSubSequence( iSet.seqname, iSet.getStart(), iSet.getEnd() ) | |
152 return Bioseq( header[:-1], sequence ) | |
153 | |
154 ## Return True if the given accession is present in the table | |
155 # | |
156 def isAccessionInTable( self, name ): | |
157 sqlCmd = "SELECT accession FROM %s WHERE accession='%s'" % ( self._table, name ) | |
158 self._iDb.execute( sqlCmd ) | |
159 res = self._iDb.fetchall() | |
160 return bool(res) | |
161 | |
162 ## Retrieve all the distinct accession names in a fasta file. | |
163 # | |
164 # @param outFileName string Fasta file | |
165 # | |
166 def exportInFastaFile(self, outFileName ): | |
167 lAccessions = self.getAccessionsList() | |
168 self.saveAccessionsListInFastaFile( lAccessions, outFileName ) | |
169 | |
170 def _getStringListWithSQLCmd( self, sqlCmd ): | |
171 self._iDb.execute(sqlCmd) | |
172 res = self._iDb.fetchall() | |
173 lString = [] | |
174 for i in res: | |
175 lString.append(i[0]) | |
176 return lString | |
177 | |
178 def _getTypeAndAttr2Insert(self, bs): | |
179 type2Insert = ( "'%s'", "'%s'", "'%s'", "'%i'" ) | |
180 attr2Insert = (bs.header.split()[0], bs.sequence, bs.header, bs.getLength()) | |
181 return type2Insert, attr2Insert | |
182 | |
183 def _escapeAntislash(self, obj): | |
184 pass | |
185 |