annotate commons/tools/AlignTEOnGenomeAccordingToAnnotation.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 # Copyright INRA (Institut National de la Recherche Agronomique)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 # http://www.inra.fr
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 # http://urgi.versailles.inra.fr
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 # This software is governed by the CeCILL license under French law and
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 # abiding by the rules of distribution of free software. You can use,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 # modify and/ or redistribute the software under the terms of the CeCILL
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 # license as circulated by CEA, CNRS and INRIA at the following URL
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 # "http://www.cecill.info".
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 # As a counterpart to the access to the source code and rights to copy,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 # modify and redistribute granted by the license, users are provided only
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 # with a limited warranty and the software's author, the holder of the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 # economic rights, and the successive licensors have only limited
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 # liability.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 # In this respect, the user's attention is drawn to the risks associated
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 # with loading, using, modifying and/or developing or reproducing the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 # software by the user in light of its specific status of free software,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 # that may mean that it is complicated to manipulate, and that also
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 # therefore means that it is reserved for developers and experienced
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 # professionals having in-depth computer knowledge. Users are therefore
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 # encouraged to load and test the software's suitability as regards their
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 # requirements in conditions enabling the security of their systems and/or
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 # data to be ensured and, more generally, to use and operate it in the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 # same conditions as regards security.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 # The fact that you are presently reading this means that you have had
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 # knowledge of the CeCILL license and that you accept its terms.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 import re
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 from commons.core.LoggerFactory import LoggerFactory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 from commons.core.utils.RepetOptionParser import RepetOptionParser
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 from commons.core.checker.ConfigChecker import ConfigRules, ConfigChecker
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 import subprocess
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 from commons.core.coord.PathUtils import PathUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 from commons.core.coord.SetUtils import SetUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 from commons.core.sql.TablePathAdaptator import TablePathAdaptator
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 from commons.core.coord.Set import Set
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 from commons.core.sql.DbFactory import DbFactory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 ## Align a TE on genome according to annotation
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 LOG_DEPTH = "repet.tools"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 class AlignTEOnGenomeAccordingToAnnotation(object):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 def __init__(self, pathTableName = "", queryTableName = "", subjectTableName = "", mergeSamePathId = False, outTableName = "", matchPenality=10, mism=8, gapo=16, gape=4, gapl=20, configFileName = "", doClean = False, verbosity = 0):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 self._pathTableName = pathTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 self._queryTableName = queryTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 self._subjectTableName = subjectTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 self._mergeSamePathId = mergeSamePathId
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 self.setOutTableName(outTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 self._matchPenality = matchPenality
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 self._mismatch = mism
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 self._gapOpening = gapo
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 self._gapExtend = gape
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 self._gapLength = gapl
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 self._configFileName = configFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 self._doClean = doClean
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 self._verbosity = verbosity
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 self._iDb = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 def setAttributesFromCmdLine(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 description = "Align a TE on genome according to annotation."
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 epilog = "\nExample 1: launch without verbosity and keep temporary files.\n"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 epilog += "\t$ python AlignTEOnGenomeAccordingToAnnotation.py -p DmelChr4_chr_allTEs_nr_noSSR_join_path -q DmelChr4_chr_seq -s DmelChr4_refTEs_seq -v 0"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 epilog += "\n"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 parser = RepetOptionParser(description = description, epilog = epilog)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 parser.add_option("-p", "--path", dest = "pathTableName", action = "store", type = "string", help = "path table name [compulsory] [format: path]", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 parser.add_option("-q", "--query", dest = "queryTableName", action = "store", type = "string", help = "query table name [compulsory] [format: seq]", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 parser.add_option("-s", "--subject", dest = "subjectTableName", action = "store", type = "string", help = "subject table name [compulsory] [format: seq]", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 parser.add_option("-m", "--merge", dest = "mergeSamePathId", action = "store_true", help = "merge joined matchs [optional] [default: False]", default = False)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 parser.add_option("-o", "--out", dest = "outTableName", action = "store", type = "string", help = "output table name [default: <pathTableName>_align]", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 #TODO: add options for : matchPenality=10, mism=8, gapo=16, gape=4, gapl=20
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 parser.add_option("-C", "--config", dest = "configFileName", action = "store", type = "string", help = "configuration file name (e.g. TEannot.cfg)", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 #NOTE: doClean unused
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 # parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [optional] [default: False]", default = False)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 options = parser.parse_args()[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 self._setAttributesFromOptions(options)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 def _setAttributesFromOptions(self, options):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 self.setPathTableName(options.pathTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 self.setQueryTableName(options.queryTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 self.setSubjectTableName(options.subjectTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 self.setMergeSamePathId(options.mergeSamePathId)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 self.setOutTableName(options.outTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 self.setConfigFileName(options.configFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 # self.setDoClean(options.doClean)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 self.setVerbosity(options.verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 def _checkConfig(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 iConfigRules = ConfigRules()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 iConfigRules.addRuleSection(section="", mandatory=True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 iConfigRules.addRuleOption(section="", option ="fasta_name", mandatory=True, type="string")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 iConfigRules.addRuleOption(section="", option ="clean", mandatory=True, type="bool")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 iConfigChecker = ConfigChecker(self._configFileName, iConfigRules)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 iConfig = iConfigChecker.getConfig()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 self._setAttributesFromConfig(iConfig)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 def _setAttributesFromConfig(self, iConfig):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 self.setOutTableName(self._outTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 self.setDoClean(iConfig.get("", "clean"))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 def setPathTableName(self, pathTableName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 self._pathTableName = pathTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 def setQueryTableName(self, queryTableName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 self._queryTableName = queryTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 def setSubjectTableName(self, subjectTableName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 self._subjectTableName = subjectTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 def setMergeSamePathId(self, mergeSamePathId):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 self._mergeSamePathId = mergeSamePathId
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 def setOutTableName(self, outTableName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 if outTableName == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 self._outTableName = "%s_align" % self._pathTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 self._outTableName = outTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 def setConfigFileName(self, configFileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 self._configFileName = configFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133 def setDoClean(self, doClean):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 self._doClean = doClean
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 def setVerbosity(self, verbosity):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 self._verbosity = verbosity
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 def setDbInstance(self, iDb):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 self._iDb = iDb
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142 def _checkOptions(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 if self._pathTableName == "" or not self._iDb.doesTableExist(self._pathTableName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 self._logAndRaise("ERROR: Missing path table")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 if self._queryTableName == "" or not self._iDb.doesTableExist(self._queryTableName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146 self._logAndRaise("ERROR: Missing query table")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 if self._subjectTableName == "" or not self._iDb.doesTableExist(self._subjectTableName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 self._logAndRaise("ERROR: Missing subject table")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 def _logAndRaise(self, errorMsg):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 self._log.error(errorMsg)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 raise Exception(errorMsg)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 def alignBioseqWithNWalign(self, iBioseq1, iBioseq2):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 fastaFileName1 = "seqtoalign1.tmp"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 iBioseq1.save(fastaFileName1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 fastaFileName2 = "seqtoalign2.tmp"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 iBioseq2.save(fastaFileName2)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 alignBioseqDBFileName = "aligned.tmp"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 cmd = "NWalign"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 cmd += " %s" % fastaFileName1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162 cmd += " %s" % fastaFileName2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 cmd += " -m %s" % self._matchPenality
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 cmd += " -d %s" % self._mismatch
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 cmd += " -g %s" % self._gapOpening
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 cmd += " -e %s" % self._gapExtend
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 cmd += " -l %s" % self._gapLength
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 cmd += " -D"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169 cmd += " -o %s" % alignBioseqDBFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 process = subprocess.Popen(cmd, shell = True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171 self._log.debug("Running : %s" % cmd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 process.communicate()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 if process.returncode != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 self._logAndRaise("ERROR when launching '%s'" % cmd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 iAlignedBioseqDB = AlignedBioseqDB()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176 iAlignedBioseqDB.load(alignBioseqDBFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 os.remove(fastaFileName1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 os.remove(fastaFileName2)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179 os.remove(alignBioseqDBFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180 return iAlignedBioseqDB
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182 def alignSeqAccordingToPathAndBuildAlignedSeqTable(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 if self._iDb.doesTableExist(self._outTableName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184 self._logAndRaise("ERROR: out table %s already exists" % self._outTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186 #TODO: create alignedSeq table in DbMySql...
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187 sqlCmd="CREATE TABLE %s (path int unsigned, query_aligned_seq longtext, subject_aligned_seq longtext, score int unsigned, identity float unsigned)" % self._outTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
188 self._iDb.execute(sqlCmd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
189
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
190 iQueryTSA = TableSeqAdaptator(self._iDb, self._queryTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
191 iSubjectTSA = TableSeqAdaptator(self._iDb, self._subjectTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
192 iTPA = TablePathAdaptator(self._iDb, self._pathTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
193
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
194 if self._mergeSamePathId:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
195 lPathId = iTPA.getIdList()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
196 pathNb = len(lPathId)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
197 count = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
198 for pathNum in lPathId:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
199 self._log.debug(count,"/",pathNb,"=>path",pathNum,"...")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
200 lPaths = iTPA.getPathListFromId(pathNum)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
201 #TODO: getSetListFromQueries() call getSubjectAsSetOfQuery() => "reverse complement" the query (coordinates are inversed, so getBioseq() will take the reverse-comp.) : is it correct ?
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
202 lQuerySets = PathUtils.getSetListFromQueries(lPaths)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
203 #NOTE: merge sets if overlap
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
204 lQueryMergedSets = SetUtils.mergeSetsInList(lQuerySets)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
205 #TODO: getBioseqFromSetList() build a sequence that does not exist : is it correct ?
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
206 iQueryBioseq = iQueryTSA.getBioseqFromSetList(lQueryMergedSets)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
207 lSubjectSets = PathUtils.getSetListFromSubjects(lPaths)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
208 #TODO: no merge for subjects : is it correct ? matcher allow overlap on query and not on subject ?
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
209 iSubjectBioseq = iSubjectTSA.getBioseqFromSetList(lSubjectSets)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
210 iAlignedBioseqDB = self.alignBioseqWithNWalign(iQueryBioseq, iSubjectBioseq)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
211 self._insertAlignedBioseqDBWithScoreAndIdentityInTable(pathNum, iAlignedBioseqDB)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
212 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
213 lPathId = iTPA.getIdList()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
214 pathNb = len(lPathId)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
215 count = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
216 for pathNum in lPathId:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
217 self._log.debug(count,"/",pathNb,"=>path",pathNum,"...")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
218 lPaths = iTPA.getPathListFromId(pathNum)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
219 queryName = lPaths[0].getQueryName()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
220 subjectName = lPaths[0].getSubjectName()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
221 lQueryStart = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
222 lQueryEnd = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
223 lSubjectStart = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
224 lSubjectEnd = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
225 isReversed = not lPaths[0].isSubjectOnDirectStrand()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
226 for iPath in lPaths:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
227 lQueryStart.append(iPath.getQueryStart())
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
228 lQueryEnd.append(iPath.getQueryEnd())
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
229 lSubjectStart.append(iPath.getSubjectStart())
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
230 lSubjectEnd.append(iPath.getSubjectEnd())
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
231 queryStart = min(lQueryStart)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
232 queryEnd = max(lQueryEnd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
233 if isReversed:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
234 subjectStart = max(lSubjectStart)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
235 subjectEnd = min(lSubjectEnd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
236 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
237 subjectStart = min(lSubjectStart)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
238 subjectEnd = max(lSubjectEnd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
239 lQuerySets = [Set(pathNum,subjectName, queryName,queryStart,queryEnd)]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
240 lSubjectSets = [Set(pathNum,queryName, subjectName,subjectStart,subjectEnd)]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
241
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
242 iQueryBioseq = iQueryTSA.getBioseqFromSetList(lQuerySets)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
243 iSubjectBioseq = iSubjectTSA.getBioseqFromSetList(lSubjectSets)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
244 iAlignedBioseqDB = self.alignBioseqWithNWalign(iQueryBioseq, iSubjectBioseq)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
245 self._insertAlignedBioseqDBWithScoreAndIdentityInTable(pathNum, iAlignedBioseqDB)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
246
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
247 def run(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
248 LoggerFactory.setLevel(self._log, self._verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
249 if self._configFileName:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
250 self._checkConfig()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
251 self._iDb = DbFactory.createInstance()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
252 self._checkOptions()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
253 self._log.info("START AlignTEOnGenomeAccordingToAnnotation")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
254 self._log.debug("path table name: %s" % self._pathTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
255 self._log.debug("query table name: %s" % self._queryTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
256 self._log.debug("subject table name: %s" % self._subjectTableName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
257 self.alignSeqAccordingToPathAndBuildAlignedSeqTable()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
258 self._iDb.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
259 self._log.info("END AlignTEOnGenomeAccordingToAnnotation")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
260
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
261 def _insertAlignedBioseqDBWithScoreAndIdentityInTable(self, pathNum, iAlignedBioseqDB):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
262 scoreWithEndLine = re.split("Score=", iAlignedBioseqDB.db[0].header)[1]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
263 score = int(scoreWithEndLine.split()[0])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
264
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
265 identity = re.split("Identity=", scoreWithEndLine)[1]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
266 if identity == "nan":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
267 identity = "0.0"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
268 identity = float(identity)*100.0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
269
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
270 #TODO: create TableAlignedSeqAdaptator (to use insert...)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
271 sqlCmd = 'INSERT INTO %s VALUES (%d,"%s","%s", %d,%f);' % (self._outTableName, pathNum, iAlignedBioseqDB.db[0].sequence, iAlignedBioseqDB.db[1].sequence, score, identity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
272 self._iDb.execute(sqlCmd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
273
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
274 self._log.debug("header:", iAlignedBioseqDB.db[0].header)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
275 self._log.debug("path", pathNum, "Score=", score, "Identity=", identity, "ok")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
276 self._log.debug(iAlignedBioseqDB.db[0].sequence[:80])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
277 self._log.debug(iAlignedBioseqDB.db[1].sequence[:80])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
278
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
279 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
280 iLaunch = AlignTEOnGenomeAccordingToAnnotation()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
281 iLaunch.setAttributesFromCmdLine()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
282 iLaunch.run()