annotate commons/tools/GetMultAlignAndPhylogenyPerTErefSeq.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 ##@file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 # For each TE reference sequence, it computes a multiple alignment and a phylogeny of all its copies.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 # usage: GetMultAlignAndPhylogenyPerTErefSeq.py [ options ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 # options:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 # -h: this help
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 # -S: step (0: all steps [default], 1:file generation, 2:multiple alignements, 3:phylogenies)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 # -p: table with the annotations (format=path)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 # -s: table with the TE reference sequences (format=seq)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 # -g: table with the genome sequence (format=seq)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 # -r: name or file with TE reference sequence(s) (all by default)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 # -m: MSA method (default=Refalign/Map)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 # -l: minimum length of copies (default=100)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 # -n: number of longest copies to use (default=20)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 # -y: minimum copy proportion compare to references (default=0.5)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 # -R: keep the reference sequence (only with Refalign)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 # -C: configuration file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 # -q: queue name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 # -c: clean
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 # -d: temporary directory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 # -v: verbosity level (default=0/1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 import glob
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 import ConfigParser
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 import pyRepet.launcher.programLauncher
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 from commons.core.coord.PathUtils import PathUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 from commons.core.seq.FastaUtils import FastaUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 from commons.core.coord.SetUtils import SetUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 from commons.core.sql.TablePathAdaptator import TablePathAdaptator
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 from commons.tools.OrientSequences import OrientSequences
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 from ConfigParser import MissingSectionHeaderError
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 from commons.core.utils.RepetOptionParser import RepetOptionParser
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 from commons.core.LoggerFactory import LoggerFactory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 from commons.launcher import LaunchMap
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 from commons.core.sql.DbFactory import DbFactory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 from commons.core.launcher.Launcher import Launcher
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 from commons.core.utils.FileUtils import FileUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 LOG_DEPTH = "repet.tools"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 ## For each TE reference sequence, it computes a multiple alignment and a phylogeny of all its copies.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 class GetMultAlignAndPhylogenyPerTErefSeq(object):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 def __init__(self, pathTableName="",refSeqTableName="", genomeSeqTableName="", step=0, mSAmethod="RefAlign",keepRefseq=False, configFileName= "", clean = True, verbosity=3):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 Constructor.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 self.step = step
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 self._pathTable = pathTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 self._refSeqTable = refSeqTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 self._genomeSeqTable = genomeSeqTableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 self._TErefseq = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 self._MSAmethod = mSAmethod
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 self._minCopyLength = 100
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 self._nbLongestCopies = 20
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 self._minPropCopy = 0.5
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 self._keepRefseq = keepRefseq
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 self.setConfigFileName(configFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 self._queue = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 self._tmpDir = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 self._clean = clean
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 self._verbosity = verbosity
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 self._db = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 self._tpaAnnot = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 self._tsaRef = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 self._pL = pyRepet.launcher.programLauncher.programLauncher()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 def _logAndRaise(self, errorMsg):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 self._log.error(errorMsg)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 raise Exception(errorMsg)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 def setAttributesFromCmdLine(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 desc = "For each TE reference sequence, it computes a multiple alignment and a phylogeny of all its copies.\n"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 #Commented: it's not true, Config File is mandatory!
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 # desc += "Connection to the database parameters are retrieved from the environment"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 #TODO: format options as other scripts (have a look at LaunchTemplate)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 parser = RepetOptionParser(description = desc, epilog = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 parser.add_option("-S", "--step", dest = "step" , action = "store", type = "int", help = "step (0: all steps [default], 1:file generation, 2:multiple alignments, 3:phylogenies)", default = 0 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 parser.add_option("-p", "--pathTable", dest = "path", action= "store", type = "string", help = "(mandatory) table with the annotations (format=path)", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 parser.add_option("-s", "--refSeqTable",dest = "refSeqTable", action= "store", type = "string", help = "(mandatory) table with the TE reference sequences (format=seq)", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 parser.add_option("-g", "--genomeSeqTable",dest = "genomeSeqTable",action= "store", type = "string", help = "(mandatory) table with the genome sequence (format=seq)", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 parser.add_option("-r", "--TErefseq",dest = "TErefseq", action= "store", type = "string", help = "name or file with TE reference sequence(s) (all by default)", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 parser.add_option("-m", "--MSAmethod",dest = "MSAmethod", action= "store", type = "string", help = "MSA method (default=RefAlign/Map)", default = "RefAlign")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 parser.add_option("-l", "--minCopyLength",dest = "minCopyLength", action= "store", type = "int", help = "minimum length of copies (default=100)", default = 100)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 parser.add_option("-n", "--nbLongestCopies",dest = "nbLongestCopies", action= "store", type = "int", help = "number of longest copies to use (default=20)", default = 20)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 parser.add_option("-y", "--minPropCopy",dest = "minPropCopy", action= "store", type = "float", help = "minimum copy proportion compare to references (default=0.5)", default = 0.5)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 parser.add_option("-R", "--keepRefseq",dest = "keepRefseq", action="store_true", help = "keep the reference sequence (only with Refalign)", default = False)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 parser.add_option("-C", "--config", dest = "configFileName", action = "store", type = "string", help = "(mandatory) config file name to set database connection", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 parser.add_option("-q", "--queue",dest = "queue", action= "store", type = "string", help = "queue name", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 parser.add_option("-c", "--clean", action="store_false", dest = "clean", help = "don't clean", default = True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 parser.add_option("-d", "--tmpDir",dest = "tmpDir", action= "store", type = "string", help = "temporary directory", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity level (default=0)", default = 0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 options = parser.parse_args()[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 self._setAttributesFromOptions(options)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 def _setAttributesFromOptions(self, options):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 self.setConfigFileName(options.configFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 self.setStep(options.step)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 self.setPathTable(options.path)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 self.setRefSeqTable(options.refSeqTable)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 self.setGenomeSeqTable(options.genomeSeqTable)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 self.setTErefseq(options.TErefseq)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 self.setMSAmethod(options.MSAmethod)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 self.setMinCopyLength(options.minCopyLength)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 self.setNbLongestCopies(options.nbLongestCopies)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 self.setMinPropCopy(options.minPropCopy)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 self.setKeepRefseq(options.keepRefseq)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 self.setQueue(options.queue)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 self.setClean(options.clean)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 self.setTmpDir(options.tmpDir)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 self.setVerbosity(options.verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 def setStep(self, step):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 self.step = step
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 def setPathTable(self, path):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 self._pathTable = path
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 def setRefSeqTable(self, refSeqTable):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 self._refSeqTable = refSeqTable
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 def setGenomeSeqTable(self, genomeSeqTable):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 self._genomeSeqTable = genomeSeqTable
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 def setTErefseq(self, TErefseq):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 self._TErefseq = TErefseq
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 def setMSAmethod(self, MSAmethod):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 self._MSAmethod = MSAmethod
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 def setMinCopyLength(self, minCopyLength):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 self._minCopyLength = minCopyLength
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146 def setNbLongestCopies(self, nbLongestCopies):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 self._nbLongestCopies = nbLongestCopies
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 def setMinPropCopy(self, minPropCopy):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 self._minPropCopy = minPropCopy
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 def setKeepRefseq(self, keepRefseq):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 self._keepRefseq = keepRefseq
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 def setQueue(self, queue):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 self._queue = queue
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 def setClean(self, clean):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 self._clean = clean
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 def setTmpDir(self, tmpDir):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162 self._tmpDir = tmpDir
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 def setVerbosity(self, verbosity):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 self._verbosity = verbosity
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 def setup_env(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 configFileHandle = open(self._configFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169 # Use RepetConfigParser?
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 config = ConfigParser.ConfigParser()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171 try :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 config.readfp(configFileHandle)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 except MissingSectionHeaderError:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 self._logAndRaise("Configuration file %s must begin with a section header" % self._configFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176 os.environ["REPET_HOST"] = config.get("repet_env", "repet_host")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 os.environ["REPET_USER"] = config.get("repet_env", "repet_user")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 os.environ["REPET_PW"] = config.get("repet_env", "repet_pw")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179 os.environ["REPET_DB"] = config.get("repet_env", "repet_db")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180 os.environ["REPET_PORT"] = config.get("repet_env", "repet_port")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181 os.environ["REPET_JOB_MANAGER"] = config.get("repet_env", "repet_job_manager")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 def setConfigFileName(self, configFileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184 self._configFileName = configFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186 def checkAttributes( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
188 Check the attributes are valid before running the algorithm.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
189 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
190 if self._pathTable == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
191 self._logAndRaise("PathTable is mandatory")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
192 if self._refSeqTable == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
193 self._logAndRaise("RefSeqTable is mandatory")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
194 if self._genomeSeqTable == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
195 self._logAndRaise("GenomeSeqTable is mandatory")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
196 if self._configFileName == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
197 self._logAndRaise("Configuration file is mandatory")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
198 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
199 if FileUtils.isRessourceExists(self._configFileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
200 self.setup_env()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
201 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
202 self._logAndRaise("Configuration file '%s' does not exist!" % self._configFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
203
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
204 if ( self.step == 2 or self.step == 3 ) and self._MSAmethod not in ["RefAlign","Map"]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
205 if self._MSAmethod == None or self._MSAmethod == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
206 self._logAndRaise("Missing method option")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
207 self._logAndRaise("Method '%s' not yet available" % ( self._MSAmethod ))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
208
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
209 if self._tmpDir == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
210 self._tmpDir = os.getcwd()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
211
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
212 def connectSql(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
213 self._db = DbFactory().createInstance(configFileName = self._configFileName, verbosity = 1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
214 self._tpaAnnot = TablePathAdaptator(self._db, self._pathTable)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
215 self._tsaRef = TableSeqAdaptator(self._db,self._refSeqTable)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
216
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
217 def getNamesOfTErefSeq( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
218 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
219 Return a list with the names of reference TEs.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
220 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
221 lNamesTErefSeq = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
222 if self._TErefseq == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
223 lNamesTErefSeq = self._tsaRef.getAccessionsList()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
224 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
225 if os.path.isfile( self._TErefseq ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
226 refseqFileHandler = open( self._TErefseq, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
227 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
228 line = refseqFileHandler.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
229 if line == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
230 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
231 lNamesTErefSeq.append( line[:-1].split("\t")[0] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
232 refseqFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
233 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
234 lNamesTErefSeq = [ self._TErefseq ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
235 for name in lNamesTErefSeq:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
236 if not self._tsaRef.isAccessionInTable( name ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
237 self._log.warning("'%s' not in table '%s'" % (name, self._refSeqTable))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
238 lNamesTErefSeq.remove( name )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
239 lNamesTErefSeq.sort()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
240 self._log.info("nb of TE reference sequences: %d" % (len(lNamesTErefSeq)))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
241 return lNamesTErefSeq
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
242
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
243
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
244 def getTErefSeqInFastaFiles( self, lNamesTErefSeq ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
245 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
246 Save sequences of reference TEs in fasta files.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
247 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
248 for name in lNamesTErefSeq:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
249 self._log.debug("save sequence of '%s'..." % ( name ))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
250 self._tsaRef.saveAccessionsListInFastaFile( [name], name+".fa" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
251
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
252 def getCopiesInFastaFilesPerTErefSeq( self, lNamesTErefSeq ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
253 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
254 Save sequences of TE copies in fasta files.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
255 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
256 self._log.info("retrieve the copies...")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
257 tsaChr = TableSeqAdaptator( self._db, self._genomeSeqTable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
258 totalNbCopies = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
259 totalNbSavedCopies = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
260 for name in lNamesTErefSeq:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
261 nbCopies = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
262 if os.path.exists(name+"_copies.fa"):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
263 continue
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
264 outFile = open( name+"_copies.fa", "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
265 self._log.debug("Fetching path nums for subject: %s " % name)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
266 lPathNums = self._tpaAnnot.getIdListSortedByDecreasingChainLengthFromSubject(name)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
267 nbCopies = len(lPathNums)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
268 totalNbCopies += nbCopies
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
269 self._log.debug("refseq '%s': %d copies" % ( name, nbCopies ))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
270 i = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
271 nbSavedCopies = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
272 nbSavedFragments = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
273 lengthRefseq = self._tsaRef.getSeqLengthFromAccession( name )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
274 while i < len(lPathNums) and nbSavedCopies < self._nbLongestCopies:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
275 lPaths = self._tpaAnnot.getPathListFromId( lPathNums[i] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
276 lSets = PathUtils.getSetListFromQueries( lPaths )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
277 copyLength = SetUtils.getCumulLength( lSets )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
278 if copyLength >= self._minCopyLength \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
279 and copyLength >= self._minPropCopy * lengthRefseq:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
280 bs = tsaChr.getBioseqFromSetList( lSets )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
281 bs.write(outFile)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
282 nbSavedCopies += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
283 nbSavedFragments += len(lPaths)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
284 i += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
285
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
286 self._log.debug(" (saved: %d copies, %d fragments)\n" % ( nbSavedCopies, nbSavedFragments ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
287 outFile.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
288 totalNbSavedCopies += nbSavedCopies
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
289 if nbSavedCopies == 0 and nbCopies != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
290 self._log.warning("No copy >= %d" % ( self._minCopyLength ))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
291 self._log.info("nb of copies: %d (%d saved)" % ( totalNbCopies, totalNbSavedCopies ))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
292
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
293 def filter4Alignments( self, lNamesTErefSeq ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
294 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
295 Filter TE copy sequences according to their length.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
296 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
297 self._log.info("filtering copies...")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
298 if len(lNamesTErefSeq) == 1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
299 if not os.path.exists( "%s_copies.fa" % ( lNamesTErefSeq[0] ) ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
300 self._logAndRaise("first run step 1")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
301 lInFiles = [ "%s_copies.fa" % ( lNamesTErefSeq[0] ) ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
302 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
303 lInFiles = glob.glob( "*_copies.fa" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
304 if len(lInFiles) == 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
305 self._logAndRaise("no file '*_copies.fa'")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
306 count = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
307 for inFileName in lInFiles:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
308 if os.path.exists( "%s.filtered" % ( inFileName ) ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
309 continue
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
310 count += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
311 self._log.debug("TE %d --> %s" %(count,inFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
312 FastaUtils.dbLengthFilter( self._minCopyLength, inFileName, verbose=self._verbosity )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
313 FastaUtils.dbLongestSequences( self._nbLongestCopies, inFileName+".Sup"+str(self._minCopyLength), verbose=self._verbosity )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
314 os.rename( inFileName+".Sup"+str(self._minCopyLength)+".best"+str(self._nbLongestCopies), inFileName+".filtered" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
315 os.remove( inFileName+".Sup"+str(self._minCopyLength) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
316 os.remove( inFileName+".Inf"+str(self._minCopyLength) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
317 self._log.info("files filtered: %d" % (count))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
318
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
319 def buildInFiles4Launcher( self, lNamesTErefSeq ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
320 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
321 Save sequences of TE copies with reference in fasta files for launcher usage.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
322 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
323 self._log.info("building input files for launcher...")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
324 for name in lNamesTErefSeq:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
325 if os.path.exists( "%s_all.fa.oriented" % ( name ) ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
326 continue
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
327 if FastaUtils.dbSize( "%s_copies.fa.filtered" % ( name ) ) > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
328 os.system( "cat "+name+".fa "+ name+"_copies.fa.filtered > " + name+"_all.fa" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
329 ors = OrientSequences(inFileName= "%s_all.fa" %(name), prgToOrient="mummer", clean=True, verbosity =self._verbosity - 1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
330 ors.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
331 ors.clean()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
332 if len( glob.glob("*_all.fa.oriented") ) == 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
333 self._logAndRaise("no copies")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
334 self._log.info("done building input files for launcher...")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
335
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
336 def renameFile( self, fromName, toName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
337 lFiles = glob.glob( "*%s" %fromName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
338 for f in lFiles:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
339 os.rename( f, f.replace(fromName,toName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
340
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
341 def _preparejobs(self, iLauncher, cDir):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
342 self._log.info("Preparing Align jobs")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
343 lCmdsTuples = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
344 print(self.queriesDir,self.alignFileSuffix)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
345 queryFiles = glob.glob("%s/%s" % (self.queriesDir,self.alignFileSuffix))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
346 print("queryFiles",queryFiles)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
347 for queryFile in queryFiles:#os.listdir(self.queriesDir):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
348 lCmds = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
349 lCmdStart = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
350 lCmdFinish = [] #['shutil.move("%s/*" %newDir, "../" )']
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
351 queryFilePath = os.path.join(self.queriesDir,queryFile)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
352 lCmds.append(self._createLaunchAlignCommands(iLauncher, queryFilePath))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
353 #lCmdFinish.append("shutil.move(\"%s.%s\", \"%s/%s.%s\")" % (benchName,self.outputformat,self.resultsDir,benchName,self.outputformat))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
354 lCmdsTuples.append(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
355 self._log.info("Finished preparing Align jobs")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
356 return lCmdsTuples
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
357
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
358 def _preparePhyMljobs(self, iLauncher, cDir):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
359 self._log.info("Preparing PhyMl jobs")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
360 lCmdsTuples = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
361 queryFiles = glob.glob("%s/%s" % (self.queriesDir,self.phyloFileSuffix))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
362 print("queryFiles",queryFiles)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
363 for queryFile in queryFiles:#os.listdir(self.queriesDir):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
364 lCmds = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
365 lCmdStart = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
366 lCmdFinish = [] #['shutil.move("%s/*" %newDir, "../" )']
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
367 queryFilePath = os.path.join(self.queriesDir,queryFile)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
368 lCmds.append(self._createLaunchPhyMLCommands(iLauncher, queryFilePath))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
369 lCmdsTuples.append(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
370 self._log.info("Finished preparing PhyMl jobs")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
371 return lCmdsTuples
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
372
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
373 def _createLaunchAlignCommands(self, iLauncher, query):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
374 if self._MSAmethod == "Map":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
375 prg = "LaunchMap.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
376 elif self._MSAmethod == "RefAlign":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
377 prg = "LaunchRefAlign.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
378
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
379 lArgs = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
380 lArgs.append("-i %s" % query)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
381 lArgs.append(" -o %s.fa_aln" % query)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
382 lArgs.append("-v 1" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
383
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
384 if self._MSAmethod == "RefAlign" and self._keepRefseq:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
385 lArgs.append("-r " )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
386
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
387 self._log.debug("Prepared Align commands : %s %s" % (prg, " ".join(lArgs)))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
388
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
389
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
390 return iLauncher.getSystemCommand("%s" % prg, lArgs)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
391
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
392 def launchMultAlignments(self, lNamesTErefSeq):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
393 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
394 Make multiple alignments via Map for each TE family
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
395 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
396 self.queriesDir = os.getcwd()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
397
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
398 if len(lNamesTErefSeq) == 1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
399 self.alignFileSuffix = "%s_all.fa.oriented" % (lNamesTErefSeq[0])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
400 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
401 self.alignFileSuffix = "*_all.fa.oriented"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
402
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
403 queue = self._queue
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
404 cDir = os.getcwd()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
405 tmpDir = self._tmpDir
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
406 groupid = "%s_%s" % ( self._refSeqTable, self._MSAmethod )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
407 acronym = "Align"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
408 iDb = DbFactory.createInstance(configFileName=self._configFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
409 iTJA = TableJobAdaptatorFactory.createInstance(iDb, "jobs")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
410 iLauncher = Launcher(iTJA, os.getcwd(), "", "", cDir, tmpDir, "jobs", queue, groupid)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
411 lCmdsTuples = self._preparejobs(iLauncher, cDir)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
412 iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, self._clean)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
413
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
414 self.renameFile("_all.fa.oriented.fa_aln", "_all.fa.oriented_%s.fa_aln" % (self._MSAmethod.lower()) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
415
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
416 # def __makeMultAlignments( self, lNamesTErefSeq ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
417 # """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
418 # Make multiple alignments via Map or Refalign for each TE family
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
419 # """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
420 # self._pL.reset("")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
421 # if self._MSAmethod == "Map":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
422 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
423 # prg = os.environ["REPET_PATH"] + "/bin/srptMAP.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
424 # elif self._MSAmethod == "RefAlign":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
425 # prg = os.environ["REPET_PATH"] + "/bin/srptRefalign.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
426 # cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
427 # cmd += " -g %s_%s" % ( self._refSeqTable, self._MSAmethod )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
428 # cmd += " -q %s" % ( os.getcwd() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
429 # if len(lNamesTErefSeq) == 1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
430 # cmd += " -S %s_all.fa.oriented" % ( lNamesTErefSeq[0] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
431 # else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
432 # cmd += " -S '*_all.fa.oriented'"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
433 # cmd += " -Q %s" % ( self._queue )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
434 # cmd += " -C %s" % ( self._configFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
435 # if self._MSAmethod == "Refalign" and self._keepRefseq:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
436 # cmd += " -r"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
437 # if not self._clean :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
438 # cmd += " -c"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
439 # if self._tmpDir != "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
440 # cmd += " -d %s" % ( self._tmpDir )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
441 # cmd += " -v %d" % ( self._verbosity )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
442 # self._pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
443 # lFiles = glob.glob( "*_all.fa.oriented.fa_aln" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
444 # for f in lFiles:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
445 # os.rename( f, f.replace("_all.fa.oriented.fa_aln","_all.fa.oriented_%s.fa_aln" % (self._MSAmethod.lower()) ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
446
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
447
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
448 def filter4phylogenies( self, verbosity=0 ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
449 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
450 Filter TE copy alignment for better phylogenies.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
451 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
452 self._log.info("Filtering MSA")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
453 lInFiles = glob.glob( "*_all.fa.oriented_%s.fa_aln" % ( self._MSAmethod.lower() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
454 count = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
455 for inFileName in lInFiles:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
456 count += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
457 self._log.debug("clean MSA %d --> %s" % (count,inFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
458 alignDB = AlignedBioseqDB()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
459 alignDB.load(inFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
460 alignDB.cleanMSA()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
461 if alignDB.getSize() > 2:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
462 alignDB.save( inFileName + ".clean" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
463 self._log.debug("clean!")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
464 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
465 self._log.debug("skip!")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
466 self._log.info("MSA cleaned: %d" % count)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
467
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
468
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
469 def _createLaunchPhyMLCommands(self, iLauncher, query):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
470 # prg = os.environ["REPET_PATH"] + "/bin/srptPhyML.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
471 # cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
472 # cmd += " -g %s_PHY_%s" % ( self._refSeqTable, os.getpid() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
473 # cmd += " -q %s" % ( os.getcwd() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
474 # cmd += " -S '*_all.fa.oriented_%s.fa_aln.clean'" % ( self._MSAmethod.lower() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
475 # cmd += " -Q %s" % ( self._queue )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
476 # cmd += " -C %s" % ( self._configFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
477
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
478 prg = "LaunchPhyML.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
479 lArgs = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
480 lArgs.append("-i %s" % query)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
481 lArgs.append("-o %s.fa_phylo" % query)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
482 lArgs.append("-v %d" % (self._verbosity-1))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
483
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
484 self._log.debug("Prepared Phyml commands : %s %s" % (prg, " ".join(lArgs)))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
485 return iLauncher.getSystemCommand("%s" % prg, lArgs)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
486
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
487 def makePhylogenies( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
488 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
489 Launch PhyML on each TE family.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
490 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
491 self.phyloFileSuffix = "*_all.fa.oriented_%s.fa_aln.clean" % ( self._MSAmethod.lower() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
492
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
493 queue = self._queue
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
494 cDir = os.getcwd()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
495 tmpDir = self._tmpDir
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
496 groupid = "%s_PHY_%s" % ( self._refSeqTable, os.getpid() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
497 acronym = "Phylo"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
498 iDb = DbFactory.createInstance(configFileName=self._configFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
499 iTJA = TableJobAdaptatorFactory.createInstance(iDb, "jobs")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
500 iLauncher = Launcher(iTJA, os.getcwd(), "", "", cDir, tmpDir, "jobs", queue, groupid)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
501 lCmdsTuples = self._preparePhyMljobs(iLauncher, cDir)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
502 iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, self._clean)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
503
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
504
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
505 def start( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
506 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
507 Useful commands before running the program.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
508 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
509 self.checkAttributes()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
510 self._log.info("START GetMultAlignAndPhylogenyPerTErefSeq.py STEP %d" % self.step)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
511 self.connectSql()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
512
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
513 def end( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
514 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
515 Useful commands before ending the program.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
516 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
517 self._db.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
518 self._log.info("END GetMultAlignAndPhylogenyPerTErefSeq.py STEP %d" % self.step)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
519
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
520 def run( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
521 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
522 Run the program.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
523 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
524 LoggerFactory.setLevel(self._log, self._verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
525 self.start()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
526 lNamesTErefSeq = self.getNamesOfTErefSeq()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
527 self._log.debug("lNamesTErefSeq: %s" % " ".join(lNamesTErefSeq))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
528
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
529 if self.step in [0, 1]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
530 self.getTErefSeqInFastaFiles( lNamesTErefSeq )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
531 self.getCopiesInFastaFilesPerTErefSeq( lNamesTErefSeq )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
532
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
533 if self.step in [0, 2]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
534 self.filter4Alignments(lNamesTErefSeq)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
535 self.buildInFiles4Launcher(lNamesTErefSeq)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
536 self.launchMultAlignments(lNamesTErefSeq)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
537
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
538 if self.step in [0, 3]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
539 self.filter4phylogenies(verbosity=self._verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
540 self.makePhylogenies()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
541 self.end()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
542
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
543 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
544 iGetMultAlignAndPhylogenyPerTErefSeq = GetMultAlignAndPhylogenyPerTErefSeq()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
545 iGetMultAlignAndPhylogenyPerTErefSeq.setAttributesFromCmdLine()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
546 iGetMultAlignAndPhylogenyPerTErefSeq.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
547