Mercurial > repos > vmarcon > repet_tedenovo
comparison TEdenovo_lite.py @ 0:baea09e6722b draft default tip
1st Uploaded
| author | vmarcon |
|---|---|
| date | Mon, 06 Feb 2017 13:31:53 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:baea09e6722b |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 | |
| 4 import os | |
| 5 import sys | |
| 6 import time | |
| 7 import glob | |
| 8 import shutil | |
| 9 import ConfigParser | |
| 10 from commons.core.seq.FastaUtils import * | |
| 11 import operator | |
| 12 import re | |
| 13 | |
| 14 | |
| 15 | |
| 16 if not "REPET_PATH" in os.environ.keys(): | |
| 17 print "ERROR: no environment variable REPET_PATH" | |
| 18 sys.exit(1) | |
| 19 | |
| 20 if (not "REPET_DB" in os.environ.keys()) or (not "REPET_HOST" in os.environ.keys()) or (not "REPET_PORT" in os.environ.keys()) or (not "REPET_USER" in os.environ.keys()) or (not "REPET_PW" in os.environ.keys()): | |
| 21 print "ERROR: there is at least one environment database variable missing : REPET_DB, REPET_PORT, REPET_HOST, REPET_USER or REPET_PW" | |
| 22 sys.exit(1) | |
| 23 | |
| 24 if not "REPET_JOB_MANAGER" in os.environ.keys(): | |
| 25 print "ERROR: no environment variable REPET_JOB_MANAGER" | |
| 26 sys.exit(1) | |
| 27 | |
| 28 | |
| 29 if not "%s/bin" % os.environ["REPET_PATH"] in os.environ["PATH"]: | |
| 30 os.environ["PATH"] = "%s/bin:%s" % (os.environ["REPET_PATH"], os.environ["PATH"]) | |
| 31 | |
| 32 sys.path.append(os.environ["REPET_PATH"]) | |
| 33 if not "PYTHONPATH" in os.environ.keys(): | |
| 34 os.environ["PYTHONPATH"] = os.environ["REPET_PATH"] | |
| 35 else: | |
| 36 os.environ["PYTHONPATH"] = "%s:%s" % (os.environ["REPET_PATH"], os.environ["PYTHONPATH"]) | |
| 37 | |
| 38 from commons.core.LoggerFactory import LoggerFactory | |
| 39 from commons.core.checker.RepetException import RepetException | |
| 40 from commons.core.utils.FileUtils import FileUtils | |
| 41 from commons.core.utils.RepetOptionParser import RepetOptionParser | |
| 42 from commons.core.seq.FastaUtils import FastaUtils | |
| 43 from commons.core.sql.DbFactory import DbFactory | |
| 44 from itertools import islice | |
| 45 | |
| 46 LOG_DEPTH = "TEdenovo.pipeline" | |
| 47 | |
| 48 class TEdenovo_lite(object): | |
| 49 | |
| 50 def __init__(self, configFileName = "", fastaFileName = "", verbosity = 0): | |
| 51 self._configFileName = configFileName | |
| 52 self._fastaFileName = os.path.abspath(fastaFileName) | |
| 53 self._projectName = time.strftime("%Y%m%d%H%M%S") | |
| 54 self._limitSeqSize = 200000000 | |
| 55 | |
| 56 if "REPET_NUCL_BANK" in os.environ.keys(): | |
| 57 if os.path.exists(os.environ["REPET_NUCL_BANK"]): | |
| 58 self._nucl_bank = os.environ["REPET_NUCL_BANK"] | |
| 59 else : | |
| 60 print "ERROR : the nucleotides bank configured doesn't exist. Please correct it in the REPET_NUCL_BANK variable" | |
| 61 sys.exit(1) | |
| 62 else : | |
| 63 self._nucl_bank = "" | |
| 64 if "REPET_PROT_BANK" in os.environ.keys(): | |
| 65 if os.path.exists(os.environ["REPET_PROT_BANK"]): | |
| 66 self._prot_bank = os.environ["REPET_PROT_BANK"] | |
| 67 else : | |
| 68 print "ERROR : the proteins bank configured doesn't exist. Please correct it in the REPET_PROT_BANK variable" | |
| 69 sys.exit(1) | |
| 70 else : | |
| 71 self._prot_bank = "" | |
| 72 if "REPET_HMM_PROFILES" in os.environ.keys(): | |
| 73 if os.path.exists(os.environ["REPET_HMM_PROFILES"]): | |
| 74 self._HMM_profiles = os.environ["REPET_HMM_PROFILES"] | |
| 75 else : | |
| 76 print "ERROR : the hmm profiles bank configured doesn't exist. Please correct it in the REPET_HMM_PROFILES variable" | |
| 77 sys.exit(1) | |
| 78 else : | |
| 79 self._HMM_profiles = "" | |
| 80 if "REPET_RDNA_BANK" in os.environ.keys(): | |
| 81 if os.path.exists(os.environ["REPET_RDNA_BANK"]): | |
| 82 self._rdna_bank = os.environ["REPET_RDNA_BANK"] | |
| 83 else : | |
| 84 print "ERROR : the rDNA bank configured doesn't exist. Please correct it in the REPET_PROT_BANK variable" | |
| 85 sys.exit(1) | |
| 86 else : | |
| 87 self._rdna_bank = "" | |
| 88 if self._nucl_bank == "" and self._prot_bank == "" and self._HMM_profiles == "" and self._rdna_bank == "" : | |
| 89 print "WARNING : No bank are configured ... To set banks please add REPET_NUCL_BANK, REPET_PROT_BANK, REPET_HMM_PROFILES and/or REPET_RDNA_BANK in your environment" | |
| 90 if "REPET_TMP_DIR" in os.environ.keys(): | |
| 91 self._tmp_dir = os.environ["REPET_TMP_DIR"] | |
| 92 else : | |
| 93 self._tmp_dir = "" | |
| 94 self._outputFasta = "" | |
| 95 self._classif = False | |
| 96 self._outputClassif = "" | |
| 97 self._outputStats = "" | |
| 98 self._verbosity = verbosity | |
| 99 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) | |
| 100 | |
| 101 def setAttributesFromCommandLine(self): | |
| 102 description = "This script is a ligth version of TEdenovo. It writes configuration file and launches TEdenovo." | |
| 103 epilog = "Example: TEdenovo_lite.py -i fastaFileName \n" | |
| 104 version = "2.0" | |
| 105 parser = RepetOptionParser(description = description, epilog = epilog, version = version) | |
| 106 parser.add_option("-i", "--fasta", dest = "fastaFileName" , action = "store" , type = "string", help ="input fasta file name ", default = "") | |
| 107 parser.add_option("-c", "--withClassif", dest="withClassif", action="store_true", help = " Get classification files in output.", default = False) | |
| 108 parser.add_option("-o", "--output", dest="outputLabel" , action = "store", type = "string", help = "[optional] Prefix label for output file(s).", default = "") | |
| 109 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "Verbosity [optional] [default: 2]", default = 2) | |
| 110 options = parser.parse_args()[0] | |
| 111 self._setAttributesFromOptions(options) | |
| 112 | |
| 113 def _setAttributesFromOptions(self, options): | |
| 114 self.setConfigFileName("") | |
| 115 if options.fastaFileName=="": | |
| 116 print "ERROR : You have to enter an input fasta file" | |
| 117 print "Example: TEdenovo_lite.py -i fastaFileName \n" | |
| 118 print "More option : TEdenovo_lite.py --help " | |
| 119 exit(1) | |
| 120 else : | |
| 121 self._fastaFileName = os.path.abspath(options.fastaFileName) | |
| 122 if options.outputLabel=="": | |
| 123 fastaBaseName=os.path.abspath(re.search(r'([^\/\\]*)\.[fa|fasta|fsa|fas]',options.fastaFileName).groups()[0]) | |
| 124 options.outputLabel=fastaBaseName | |
| 125 self._outputFasta = os.path.abspath(options.outputLabel+"-%s-denovoLibTEs_filtered.fa"%self._projectName[:8]) | |
| 126 self._outputStats = os.path.abspath(options.outputLabel+"-%s-classif_stats.txt"%self._projectName[:8]) | |
| 127 self._verbosity = options.verbosity | |
| 128 if options.withClassif : | |
| 129 self._classif=True | |
| 130 self._outputClassif = os.path.abspath(options.outputLabel+'-%s.classif'%self._projectName[:8]) | |
| 131 | |
| 132 def setConfigFileName(self, configFileName): | |
| 133 self._configFileName = configFileName | |
| 134 if not self._configFileName: | |
| 135 self._configFileName = "TEdenovo_Galaxy_config_%s" % self._projectName | |
| 136 | |
| 137 def setAttributesFromConfigFile(self, configFileName): | |
| 138 config = ConfigParser.ConfigParser() | |
| 139 config.readfp( open(configFileName) ) | |
| 140 | |
| 141 | |
| 142 def _writeConfigFile(self): | |
| 143 if FileUtils.isRessourceExists(self._configFileName): | |
| 144 self._logAndRaise("Configuration file '%s' already exists. Won't be overwritten.") | |
| 145 | |
| 146 shutil.copy("%s/config/TEdenovo.cfg" % os.environ.get("REPET_PATH"), self._configFileName) | |
| 147 self.setAttributesFromConfigFile(self._configFileName) | |
| 148 | |
| 149 os.system("sed -i 's|repet_host: <your_MySQL_host>|repet_host: %s|' %s" % (os.environ["REPET_HOST"], self._configFileName)) | |
| 150 os.system("sed -i 's|repet_user: <your_MySQL_login>|repet_user: %s|' %s" % (os.environ["REPET_USER"], self._configFileName)) | |
| 151 os.system("sed -i 's|repet_pw: <your_MySQL_password>|repet_pw: %s|' %s" % (os.environ["REPET_PW"], self._configFileName)) | |
| 152 os.system("sed -i 's|repet_db: <your_MySQL_db>|repet_db: %s|' %s" % (os.environ["REPET_DB"], self._configFileName)) | |
| 153 os.system("sed -i 's|repet_port: 3306|repet_port: %s|' %s" % (os.environ["REPET_PORT"], self._configFileName)) | |
| 154 os.system("sed -i 's|repet_job_manager: SGE|repet_job_manager: %s|' %s" % (os.environ["REPET_JOB_MANAGER"], self._configFileName)) | |
| 155 os.system("sed -i 's|project_name: <your_project_name>|project_name: %s|' %s" % (self._projectName, self._configFileName)) | |
| 156 os.system("sed -i 's|project_dir: <absolute_path_to_your_project_directory>|project_dir: %s|' %s" % (os.getcwd().replace("/", "\/"), self._configFileName)) | |
| 157 os.system("sed -i 's|tmpDir:|tmpDir: %s|g' %s" % (self._tmp_dir, self._configFileName)) | |
| 158 | |
| 159 if self._nucl_bank != "" and self._nucl_bank != None: | |
| 160 os.system("sed -i 's|TE_BLRn: no|TE_BLRn: yes|' %s" % self._configFileName) | |
| 161 os.system("sed -i 's|TE_BLRtx: no|TE_BLRtx: yes|' %s" % self._configFileName) | |
| 162 os.system("sed -i 's|TE_nucl_bank: <bank_of_TE_nucleotide_sequences_such_as_Repbase>|TE_nucl_bank: %s|' %s" % (os.path.basename(self._nucl_bank), self._configFileName)) | |
| 163 | |
| 164 if self._prot_bank != "" and self._prot_bank != None: | |
| 165 os.system("sed -i 's|TE_BLRx: no|TE_BLRx: yes|' %s" % self._configFileName) | |
| 166 os.system("sed -i 's|TE_prot_bank: <bank_of_TE_amino-acid_sequences_such_as_Repbase>|TE_prot_bank: %s|' %s" % (os.path.basename(self._prot_bank), self._configFileName)) | |
| 167 | |
| 168 if self._HMM_profiles != "" and self._HMM_profiles != None: | |
| 169 os.system("sed -i 's|TE_HMMER: no|TE_HMMER: yes|' %s" % self._configFileName) | |
| 170 os.system("sed -i 's|TE_HMM_profiles: <bank_of_HMM_profiles>|TE_HMM_profiles: %s|' %s" % (os.path.basename(self._HMM_profiles),self._configFileName)) | |
| 171 | |
| 172 if self._rdna_bank != "" and self._rdna_bank != None: | |
| 173 os.system("sed -i 's|rDNA_BLRn: no|rDNA_BLRn: yes|' %s" % self._configFileName) | |
| 174 os.system("sed -i 's|rDNA_bank: <bank_of_rDNA_sequences_from_eukaryota>|rDNA_bank: %s|' %s" % (os.path.basename(self._rdna_bank),self._configFileName)) | |
| 175 | |
| 176 os.system("sed -i 's|filter_host_gene: no|filter_host_gene: yes|' %s" % (self._configFileName)) | |
| 177 | |
| 178 | |
| 179 def removeNstretches(self,maxNstretchesSize=11,minContigsize=10000): | |
| 180 if self._verbosity > 0: | |
| 181 print "Removing Nstretches longer than %d pb and removing conting shorter than %d pb"%(maxNstretchesSize,minContigsize) | |
| 182 t0=time.time() | |
| 183 Nstretches=FastaUtils.getNstretchesRangesList(self._fastaFileName,maxNstretchesSize) | |
| 184 t1=time.time() | |
| 185 refBSDB = BioseqDB(self._fastaFileName) | |
| 186 t3=time.time() | |
| 187 debut=1 | |
| 188 t2=time.time() | |
| 189 if len(Nstretches)>0: | |
| 190 currentchrom=Nstretches[0].seqname | |
| 191 refBS=refBSDB.fetch(currentchrom) | |
| 192 t3=time.time() | |
| 193 newBSDB = BioseqDB() | |
| 194 i=0 | |
| 195 seqInNstretches = [] | |
| 196 for Nstretch in Nstretches : | |
| 197 i+=1 | |
| 198 tmpBS="" | |
| 199 if Nstretch.seqname not in seqInNstretches : | |
| 200 seqInNstretches.append(Nstretch.seqname) | |
| 201 if currentchrom==Nstretch.seqname: | |
| 202 fin=Nstretch.start-1 | |
| 203 size=fin-debut+1 | |
| 204 if size>minContigsize : | |
| 205 tmpBS=refBS.subseq(debut,fin) | |
| 206 newBSDB.add(tmpBS) | |
| 207 | |
| 208 debut=Nstretch.end+1 | |
| 209 | |
| 210 else : | |
| 211 fin = refBSDB.getSeqLength(currentchrom) | |
| 212 size=fin-debut+1 | |
| 213 if size>minContigsize : | |
| 214 tmpBS=refBS.subseq(debut,fin) | |
| 215 newBSDB.add(tmpBS) | |
| 216 currentchrom=Nstretch.seqname | |
| 217 refBS=refBSDB.fetch(currentchrom) | |
| 218 debut=1 | |
| 219 fin==Nstretch.start | |
| 220 size=fin-debut+1 | |
| 221 if size>minContigsize : | |
| 222 tmpBS=refBS.subseq(debut,fin) | |
| 223 newBSDB.add(tmpBS) | |
| 224 debut=Nstretch.end+1 | |
| 225 | |
| 226 if len(Nstretches)>0: | |
| 227 fin = refBSDB.getSeqLength(currentchrom) | |
| 228 size=fin-debut+1 | |
| 229 if size>minContigsize : | |
| 230 tmpBS=refBS.subseq(debut,fin) | |
| 231 newBSDB.add(tmpBS) | |
| 232 | |
| 233 for refName in refBSDB.getHeaderList() : | |
| 234 if refName not in seqInNstretches: | |
| 235 debut=1 | |
| 236 fin=refBSDB.getSeqLength(refName) | |
| 237 size=fin-debut+1 | |
| 238 if size>minContigsize : | |
| 239 refBS=refBSDB.fetch(refName) | |
| 240 tmpBS=refBS.subseq(debut,fin) | |
| 241 newBSDB.add(tmpBS) | |
| 242 | |
| 243 | |
| 244 t5b=time.time() | |
| 245 if self._verbosity >= 2: | |
| 246 print "%s contigs selected from %s scaffolds"%(newBSDB.getSize(),refBSDB.getSize()) | |
| 247 #newBSDB.sortByLength(reverse=True) | |
| 248 | |
| 249 return newBSDB | |
| 250 | |
| 251 | |
| 252 | |
| 253 #TODO refactoring about min size of genome for preprocess | |
| 254 def selectContigs4givenSize(self,BSDB,limit=200000000): | |
| 255 if self._verbosity > 0: | |
| 256 print "Selecting contigs to reach %s pb "%limit | |
| 257 contigsHeadersAndLength=zip(BSDB.getHeaderList(),BSDB.getListOfSequencesLength()) | |
| 258 size=0 | |
| 259 size_small=0 | |
| 260 size_big=500000000 | |
| 261 lselectedContigs=[] | |
| 262 | |
| 263 for seq in BSDB.db : | |
| 264 size+=seq.getLength() | |
| 265 if size<limit: | |
| 266 lselectedContigs.append(seq) | |
| 267 size_small=size | |
| 268 else : | |
| 269 size_big=size | |
| 270 break | |
| 271 | |
| 272 if size_big-limit<limit-size_small : | |
| 273 lselectedContigs.append(seq) | |
| 274 | |
| 275 if self._verbosity > 0: | |
| 276 print "%s contigs selected to reach %s pb (%s contigs initially) "%(len(lselectedContigs),limit,len(contigsHeadersAndLength)) | |
| 277 | |
| 278 selectedContigsBSDB=BioseqDB() | |
| 279 selectedContigsBSDB.setData(lselectedContigs) | |
| 280 return selectedContigsBSDB | |
| 281 | |
| 282 def writeFastaInput(self,BSDB,outFileName=''): | |
| 283 if self._verbosity > 0: | |
| 284 print "Writing fasta file" | |
| 285 | |
| 286 if not outFileName: | |
| 287 outFileName = self._projectName + ".fastaExtract" | |
| 288 | |
| 289 BSDB.save(outFileName) | |
| 290 if self._verbosity > 0: | |
| 291 print '%d sequences saved.'%BSDB.getSize() | |
| 292 | |
| 293 return outFileName | |
| 294 | |
| 295 def correctHeader(self,BSDB): | |
| 296 if self._verbosity > 0: | |
| 297 print "Correcting fasta headers" | |
| 298 replacedSeqNb=0 | |
| 299 for header in BSDB.getHeaderList() : | |
| 300 p = re.compile('[^a-zA-Z0-9_:\.\-]', re.IGNORECASE) | |
| 301 if p.search(header): | |
| 302 sub=list(set(p.findall(header))) | |
| 303 correctedHeader=header | |
| 304 for s in sub : | |
| 305 correctedHeader=correctedHeader.replace(s,'_') | |
| 306 if self._verbosity>2: | |
| 307 print "Correct Header : '%s' replaced by '%s'"%(header,correctedHeader) | |
| 308 BSDB.fetch(header).setHeader(correctedHeader) | |
| 309 replacedSeqNb+=1 | |
| 310 if self._verbosity > 0: | |
| 311 print '%s sequence headers corrected'%replacedSeqNb | |
| 312 return BSDB | |
| 313 | |
| 314 | |
| 315 def _launchTEdenovo(self): | |
| 316 print "START time: %s" % time.strftime("%Y-%m-%d %H:%M:%S") | |
| 317 lCmds = [] | |
| 318 lCmds.append( "TEdenovo.py -P %s -C %s -S 1 -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 319 lCmds.append( "TEdenovo.py -P %s -C %s -S 2 -s Blaster -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 320 lCmds.append( "TEdenovo.py -P %s -C %s -S 3 -s Blaster -c Grouper -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 321 lCmds.append( "TEdenovo.py -P %s -C %s -S 3 -s Blaster -c Recon -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 322 lCmds.append( "TEdenovo.py -P %s -C %s -S 3 -s Blaster -c Piler -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 323 lCmds.append( "TEdenovo.py -P %s -C %s -S 4 -s Blaster -c Grouper -m Map -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 324 lCmds.append( "TEdenovo.py -P %s -C %s -S 4 -s Blaster -c Recon -m Map -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 325 lCmds.append( "TEdenovo.py -P %s -C %s -S 4 -s Blaster -c Piler -m Map -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 326 lCmds.append( "TEdenovo.py -P %s -C %s -S 5 -s Blaster -c GrpRecPil -m Map -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 327 lCmds.append( "TEdenovo.py -P %s -C %s -S 6 -s Blaster -c GrpRecPil -m Map -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 328 lCmds.append( "TEdenovo.py -P %s -C %s -S 7 -s Blaster -c GrpRecPil -m Map -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 329 | |
| 330 for cmd in lCmds: | |
| 331 returnValue = os.system(cmd) | |
| 332 if returnValue != 0: | |
| 333 print "ERROR: command '%s' returned %i" % (cmd, returnValue) | |
| 334 self._cleanTables() | |
| 335 sys.exit(1) | |
| 336 | |
| 337 print "END time: %s" % time.strftime("%Y-%m-%d %H:%M:%S") | |
| 338 outFastaFile = glob.glob("%s_Blaster_GrpRecPil_Map_TEclassif_Filtered/*_denovoLibTEs_filtered.fa"%self._projectName) | |
| 339 shutil.copy(outFastaFile[0], self._outputFasta) | |
| 340 outStatsFile = glob.glob("%s_Blaster_GrpRecPil_Map_TEclassif_Filtered/*.classif_stats.txt"%self._projectName) | |
| 341 shutil.copy(outStatsFile[0], self._outputStats) | |
| 342 if self._classif: | |
| 343 outClassifFile = glob.glob("%s_Blaster_GrpRecPil_Map_TEclassif/classifConsensus/*_withoutRedundancy_negStrandReversed_WickerH.classif"%self._projectName) | |
| 344 shutil.copy(outClassifFile[0], self._outputClassif) | |
| 345 self._renameTE() | |
| 346 | |
| 347 def _renameTE(self): | |
| 348 name=re.search(r'([^\/\\]*)-\d{8}-denovoLibTEs_filtered\.[fa|fasta|fsa|fas]',self._outputFasta).groups()[0] | |
| 349 os.system("sed -i 's|%s|%s|' %s" % (self._projectName,name, self._outputFasta)) | |
| 350 if self._classif: | |
| 351 os.system("sed -i 's|%s|%s|' %s" % (self._projectName,name, self._outputClassif)) | |
| 352 | |
| 353 def preprocessFastaFile(self): | |
| 354 inFileHandler = open(self._fastaFileName, "r") | |
| 355 cumulLength = FastaUtils.dbCumLength(inFileHandler) | |
| 356 inFileHandler.close() | |
| 357 if cumulLength >= self._limitSeqSize: | |
| 358 print "Preprocess lauched" | |
| 359 allContigsBSDB=self.removeNstretches() | |
| 360 selectedContigsBSDB=self.selectContigs4givenSize(allContigsBSDB) | |
| 361 self.correctHeader(selectedContigsBSDB) | |
| 362 fastaFile=self.writeFastaInput(selectedContigsBSDB) | |
| 363 print "Preprocess finished" | |
| 364 else: | |
| 365 fastaFile=self._fastaFileName | |
| 366 print "No preprocess : the genome size %s lower than %s Mbp" % (cumulLength, self._limitSeqSize/1000000) | |
| 367 os.symlink(fastaFile,"%s/%s.fa" %(os.getcwd(),self._projectName)) #creer repertoire projet | |
| 368 | |
| 369 def _launchListAndDropTables(self): | |
| 370 cmd = "ListAndDropTables.py" | |
| 371 cmd += " -C %s" % self._configFileName | |
| 372 cmd += " -d '%s'" % self._projectName | |
| 373 os.system(cmd) | |
| 374 | |
| 375 def _cleanJobsTable(self): | |
| 376 db = DbFactory.createInstance( configFileName = self._configFileName ) | |
| 377 sql_cmd="DELETE FROM jobs WHERE groupid like '%s%%';"%self._projectName | |
| 378 db.execute( sql_cmd ) | |
| 379 db.close() | |
| 380 | |
| 381 def _cleanTables(self): | |
| 382 self._launchListAndDropTables() | |
| 383 self. _cleanJobsTable() | |
| 384 | |
| 385 | |
| 386 def run(self): | |
| 387 os.mkdir(self._projectName) | |
| 388 os.chdir(self._projectName) | |
| 389 self._writeConfigFile() | |
| 390 self.preprocessFastaFile() | |
| 391 if self._nucl_bank != "" and self._nucl_bank != None: | |
| 392 os.symlink(self._nucl_bank,"%s/%s" %(os.getcwd(),os.path.basename(self._nucl_bank))) | |
| 393 if self._prot_bank != "" and self._prot_bank != None: | |
| 394 os.symlink(self._prot_bank,"%s/%s" %(os.getcwd(),os.path.basename(self._prot_bank))) | |
| 395 if self._HMM_profiles != "" and self._HMM_profiles != None: | |
| 396 os.symlink(self._HMM_profiles,"%s/%s" %(os.getcwd(),os.path.basename(self._HMM_profiles))) | |
| 397 if self._rdna_bank != "" and self._rdna_bank != None: | |
| 398 os.symlink(self._rdna_bank,"%s/%s" %(os.getcwd(),os.path.basename(self._rdna_bank))) | |
| 399 | |
| 400 self._launchTEdenovo() | |
| 401 self._cleanTables() | |
| 402 | |
| 403 if __name__ == '__main__': | |
| 404 iTEdenovo = TEdenovo_lite() | |
| 405 iTEdenovo.setAttributesFromCommandLine() | |
| 406 iTEdenovo.run() |
