Mercurial > repos > yufei-luo > s_mart
view commons/pyRepetUnit/blastnForClassifierStep1/RepbaseBLRnForClassifierStep1.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line source
""" Launch Blaster and then Matcher to compare the input sequences with known TEs via blastn and record the results into a MySQL table. """ import os import ConfigParser from commons.core.utils.FileUtils import FileUtils from commons.core.LoggerFactory import LoggerFactory LOG_DEPTH = "repet.tools" class RepbaseBLRnForClassifierStep1( object ): """ Launch Blaster and then Matcher to compare the input sequences with known TEs via blastn and record the results into a MySQL table. @param inFileName: name of the input fasta file @type inFileName: string @param launch_1: generic command at the beginning of a specific command @type launch_1: string @param launch_2: generic command at the end of a specific command @type launch_2: string @return: all the commands to run the job @rtype: string @param cDir: current directory (where to retrieve the result files) @ype cDir: string @param tmpDir: temporary directory (where the job will run) @type tmpDir: string @param configFileName: configuration file name @type configFileName: string @param logger: a logger Instance @type logger: logger @param verbose: verbose(0/1/2) @type verbose: int @param pL: program launcher @type pL: programLauncher Instance @param project: project name @type project: string """ def __init__(self, inFileName, launch_1, launch_2, cDir, tmpDir, configFileName, verbose, pL, project): """ Constructor """ self._inFileName = inFileName self._launch_1 = launch_1 self._launch_2 = launch_2 self._cDir = cDir self._tmpDir = tmpDir self._verbose = verbose self._pL = pL self._project = project self._fileUtils = FileUtils() self._config = ConfigParser.ConfigParser() self._configFileName = configFileName self._config.readfp( open(self._configFileName) ) self._bank = self._config.get("detect_features","TE_nucl_bank") self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbose) def formatRepbase_ntIfNecessary( self ): """ Format Repbase (make 'cut' files). """ if not os.path.exists( "%s_cut" % ( self._bank ) ): self._log.debug("prepare bank '%s'..." % ( self._bank )) prg = os.environ["REPET_PATH"] + "/bin/blaster" cmd = prg cmd += " -s %s" % ( self._bank ) cmd += " -n blastn" if self._config.get("detect_features","wublast") == "yes": cmd += " -W" cmd += " -r" cmd += " -P" self._pL.launch( prg, cmd ) os.system( "rm -f %s-blastn-*.param" % ( self._bank ) ) def createCmdToLaunch( self ): cmd = self._launch_1 + os.environ["REPET_PATH"] + "/bin/blaster" cmd += " -q %s" % ( self._inFileName ) cmd += " -s %s/%s" % ( self._cDir, self._bank ) cmd += " -B %s_BLRn_%s" % ( self._inFileName, self._bank ) cmd += " -n blastn" if self._config.get("detect_features","wublast") == "yes": cmd += " -W" cmd += " -r" cmd += " -v 1" cmd += self._launch_2 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.param\" ):\n" % ( self._cDir, self._inFileName, self._bank ) cmd += "\tos.system( \"mv %s_BLRn_%s.param %s\" )\n" % ( self._inFileName, self._bank, self._cDir ) cmd += "if os.path.exists( \"%s_cut\" ):\n" % ( self._inFileName ) cmd += "\tos.system( \"rm -f %s_cut*\" )\n" % ( self._inFileName ) cmd += "if os.path.exists( \"%s.Nstretch.map\" ):\n" % ( self._inFileName ) cmd += "\tos.remove( \"%s.Nstretch.map\" )\n" % ( self._inFileName ) cmd += "if os.path.exists( \"%s_BLRn_%s.raw\" ):\n" % ( self._inFileName, self._bank ) cmd += "\tos.remove( \"%s_BLRn_%s.raw\" )\n" % ( self._inFileName, self._bank ) cmd += "if os.path.exists( \"%s_BLRn_%s.seq_treated\" ):\n" % ( self._inFileName, self._bank ) cmd += "\tos.remove( \"%s_BLRn_%s.seq_treated\" )\n" % ( self._inFileName, self._bank ) cmd += self._launch_1 cmd += os.environ["REPET_PATH"] + "/bin/matcher" cmd += " -m %s_BLRn_%s.align" % ( self._inFileName, self._bank ) cmd += " -q %s" % ( self._inFileName ) cmd += " -s %s/%s" % ( self._cDir, self._bank ) cmd += " -j" cmd += " -v 1" cmd += self._launch_2 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.align.clean_match.path\" ):\n" % ( self._cDir, self._inFileName, self._bank ) cmd += "\tos.system( \"mv %s_BLRn_%s.align.clean_match.path %s\" )\n" % ( self._inFileName, self._bank, self._cDir ) cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.align.clean_match.param\" ):\n" % ( self._cDir, self._inFileName, self._bank ) cmd += "\tos.system( \"mv %s_BLRn_%s.align.clean_match.param %s\" )\n" % ( self._inFileName, self._bank, self._cDir ) cmd += "if os.path.exists( \"%s_BLRn_%s.align\" ):\n" % ( self._inFileName, self._bank ) cmd += "\tos.remove( \"%s_BLRn_%s.align\" )\n" % ( self._inFileName, self._bank ) cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.fa\" ):\n" % ( self._inFileName, self._bank ) cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.fa\" )\n" % ( self._inFileName, self._bank ) cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.map\" ):\n" % ( self._inFileName, self._bank ) cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.map\" )\n" % ( self._inFileName, self._bank ) cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.tab\" ):\n" % ( self._inFileName, self._bank ) cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.tab\" )\n" % ( self._inFileName, self._bank ) if self._tmpDir != self._cDir: cmd += "if os.path.exists( \"%s\" ):\n" % ( self._bank ) cmd += "\tos.remove( \"%s\" )\n" % ( self._bank ) return cmd def collectRepbaseBLRn( self ): """ Concatenate the outputs of blastn, adapt the ID and load the results into a table. """ bankFull = self._bank bankPath, bank = os.path.split( bankFull ) self._concatPathFile(bank) self._adaptIDInPathFile(bank) self._loadPathFileInTable(bank) self._findAndRemoveUselessFiles(bank) def _concatPathFile(self, bank): FileUtils.catFilesByPattern("../batch_*.fa_BLRn_%s.align.clean_match.path" % bank, "%s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank)) def _adaptIDInPathFile(self, bank): if os.path.exists(os.environ["REPET_PATH"] + "/bin/pathnum2id"): prg = os.environ["REPET_PATH"] + "/bin/pathnum2id" cmd = prg cmd += " -i %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank) cmd += " -o %s_BLRn_%s.align.clean_match.path" % (self._project, bank) cmd += " -v %i" % (self._verbose - 1) self._pL.launch(prg, cmd) else: prg = os.environ["REPET_PATH"] + "/bin/pathnum2id.py" cmd = prg cmd += " -i %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank) cmd += " -o %s_BLRn_%s.align.clean_match.path" % (self._project, bank) self._pL.launch(prg, cmd) def _loadPathFileInTable(self, bank): prg = os.environ["REPET_PATH"] + "/bin/srptCreateTable.py" cmd = prg cmd += " -f %s_BLRn_%s.align.clean_match.path" % (self._project, bank) cmd += " -n %s_TE_BLRn_path" % (self._project) cmd += " -t path" cmd += " -c ../%s" % (self._configFileName) self._pL.launch(prg, cmd) def _findAndRemoveUselessFiles(self, bank): prg = "find" cmd = prg cmd += " .. -name \"batch_*.fa_BLRn_%s.*\" -exec rm {} \;" % (bank) self._pL.launch(prg, cmd) prg = "rm" cmd = prg cmd += " %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank) self._pL.launch(prg, cmd)