view commons/pyRepetUnit/hmmer/tests/TestAcceptanceHmmpfamAndParse2alignInparallel.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line source

import os
import sys
import unittest
import pyRepet.seq.fastaDB
from sets import Set
import ConfigParser 
from pyRepet.launcher.programLauncher import *
from pyRepet.launcher.Launcher import HmmpfamAndParse2alignLauncher
from pyRepet.sql.RepetJobMySQL import *
from pyRepet.util.file.FileUtils import *
import commons.pyRepetUnit.dbSplit.LauncherDbSplit
import commons.pyRepetUnit.align.hmmOutputParsing.HmmpfamOutput2align

CONFIG_FILE = "datas/configTestAcceptanceHmmpfamAndParse2alignLauncherInparallel.cfg"
CURRENT_DIR = os.getcwd()

class TestAcceptanceHmmpfamLauncherAndParse2alignInparallel(unittest.TestCase):

    def setUp(self):
        self._config = ConfigParser.ConfigParser()
        self._config.readfp( open(CONFIG_FILE) )
        self._batchFilesList = []
        self._batchFilesBatchesList = []
        self._inputFile = self._config.get("dbSplit_config", "input_file")
        self._programLauncherInstance = programLauncher()   
        self._outputFileNotInparallel = self._config.get("hmmer_config", "output_file")
        self._hmmpfamOutput2align = commons.pyRepetUnit.align.hmmOutputParsing.HmmpfamOutput2align.HmmpfamOutput2align()
        os.chdir(CURRENT_DIR)

    def _launchHmmpfamAndParseNotInParallel(self):
        self._programLauncherInstance.reset(self._inputFile)
        self._programLauncherInstance.setOutputFileName(self._outputFileNotInparallel)
        self._programLauncherInstance.launchHmmpfam(evalFilter=self._config.get("profil_search", "Evalue"), inputFormat=self._config.get("profil_search", "InputFormat"), profilDatabank=self._config.get("profil_search", "ProfilDatabank"))
        self._hmmpfamOutput2align.setInputFile(self._outputFileNotInparallel)
        self._outputFileParsingNotInparallel = self._config.get("hmmpfam2align_config", "output_file")
        self._hmmpfamOutput2align.setOutputFile(self._outputFileParsingNotInparallel)
        self._hmmpfamOutput2align.run()


    def _countNblinesInAllResultsFilesInParallel(self, fileUtils):
        result_dir = self._config.get("hmmer_config", "result_dir")
        dirFiles = fileUtils.listFilesInDir(result_dir)
        listPath = []
        for file in dirFiles:
            listPath.append(file)
        
        nbLinesInAlignFilesInparallel = fileUtils.countLinesInFiles(listPath)
        return nbLinesInAlignFilesInparallel


    def _launchHmmpfamAndParseInParallel(self):
        user = self._config.get("db_config", "user")
        host = self._config.get("db_config", "host")
        passwd = self._config.get("db_config", "passwd")
        dbName = self._config.get("db_config", "dbname")
        jobDb = RepetJob(user, host, passwd, dbName)
        params = {"param": "--informat " + self._config.get("profil_search", "InputFormat") + " -E " + self._config.get("profil_search", "Evalue"), "profilDB": self._config.get("profil_search", "ProfilDatabank"), "outputDir": self._config.get("hmmer_config", "result_dir"), "query": self._config.get("hmmer_config", "query_dir"), "job_table": "jobs", "queue": self._config.get("sge_config", "queue"), "groupid": self._config.get("sge_config", "groupid"), "tmpDir": self._config.get("hmmer_config", "tmp_dir"), "scriptToLaunch": self._config.get("parse_config", "scriptToLaunch"), "cDir" : "."}
        self.hmmpfamAndParse2align = HmmpfamAndParse2alignLauncher(jobDb, params)
        self.hmmpfamAndParse2align.run()


    def _splitInputFile(self, dbSplit):
        dbSplit.setInFileName(self._inputFile)
        dbSplit.setIsNewDir(True)
        dbSplit.setNbSequences(3)
        dbSplit.run()

    def testAcceptanceHmmpfamAndParse2alignHasRunInparallel (self):
        
        dbSplit = commons.pyRepetUnit.dbSplit.LauncherDbSplit.LauncherDbSplit()
        self._splitInputFile(dbSplit)         
        self._launchHmmpfamAndParseInParallel()
        fileUtils = FileUtils()
        nbLinesInAlignFilesInparallel = self._countNblinesInAllResultsFilesInParallel(fileUtils)
        dbSplit.clean()
        self._launchHmmpfamAndParseNotInParallel()
        nbLinesInAlignFileNotInparallel = fileUtils.countLinesInAFile(self._outputFileParsingNotInparallel)
        self.assertEqual(nbLinesInAlignFilesInparallel,nbLinesInAlignFileNotInparallel)
        self._clean()

    def _clean(self):
        
        dirToClean = self._config.get( "hmmer_config", "tmp_dir" )
        os.system("rm  " + dirToClean + "/*")
        dirToClean = self._config.get( "hmmer_config", "result_dir" )
        os.system("rm  " + dirToClean + "/*")
        os.system("rm  HmmpfamJob*.e*")
        os.system("rm  HmmpfamJob*.o*")
        os.remove(self._outputFileNotInparallel)
        os.remove(self._outputFileParsingNotInparallel)


if __name__ == "__main__":
        unittest.main()