Mercurial > repos > yufei-luo > s_mart
diff commons/tools/PrepareBatches.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/tools/PrepareBatches.py Mon Apr 29 03:20:15 2013 -0400 @@ -0,0 +1,126 @@ +#!/usr/bin/env python + +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +import os +import sys +from ConfigParser import NoSectionError, NoOptionError +from commons.core.checker.CheckerUtils import CheckerUtils +from commons.core.checker.CheckerException import CheckerException +from commons.core.utils.FileUtils import FileUtils +from commons.core.seq.FastaUtils import FastaUtils + +class PrepareBatches(object): + + def __init__(self, pipelineName, projectDir, projectName, iConfig, verbose): + self._pipelineName = pipelineName + self._projectDir = projectDir + self._projectName = projectName + self._iConfig = iConfig + self._verbose = verbose + + def run(self): + if self._verbose > 0: + print "beginning of step 1" + sys.stdout.flush() + if FileUtils.isRessourceExists("%s_db" % self._projectName): + print "ERROR: directory '%s_db' already exists" % self._projectName + sys.exit(1) + + os.mkdir("%s_db" % self._projectName) + os.chdir("%s_db" % self._projectName) + genomeFastaFileName = "%s.fa" % self._projectName + os.symlink("../%s" % genomeFastaFileName, genomeFastaFileName) + sectionName = "prepare_batches" + self._checkConfig(sectionName) + + separator = "\n" + inGenomeFileHandler = open(genomeFastaFileName, "r") + try: + CheckerUtils.checkHeaders(inGenomeFileHandler) + except CheckerException, e: + print "Error in file %s. Wrong headers are :" % genomeFastaFileName + print separator.join(e.messages) + print "Authorized characters are : a-z A-Z 0-9 - . : _\n" + inGenomeFileHandler.close() + sys.exit(1) + inGenomeFileHandler.close() + + doClean = False + if self._iConfig.get(sectionName, "clean") == "yes": + doClean = True + chunkFilePrefix = "%s_chunks" % self._projectName + chunkLength = int(self._iConfig.get(sectionName, "chunk_length")) + chunkOverlap = int(self._iConfig.get(sectionName, "chunk_overlap")) + FastaUtils.dbChunks(genomeFastaFileName, chunkLength, chunkOverlap, 0, chunkFilePrefix, doClean, self._verbose) + + nbSeq = int(self._iConfig.get(sectionName, "nb_seq_per_batch")) + FastaUtils.splitFastaFileInBatches("%s.fa" % chunkFilePrefix, nbSeq * chunkLength) + + if self._iConfig.get(sectionName, "clean") == "yes": + FileUtils.removeFilesByPattern("%s.fa*" % self._projectName) + + os.chdir( ".." ) + if self._verbose > 0: + print "step 1 finished successfully" + sys.stdout.flush() + + def _checkConfig(self, sectionName): + try: + CheckerUtils.checkSectionInConfigFile(self._iConfig, sectionName) + except NoSectionError: + print "ERROR: the section %s must be in your configuration file" % sectionName + sys.exit(1) + try: + CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "chunk_length") + except NoOptionError: + print "ERROR: the option 'chunk_length' must be defined in %s in your configuration file" % sectionName + sys.exit(1) + try: + CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "chunk_overlap") + except NoOptionError: + print "ERROR: the option 'chunk_overlap' must be defined in %s in your configuration file" % sectionName + sys.exit(1) + try: + CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "nb_seq_per_batch") + except NoOptionError: + print "ERROR: the option 'nb_seq_per_batch' must be defined in %s in your configuration file" % sectionName + sys.exit(1) + try: + CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "resources") + except NoOptionError: + print "ERROR: the option 'resources' must be defined in %s in your configuration file" % sectionName + sys.exit(1) + try: + CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "tmpDir") + except NoOptionError: + print "ERROR: the option 'tmpDir' must be defined in %s in your configuration file" % sectionName + sys.exit(1)