| 18 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 # Copyright INRA (Institut National de la Recherche Agronomique) | 
|  | 4 # http://www.inra.fr | 
|  | 5 # http://urgi.versailles.inra.fr | 
|  | 6 # | 
|  | 7 # This software is governed by the CeCILL license under French law and | 
|  | 8 # abiding by the rules of distribution of free software.  You can  use, | 
|  | 9 # modify and/ or redistribute the software under the terms of the CeCILL | 
|  | 10 # license as circulated by CEA, CNRS and INRIA at the following URL | 
|  | 11 # "http://www.cecill.info". | 
|  | 12 # | 
|  | 13 # As a counterpart to the access to the source code and  rights to copy, | 
|  | 14 # modify and redistribute granted by the license, users are provided only | 
|  | 15 # with a limited warranty  and the software's author,  the holder of the | 
|  | 16 # economic rights,  and the successive licensors  have only  limited | 
|  | 17 # liability. | 
|  | 18 # | 
|  | 19 # In this respect, the user's attention is drawn to the risks associated | 
|  | 20 # with loading,  using,  modifying and/or developing or reproducing the | 
|  | 21 # software by the user in light of its specific status of free software, | 
|  | 22 # that may mean  that it is complicated to manipulate,  and  that  also | 
|  | 23 # therefore means  that it is reserved for developers  and  experienced | 
|  | 24 # professionals having in-depth computer knowledge. Users are therefore | 
|  | 25 # encouraged to load and test the software's suitability as regards their | 
|  | 26 # requirements in conditions enabling the security of their systems and/or | 
|  | 27 # data to be ensured and,  more generally, to use and operate it in the | 
|  | 28 # same conditions as regards security. | 
|  | 29 # | 
|  | 30 # The fact that you are presently reading this means that you have had | 
|  | 31 # knowledge of the CeCILL license and that you accept its terms. | 
|  | 32 | 
|  | 33 | 
|  | 34 import os | 
|  | 35 import sys | 
|  | 36 from commons.core.sql.DbMySql import DbMySql | 
|  | 37 from commons.core.utils.RepetOptionParser import RepetOptionParser | 
|  | 38 from commons.core.utils.FileUtils import FileUtils | 
|  | 39 from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator | 
|  | 40 from commons.core.LoggerFactory import LoggerFactory | 
|  | 41 | 
|  | 42 LOG_DEPTH = "repet.tools" | 
|  | 43 LOG_FORMAT = "%(message)s" | 
|  | 44 #TODO: use configuration file | 
|  | 45 | 
|  | 46 ## Get 3 annotation files, using output from TEannot: | 
|  | 47 #- consensus with one or more full length copy, | 
|  | 48 #- consensus with one or more full length fragment, | 
|  | 49 #- consensus without copy | 
|  | 50 | 
|  | 51 class GetSpecificTELibAccordingToAnnotation(object): | 
|  | 52 | 
|  | 53     def __init__(self, inInfoFileName = "", tableName = "", verbose = 0): | 
|  | 54         self._inInfoFileName = inInfoFileName | 
|  | 55         self._tableName = tableName | 
|  | 56         self._verbose = verbose | 
|  | 57         self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbose, LOG_FORMAT) | 
|  | 58 | 
|  | 59     def setAttributesFromCmdLine(self): | 
|  | 60         desc = "Splits a GiveInfoTEannot \"statsPerTE.txt\" file in 3 subfiles containing consensus which have at least one copy, one full length fragment or one full length copy. " | 
|  | 61         desc += "A TEs library is built according to each category. Connection to the database parameters are retrieved from the environment" | 
|  | 62 | 
|  | 63         examples = "\nExample : with a project called \"MyTEannotAnalysis\":\n" | 
|  | 64         examples += "\t$ python GetSpecificTELibAccordingToAnnotation.py -i MyTEannotAnalysis_chr_allTEs_nr_noSSR_join_path_statsPerTE.txt -t MyTEannotAnalysis_refTEs_seq" | 
|  | 65         examples += "\n\t" | 
|  | 66         examples += "\n\n" | 
|  | 67 | 
|  | 68         parser = RepetOptionParser(description = desc, epilog = examples) | 
|  | 69         parser.add_option("-i", "--file",     dest = "inInfoFileName",  action = "store", type = "string", help = "input file (mandatory) = output file from GiveInfoTEannot.py (e.g. <project_name>_chr_allTEs_nr_noSSR_join_path_statsPerTE.txt)",  default = "") | 
|  | 70         parser.add_option("-t", "--table",    dest = "tableName",       action = "store", type = "string", help = "table name of TEs sequences (mandatory, seq format, e.g. <project_name>_refTEs_seq)", default = "") | 
|  | 71         parser.add_option("-v", "--verbose",  dest = "verbose",         action = "store", type = "int",    help = "verbosity level (default=0, else 1)", default = 0) | 
|  | 72         (options, args) = parser.parse_args() | 
|  | 73         self._setAttributesFromOptions(options) | 
|  | 74 | 
|  | 75     def _setAttributesFromOptions(self, options): | 
|  | 76         self.setInInfoFileName(options.inInfoFileName) | 
|  | 77         self.setTableName(options.tableName) | 
|  | 78         self.setVerbose(options.verbose) | 
|  | 79 | 
|  | 80     def setTableName(self, tableName): | 
|  | 81         self._tableName = tableName | 
|  | 82 | 
|  | 83     def setInInfoFileName(self, inInfoFileName): | 
|  | 84         self._inInfoFileName = inInfoFileName | 
|  | 85 | 
|  | 86     def setVerbose(self, verbose): | 
|  | 87         self._verbose = verbose | 
|  | 88 | 
|  | 89     def checkOptions(self): | 
|  | 90         if self._inInfoFileName != "": | 
|  | 91             if not FileUtils.isRessourceExists(self._inClassifFileName): | 
|  | 92                 self._logAndRaise("ERROR: Input GiveInfoTEannot.txt output file does not exist!") | 
|  | 93         else: | 
|  | 94             self._logAndRaise("ERROR: No specified -i option!") | 
|  | 95 | 
|  | 96         if self._tableName != "": | 
|  | 97             iDb = DbMySql() | 
|  | 98             if not iDb.doesTableExist(self._tableName): | 
|  | 99                 self._logAndRaise("ERROR: table does not exist!") | 
|  | 100             iDb.close() | 
|  | 101         else: | 
|  | 102             self._logAndRaise("ERROR: No specified -t option!") | 
|  | 103 | 
|  | 104     def _logAndRaise(self, errorMsg): | 
|  | 105         self._log.error(errorMsg) | 
|  | 106         raise Exception(errorMsg) | 
|  | 107 | 
|  | 108     def writeFastaFileFromGiveInfoTEAnnot(self, fileName): | 
|  | 109         fFileHandler = open(fileName,"r") | 
|  | 110         lineHeader = fFileHandler.readline() | 
|  | 111         line = fFileHandler.readline() | 
|  | 112         lConsensusName = [] | 
|  | 113         while line: | 
|  | 114             lConsensusName.append(line.split()[0]) | 
|  | 115             line = fFileHandler.readline() | 
|  | 116 | 
|  | 117         fFileHandler.close() | 
|  | 118         iDb = DbMySql() | 
|  | 119         iTSA = TableSeqAdaptator(iDb, self._tableName) | 
|  | 120         outPutFileName = "%s.fa" % os.path.splitext(fileName)[0] | 
|  | 121         iTSA.saveAccessionsListInFastaFile(lConsensusName, outPutFileName) | 
|  | 122         iDb.close() | 
|  | 123 | 
|  | 124     def run(self): | 
|  | 125         LoggerFactory.setLevel(self._log, self._verbose) | 
|  | 126 | 
|  | 127         outInfoFileNameFullCopy = "%s_FullLengthCopy.txt" % os.path.splitext(os.path.basename(self._inInfoFileName))[0] | 
|  | 128         outInfoFileNameCopy = "%s_OneCopyAndMore.txt" % os.path.splitext(os.path.basename(self._inInfoFileName))[0] | 
|  | 129         outInfoFileNameFullFrag = "%s_FullLengthFrag.txt" % os.path.splitext(os.path.basename(self._inInfoFileName))[0] | 
|  | 130 | 
|  | 131         outInfoFileFullCopy = open(outInfoFileNameFullCopy, "w") | 
|  | 132         outInfoFileCopy = open(outInfoFileNameCopy, "w") | 
|  | 133         outInfoFileFullFrag = open(outInfoFileNameFullFrag, "w") | 
|  | 134 | 
|  | 135         self._log.info("START GetSpecificTELibAccordingToAnnotation\n input info file: %s" % self._inInfoFileName) | 
|  | 136 | 
|  | 137         inFileFh = open(self._inInfoFileName, "r") | 
|  | 138         line = inFileFh.readline() | 
|  | 139         lHeaders = line.split() | 
|  | 140         if "fullLgthCopies" not in lHeaders: | 
|  | 141             self._logAndRaise("ERROR: No headers in %s!" % self._inInfoFileName ) | 
|  | 142 | 
|  | 143         outInfoFileFullCopy.write(line) | 
|  | 144         outInfoFileCopy.write(line) | 
|  | 145         outInfoFileFullFrag.write(line) | 
|  | 146 | 
|  | 147         line = inFileFh.readline() | 
|  | 148         while line: | 
|  | 149             dTokens = {} | 
|  | 150             for index, token in enumerate(line.split()): | 
|  | 151                 dTokens[lHeaders[index]] = token | 
|  | 152 | 
|  | 153             if int(dTokens["fullLgthCopies"]) > 0: | 
|  | 154                 outInfoFileFullCopy.write(line) | 
|  | 155             if int(dTokens["copies"]) > 0: | 
|  | 156                 outInfoFileCopy.write(line) | 
|  | 157             if int(dTokens["fullLgthFrags"]) > 0: | 
|  | 158                 outInfoFileFullFrag.write(line) | 
|  | 159             line = inFileFh.readline() | 
|  | 160 | 
|  | 161         inFileFh.close() | 
|  | 162         outInfoFileFullCopy.close() | 
|  | 163         outInfoFileCopy.close() | 
|  | 164         outInfoFileFullFrag.close() | 
|  | 165 | 
|  | 166         self.writeFastaFileFromGiveInfoTEAnnot(outInfoFileNameFullCopy) | 
|  | 167         self.writeFastaFileFromGiveInfoTEAnnot(outInfoFileNameCopy) | 
|  | 168         self.writeFastaFileFromGiveInfoTEAnnot(outInfoFileNameFullFrag) | 
|  | 169 | 
|  | 170         self._log.info("END GetSpecificTELibAccordingToAnnotation\n" ) | 
|  | 171 | 
|  | 172         return 0 | 
|  | 173 | 
|  | 174 if __name__ == '__main__': | 
|  | 175     iGetTELib = GetSpecificTELibAccordingToAnnotation() | 
|  | 176     iGetTELib.setAttributesFromCmdLine() | 
|  | 177     iGetTELib.run() | 
|  | 178 |