| 
13
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 # Copyright INRA (Institut National de la Recherche Agronomique)
 | 
| 
 | 
     4 # http://www.inra.fr
 | 
| 
 | 
     5 # http://urgi.versailles.inra.fr
 | 
| 
 | 
     6 #
 | 
| 
 | 
     7 # This software is governed by the CeCILL license under French law and
 | 
| 
 | 
     8 # abiding by the rules of distribution of free software.  You can  use,
 | 
| 
 | 
     9 # modify and/ or redistribute the software under the terms of the CeCILL
 | 
| 
 | 
    10 # license as circulated by CEA, CNRS and INRIA at the following URL
 | 
| 
 | 
    11 # "http://www.cecill.info".
 | 
| 
 | 
    12 #
 | 
| 
 | 
    13 # As a counterpart to the access to the source code and  rights to copy,
 | 
| 
 | 
    14 # modify and redistribute granted by the license, users are provided only
 | 
| 
 | 
    15 # with a limited warranty  and the software's author,  the holder of the
 | 
| 
 | 
    16 # economic rights,  and the successive licensors  have only  limited
 | 
| 
 | 
    17 # liability.
 | 
| 
 | 
    18 #
 | 
| 
 | 
    19 # In this respect, the user's attention is drawn to the risks associated
 | 
| 
 | 
    20 # with loading,  using,  modifying and/or developing or reproducing the
 | 
| 
 | 
    21 # software by the user in light of its specific status of free software,
 | 
| 
 | 
    22 # that may mean  that it is complicated to manipulate,  and  that  also
 | 
| 
 | 
    23 # therefore means  that it is reserved for developers  and  experienced
 | 
| 
 | 
    24 # professionals having in-depth computer knowledge. Users are therefore
 | 
| 
 | 
    25 # encouraged to load and test the software's suitability as regards their
 | 
| 
 | 
    26 # requirements in conditions enabling the security of their systems and/or
 | 
| 
 | 
    27 # data to be ensured and,  more generally, to use and operate it in the
 | 
| 
 | 
    28 # same conditions as regards security.
 | 
| 
 | 
    29 #
 | 
| 
 | 
    30 # The fact that you are presently reading this means that you have had
 | 
| 
 | 
    31 # knowledge of the CeCILL license and that you accept its terms.
 | 
| 
 | 
    32 
 | 
| 
 | 
    33 from commons.core.LoggerFactory             import LoggerFactory
 | 
| 
 | 
    34 from commons.core.utils.RepetOptionParser   import RepetOptionParser
 | 
| 
 | 
    35 from commons.core.checker.RepetException    import RepetException
 | 
| 
 | 
    36 from commons.core.utils.FileUtils           import FileUtils
 | 
| 
 | 
    37 import os
 | 
| 
 | 
    38 from commons.core.parsing.GffParser import GffParser
 | 
| 
 | 
    39 LOG_NAME = "TEiso"
 | 
| 
 | 
    40 
 | 
| 
 | 
    41 class GFFToBed(object):
 | 
| 
 | 
    42     
 | 
| 
 | 
    43     def __init__(self, inputFile = "", outputFile = "", verbosity = 3):
 | 
| 
 | 
    44         self._inputFile = inputFile
 | 
| 
 | 
    45         self._outputFile = outputFile
 | 
| 
 | 
    46         self._verbosity = verbosity
 | 
| 
 | 
    47         self._log = LoggerFactory.createLogger("%s.%s" % (LOG_NAME, self.__class__.__name__), self._verbosity)
 | 
| 
 | 
    48             
 | 
| 
 | 
    49     def setAttributesFromCmdLine(self):
 | 
| 
 | 
    50         self._toolVersion = "1.1.a"
 | 
| 
 | 
    51         description = "GFFToBed version %s" % self._toolVersion
 | 
| 
 | 
    52         epilog = "\n parses a GFF3 file and create a bed file. \n"
 | 
| 
 | 
    53         epilog += "example: GFFToBed.py -i <inputFile> -o  <outputFile>\n"
 | 
| 
 | 
    54         parser = RepetOptionParser(description = description, epilog = epilog, version = self._toolVersion) 
 | 
| 
 | 
    55         parser.add_option("-i", "--inputFile",  dest = "inputFile",  action = "store", type = "string", help = "Input GFF3 File name.",  default = "")
 | 
| 
 | 
    56         parser.add_option("-o", "--outputFile", dest = "outputFile", action = "store", type = "string", help = "output Bed File name", default = "")
 | 
| 
 | 
    57         parser.add_option("-v", "--verbosity",  dest = "verbosity",  action = "store", type = "int",    help = "Verbosity [optional] [default: 3]",default = 3)
 | 
| 
 | 
    58         options = parser.parse_args()[0]
 | 
| 
 | 
    59         self._setAttributesFromOptions(options)
 | 
| 
 | 
    60 
 | 
| 
 | 
    61     def _setAttributesFromOptions(self, options):
 | 
| 
 | 
    62         self._inputFile  = options.inputFile
 | 
| 
 | 
    63         self._outputFile = options.outputFile
 | 
| 
 | 
    64         self._verbosity  = options.verbosity
 | 
| 
 | 
    65                     
 | 
| 
 | 
    66     def _logAndRaise(self, errorMsg):
 | 
| 
 | 
    67         self._log.error(errorMsg)
 | 
| 
 | 
    68         raise RepetException(errorMsg)
 | 
| 
 | 
    69 
 | 
| 
 | 
    70     def checkoption(self):
 | 
| 
 | 
    71         if self._outputFile == "":
 | 
| 
 | 
    72             #self._log.info("Missing output file destination")
 | 
| 
 | 
    73             self._outputFile = "%s.bed" % os.path.splitext(self._inputFile)[0]
 | 
| 
 | 
    74         else:
 | 
| 
 | 
    75             if FileUtils.isRessourceExists(self._outputFile):
 | 
| 
 | 
    76                 self._log.info("Output file '%s' already exists!" % self._outputFile)
 | 
| 
 | 
    77             
 | 
| 
 | 
    78         if self._inputFile == "":
 | 
| 
 | 
    79             self._log.info("Missing input file")
 | 
| 
 | 
    80         
 | 
| 
 | 
    81     def getGFFToBed (self, inputFile ,outputFile):
 | 
| 
 | 
    82         try:
 | 
| 
 | 
    83             filewrite=open(outputFile, "w")
 | 
| 
 | 
    84             gffParser = GffParser(inputFile)
 | 
| 
 | 
    85             for transcript in gffParser.getIterator():
 | 
| 
 | 
    86                 if(transcript.getDirection()==1):
 | 
| 
 | 
    87                     strand="+"
 | 
| 
 | 
    88                 else:
 | 
| 
 | 
    89                     strand="-"
 | 
| 
 | 
    90                 filewrite.write("%s\t%s\t%s\t%s\t%s\t%s\n"  % (transcript.getChromosome(),transcript.getStart(),
 | 
| 
 | 
    91                 transcript.getEnd(), transcript.getTagValue("ID"), transcript.getTagValue("Target"), strand) )
 | 
| 
 | 
    92             filewrite.close()
 | 
| 
 | 
    93         except:
 | 
| 
 | 
    94             raise Exception("Couldn't open %s for writing" % outputFile)
 | 
| 
 | 
    95 
 | 
| 
 | 
    96                 
 | 
| 
 | 
    97     def run(self):
 | 
| 
 | 
    98         self.checkoption()
 | 
| 
 | 
    99         self.getGFFToBed(self._inputFile, self._outputFile)
 | 
| 
 | 
   100         
 | 
| 
 | 
   101 
 | 
| 
 | 
   102 if __name__== "__main__":
 | 
| 
 | 
   103     iGFFToBed = GFFToBed()
 | 
| 
 | 
   104     iGFFToBed.setAttributesFromCmdLine()
 | 
| 
 | 
   105     iGFFToBed.run()
 | 
| 
 | 
   106 
 | 
| 
 | 
   107 
 |