| 
6
 | 
     1 #! /usr/bin/env python
 | 
| 
 | 
     2 #
 | 
| 
 | 
     3 # Copyright INRA-URGI 2009-2012
 | 
| 
 | 
     4 # 
 | 
| 
 | 
     5 # This software is governed by the CeCILL license under French law and
 | 
| 
 | 
     6 # abiding by the rules of distribution of free software. You can use,
 | 
| 
 | 
     7 # modify and/ or redistribute the software under the terms of the CeCILL
 | 
| 
 | 
     8 # license as circulated by CEA, CNRS and INRIA at the following URL
 | 
| 
 | 
     9 # "http://www.cecill.info".
 | 
| 
 | 
    10 # 
 | 
| 
 | 
    11 # As a counterpart to the access to the source code and rights to copy,
 | 
| 
 | 
    12 # modify and redistribute granted by the license, users are provided only
 | 
| 
 | 
    13 # with a limited warranty and the software's author, the holder of the
 | 
| 
 | 
    14 # economic rights, and the successive licensors have only limited
 | 
| 
 | 
    15 # liability.
 | 
| 
 | 
    16 # 
 | 
| 
 | 
    17 # In this respect, the user's attention is drawn to the risks associated
 | 
| 
 | 
    18 # with loading, using, modifying and/or developing or reproducing the
 | 
| 
 | 
    19 # software by the user in light of its specific status of free software,
 | 
| 
 | 
    20 # that may mean that it is complicated to manipulate, and that also
 | 
| 
 | 
    21 # therefore means that it is reserved for developers and experienced
 | 
| 
 | 
    22 # professionals having in-depth computer knowledge. Users are therefore
 | 
| 
 | 
    23 # encouraged to load and test the software's suitability as regards their
 | 
| 
 | 
    24 # requirements in conditions enabling the security of their systems and/or
 | 
| 
 | 
    25 # data to be ensured and, more generally, to use and operate it in the
 | 
| 
 | 
    26 # same conditions as regards security.
 | 
| 
 | 
    27 # 
 | 
| 
 | 
    28 # The fact that you are presently reading this means that you have had
 | 
| 
 | 
    29 # knowledge of the CeCILL license and that you accept its terms.
 | 
| 
 | 
    30 #
 | 
| 
 | 
    31 import os
 | 
| 
 | 
    32 from optparse import OptionParser, OptionGroup
 | 
| 
 | 
    33 from commons.core.parsing.ParserChooser import ParserChooser
 | 
| 
 | 
    34 from commons.core.writer.Gff3Writer import Gff3Writer
 | 
| 
 | 
    35 from SMART.Java.Python.structure.Transcript import Transcript
 | 
| 
 | 
    36 from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle
 | 
| 
 | 
    37 from SMART.Java.Python.ncList.FileSorter import FileSorter
 | 
| 
 | 
    38 from SMART.Java.Python.misc.Progress import Progress
 | 
| 
 | 
    39 from SMART.Java.Python.misc import Utils
 | 
| 
 | 
    40 
 | 
| 
 | 
    41 
 | 
| 
 | 
    42 class GetUpDownStream(object):
 | 
| 
 | 
    43 
 | 
| 
 | 
    44     def __init__(self, verbosity = 0):
 | 
| 
 | 
    45         self.verbosity         = verbosity
 | 
| 
 | 
    46         self.inputReader       = None
 | 
| 
 | 
    47         self.outputWriter      = None
 | 
| 
 | 
    48         self.nbRead            = 0
 | 
| 
 | 
    49         self.nbWritten         = 0
 | 
| 
 | 
    50         self.nbMerges          = 0
 | 
| 
 | 
    51         self.splittedFileNames = {}
 | 
| 
 | 
    52 
 | 
| 
 | 
    53     def __del__(self):
 | 
| 
 | 
    54         for fileName in self.splittedFileNames.values():
 | 
| 
 | 
    55             os.remove(fileName)
 | 
| 
 | 
    56             
 | 
| 
 | 
    57     def setInputFile(self, fileName, format):
 | 
| 
 | 
    58         parserChooser = ParserChooser(self.verbosity)
 | 
| 
 | 
    59         parserChooser.findFormat(format, "transcript")
 | 
| 
 | 
    60         self.parser = parserChooser.getParser(fileName)
 | 
| 
 | 
    61         self.sortedFileName = "%s_sorted.pkl" % (os.path.splitext(fileName)[0])
 | 
| 
 | 
    62 
 | 
| 
 | 
    63     def setOutputFile(self, fileName):
 | 
| 
 | 
    64         self.outputWriter = Gff3Writer(fileName, self.verbosity)
 | 
| 
 | 
    65 
 | 
| 
 | 
    66     def setDistances(self, up, down):
 | 
| 
 | 
    67         self.upDistance   = up
 | 
| 
 | 
    68         self.downDistance = down
 | 
| 
 | 
    69 
 | 
| 
 | 
    70     def _sortFile(self):
 | 
| 
 | 
    71         fs = FileSorter(self.parser, self.verbosity-4)
 | 
| 
 | 
    72         fs.perChromosome(True)
 | 
| 
 | 
    73         fs.setOutputFileName(self.sortedFileName)
 | 
| 
 | 
    74         fs.sort()
 | 
| 
 | 
    75         self.splittedFileNames       = fs.getOutputFileNames()
 | 
| 
 | 
    76         self.nbElementsPerChromosome = fs.getNbElementsPerChromosome()
 | 
| 
 | 
    77         self.nbRead                  = fs.getNbElements()
 | 
| 
 | 
    78 
 | 
| 
 | 
    79     def _write(self, start, end, reference, after):
 | 
| 
 | 
    80         if start > end:
 | 
| 
 | 
    81             return
 | 
| 
 | 
    82         transcript = Transcript()
 | 
| 
 | 
    83         transcript.setChromosome(reference.getChromosome())
 | 
| 
 | 
    84         transcript.setStart(start)
 | 
| 
 | 
    85         transcript.setEnd(end)
 | 
| 
 | 
    86         transcript.setDirection("+")
 | 
| 
 | 
    87         transcript.setName("%s_%s" % ("up" if Utils.xor(reference.getDirection() == 1, after) else "down", reference.getName()))
 | 
| 
 | 
    88         self.outputWriter.addTranscript(transcript)
 | 
| 
 | 
    89         
 | 
| 
 | 
    90     def _getFlanking(self, chromosome):
 | 
| 
 | 
    91         progress    = Progress(self.nbElementsPerChromosome[chromosome], "Analyzing chromosome %s" % (chromosome), self.verbosity)
 | 
| 
 | 
    92         parser      = NCListFileUnpickle(self.splittedFileNames[chromosome], self.verbosity)
 | 
| 
 | 
    93         previous    = None
 | 
| 
 | 
    94         for transcript in parser.getIterator():
 | 
| 
 | 
    95             progress.inc()
 | 
| 
 | 
    96             transcript.removeExons()
 | 
| 
 | 
    97             if previous == None:
 | 
| 
 | 
    98                 distance = self.upDistance if transcript.getDirection() == 1 else self.downDistance
 | 
| 
 | 
    99                 start    = max(1, transcript.getStart() - distance)
 | 
| 
 | 
   100                 self._write(start, transcript.getStart()-1, transcript, False)
 | 
| 
 | 
   101                 previous = transcript
 | 
| 
 | 
   102                 continue
 | 
| 
 | 
   103             if previous.include(transcript):
 | 
| 
 | 
   104                 continue
 | 
| 
 | 
   105             if transcript.overlapWith(previous):
 | 
| 
 | 
   106                 previous = transcript
 | 
| 
 | 
   107                 continue
 | 
| 
 | 
   108             distancePrevious = self.downDistance if previous.getDirection()   == 1 else self.upDistance
 | 
| 
 | 
   109             distanceCurrent  = self.upDistance   if transcript.getDirection() == 1 else self.downDistance
 | 
| 
 | 
   110             distance = transcript.getDistance(previous)
 | 
| 
 | 
   111             if distancePrevious + distanceCurrent == 0:
 | 
| 
 | 
   112                 previous = transcript
 | 
| 
 | 
   113                 continue
 | 
| 
 | 
   114             if distance >= distancePrevious + distanceCurrent:
 | 
| 
 | 
   115                 endPrevious  = previous.getEnd() + distancePrevious
 | 
| 
 | 
   116                 startCurrent = transcript.getStart() - distanceCurrent
 | 
| 
 | 
   117             else:
 | 
| 
 | 
   118                 middle       = previous.getEnd() + int((distance-1) * float(distancePrevious) / (distancePrevious + distanceCurrent))
 | 
| 
 | 
   119                 endPrevious  = middle
 | 
| 
 | 
   120                 startCurrent = middle+1
 | 
| 
 | 
   121             self._write(previous.getEnd() + 1, endPrevious, previous, True)
 | 
| 
 | 
   122             self._write(startCurrent, transcript.getStart() - 1, transcript, False)
 | 
| 
 | 
   123             previous = transcript
 | 
| 
 | 
   124         distance = self.downDistance if previous.getDirection() == 1 else self.upDistance
 | 
| 
 | 
   125         self._write(previous.getEnd() + 1, previous.getEnd() + distance, previous, True)
 | 
| 
 | 
   126         progress.done()
 | 
| 
 | 
   127 
 | 
| 
 | 
   128     def run(self):
 | 
| 
 | 
   129         self._sortFile()
 | 
| 
 | 
   130         for chromosome in sorted(self.nbElementsPerChromosome.keys()):
 | 
| 
 | 
   131             self._getFlanking(chromosome)
 | 
| 
 | 
   132         self.outputWriter.close()
 | 
| 
 | 
   133 
 | 
| 
 | 
   134 if __name__ == "__main__":
 | 
| 
 | 
   135     
 | 
| 
 | 
   136     # parse command line
 | 
| 
 | 
   137     description = "Get Up and Down Stream v1.0.0: Get the flanking regions of an annotation. [Category: Data Modification]"
 | 
| 
 | 
   138 
 | 
| 
 | 
   139     parser = OptionParser(description = description)
 | 
| 
 | 
   140     parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in mapping format given by -f]")
 | 
| 
 | 
   141     parser.add_option("-f", "--format",    dest="format",         action="store",                     type="string", help="format of the file [compulsory] [format: mapping file format]")
 | 
| 
 | 
   142     parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")
 | 
| 
 | 
   143     parser.add_option("-u", "--up",        dest="up",             action="store",      default=0,     type="int",    help="the upstream distance  [format: int]")
 | 
| 
 | 
   144     parser.add_option("-d", "--down",      dest="down",           action="store",      default=0,     type="int",    help="the downstream distance  [format: int]")
 | 
| 
 | 
   145     parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
 | 
| 
 | 
   146     (options, args) = parser.parse_args()
 | 
| 
 | 
   147 
 | 
| 
 | 
   148     guds = GetUpDownStream(options.verbosity)
 | 
| 
 | 
   149     guds.setInputFile(options.inputFileName, options.format)
 | 
| 
 | 
   150     guds.setOutputFile(options.outputFileName)
 | 
| 
 | 
   151     guds.setDistances(options.up, options.down)
 | 
| 
 | 
   152     guds.run()
 |