| 
6
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 # Copyright INRA (Institut National de la Recherche Agronomique)
 | 
| 
 | 
     4 # http://www.inra.fr
 | 
| 
 | 
     5 # http://urgi.versailles.inra.fr
 | 
| 
 | 
     6 #
 | 
| 
 | 
     7 # This software is governed by the CeCILL license under French law and
 | 
| 
 | 
     8 # abiding by the rules of distribution of free software.  You can  use,
 | 
| 
 | 
     9 # modify and/ or redistribute the software under the terms of the CeCILL
 | 
| 
 | 
    10 # license as circulated by CEA, CNRS and INRIA at the following URL
 | 
| 
 | 
    11 # "http://www.cecill.info".
 | 
| 
 | 
    12 #
 | 
| 
 | 
    13 # As a counterpart to the access to the source code and  rights to copy,
 | 
| 
 | 
    14 # modify and redistribute granted by the license, users are provided only
 | 
| 
 | 
    15 # with a limited warranty  and the software's author,  the holder of the
 | 
| 
 | 
    16 # economic rights,  and the successive licensors  have only  limited
 | 
| 
 | 
    17 # liability.
 | 
| 
 | 
    18 #
 | 
| 
 | 
    19 # In this respect, the user's attention is drawn to the risks associated
 | 
| 
 | 
    20 # with loading,  using,  modifying and/or developing or reproducing the
 | 
| 
 | 
    21 # software by the user in light of its specific status of free software,
 | 
| 
 | 
    22 # that may mean  that it is complicated to manipulate,  and  that  also
 | 
| 
 | 
    23 # therefore means  that it is reserved for developers  and  experienced
 | 
| 
 | 
    24 # professionals having in-depth computer knowledge. Users are therefore
 | 
| 
 | 
    25 # encouraged to load and test the software's suitability as regards their
 | 
| 
 | 
    26 # requirements in conditions enabling the security of their systems and/or
 | 
| 
 | 
    27 # data to be ensured and,  more generally, to use and operate it in the
 | 
| 
 | 
    28 # same conditions as regards security.
 | 
| 
 | 
    29 #
 | 
| 
 | 
    30 # The fact that you are presently reading this means that you have had
 | 
| 
 | 
    31 # knowledge of the CeCILL license and that you accept its terms.
 | 
| 
 | 
    32 
 | 
| 
 | 
    33 import os
 | 
| 
 | 
    34 from commons.core.checker.CheckerUtils import CheckerUtils
 | 
| 
 | 
    35 from commons.core.utils.RepetOptionParser import RepetOptionParser
 | 
| 
 | 
    36 from commons.core.utils.FileUtils import FileUtils
 | 
| 
 | 
    37 import subprocess
 | 
| 
 | 
    38 
 | 
| 
 | 
    39 class Bedtools_closest(object):
 | 
| 
 | 
    40          
 | 
| 
 | 
    41     def __init__(self, input_file_A = "", input_file_B = "", output_file = "", verbosity = 3):
 | 
| 
 | 
    42         self._input_file_A = input_file_A
 | 
| 
 | 
    43         self._input_file_B = input_file_B
 | 
| 
 | 
    44         self._output_file = output_file
 | 
| 
 | 
    45         self._verbosity = verbosity
 | 
| 
 | 
    46         
 | 
| 
 | 
    47     def setAttributesFromCmdLine(self):
 | 
| 
 | 
    48         description = "For each feature in A, finds the closest feature (upstream or downstream) in B.\n" 
 | 
| 
 | 
    49         usage = " Bedtools_closest [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf> -o <output_dir> \n"
 | 
| 
 | 
    50         parser = RepetOptionParser(description = description, usage = usage)
 | 
| 
 | 
    51         parser.add_option( '-a', '--input_file_A', dest='input_file_A', help='bed/gff/vcf' )
 | 
| 
 | 
    52         parser.add_option( '-b', '--input_file_B', dest='input_file_B', help='bed/gff/vcf' )
 | 
| 
 | 
    53         parser.add_option( '-o', '--output_file', dest='output_file', help='write all output in this file/bed/gff/vcf', default = "")
 | 
| 
 | 
    54         options, args = parser.parse_args()
 | 
| 
 | 
    55         self.setAttributesFromOptions(options)
 | 
| 
 | 
    56     
 | 
| 
 | 
    57     def setAttributesFromOptions(self, options):
 | 
| 
 | 
    58         self._input_file_A = options.input_file_A
 | 
| 
 | 
    59         self._input_file_B = options.input_file_B
 | 
| 
 | 
    60         self._output_file = options.output_file
 | 
| 
 | 
    61         
 | 
| 
 | 
    62     def checkExecutables(self):
 | 
| 
 | 
    63         if not CheckerUtils.isExecutableInUserPath("bedtools"):
 | 
| 
 | 
    64             raise Exception("ERROR: bedtools must be in your path")
 | 
| 
 | 
    65     
 | 
| 
 | 
    66     def checkOptions(self):   
 | 
| 
 | 
    67         if self._input_file_A != "":
 | 
| 
 | 
    68             if not FileUtils.isRessourceExists(self._input_file_A ):
 | 
| 
 | 
    69                 raise Exception("ERROR: reference file %s does not exist!" % self._input_file_A )
 | 
| 
 | 
    70         else:
 | 
| 
 | 
    71             raise Exception("ERROR: No specified -a option!")
 | 
| 
 | 
    72         
 | 
| 
 | 
    73         if self._input_file_B != "":
 | 
| 
 | 
    74             if not FileUtils.isRessourceExists(self._input_file_B):
 | 
| 
 | 
    75                 raise Exception("ERROR: reference file %s does not exist!" % self._input_file_B )
 | 
| 
 | 
    76         else:
 | 
| 
 | 
    77             raise Exception("ERROR: No specified -b option!")
 | 
| 
 | 
    78    
 | 
| 
 | 
    79     def getbedtoolsclosestCmd(self, file_A, file_B, output_file):
 | 
| 
 | 
    80         cmd = 'bedtools closest -a %s -b %s -d -D a > %s' % (file_A,file_B, output_file)
 | 
| 
 | 
    81         ##print cmd
 | 
| 
 | 
    82         return cmd
 | 
| 
 | 
    83     
 | 
| 
 | 
    84     def run(self):
 | 
| 
 | 
    85         self.checkExecutables()
 | 
| 
 | 
    86         self.checkOptions()
 | 
| 
 | 
    87         sortfileA = "%s.sorted" % self._input_file_A
 | 
| 
 | 
    88         sortfileB = "%s.sorted" % self._input_file_B
 | 
| 
 | 
    89         os.system("bedtools sort -i %s > %s " % (self._input_file_A, sortfileA))
 | 
| 
 | 
    90         os.system("bedtools sort -i %s > %s " % (self._input_file_B, sortfileB))
 | 
| 
 | 
    91 
 | 
| 
 | 
    92         try:
 | 
| 
 | 
    93             if os.path.exists(self._output_file):
 | 
| 
 | 
    94                 raise Exception("ERROR: %s already exists." % self._output_file)
 | 
| 
 | 
    95            
 | 
| 
 | 
    96             cmd_bedtoolsclosest = self.getbedtoolsclosestCmd(sortfileA, sortfileB, self._output_file)
 | 
| 
 | 
    97             ## hide output of subprocess: stdout = index_dir_stderr
 | 
| 
 | 
    98             fstdout = open( "bedtools_closest.log" , 'w' )
 | 
| 
 | 
    99             process = subprocess.Popen(cmd_bedtoolsclosest, shell = True, stdout = fstdout, stderr=subprocess.STDOUT)
 | 
| 
 | 
   100             returncode = process.wait()
 | 
| 
 | 
   101             fstdout.close()
 | 
| 
 | 
   102             # get stderr, allowing for case where it's very large
 | 
| 
 | 
   103             fstdout = open("bedtools_closest.log", 'rb' )
 | 
| 
 | 
   104             stderr = ''
 | 
| 
 | 
   105             buffsize = 1048576
 | 
| 
 | 
   106             try:
 | 
| 
 | 
   107                 while True:
 | 
| 
 | 
   108                     stderr += fstdout.read( buffsize )
 | 
| 
 | 
   109                     if not stderr or len( stderr ) % buffsize != 0:
 | 
| 
 | 
   110                         break
 | 
| 
 | 
   111             except OverflowError:
 | 
| 
 | 
   112                     pass
 | 
| 
 | 
   113             fstdout.close()
 | 
| 
 | 
   114             if returncode != 0:
 | 
| 
 | 
   115                     raise Exception, stderr
 | 
| 
 | 
   116             #os.system("mv cufflinks.log  %s/cufflinks.log " % workingDir)
 | 
| 
 | 
   117         except Exception:
 | 
| 
 | 
   118             raise Exception("ERROR in %s " % cmd_bedtoolsclosest)
 | 
| 
 | 
   119         
 | 
| 
 | 
   120                 
 | 
| 
 | 
   121 if __name__ == "__main__":
 | 
| 
 | 
   122     iLaunch = Bedtools_closest()
 | 
| 
 | 
   123     iLaunch.setAttributesFromCmdLine()
 | 
| 
 | 
   124     iLaunch.run()
 | 
| 
 | 
   125 
 |