#! /usr/bin/env python
"""Compare overlap of two transcript lists"""

import os
from optparse import OptionParser
import misc.utils
from parsing.sequenceListParser import *
from structure.transcriptContainer import *
from writer.transcriptWriter import *
from mySql.mySqlConnection import *
from structure.transcriptListsComparator import *
from misc.rPlotter import *
from misc.progress import *


if __name__ == "__main__":
  
  # parse command line
  description = "Compare Overlapping: Get the data which overlap with a reference set. [Category: Data Comparison]"

  parser = OptionParser(description = description)
  parser.add_option("-i", "--input1",           dest="inputFileName1", action="store",                     type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
  parser.add_option("-f", "--format1",          dest="format1",        action="store",                     type="string", help="format of file 1 [compulsory] [format: transcript file format]")
  parser.add_option("-j", "--input2",           dest="inputFileName2", action="store",                     type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
  parser.add_option("-g", "--format2",          dest="format2",        action="store",                     type="string", help="format of file 2 [compulsory] [format: transcript file format]")
  parser.add_option("-o", "--output",           dest="output",         action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")
  parser.add_option("-s", "--start2",           dest="start2",         action="store",      default=None,  type="int",    help="only consider start overlaps in file 2 [format: int]")
  parser.add_option("-S", "--start1",           dest="start1",         action="store",      default=None,  type="int",    help="only consider start overlaps in file 1 [format: int]")
  parser.add_option("-t", "--intron",           dest="introns",        action="store_true", default=False,                help="also report introns [format: bool] [default: false]")
  parser.add_option("-e", "--5primeExtension2", dest="fivePrime2",     action="store",      default=None,  type="int",    help="extension towards 5' in file 2 [format: int]")
  parser.add_option("-E", "--5primeExtension1", dest="fivePrime1",     action="store",      default=None,  type="int",    help="extension towards 5' in file 1 [format: int]")
  parser.add_option("-n", "--3primeExtension2", dest="threePrime2",    action="store",      default=None,  type="int",    help="extension towards 3' in file 2 [format: int]")
  parser.add_option("-N", "--3primeExtension1", dest="threePrime1",    action="store",      default=None,  type="int",    help="extension towards 3' in file 1 [format: int]")
  parser.add_option("-c", "--colinear",         dest="colinear",       action="store_true", default=False,                help="colinear only [format: bool] [default: false]")
  parser.add_option("-a", "--antisense",        dest="antisense",      action="store_true", default=False,                help="antisense only [format: bool] [default: false]")
  parser.add_option("-d", "--distance",         dest="distance",       action="store",      default=None,  type="int",    help="accept some distance between query and reference [format: int]")
  parser.add_option("-r", "--csv",              dest="csv",            action="store_true", default=False,                help="Excel output [format: bool] [default: false]")
  parser.add_option("-y", "--mysql",            dest="mysql",          action="store_true", default=False,                help="mySQL output [format: bool] [default: false]")
  parser.add_option("-x", "--exclude",          dest="exclude",        action="store_true", default=False,                help="invert the match [format: bool] [default: false]")
  parser.add_option("-v", "--verbosity",        dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
  parser.add_option("-l", "--log",              dest="log",            action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
  (options, args) = parser.parse_args()

  logHandle = None
  if options.log:
    logHandle = open("%s.log" % options.output, "w")

  transcriptContainer1 = TranscriptContainer(options.inputFileName1, options.format1, options.verbosity)
  transcriptContainer2 = TranscriptContainer(options.inputFileName2, options.format2, options.verbosity)

  transcriptListComparator = TranscriptListsComparator(logHandle, options.verbosity)
  transcriptListComparator.restrictToStart(transcriptListComparator.QUERY, options.start1)
  transcriptListComparator.restrictToStart(transcriptListComparator.REFERENCE, options.start2)
  transcriptListComparator.extendFivePrime(transcriptListComparator.QUERY, options.fivePrime1)
  transcriptListComparator.extendFivePrime(transcriptListComparator.REFERENCE, options.fivePrime2)
  transcriptListComparator.extendThreePrime(transcriptListComparator.QUERY, options.threePrime1)
  transcriptListComparator.extendThreePrime(transcriptListComparator.REFERENCE, options.threePrime2)
  transcriptListComparator.acceptIntrons(transcriptListComparator.QUERY, options.introns)
  transcriptListComparator.acceptIntrons(transcriptListComparator.REFERENCE, options.introns)
  transcriptListComparator.getAntisenseOnly(options.antisense)
  transcriptListComparator.getColinearOnly(options.colinear)
  transcriptListComparator.getInvert(options.exclude)
  transcriptListComparator.setMaxDistance(options.distance)
  transcriptListComparator.computeOdds(True)
  transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, transcriptContainer1)
  transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.REFERENCE, transcriptContainer2)
  transcriptListComparator.compareTranscriptList()
  outputTranscripts = transcriptListComparator.getOutputTranscripts()
  
  if options.output != None:
    if outputTranscripts.getNbTranscripts() > 0:
      writer = TranscriptWriter(options.output, "gff3", options.verbosity)
      writer.setContainer(outputTranscripts)
      writer.write()
  
    if options.mysql:
      writer = TranscriptWriter(options.output, "sql", options.verbosity)
      writer.setContainer(outputTranscripts)
      writer.write()
  
  if options.log:
    logHandle.close()

  if not options.exclude:
    odds = transcriptListComparator.getOdds()
    if (odds):
      plotter = RPlotter("%s.png" % (options.output), options.verbosity)
      plotter.setFill(0)
      plotter.addLine(odds)
      plotter.plot()
      print "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % utils.getMinAvgMedMax(odds)

  if options.csv:
    csvResults = transcriptListComparator.getOddsPerTranscript()
    csvFile = open("%s.csv" % (options.output), "w")
    csvFile.write("Number,Transcript\n")
    for number in sorted(list(set(csvResults.values()))):
      csvFile.write("%d," % (number))
      for name in csvResults:
        if csvResults[name] == number:
          csvFile.write("%s " % (name))
      csvFile.write("\n")
    csvFile.close()
