#! /usr/bin/env python
"""Get the distance between the transcripts of two lists"""

import os
from optparse import OptionParser
from parsing.sequenceListParser import *
from structure.transcriptListsComparator import *
from writer.transcriptWriter import *
from misc.rPlotter import *
from misc.progress import *


if __name__ == "__main__":
  
  # parse command line
  description = "Get Distance: Compute the distance of a set of transcript with respect to a reference set. [Category: Visualization]"

  parser = OptionParser(description = description)
  parser.add_option("-i", "--input1",           dest="inputFileName1",  action="store",                    type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
  parser.add_option("-f", "--format1",          dest="format1",         action="store",                    type="string", help="format of file 1 [compulsory] [format: transcript file format]")
  parser.add_option("-j", "--input2",           dest="inputFileName2",  action="store",                    type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
  parser.add_option("-g", "--format2",          dest="format2",         action="store",                    type="string", help="format of file 2 [compulsory] [format: transcript file format]")
  parser.add_option("-o", "--output",           dest="outputFileName",  action="store",                    type="string", help="plot output file [format: output file in PNG format]")
  parser.add_option("-O", "--outputDistances",  dest="outputDistances", action="store",      default=None, type="string", help="output file containing the for each element of the query [format: output file in GFF3 format] [default: None]")
  parser.add_option("-c", "--colinear",         dest="colinear",        action="store_true", default=False,               help="only consider features on the same strand [format: bool] [default: false]")
  parser.add_option("-a", "--antisense",        dest="antisense",       action="store_true", default=False,               help="only consider features on the opposite strand [format: bool] [default: false]")
  parser.add_option("-b", "--absolute",         dest="absolute",        action="store_true", default=False,               help="give the absolute value of the distance [format: bool] [default: false]")
  parser.add_option("-p", "--proportion",       dest="proportion",      action="store_true", default=False,               help="gives the proportion on the y-axis instead of the number of distances [format: bool] [default: false]")
  parser.add_option("-s", "--start1",           dest="start1",          action="store",      default=None, type="int",    help="only consider the n first 5' nucleotides for list 1 [format: int]")
  parser.add_option("-S", "--start2",           dest="start2",          action="store",      default=None, type="int",    help="only consider the n first 5' nucleotides for list 2 [format: int]")
  parser.add_option("-e", "--end1",             dest="end1",            action="store",      default=None, type="int",    help="only consider the n last 3' nucleotides for list 1 [format: int]")
  parser.add_option("-E", "--end2",             dest="end2",            action="store",      default=None, type="int",    help="only consider the n last 3' nucleotides for list 2 [format: int]")
  parser.add_option("-m", "--minDistance",      dest="minDistance",     action="store",      default=None, type="int",    help="minimum distance considered between two transcripts [format: int] [default: None]")
  parser.add_option("-M", "--maxDistance",      dest="maxDistance",     action="store",      default=1000, type="int",    help="maximum distance considered between two transcripts [format: int] [default: 1000]")
  parser.add_option("-5", "--fivePrime",        dest="fivePrime",       action="store_true", default=False,               help="consider the elements from list 1 which are upstream of elements of list 2 [format: bool] [default: False]")
  parser.add_option("-3", "--threePrime",       dest="threePrime",      action="store_true", default=False,               help="consider the elements from list 1 which are downstream of elements of list 2 [format: bool] [default: False]")
  parser.add_option("-u", "--buckets",          dest="buckets",         action="store",      default=None, type="int",    help="plot histogram instead of line plot with given interval size [format: int] [default: None]")
  parser.add_option("-r", "--spearman",         dest="spearman",        action="store_true", default=False,               help="compute Spearman rho [format: bool] [default: False]")
  parser.add_option("-x", "--xMin",             dest="xMin",            action="store",      default=None, type="int",    help="minimum value on the x-axis to plot [format: int] [default: None]")
  parser.add_option("-X", "--xMax",             dest="xMax",            action="store",      default=None, type="int",    help="maximum value on the x-axis to plot [format: int] [default: None]")
  parser.add_option("-t", "--title",            dest="title",           action="store",      default=None, type="string", help="title for the graph [format: int] [default: None]")
  parser.add_option("-v", "--verbosity",        dest="verbosity",       action="store",      default=1,    type="int",    help="trace level [format: int]")
  parser.add_option("-l", "--log",              dest="log",             action="store",      default=None, type="string", help="write a log file [format: bool]")
  parser.add_option("-k", "--keep",             dest="keep",            action="store_true", default=False,               help="keep temporary files [format: bool]")
  (options, args) = parser.parse_args()

  logHandle = None
  if options.log != None:
    logHandle = open(options.log, "w")
  
  # store input files
  transcriptContainer1 = TranscriptContainer(options.inputFileName1, options.format1, options.verbosity)
  transcriptContainer2 = TranscriptContainer(options.inputFileName2, options.format2, options.verbosity)  

  # compare data
  transcriptListComparator = TranscriptListsComparator(logHandle, options.verbosity)
  transcriptListComparator.restrictToStart(transcriptListComparator.QUERY, options.start1)
  transcriptListComparator.restrictToStart(transcriptListComparator.REFERENCE, options.start2)
  transcriptListComparator.restrictToEnd(transcriptListComparator.QUERY, options.end1)
  transcriptListComparator.restrictToEnd(transcriptListComparator.REFERENCE, options.end2)
  transcriptListComparator.setAbsolute(options.absolute)
  transcriptListComparator.getColinearOnly(options.colinear)
  transcriptListComparator.getAntisenseOnly(options.antisense)
  transcriptListComparator.setMinDistance(options.minDistance)
  transcriptListComparator.setMaxDistance(options.maxDistance)
  transcriptListComparator.setUpstream(transcriptListComparator.REFERENCE, options.fivePrime)
  transcriptListComparator.setDownstream(transcriptListComparator.REFERENCE, options.threePrime)
  transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, transcriptContainer1)
  transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.REFERENCE, transcriptContainer2)
  distances = transcriptListComparator.compareTranscriptListDistance()

  # plot distribution
  if len(distances) != 0:
    plotter = RPlotter("%s.png" % (options.outputFileName), options.verbosity, options.keep)
    if options.buckets != None:
      plotter.setBarplot(True)
    plotter.setFill(0)
    plotter.setXLabel("distance")
    plotter.setYLabel("# elements")
    plotter.setBuckets(options.buckets)
    plotter.setMinimumX(options.xMin)
    plotter.setMaximumX(options.xMax)
    plotter.setTitle(options.title)
    if (options.proportion):
      size              = float(sum(distances.values()))
      modifiedDistances = dict([(distance, nb / size * 100) for distance, nb in distances.iteritems()])
      distances         = modifiedDistances
      plotter.setYLabel("%% elements (%d in toto)" % (size))
    plotter.addLine(distances)
    plotter.plot()
    if options.spearman:
      print "Spearman's rho: %.5f" % plotter.getSpearmanRho()

    # write distances into output transcript file
    if options.outputDistances != None:
      outputTranscripts = transcriptListComparator.getOutputTranscripts()
      writer = TranscriptWriter(options.outputDistances, "gff3", options.verbosity)
      writer.setContainer(outputTranscripts)
      writer.write()

  
  if options.log:
    logHandle.close()
