#! /usr/bin/env python
"""Get the repartition of some elements (# exons per transcripts, # of repetitions of a mapping or # of transcripts in a cluster)"""

import os
from optparse import OptionParser
from structure.transcriptContainer import *
from misc.rPlotter import *
from misc.progress import *
from misc import utils
from math import *

if __name__ == "__main__":
  
  # parse command line
  description = "Get Nb: Get the distribution of exons per transcripts, or mapping per read, or transcript per cluster. [Category: Visualization]"

  parser = OptionParser(description = description)
  parser.add_option("-i", "--input",      dest="inputFileName",     action="store",                        type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
  parser.add_option("-f", "--format",     dest="format",            action="store",                        type="string", help="format of the input file [compulsory] [format: transcript file format]")
  parser.add_option("-o", "--output",     dest="outputFileName",    action="store",                        type="string", help="output file [compulsory] [format: output file in GFF3 format]")
  parser.add_option("-q", "--query",      dest="query",             action="store",                        type="string", help="query (# exons, # transcripts) [format: choice (exon, transcript, cluster)]")  
  parser.add_option("-b", "--barplot",    dest="barplot",           action="store_true", default=False,                   help="use barplot representation [format: bool] [default: false]")
  parser.add_option("-x", "--xMax",       dest="xMax",              action="store",      default=None,     type="int",    help="maximum value on the x-axis to plot [format: int]")
  parser.add_option("-v", "--verbosity",  dest="verbosity",         action="store",      default=1,        type="int",    help="trace level [default: 1] [format: int]")
  parser.add_option("-l", "--log",        dest="log",               action="store_true", default=False,                   help="write a log file [format: bool] [default: false]")
  (options, args) = parser.parse_args()

  if options.query != "exon" and options.query != "transcript" and options.query != "cluster":
    sys.exit("Do not understand query %s" % (options.query))

  exonDistribution       = {}
  transcriptDistribution = {}
  clusterDistribution    = {}
  
  transcriptContainer = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
    
  progress = Progress(transcriptContainer.getNbTranscripts(), "Parsing %s" % (options.inputFileName), options.verbosity)
  # count the number of reads
  for element in transcriptContainer.getIterator():
    if options.query == "exon":
      nbExons = len(element.getExons())
      if nbExons not in exonDistribution:
        exonDistribution[nbExons] = 1
      else:
        exonDistribution[nbExons] += 1
    elif options.query == "transcript":
      if element.queryInterval.name not in transcriptDistribution:
        transcriptDistribution[element.queryInterval.name] = 1
      else:
        transcriptDistribution[element.queryInterval.name] += 1
    elif options.query == "cluster":
      nbElements = 1 if "nbElements" not in element.getTagNames() else element.getTagValue("nbElements")
      if nbElements not in clusterDistribution:
        clusterDistribution[nbElements] = 1
      else:
        clusterDistribution[nbElements] += 1
    progress.inc()
  progress.done()
  
  if options.query == "exon":
    distribution = exonDistribution
  elif options.query == "transcript":
    distribution = {}
    for name in transcriptDistribution:
      if transcriptDistribution[name] not in distribution:
        distribution[transcriptDistribution[name]] = 1
      else:
        distribution[transcriptDistribution[name]] += 1
  elif options.query == "cluster":
    distribution = clusterDistribution
  
  outputFileName = "%s.png" % (options.outputFileName)
  plotter = RPlotter(outputFileName, options.verbosity)
  plotter.setImageSize(1000, 300)
  plotter.setFill(0)
  plotter.setMaximumX(options.xMax)
  plotter.setBarplot(options.barplot)
  plotter.addLine(distribution)
  plotter.plot()
       
  print "min/avg/med/max: %d/%.2f/%.1f/%d" % (utils.getMinAvgMedMax(distribution))
      
