#! /usr/bin/env python
"""Find TSS from short reads"""

import os
from optparse import OptionParser
from parsing.sequenceListParser import *
from writer.transcriptWriter import *
from structure.transcriptListsComparator import *
from misc.rPlotter import *
import misc.utils
from misc.progress import *


if __name__ == "__main__":
  
  # parse command line
  description = "Find TSS: Find the transcription start site of a list of transcripts. [Category: Merge]"

  parser = OptionParser(description = description)
  parser.add_option("-i", "--input",           dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
  parser.add_option("-f", "--format",          dest="format",         action="store",                     type="string", help="format of file [compulsory] [format: transcript file format]")
  parser.add_option("-o", "--output",          dest="output",         action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")
  parser.add_option("-d", "--distance",        dest="distance",       action="store",      default=10,    type="int",    help="distance between two reads to mark the same TSS [format: int] [default: 10]")
  parser.add_option("-s", "--odds",            dest="odds",           action="store_true", default=False,                help="give odds [format: bool] [default: false]")
  parser.add_option("-m", "--max",             dest="max",            action="store",      default=None,  type="int",    help="maximum value to display on the x-axis [format: int]")
  parser.add_option("-c", "--csv",             dest="csv",            action="store_true", default=False,                help="output a CSV file [format: bool] [default: false]")
  parser.add_option("-y", "--mysql",           dest="mysql",          action="store_true", default=False,                help="mySQL output [format: bool] [default: false]")
  parser.add_option("-v", "--verbosity",       dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
  parser.add_option("-l", "--log",             dest="log",            action="store",      default=None,  type="string", help="write a log file [format: bool]")
  (options, args) = parser.parse_args()

  logHandle = None
  if options.log != None:
    logHandle = open(options.log, "w")
    
  transcriptContainer = TranscriptContainer(options.inputFileName, options.format, options.verbosity)    
  transcriptListComparator = TranscriptListsComparator(logHandle, options.verbosity)
  transcriptListComparator.restrictToStart(transcriptListComparator.REFERENCE, 1)
  transcriptListComparator.restrictToStart(transcriptListComparator.QUERY, 1)
  transcriptListComparator.setDistance(options.distance)
  transcriptListComparator.aggregate(True)
  transcriptListComparator.computeOdds(True)
  transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, transcriptContainer)
  transcriptListComparator.compareTranscriptListSelfMerge()
  outputTranscriptContainer = transcriptListComparator.getOutputTranscripts()

  if options.output != None:
    if options.mysql:
      sqlWriter = TranscriptWriter(options.output, "sql", options.verbosity)
    writer = Gff3Writer("%s.gff3" % (options.output), options.verbosity)
    for transcript in outputTranscriptContainer.getIterator():
      transcript.removeExons()
      writer.addTranscript(transcript)
      if options.mysql:
        sqlWriter.addTranscript(transcript)
    if options.mysql:
      sqlWriter.write()
    
  if options.odds:
    odds    = transcriptListComparator.getOdds()
    plotter = RPlotter("%s.png" % (options.output), options.verbosity)
    plotter.setFill(0)
    plotter.setMaximumX(options.max)
    plotter.addLine(odds)
    plotter.plot()
    print "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % utils.getMinAvgMedMax(odds)
    
  if options.csv:
    csvResults = transcriptListComparator.getOddsPerTranscript()
    csvFile = open("%s.csv" % (options.output), "w")
    csvFile.write("Number,Transcript\n")
    for number in sorted(list(set(csvResults.values()))):
      csvFile.write("%d," % (number))
      for name in csvResults:
        if csvResults[name] == number:
          csvFile.write("%s " % (name))
      csvFile.write("\n")
    csvFile.close()
    
  if options.log:
    logHandle.close()
