#! /usr/bin/env python
"""Select the transcript that have not more that a given number of occurrences"""

import os
from optparse import OptionParser
from structure.transcriptContainer import *
from writer.gff3Writer import *
from misc.progress import *
from misc.rPlotter import *


if __name__ == "__main__":
  
  # parse command line
  description = "Select by # of Occurrences: Keep the reads which have mapped less than a given number of times. [Category: Personnal]"

  parser = OptionParser(description = description)
  parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                        type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
  parser.add_option("-f", "--format",      dest="format",         action="store",                        type="string", help="format of the input [compulsory] [format: transcript file format]")
  parser.add_option("-n", "--occurrences", dest="occurrences",    action="store",      default=1,        type="int",    help="maximum number of occurrences allowed [format: int] [default: 1]")   
  parser.add_option("-o", "--output",      dest="outputFileName", action="store",                        type="string", help="output file [format: output file in GFF3 format]")
  parser.add_option("-y", "--mysql",       dest="mysql",          action="store_true", default=False,                   help="mySQL output [format: bool] [default: false]")
  parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,        type="int",    help="trace level [format: int] [default: 1]")
  parser.add_option("-l", "--log",         dest="log",            action="store_true", default=False,                   help="write a log file [format: bool] [default: false]")
  (options, args) = parser.parse_args()

  parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity)

  # get occurrences of the transcripts
  names    = dict()
  progress = Progress(parser.getNbTranscripts(), "Reading names of %s" % (options.inputFileName), options.verbosity)
  for transcript in parser.getIterator():
    name = transcript.name
    if name not in names:
      names[name] = 1
    else:
      names[name] += 1
    progress.inc()
  progress.done()

  # write output file
  nbWritten = 0
  writer    = Gff3Writer(options.outputFileName, options.verbosity)
  if options.mysql:
    mysqlWriter = MySqlTranscriptWriter(options.outputFileName, options.verbosity)
  progress  = Progress(parser.getNbTranscripts(), "Writing transcripts", options.verbosity)
  for transcript in parser.getIterator():
    name = transcript.name
    if names[name] <= options.occurrences:
      nbWritten += 1
      writer.addTranscript(transcript)
      if options.mysql:
        mysqlWriter.addTranscript(transcript)
    progress.inc()
  progress.done()
                           
  if options.mysql:
    mysqlWriter.write()
  print "%d input" % (parser.getNbTranscripts())
  print "%d output (%.2f%%)" % (nbWritten, float(nbWritten) / parser.getNbTranscripts() * 100)
