#! /usr/bin/env python
"""Get the size distribution of a Fasta / BED file"""

import os
from optparse import OptionParser
from parsing.fastaParser import *
from parsing.fastqParser import *
from structure.transcriptContainer import *
from writer.transcriptWriter import *
from writer.fastaWriter import *
from writer.fastqWriter import *
from misc.progress import *
from misc.rPlotter import *


if __name__ == "__main__":
  
  # parse command line
  description = "Restrict from Size: Select the elements of a list of sequences or transcripts with a given size. [Category: Data Selection]"

  parser = OptionParser(description = description)
  parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                        type="string", help="input file [compulsory] [format: file in transcript or sequence format given by -f]")
  parser.add_option("-f", "--format",    dest="format",         action="store",                        type="string", help="format of the input [compulsory] [format: sequence or transcript file format]")
  parser.add_option("-o", "--output",    dest="outputFileName", action="store",                        type="string", help="output file [compulsory] [format: output file in transcript or sequence format given by -f]")
  parser.add_option("-m", "--minSize",   dest="minSize",        action="store",      default=None,     type="int",    help="minimum size [format: int]")
  parser.add_option("-M", "--maxSize",   dest="maxSize",        action="store",      default=None,     type="int",    help="maximum size [format: int]")
  parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,        type="int",    help="trace level [format: int]")
  parser.add_option("-l", "--log",       dest="log",            action="store_true", default=False,                   help="write a log file [format: bool] [default: false]")
  (options, args) = parser.parse_args()

  if options.format == "fasta":
    parser = FastaParser(options.inputFileName, options.verbosity)
    writer = FastaWriter("%s.mfa" % (options.outputFileName), options.verbosity)
  elif options.format == "fastq":
    parser = FastqParser(options.inputFileName, options.verbosity)
    writer = FastqWriter("%s.mfq" % (options.outputFileName), options.verbosity)
  else:
    parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
    writer = TranscriptWriter(options.outputFileName, options.format, options.verbosity)


  # treat items
  nbItems    = parser.getNbItems()
  progress   = Progress(nbItems, "Analyzing sequences of %s" % (options.inputFileName), options.verbosity)
  nbKept     = 0
  for item in parser.getIterator():
    size = item.getSize()
    if (options.minSize == None or options.minSize <= size) and (options.maxSize == None or options.maxSize >= size):
      writer.addElement(item)
      nbKept += 1
    progress.inc()
  progress.done()
  
  writer.write()

  print "%d items, %d kept (%.2f%%)" % (nbItems, nbKept, float(nbKept) / nbItems * 100)
