#! /usr/bin/env python
"""
Remove empty sequences from a FASTA or FASTQ file
"""

import os, random
from optparse import OptionParser
from parsing.fastaParser import *
from parsing.fastqParser import *
from writer.fastaWriter import *
from writer.fastqWriter import *
from misc.progress import *


if __name__ == "__main__":
  
  # parse command line
  description = "Change Tag Name: Change the name of tag of a list of transcripts. [Category: Personnal]"

  parser = OptionParser(description = description)
  parser.add_option("-i", "--input",        dest="inputFileName",     action="store",                      type="string", help="input file [compulsory] [format: file in sequence format given by -f]")
  parser.add_option("-f", "--format",       dest="format",            action="store",                      type="string", help="format of the input file [compulsory] [format: sequence file format]")
  parser.add_option("-o", "--output",       dest="outputFileName",    action="store",                      type="string", help="output file [compulsory] [format: output file in format given by -f]")
  parser.add_option("-v", "--verbosity",    dest="verbosity",         action="store",      default=1,      type="int",    help="trace level [format: int] [default: 1]")
  parser.add_option("-l", "--log",          dest="log",               action="store_true", default=False,                 help="write a log file [format: bool] [default: false]")
  (options, args) = parser.parse_args()

  if options.log:
    logHandle = open("%s.log" % options.outputFileName, "w")

  if options.format == "fasta":
    parser = FastaParser(options.inputFileName, options.verbosity)
    writer = FastaWriter("%s.mfa" % (options.outputFileName), options.verbosity)
  elif options.format == "fastq":
    parser = FastqParser(options.inputFileName, options.verbosity)
    writer = FastqWriter("%s.mfq" % (options.outputFileName), options.verbosity)
  else:
    sys.exit("Do not understand '%s' file format." % (options.format))
  if options.log:
    logHandle = open("%s.log" % (options.outputFileName), "w")

  # process sequences
  nbEmpty = 0
  progress = Progress(parser.getNbSequences(), "Reading sequences in %s" % (options.inputFileName), options.verbosity)
  for sequence in parser.getIterator():
    if sequence.sequence != "":
      writer.addSequence(sequence)
    else:
      if options.log:
        logHandle.write("%s is empty.\n" % (sequence.name))
      nbEmpty += 1
    progress.inc()
  progress.done()

  writer.write()

  print "%d sequences are empty (%.2f%%)." % (nbEmpty, float(nbEmpty) / parser.getNbSequences() * 100)
