#! /usr/bin/env python
"""Update a .qual file given a .fasta file"""

from optparse import OptionParser
from parsing.fastaParser import *
from misc.progress import *


if __name__ == "__main__":
  
  # parse command line
  description = "Update Qual: Remove the sequence in a Qual file which are not in the corresponding Fasta file. [Category: Personnal]"

  parser = OptionParser(description = description)
  parser.add_option("-f", "--fasta",     dest="fastaFile",  action="store",                     type="string", help="fasta file [compulsory] [format: file in FASTA format]")
  parser.add_option("-q", "--qual",      dest="qualFile",   action="store",                     type="string", help="qual file [compulsory] [format: file in QUAL format]")
  parser.add_option("-o", "--output",    dest="outputFile", action="store",                     type="string", help="output file [compulsory] [format: output file in QUAL format]")
  parser.add_option("-v", "--verbosity", dest="verbosity",  action="store",      default=1,     type="int",    help="trace level [format: int]")
  (options, args) = parser.parse_args()

  parser       = SequenceListParser(options.fastaFile, options.verbosity)
  nbSequences  = parser.getNbSequences()
  progress     = Progress(nbSequences, "Parsing file %s" % (options.fastaFile), options.verbosity)
  qualHandle   = open(options.qualFile)
  outputHandle = open(options.outputFile, "w")
  nbRefused    = 0
  nbTotal      = 0
  
  names = []
  while parser.getNextSequence():
    sequence = parser.getCurrentSequence()
    nbTotal += 1
  
    found = False
    name  = None
    for line in qualHandle:
      line = line.strip()
      if line[0] == ">":
        name = line[1:]
        if name == sequence.name:
          found = True
        else:
          nbRefused += 1
      else:
        if found:
          outputHandle.write(">%s\n%s\n" % (name, line))
          found = False
          name  = None
          break
    progress.inc()
  progress.done()

  
  outputHandle.close()
  qualHandle.close()
  
  print "%d out of %d are refused (%f%%)"       % (nbRefused, nbTotal, (float(nbRefused) / nbTotal * 100))    
