#! /usr/bin/env python
"""Read a mapping file (MUMmer) and select some of them"""

import os
from optparse import OptionParser
from mummerParser import *
from progress import *


if __name__ == "__main__":
  nbSequences = 0
  nbRemaining = 0
  
  # parse command line
  parser = OptionParser()
  parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file")
  parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file")
  parser.add_option("-u", "--unique",    dest="unique",         action="store_true", default=False,                help="unique matches only")
  parser.add_option("-s", "--size",      dest="size",           action="store",      default=-1,    type="int",    help="minimum size of the matches")
  parser.add_option("-p", "--pcId",      dest="pcIdentity",     action="store",      default=100,   type="int",    help="minimum pourcentage of identity")
  parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level")
  parser.add_option("-l", "--log",       dest="log",            action="store_true", default=False,                help="write a log file")
  (options, args) = parser.parse_args()

  if options.log:
    logHandle = open(options.outputFileName + ".log", "w")

  # remove possible existing output file
  if os.path.exists(options.outputFileName):
    os.unlink(options.outputFileName)

  mummerParser = MummerParser(options.inputFileName, options.verbosity)
  if options.verbosity > 0:
    mummerParser.computeData()
    nbSequences = mummerParser.getNbSequences()
    print "%i lines parsed" % mummerParser.getNbMappings()
    progress = Progress(mummerParser.getNbMappings(), "Analyzing " + options.inputFileName, options.verbosity)

  # treat sequences
  if options.unique:
    mappings = {}
  else:
    mappings = []
  while mummerParser.getNextMapping():
    mapping   = mummerParser.getCurrentMapping()
    queryName = mapping.queryName

    # remove short mappings
    if options.size > -1 and mapping.size <= options.size:
      if options.log:
        logHandle.write("size of mapping " + mapping + " is too short")
    elif options.size == -1 and mapping.size < mapping.querySize:
      if options.log:
        logHandle.write("size of mapping " + mapping + " is too short")

    # remove not unique mappings
    elif options.unique and queryName in mappings:
      mappings[queryName] = None
      if options.log:
        logHandle.write("size of mapping " + mapping + "is not unique")

    else:
      storedMapping = Mapping()
      storedMapping.copy(mapping)
      if options.unique:
        mappings[queryName] = storedMapping
      else:
        mappings.append(storedMapping)

    if options.verbosity > 0:
      progress.inc()

  if options.verbosity > 0:
    progress.done()

  if options.log:
    logHandle.close()

  outputHandle = open(options.outputFileName, "w")
  if options.unique:
    for key, value in mappings.iteritems():
      if value != None:
        outputHandle.write(str(value) + "\n")
        nbRemaining += 1
  else:
    for mapping in mappings:
      outputHandle.write(str(mapping) + "\n")
      nbRemaining += 1
  outputHandle.close()

  if options.verbosity > 0:
    print "kept %i over %i (%f%%)" % (nbRemaining, mummerParser.nbMappings, float(nbRemaining) / mummerParser.nbMappings * 100)

