#! /usr/bin/env python
"""Read a mapping file (Nucmer) and select some of them"""

import os
from optparse import OptionParser
from nucmerParser import *
from progress import *


if __name__ == "__main__":
  nbSequences = 0
  nbRemaining = 0
  
  # parse command line
  parser = OptionParser()
  parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file")
  parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file")
  parser.add_option("-n", "--number",    dest="number",         action="store",      default=1,     type="int",    help="max. number of occurrences of a sequence")
  parser.add_option("-s", "--size",      dest="size",           action="store",      default=100,   type="int",    help="minimum pourcentage of size")
  parser.add_option("-d", "--identity",  dest="identity",       action="store",      default=100,   type="int",    help="minimum pourcentage of identity")
  parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level")
  parser.add_option("-l", "--log",       dest="log",            action="store_true", default=False,                help="write a log file")
  (options, args) = parser.parse_args()

  if options.log:
    logHandle = open(options.outputFileName + ".log", "w")

  # remove possible existing output file
  if os.path.exists(options.outputFileName):
    os.unlink(options.outputFileName)

  nucmer     = NucmerParser(options.inputFileName, options.verbosity)
  nbMappings = 0
  if options.verbosity > 0:
    nucmer.computeData()
    nbMappings = nucmer.getNbMappings()
    print "%i matches found, with %i different sequences" % (nbMappings, nucmer.getNbSequences())


  # treat sequences
  progress = Progress(nbMappings, "Analyzing sequences of " + options.inputFileName, options.verbosity)
  nbOccurrences = {}
  while nucmer.getNextMapping():
    mapping   = nucmer.getCurrentMapping()
    queryName = mapping.queryName
    if queryName not in nbOccurrences:
      nbOccurrences[queryName] = 1
    else:
      nbOccurrences[queryName] += 1
    progress.inc()
  progress.done()

  # treat mappings
  nucmer.reset()
  progress     = Progress(nbMappings, "Analyzing reads of " + options.inputFileName, options.verbosity)
  outputHandle = open(options.outputFileName, "w")
  mappings     = []
  while nucmer.getNextMapping():
    mapping   = nucmer.getCurrentMapping()
    queryName = mapping.queryName

    # remove short mappings
    if float(mapping.size) / mapping.querySize * 100 < options.size:
      if options.log:
        logHandle.write("size of mapping " + mapping.__str__() + " is too short\n")
    # remove mappings with low identity
    elif mapping.identity < options.identity:
      if options.log:
        logHandle.write("mapping " + mapping.__str__() + " has a low identity rate\n")
    # remove too frequent mappings
    elif nbOccurrences[queryName] > options.number:
      if options.log:
        logHandle.write("sequence %s maps %i times\n" % (queryName, nbOccurrences[queryName]))
    else:
      # write results
      outputHandle.write(str(mapping) + "\n")
      nbRemaining += 1

    progress.inc()

  progress.done()

  outputHandle.close()

  if options.log:
    logHandle.close()

  if options.verbosity > 0:
    print "kept %i over %i (%f%%)" % (nbRemaining, nucmer.getNbMappings(), float(nbRemaining) / nucmer.getNbMappings() * 100)

