#! /usr/bin/env python
"""Read a mapping file (SeqMap) and select some of them"""

import os
from optparse import OptionParser
from seqmapParser import *
from progress import *


if __name__ == "__main__":
  nbSequences = 0
  nbRemaining = 0

  # parse command line
  parser = OptionParser()
  parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file")
  parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file")
  parser.add_option("-m", "--multiple",  dest="multiple",       action="store_true", default=False,                help="multiple matches allowed")
  parser.add_option("-n", "--mismatch",  dest="mismatch",       action="store",      default=0,     type="int",    help="maximum number of mismatches")
  parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level")
  parser.add_option("-l", "--log",       dest="log",            action="store_true", default=False,                help="write a log file")
  (options, args) = parser.parse_args()

  if options.log:
    logHandle = open(options.outputFileName + ".log", "w")

  # remove possible existing output file
  if os.path.exists(options.outputFileName):
    os.unlink(options.outputFileName)

  seqmap      = SeqmapParser(options.inputFileName, options.verbosity)
  nbSequences = 0
  if options.verbosity > 0:
    seqmap.computeData()
    nbSequences = seqmap.getNbSequences()
    print "%i sequences found" %  seqmap.getNbSequences()


  # treat mappings
  progress     = Progress(nbSequences, "Analyzing reads of " + options.inputFileName, options.verbosity)
  outputHandle = open(options.outputFileName, "w")
  mappings     = []
  while seqmap.getNextMapping():
    mapping   = seqmap.getCurrentMapping()
    queryName = mapping.queryName

    # remove no match
    if not mapping.match:
      if options.log:
        logHandle.write(mapping.__str__() + " did not match\n")
    # remove mappings with many mismatches
    elif mapping.nbMismatches > options.mismatch:
      if options.log:
        logHandle.write("mapping " + mapping.__str__() + " has too many mismatches\n")
    # remove multiple matches
    elif not options.multiple and mapping.multiple:
      if options.log:
        logHandle.write("sequence %s maps several times\n" % (queryName))
    else:
      # write results
      outputHandle.write(str(mapping) + "\n")
      nbRemaining += 1

    progress.inc()

  progress.done()

  outputHandle.close()

  if options.log:
    logHandle.close()

  if options.verbosity > 0:
    print "kept %i over %i (%f%%)" % (nbRemaining, seqmap.getNbSequences(), float(nbRemaining) / seqmap.getNbSequences() * 100)


