import re
import sys
from structure.mapping import *
from parsing.mapperParser import *


class SoapParser(MapperParser):
  """A class that parses the output of SOAP"""

  def __init__(self, fileName, verbosity = 0):
    super(SoapParser, self).__init__(fileName, verbosity)


  def __del__(self):
    super(SoapParser, self).__del__()


  def getFileFormats():
    return ["soap"]
  getFileFormats = staticmethod(getFileFormats)


  def skipFirstLines(self):
    pass


  def parseLine(self, line):
    m = re.search(r"^\s*(\S+)\s+(\w+)\s+(h+)\s+(\d+)\s+(a)\s+(\d+)\s+([+-])\s+(\w+)\s+(\d+)\s+(\d+)", line)
    if m == None:
      sys.exit("\nLine %d '%s' does not have a SOAP format" % (self.currentLineNb, line))

    mapping = Mapping()

    mapping.queryInterval.setName(m.group(1))
    mapping.queryInterval.setStart(1)
    mapping.queryInterval.setSize(len(m.group(2)))

    mapping.targetInterval.setChromosome(m.group(8))
    mapping.targetInterval.setStart(int(m.group(9)))
    mapping.targetInterval.setSize(len(m.group(2)))

    mapping.setDirection(m.group(7))
    mapping.setSize(len(m.group(2)))
    mapping.setNbMismatches(int(m.group(10)))

    return mapping
