from parser.sequenceListParser import *

class FastqParser(SequenceListParser):
  """A class that reads a list of sequences in FASTQ format"""

  def __init__(self, fileName, verbosity = 0):
    super(FastqParser, self).__init__(fileName, verbosity)


  def getInfos(self):
    """
    Get some generic information about the sequences
    """
    self.nbSequences = 0
    self.size        = 0
    self.reset()
    if self.verbosity >= 10:
      print "Getting information on %s." % (self.fileName)

    inSequence = False
    for line in self.handle:
      line = line.strip()
      if line == "":
        continue
      if line[0] == "@":
        self.nbSequences += 1
        inSequence = True
      elif line[0] == "+":
        inSequence = False
      else:
        if inSequence:
          self.size += len(line)
      if self.verbosity >= 10 and self.nbSequences % 100000 == 0:
        sys.stdout.write("  %d sequences read\r" % (self.nbSequences))
        sys.stdout.flush()
    self.reset()
    if self.verbosity >= 10:
      print "  %d sequences read" % (self.nbSequences)
      print "Done."


  def parseOne(self):
    """
    Parse only one element in the file
    """
    name       = None
    string     = ""
    quality    = ""
    inSequence = False
    inQuality  = False

    if self.currentLine != None:
      if self.currentLine[0] != "@":
        sys.exit("First line is weird: %s" % (self.currentLine))
      name = self.currentLine[1:]
      self.currentLine = None
      inSequence       = True
      inQuality        = False

    for line in self.handle:
      line = line.strip()
      if line == "": continue
      if line[0] == "@":
        if name != None:
          self.currentLine = line
          sequence         = Sequence(name, string)
          sequence.quality = quality
          return sequence
        name              = line[1:]
        inSequence        = True
        inQuality         = False
      elif line[0] == "+":
        if line[1:] != name:
          sys.exit("Weird difference in sequence and quality names (%s and %s) while parsing FASTQ file %s." % (name, line[1:], self.fileName))
        inQuality  = True
        inSequence = False
      else:
        if inSequence:
          string += line
        if inQuality:
          quality += line
      
    if name == None:
      return None

    sequence = Sequence(name, string)
    sequence.quality = quality
    return sequence
