import re
import sys
import os.path
import struct
from structure.interval import *
from parsing.transcriptListParser import *


class WigParser():
  """A class that parses a big WIG file, creates an index and make it possible to quickly retrieve some data"""

  def __init__(self, directoryName, organism = None):
    self.directoryName = directoryName
    self.organism      = organism
    self.filler        = "\xFF" * struct.calcsize('Q')


  def getFileFormats():
    return ["wig"]
  getFileFormats = staticmethod(getFileFormats)


  def makeIndexName(self, chromosome):
    """
    Create an index name for a file
    """
    indexName  = os.path.join(self.directoryName, ".%s.index" % (chromosome))
    if self.organism != None:
      indexName = "%s.%s" (indexName, self.organism)
    return indexName
  
  
  def findIndexFile(self, chromosome):
    """
    Check if the index of a file exists
    """ 
    indexName = self.makeIndexName(chromosome)
    if os.path.exists(indexName):
      return indexName
    return False
  
  
  def makeIndexFile(self, chromosome):
    """
    Create the index for a file
    """
    outputFileName = self.makeIndexName(chromosome)
    
    inputFile  = open(os.path.join(self.directoryName, "%s.wig" % (chromosome)))
    outputFile = open(outputFileName, "wb")
  
    index = 1
    mark  = inputFile.tell()
  
    outputFile.write(self.filler)
  
    line = inputFile.readline()
    while line != "":
      line = line.strip()
      
      m1 = re.search(r"^\s*\d+\.?\d*\s*$", line)
      m2 = re.search(r"^\s*(\d+)\s+\d+\.?\d*\s*$", line)
      m3 = re.search(r"^\s*fixedStep\s+chrom=\S+\s+start=(\d+)\s+step=1\s*$", line)
      m4 = re.search(r"^\s*fixedStep\s+chrom=\S+\s+start=\d+\s+step=\d+\s+span=\d+\s*$", line)
      m5 = re.search(r"^\s*variableStep\s+chrom=\S+\s*$", line)
      m6 = re.search(r"^\s*variableStep\s+chrom=\S+span=(\d+)\s*$", line)
            
      if m1 != None:
        outputFile.write(struct.pack("Q", mark))
        index += 1
      elif m2 != None:
        nextIndex = int(m2.group(1))
        outputFile.write(self.filler * (nextIndex - index))
        outputFile.write(struct.pack("Q", mark))
        index = nextIndex        
      elif m3 != None:
        nextIndex = int(m3.group(1))
        outputFile.write(self.filler * (nextIndex - index))
        index = nextIndex
      elif m4 != None:
        sys.exit("Error! Cannot parse fixed step WIG files with step > 1 or span > 1")
      elif m5 != None:
        pass
      elif m6 != None:
        sys.exit("Error! Cannot parse variable step WIG files with step > 1 or span > 1")
      elif (len(line) == 0) or line[0] == "#":
        pass
      else:
        sys.exit("Error! Cannot understand line '%s' of Conservation while creating index file! Aborting." % (line))
      
      mark = inputFile.tell()
      line = inputFile.readline()
  
    inputFile.close()
    outputFile.close()
  
  
  def findIndex(self, chromosome, start):
    """
    Find the point where to start reading file
    """
    indexFileName = self.makeIndexName(chromosome)
    if not self.findIndexFile(chromosome):
      self.makeIndexFile(chromosome)
  
    sizeOfLong = struct.calcsize("Q")
    empty      = int(struct.unpack("Q", self.filler)[0])
    offset     = empty
  
    indexFile = open(indexFileName, "rb")
    
    while offset == empty:
      address = start * sizeOfLong
      indexFile.seek(address, os.SEEK_SET)
      
      buffer = indexFile.read(sizeOfLong)
      if len(buffer) != sizeOfLong:
        print "Warning! Index %s seems wrong. Rebuilding it..." % (indexFileName)
        indexFile.close()
        self.makeIndexFile(chromosome)
        indexFile = open(indexFileName, "rb")
        indexFile.seek(address, os.SEEK_SET)
        buffer = indexFile.read(sizeOfLong)
      
      offset = int(struct.unpack("Q", buffer)[0])
      start += 1
      
    indexFile.close()
  
    start -= 1
    return [offset, start]
  
  

  def getRange(self, chromosome, start, end):
    """
    Parse a wig file and output a range
    """
    array = [0.0] * (end - start + 1)

    file = open(os.path.join(self.directoryName, "%s.wig" % (chromosome)))
    offset, index = self.findIndex(chromosome, start)
    file.seek(offset, os.SEEK_SET)

    for line in file:
      line = line.strip()
      
      m1 = re.search(r"^\s*(\d+\.?\d*)\s*$", line)
      m2 = re.search(r"^\s*(\d+)\s+(\d+\.?\d*)\s*$", line)
      m3 = re.search(r"^\s*fixedStep\s+chrom=\S+\s+start=(\d+)\s+step=\d+\s*$", line)
      m4 = re.search(r"^\s*variableStep\s+chrom=\S+\s*$", line)

      if m1 != None:
        if index > end:
          file.close()
          return array
        if index >= start:
          array[index - start] = float(m1.group(1))
        index += 1
      elif m2 != None:
        index = int(m2.group(1))
        if index > end:
          file.close()
          return array
        if index >= start:
          array[index - start] = float(m2.group(2))
        index += 1
      elif m3 != None:
        index = int(m3.group(1))
      elif m4 != None:
        pass
      elif (len(line) == 0) or (line[0] == "#"):
        pass
      else:
        sys.exit("Error! Cannot read line '%s' of wig file" % (self.fileName))

    file.close()
  
    return array
  
