"""Some useful functions"""

import sys, os
import random
import subprocess


def sumOfLists(list1, list2):
  """
  Element by element sum
  """
  if len(list1) != len(list2):
    sys.exit("Cannot sum list whose sizes are different!")
  return [list1[i] + list2[i] for i in range(len(list1))]
  

def getHammingDistance(string1, string2):
  """
  Compute Hamming distance between two strings
  """
  if len(string1) != len(string2):
    sys.exit("Error, size of %s and %s differ" % (string1, string2))
  score = 0
  for i in range(0, len(string1)):
    if string1[i] != string2[i]:
      score += 1
  return score


def getMinAvgMedMax(values):
  """
  Get some stats about a dict
  @param values: a distribution (the value being the number of occurrences of the key)
  @type  values: dict int to int
  @return: a tuple
  """
  minValues    = min(values.keys())
  maxValues    = max(values.keys())
  sumValues    = sum([value * values[value] for value in values])
  nbValues     = sum(values.values())
  allValues    = []
  for key in values:
    for i in range(values[key]):
      allValues.append(key)
  sortedValues = sorted(allValues)
  sorted(values.values())
  if (nbValues % 2 == 0):
    medValues = (sortedValues[nbValues / 2 - 1] + sortedValues[nbValues / 2]) / 2.0
  else:
    medValues = sortedValues[(nbValues + 1) / 2 - 1]
  return (minValues, float(sumValues) / nbValues, medValues, maxValues)


def xor(value1, value2):
  """
  Logical xor
  @param value1: a value
  @type  value1: anything
  @param value2: a value
  @type  value2: anything
  """
  return bool(value1) != bool(value2)


def binomialCoefficient(a, b):
  """
  Compute cumulated product from a to b
  @param a: a value
  @type  a: int
  @param b: a value
  @type  b: int
  """
  if a > b / 2:
    a = b-a
  p = 1.0
  for i in range(b-a+1, b+1):
    p *= i
  q = 1.0
  for i in range(1, a+1):
    q *= i
  return p / q


memory = {}

# def fisherExactPValue(a, b, c, d):
#   """
#   P-value of Fisher exact test for 2x2 contingency table
#   """
#   if (a, b, c, d) in memory:
#     return memory[(a, b, c, d)]

#   n = a + b + c + d
#   i1 = binomialCoefficient(a, a+b)
#   i2 = binomialCoefficient(c, a+c)
#   i3 = binomialCoefficient(c+d, n)
#   pValue = i1 * i2 / i3

#   memory[(a, b, c, d)] = pValue

#   return pValue
  

def fisherExactPValue(a, b, c, d):
  if (a, b, c, d) in memory:
    return memory[(a, b, c, d)]

  scriptFileName = "tmpScript-%d.R" % (random.randint(0, 10000))
  rScript = open(scriptFileName, "w")
  rScript.write("data = matrix(c(%d, %d, %d, %d), nr=2)\n" % (a, b, c, d))
  rScript.write("fisher.test(data)\n")
  #rScript.write("chisq.test(data)\n")
  rScript.close()

  rCommand = "R"
  if "SMARTRPATH" in os.environ:
    rCommand = os.environ["SMARTRPATH"]
  command = "\"%s\" CMD BATCH %s" % (rCommand, scriptFileName)
  status  = subprocess.call(command, shell=True)

  if status != 0:
    sys.exit("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))

  outputRFileName = "%sout" % (scriptFileName)
  outputRFile     = open(outputRFileName)
  pValue          = None
  pValueTag       = "p-value "
  for line in outputRFile:
    line = line.strip()
    if line == "": continue
    for splittedLine in line.split(","):
      splittedLine = splittedLine.strip()
      if splittedLine.startswith(pValueTag):
        pValue = float(splittedLine.split()[-1])
        break

  if pValue == None:
    sys.exit("Problem with the cannot find p-value! File %s, values are: %d, %d, %d, %d" % (scriptFileName, a, b, c, d))

  os.remove(scriptFileName)
  os.remove(outputRFileName)

  memory[(a, b, c, d)] = pValue

  return pValue


def fisherExactPValueBulk(list):

  scriptFileName = "tmpScript-%d.R" % (random.randint(0, 10000))
  rScript = open(scriptFileName, "w")
  for element in list:
    rScript.write("fisher.test(matrix(c(%d, %d, %d, %d), nr=2))$p.value\n" % (element[0], element[1], element[2], element[3]))
  rScript.close()

  rCommand = "R"
  if "SMARTRPATH" in os.environ:
    rCommand = os.environ["SMARTRPATH"]
  command = "\"%s\" CMD BATCH %s" % (rCommand, scriptFileName)
  status  = subprocess.call(command, shell=True)

  if status != 0:
    sys.exit("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))

  outputRFileName = "%sout" % (scriptFileName)
  outputRFile     = open(outputRFileName)
  pValue          = None
  pValueTag       = "[1] "
  results         = {}
  cpt             = 0
  for line in outputRFile:
    line = line.strip()
    if line == "": continue
    if line.startswith(pValueTag):
      pValue             = float(line.split()[-1])
      results[list[cpt]] = pValue
      cpt += 1

  if pValue == None:
    sys.exit("Problem with the cannot find p-value!")
  if cpt != len(list):
    sys.exit("Error in the number of p-values computed by R in file '%s'!" % (scriptFileName))

  os.remove(scriptFileName)
  os.remove(outputRFileName)

  return results

