Mercurial > repos > davidvanzessen > mutation_analysis

#########################################################################################
# License Agreement
#
# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE
# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER
# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE
# OR COPYRIGHT LAW IS PROHIBITED.
#
# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE
# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED
# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN
# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
#
# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences
# Coded by: Mohamed Uduman & Gur Yaari
# Copyright 2012 Kleinstein Lab
# Version: 1.3 (01/23/2014)
#########################################################################################

# Global variables

  FILTER_BY_MUTATIONS = 1000

  # Nucleotides
  NUCLEOTIDES = c("A","C","G","T")

  # Amino Acids
  AMINO_ACIDS <- c("F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "*", "*", "C", "C", "*", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G")
  names(AMINO_ACIDS) <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG")
  names(AMINO_ACIDS) <- names(AMINO_ACIDS)

  #Amino Acid Traits
  #"*" "A" "C" "D" "E" "F" "G" "H" "I" "K" "L" "M" "N" "P" "Q" "R" "S" "T" "V" "W" "Y"
  #B = "Hydrophobic/Burried"  N = "Intermediate/Neutral"  S="Hydrophilic/Surface")
  TRAITS_AMINO_ACIDS_CHOTHIA98 <- c("*","N","B","S","S","B","N","N","B","S","B","B","S","N","S","S","N","N","B","B","N")
  names(TRAITS_AMINO_ACIDS_CHOTHIA98) <- sort(unique(AMINO_ACIDS))
  TRAITS_AMINO_ACIDS <- array(NA,21)

  # Codon Table
  CODON_TABLE <- as.data.frame(matrix(NA,ncol=64,nrow=12))

  # Substitution Model: Smith DS et al. 1996
  substitution_Literature_Mouse <- matrix(c(0, 0.156222928, 0.601501588, 0.242275484, 0.172506739, 0, 0.241239892, 0.586253369, 0.54636291, 0.255795364, 0, 0.197841727, 0.290240811, 0.467680608, 0.24207858, 0),nrow=4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
  substitution_Flu_Human <- matrix(c(0,0.2795596,0.5026927,0.2177477,0.1693210,0,0.3264723,0.5042067,0.4983549,0.3328321,0,0.1688130,0.2021079,0.4696077,0.3282844,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
  substitution_Flu25_Human <- matrix(c(0,0.2580641,0.5163685,0.2255674,0.1541125,0,0.3210224,0.5248651,0.5239281,0.3101292,0,0.1659427,0.1997207,0.4579444,0.3423350,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
  load("FiveS_Substitution.RData")

  # Mutability Models: Shapiro GS et al. 2002
  triMutability_Literature_Human <- matrix(c(0.24, 1.2, 0.96, 0.43, 2.14, 2, 1.11, 1.9, 0.85, 1.83, 2.36, 1.31, 0.82, 0.52, 0.89, 1.33, 1.4, 0.82, 1.83, 0.73, 1.83, 1.62, 1.53, 0.57, 0.92, 0.42, 0.42, 1.47, 3.44, 2.58, 1.18, 0.47, 0.39, 1.12, 1.8, 0.68, 0.47, 2.19, 2.35, 2.19, 1.05, 1.84, 1.26, 0.28, 0.98, 2.37, 0.66, 1.58, 0.67, 0.92, 1.76, 0.83, 0.97, 0.56, 0.75, 0.62, 2.26, 0.62, 0.74, 1.11, 1.16, 0.61, 0.88, 0.67, 0.37, 0.07, 1.08, 0.46, 0.31, 0.94, 0.62, 0.57, 0.29, NA, 1.44, 0.46, 0.69, 0.57, 0.24, 0.37, 1.1, 0.99, 1.39, 0.6, 2.26, 1.24, 1.36, 0.52, 0.33, 0.26, 1.25, 0.37, 0.58, 1.03, 1.2, 0.34, 0.49, 0.33, 2.62, 0.16, 0.4, 0.16, 0.35, 0.75, 1.85, 0.94, 1.61, 0.85, 2.09, 1.39, 0.3, 0.52, 1.33, 0.29, 0.51, 0.26, 0.51, 3.83, 2.01, 0.71, 0.58, 0.62, 1.07, 0.28, 1.2, 0.74, 0.25, 0.59, 1.09, 0.91, 1.36, 0.45, 2.89, 1.27, 3.7, 0.69, 0.28, 0.41, 1.17, 0.56, 0.93, 3.41, 1, 1, NA, 5.9, 0.74, 2.51, 2.24, 2.24, 1.95, 3.32, 2.34, 1.3, 2.3, 1, 0.66, 0.73, 0.93, 0.41, 0.65, 0.89, 0.65, 0.32, NA, 0.43, 0.85, 0.43, 0.31, 0.31, 0.23, 0.29, 0.57, 0.71, 0.48, 0.44, 0.76, 0.51, 1.7, 0.85, 0.74, 2.23, 2.08, 1.16, 0.51, 0.51, 1, 0.5, NA, NA, 0.71, 2.14), nrow=64,byrow=T)
  triMutability_Literature_Mouse <- matrix(c(1.31, 1.35, 1.42, 1.18, 2.02, 2.02, 1.02, 1.61, 1.99, 1.42, 2.01, 1.03, 2.02, 0.97, 0.53, 0.71, 1.19, 0.83, 0.96, 0.96, 0, 1.7, 2.22, 0.59, 1.24, 1.07, 0.51, 1.68, 3.36, 3.36, 1.14, 0.29, 0.33, 0.9, 1.11, 0.63, 1.08, 2.07, 2.27, 1.74, 0.22, 1.19, 2.37, 1.15, 1.15, 1.56, 0.81, 0.34, 0.87, 0.79, 2.13, 0.49, 0.85, 0.97, 0.36, 0.82, 0.66, 0.63, 1.15, 0.94, 0.85, 0.25, 0.93, 1.19, 0.4, 0.2, 0.44, 0.44, 0.88, 1.06, 0.77, 0.39, 0, 0, 0, 0, 0, 0, 0.43, 0.43, 0.86, 0.59, 0.59, 0, 1.18, 0.86, 2.9, 1.66, 0.4, 0.2, 1.54, 0.43, 0.69, 1.71, 0.68, 0.55, 0.91, 0.7, 1.71, 0.09, 0.27, 0.63, 0.2, 0.45, 1.01, 1.63, 0.96, 1.48, 2.18, 1.2, 1.31, 0.66, 2.13, 0.49, 0, 0, 0, 2.97, 2.8, 0.79, 0.4, 0.5, 0.4, 0.11, 1.68, 0.42, 0.13, 0.44, 0.93, 0.71, 1.11, 1.19, 2.71, 1.08, 3.43, 0.4, 0.67, 0.47, 1.02, 0.14, 1.56, 1.98, 0.53, 0.33, 0.63, 2.06, 1.77, 1.46, 3.74, 2.93, 2.1, 2.18, 0.78, 0.73, 2.93, 0.63, 0.57, 0.17, 0.85, 0.52, 0.31, 0.31, 0, 0, 0.51, 0.29, 0.83, 0.54, 0.28, 0.47, 0.9, 0.99, 1.24, 2.47, 0.73, 0.23, 1.13, 0.24, 2.12, 0.24, 0.33, 0.83, 1.41, 0.62, 0.28, 0.35, 0.77, 0.17, 0.72, 0.58, 0.45, 0.41), nrow=64,byrow=T)
  triMutability_Names <- c("AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT")
  load("FiveS_Mutability.RData")

# Functions

  # Translate codon to amino acid
  translateCodonToAminoAcid<-function(Codon){
     return(AMINO_ACIDS[Codon])
  }

  # Translate amino acid to trait change
  translateAminoAcidToTraitChange<-function(AminoAcid){
     return(TRAITS_AMINO_ACIDS[AminoAcid])
  }

  # Initialize Amino Acid Trait Changes
  initializeTraitChange <- function(traitChangeModel=1,species=1,traitChangeFileName=NULL){
    if(!is.null(traitChangeFileName)){
      tryCatch(
          traitChange <- read.delim(traitChangeFileName,sep="\t",header=T)
          , error = function(ex){
            cat("Error|Error reading trait changes. Please check file name/path and format.\n")
            q()
          }
        )
    }else{
      traitChange <- TRAITS_AMINO_ACIDS_CHOTHIA98
    }
    TRAITS_AMINO_ACIDS <<- traitChange
 }

  # Read in formatted nucleotide substitution matrix
  initializeSubstitutionMatrix <- function(substitutionModel,species,subsMatFileName=NULL){
    if(!is.null(subsMatFileName)){
      tryCatch(
          subsMat <- read.delim(subsMatFileName,sep="\t",header=T)
          , error = function(ex){
            cat("Error|Error reading substitution matrix. Please check file name/path and format.\n")
            q()
          }
        )
      if(sum(apply(subsMat,1,sum)==1)!=4) subsMat = t(apply(subsMat,1,function(x)x/sum(x)))
    }else{
      if(substitutionModel==1)subsMat <- substitution_Literature_Mouse
      if(substitutionModel==2)subsMat <- substitution_Flu_Human
      if(substitutionModel==3)subsMat <- substitution_Flu25_Human

    }

    if(substitutionModel==0){
      subsMat <- matrix(1,4,4)
      subsMat[,] = 1/3
      subsMat[1,1] = 0
      subsMat[2,2] = 0
      subsMat[3,3] = 0
      subsMat[4,4] = 0
    }


    NUCLEOTIDESN = c(NUCLEOTIDES,"N", "-")
    if(substitutionModel==5){
      subsMat <- FiveS_Substitution
      return(subsMat)
    }else{
      subsMat <- rbind(subsMat,rep(NA,4),rep(NA,4))
      return( matrix(data.matrix(subsMat),6,4,dimnames=list(NUCLEOTIDESN,NUCLEOTIDES) ) )
    }
  }


  # Read in formatted Mutability file
  initializeMutabilityMatrix <- function(mutabilityModel=1, species=1,mutabilityMatFileName=NULL){
    if(!is.null(mutabilityMatFileName)){
        tryCatch(
            mutabilityMat <- read.delim(mutabilityMatFileName,sep="\t",header=T)
            , error = function(ex){
              cat("Error|Error reading mutability matrix. Please check file name/path and format.\n")
              q()
            }
          )
    }else{
      mutabilityMat <- triMutability_Literature_Human
      if(species==2) mutabilityMat <- triMutability_Literature_Mouse
    }

  if(mutabilityModel==0){ mutabilityMat <- matrix(1,64,3)}

    if(mutabilityModel==5){
      mutabilityMat <- FiveS_Mutability
      return(mutabilityMat)
    }else{
      return( matrix( data.matrix(mutabilityMat), 64, 3, dimnames=list(triMutability_Names,1:3)) )
    }
  }

  # Read FASTA file formats
  # Modified from read.fasta from the seqinR package
  baseline.read.fasta <-
  function (file = system.file("sequences/sample.fasta", package = "seqinr"),
      seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE,
      set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE,
      strip.desc = FALSE,  sizeof.longlong = .Machine$sizeof.longlong,
      endian = .Platform$endian, apply.mask = TRUE)
  {
      seqtype <- match.arg(seqtype)

          lines <- readLines(file)

          if (legacy.mode) {
              comments <- grep("^;", lines)
              if (length(comments) > 0)
                  lines <- lines[-comments]
          }


          ind_groups<-which(substr(lines, 1L, 3L) == ">>>")
          lines_mod<-lines

          if(!length(ind_groups)){
              lines_mod<-c(">>>All sequences combined",lines)
          }

          ind_groups<-which(substr(lines_mod, 1L, 3L) == ">>>")

          lines <- array("BLA",dim=(length(ind_groups)+length(lines_mod)))
          id<-sapply(1:length(ind_groups),function(i)ind_groups[i]+i-1)+1
          lines[id] <- "THIS IS A FAKE SEQUENCE"
          lines[-id] <- lines_mod
          rm(lines_mod)

  		ind <- which(substr(lines, 1L, 1L) == ">")
          nseq <- length(ind)
          if (nseq == 0) {
               stop("no line starting with a > character found")
          }
          start <- ind + 1
          end <- ind - 1

          while( any(which(ind%in%end)) ){
            ind=ind[-which(ind%in%end)]
            nseq <- length(ind)
            if (nseq == 0) {
                stop("no line starting with a > character found")
            }
            start <- ind + 1
            end <- ind - 1
          }

          end <- c(end[-1], length(lines))
          sequences <- lapply(seq_len(nseq), function(i) paste(lines[start[i]:end[i]], collapse = ""))
          if (seqonly)
              return(sequences)
          nomseq <- lapply(seq_len(nseq), function(i) {

              #firstword <- strsplit(lines[ind[i]], " ")[[1]][1]
              substr(lines[ind[i]], 2, nchar(lines[ind[i]]))

          })
          if (seqtype == "DNA") {
              if (forceDNAtolower) {
                  sequences <- as.list(tolower(chartr(".","-",sequences)))
              }else{
                  sequences <- as.list(toupper(chartr(".","-",sequences)))
              }
          }
          if (as.string == FALSE)
              sequences <- lapply(sequences, s2c)
          if (set.attributes) {
              for (i in seq_len(nseq)) {
                  Annot <- lines[ind[i]]
                  if (strip.desc)
                    Annot <- substr(Annot, 2L, nchar(Annot))
                  attributes(sequences[[i]]) <- list(name = nomseq[[i]],
                    Annot = Annot, class = switch(seqtype, AA = "SeqFastaAA",
                      DNA = "SeqFastadna"))
              }
          }
          names(sequences) <- nomseq
          return(sequences)
  }


  # Replaces non FASTA characters in input files with N
  replaceNonFASTAChars <-function(inSeq="ACGTN-AApA"){
    gsub('[^ACGTNacgt[:punct:]-[:punct:].]','N',inSeq,perl=TRUE)
  }

  # Find the germlines in the FASTA list
  germlinesInFile <- function(seqIDs){
    firstChar = sapply(seqIDs,function(x){substr(x,1,1)})
    secondChar = sapply(seqIDs,function(x){substr(x,2,2)})
    return(firstChar==">" & secondChar!=">")
  }

  # Find the groups in the FASTA list
  groupsInFile <- function(seqIDs){
    sapply(seqIDs,function(x){substr(x,1,2)})==">>"
  }

  # In the process of finding germlines/groups, expand from the start to end of the group
  expandTillNext <- function(vecPosToID){
    IDs = names(vecPosToID)
    posOfInterests =  which(vecPosToID)

    expandedID = rep(NA,length(IDs))
    expandedIDNames = gsub(">","",IDs[posOfInterests])
    startIndexes = c(1,posOfInterests[-1])
    stopIndexes = c(posOfInterests[-1]-1,length(IDs))
    expandedID  = unlist(sapply(1:length(startIndexes),function(i){
                                    rep(i,stopIndexes[i]-startIndexes[i]+1)
                                  }))
    names(expandedID) = unlist(sapply(1:length(startIndexes),function(i){
                                    rep(expandedIDNames[i],stopIndexes[i]-startIndexes[i]+1)
                                  }))
    return(expandedID)
  }

  # Process FASTA (list) to return a matrix[input, germline)
  processInputAdvanced <- function(inputFASTA){

    seqIDs = names(inputFASTA)
    numbSeqs = length(seqIDs)
    posGermlines1 = germlinesInFile(seqIDs)
    numbGermlines = sum(posGermlines1)
    posGroups1 = groupsInFile(seqIDs)
    numbGroups = sum(posGroups1)
    consDef = NA

    if(numbGermlines==0){
      posGermlines = 2
      numbGermlines = 1
    }

      glPositionsSum = cumsum(posGermlines1)
      glPositions = table(glPositionsSum)
      #Find the position of the conservation row
      consDefPos = as.numeric(names(glPositions[names(glPositions)!=0 & glPositions==1]))+1
    if( length(consDefPos)> 0 ){
      consDefID =  match(consDefPos, glPositionsSum)
      #The coservation rows need to be pulled out and stores seperately
      consDef =  inputFASTA[consDefID]
      inputFASTA =  inputFASTA[-consDefID]

      seqIDs = names(inputFASTA)
      numbSeqs = length(seqIDs)
      posGermlines1 = germlinesInFile(seqIDs)
      numbGermlines = sum(posGermlines1)
      posGroups1 = groupsInFile(seqIDs)
      numbGroups = sum(posGroups1)
      if(numbGermlines==0){
        posGermlines = 2
        numbGermlines = 1
      }
    }

    posGroups <- expandTillNext(posGroups1)
    posGermlines <- expandTillNext(posGermlines1)
    posGermlines[posGroups1] = 0
    names(posGermlines)[posGroups1] = names(posGroups)[posGroups1]
    posInput = rep(TRUE,numbSeqs)
    posInput[posGroups1 | posGermlines1] = FALSE

    matInput = matrix(NA, nrow=sum(posInput), ncol=2)
    rownames(matInput) = seqIDs[posInput]
    colnames(matInput) = c("Input","Germline")

    vecInputFASTA = unlist(inputFASTA)
    matInput[,1] = vecInputFASTA[posInput]
    matInput[,2] = vecInputFASTA[ which( names(inputFASTA)%in%paste(">",names(posGermlines)[posInput],sep="") )[ posGermlines[posInput]] ]

    germlines = posGermlines[posInput]
    groups = posGroups[posInput]

    return( list("matInput"=matInput, "germlines"=germlines, "groups"=groups, "conservationDefinition"=consDef ))
  }


  # Replace leading and trailing dashes in the sequence
  replaceLeadingTrailingDashes <- function(x,readEnd){
    iiGap = unlist(gregexpr("-",x[1]))
    ggGap = unlist(gregexpr("-",x[2]))
    #posToChange = intersect(iiGap,ggGap)


    seqIn = replaceLeadingTrailingDashesHelper(x[1])
    seqGL = replaceLeadingTrailingDashesHelper(x[2])
    seqTemplate = rep('N',readEnd)
    seqIn <- c(seqIn,seqTemplate[(length(seqIn)+1):readEnd])
    seqGL <- c(seqGL,seqTemplate[(length(seqGL)+1):readEnd])
#    if(posToChange!=-1){
#      seqIn[posToChange] = "-"
#      seqGL[posToChange] = "-"
#    }

    seqIn = c2s(seqIn[1:readEnd])
    seqGL = c2s(seqGL[1:readEnd])

    lenGL = nchar(seqGL)
    if(lenGL<readEnd){
      seqGL = paste(seqGL,c2s(rep("N",readEnd-lenGL)),sep="")
    }

    lenInput = nchar(seqIn)
    if(lenInput<readEnd){
      seqIn = paste(seqIn,c2s(rep("N",readEnd-lenInput)),sep="")
    }
    return( c(seqIn,seqGL) )
  }

  replaceLeadingTrailingDashesHelper <- function(x){
    grepResults = gregexpr("-*",x)
    grepResultsPos = unlist(grepResults)
    grepResultsLen =  attr(grepResults[[1]],"match.length")
    #print(paste("x = '", x, "'", sep=""))
    x = s2c(x)
    if(x[1]=="-"){
      x[1:grepResultsLen[1]] = "N"
    }
    if(x[length(x)]=="-"){
      x[(length(x)-grepResultsLen[length(grepResultsLen)]+1):length(x)] = "N"
    }
    return(x)
  }


  # Check sequences for indels
  checkForInDels <- function(matInputP){
    insPos <- checkInsertion(matInputP)
    delPos <- checkDeletions(matInputP)
    return(list("Insertions"=insPos, "Deletions"=delPos))
  }

  # Check sequences for insertions
  checkInsertion <- function(matInputP){
    insertionCheck = apply( matInputP,1, function(x){
                                          inputGaps <- as.vector( gregexpr("-",x[1])[[1]] )
                                          glGaps <- as.vector( gregexpr("-",x[2])[[1]] )
                                          return( is.finite( match(FALSE, glGaps%in%inputGaps ) ) )
                                        })
    return(as.vector(insertionCheck))
  }
  # Fix inserstions
  fixInsertions <- function(matInputP){
    insPos <- checkInsertion(matInputP)
    sapply((1:nrow(matInputP))[insPos],function(rowIndex){
                                                x <- matInputP[rowIndex,]
                                                inputGaps <- gregexpr("-",x[1])[[1]]
                                                glGaps <- gregexpr("-",x[2])[[1]]
                                                posInsertions <- glGaps[!(glGaps%in%inputGaps)]
                                                inputInsertionToN <- s2c(x[2])
                                                inputInsertionToN[posInsertions]!="-"
                                                inputInsertionToN[posInsertions] <- "N"
                                                inputInsertionToN <- c2s(inputInsertionToN)
                                                matInput[rowIndex,2] <<- inputInsertionToN
                                              })
    return(insPos)
  }

  # Check sequences for deletions
  checkDeletions <-function(matInputP){
    deletionCheck = apply( matInputP,1, function(x){
                                          inputGaps <- as.vector( gregexpr("-",x[1])[[1]] )
                                          glGaps <- as.vector( gregexpr("-",x[2])[[1]] )
                                          return( is.finite( match(FALSE, inputGaps%in%glGaps ) ) )
                                      })
    return(as.vector(deletionCheck))
  }
  # Fix sequences with deletions
  fixDeletions <- function(matInputP){
    delPos <- checkDeletions(matInputP)
    sapply((1:nrow(matInputP))[delPos],function(rowIndex){
                                                x <- matInputP[rowIndex,]
                                                inputGaps <- gregexpr("-",x[1])[[1]]
                                                glGaps <- gregexpr("-",x[2])[[1]]
                                                posDeletions <- inputGaps[!(inputGaps%in%glGaps)]
                                                inputDeletionToN <- s2c(x[1])
                                                inputDeletionToN[posDeletions] <- "N"
                                                inputDeletionToN <- c2s(inputDeletionToN)
                                                matInput[rowIndex,1] <<- inputDeletionToN
                                              })
    return(delPos)
  }


  # Trim DNA sequence to the last codon
  trimToLastCodon <- function(seqToTrim){
    seqLen = nchar(seqToTrim)
    trimmedSeq = s2c(seqToTrim)
    poi = seqLen
    tailLen = 0

    while(trimmedSeq[poi]=="-" || trimmedSeq[poi]=="."){
      tailLen = tailLen + 1
      poi = poi - 1
    }

    trimmedSeq = c2s(trimmedSeq[1:(seqLen-tailLen)])
    seqLen = nchar(trimmedSeq)
    # Trim sequence to last codon
  	if( getCodonPos(seqLen)[3] > seqLen )
  	  trimmedSeq = substr(seqToTrim,1, ( (getCodonPos(seqLen)[1])-1 ) )

    return(trimmedSeq)
  }

  # Given a nuclotide position, returns the pos of the 3 nucs that made the codon
  # e.g. nuc 86 is part of nucs 85,86,87
  getCodonPos <- function(nucPos){
    codonNum =  (ceiling(nucPos/3))*3
    return( (codonNum-2):codonNum)
  }

  # Given a nuclotide position, returns the codon number
  # e.g. nuc 86  = codon 29
  getCodonNumb <- function(nucPos){
    return( ceiling(nucPos/3) )
  }

  # Given a codon, returns all the nuc positions that make the codon
  getCodonNucs <- function(codonNumb){
    getCodonPos(codonNumb*3)
  }

  computeCodonTable <- function(testID=1){

    if(testID<=4){
      # Pre-compute every codons
      intCounter = 1
      for(pOne in NUCLEOTIDES){
        for(pTwo in NUCLEOTIDES){
          for(pThree in NUCLEOTIDES){
            codon = paste(pOne,pTwo,pThree,sep="")
            colnames(CODON_TABLE)[intCounter] =  codon
            intCounter = intCounter + 1
            CODON_TABLE[,codon] = mutationTypeOptimized(cbind(permutateAllCodon(codon),rep(codon,12)))
          }
        }
      }
      chars = c("N","A","C","G","T", "-")
      for(a in chars){
        for(b in chars){
          for(c in chars){
            if(a=="N" | b=="N" | c=="N"){
              #cat(paste(a,b,c),sep="","\n")
              CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
            }
          }
        }
      }

      chars = c("-","A","C","G","T")
      for(a in chars){
        for(b in chars){
          for(c in chars){
            if(a=="-" | b=="-" | c=="-"){
              #cat(paste(a,b,c),sep="","\n")
              CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
            }
          }
        }
      }
      CODON_TABLE <<- as.matrix(CODON_TABLE)
    }
  }

  collapseClone <- function(vecInputSeqs,glSeq,readEnd,nonTerminalOnly=0){
  #print(length(vecInputSeqs))
    vecInputSeqs = unique(vecInputSeqs)
    if(length(vecInputSeqs)==1){
      return( list( c(vecInputSeqs,glSeq), F) )
    }else{
      charInputSeqs <- sapply(vecInputSeqs, function(x){
                                              s2c(x)[1:readEnd]
                                            })
      charGLSeq <- s2c(glSeq)
      matClone <- sapply(1:readEnd, function(i){
                                            posNucs = unique(charInputSeqs[i,])
                                            posGL = charGLSeq[i]
                                            error = FALSE
                                            if(posGL=="-" & sum(!(posNucs%in%c("-","N")))==0 ){
                                              return(c("-",error))
                                            }
                                            if(length(posNucs)==1)
                                              return(c(posNucs[1],error))
                                            else{
                                              if("N"%in%posNucs){
                                                error=TRUE
                                              }
                                              if(sum(!posNucs[posNucs!="N"]%in%posGL)==0){
                                                return( c(posGL,error) )
                                              }else{
                                                #return( c(sample(posNucs[posNucs!="N"],1),error) )
                                                if(nonTerminalOnly==0){
                                                  return( c(sample(charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL],1),error) )
                                                }else{
                                                  posNucs = charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL]
                                                  posNucsTable = table(posNucs)
                                                  if(sum(posNucsTable>1)==0){
                                                    return( c(posGL,error) )
                                                  }else{
                                                    return( c(sample( posNucs[posNucs%in%names(posNucsTable)[posNucsTable>1]],1),error) )
                                                  }
                                                }

                                              }
                                            }
                                          })


      #print(length(vecInputSeqs))
      return(list(c(c2s(matClone[1,]),glSeq),"TRUE"%in%matClone[2,]))
    }
  }

  # Compute the expected for each sequence-germline pair
  getExpectedIndividual <- function(matInput){
  if( any(grep("multicore",search())) ){
    facGL <- factor(matInput[,2])
    facLevels = levels(facGL)
    LisGLs_MutabilityU = mclapply(1:length(facLevels),  function(x){
                                                      computeMutabilities(facLevels[x])
                                                    })
    facIndex = match(facGL,facLevels)

    LisGLs_Mutability = mclapply(1:nrow(matInput),  function(x){
                                                      cInput = rep(NA,nchar(matInput[x,1]))
                                                      cInput[s2c(matInput[x,1])!="N"] = 1
                                                      LisGLs_MutabilityU[[facIndex[x]]] * cInput
                                                    })

    LisGLs_Targeting =  mclapply(1:dim(matInput)[1],  function(x){
                                                      computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
                                                    })

    LisGLs_MutationTypes  = mclapply(1:length(matInput[,2]),function(x){
                                                    #print(x)
                                                    computeMutationTypes(matInput[x,2])
                                                })

    LisGLs_Exp = mclapply(1:dim(matInput)[1],  function(x){
                                                  computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
                                                })

    ul_LisGLs_Exp =  unlist(LisGLs_Exp)
    return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
  }else{
    facGL <- factor(matInput[,2])
    facLevels = levels(facGL)
    LisGLs_MutabilityU = lapply(1:length(facLevels),  function(x){
      computeMutabilities(facLevels[x])
    })
    facIndex = match(facGL,facLevels)

    LisGLs_Mutability = lapply(1:nrow(matInput),  function(x){
      cInput = rep(NA,nchar(matInput[x,1]))
      cInput[s2c(matInput[x,1])!="N"] = 1
      LisGLs_MutabilityU[[facIndex[x]]] * cInput
    })

    LisGLs_Targeting =  lapply(1:dim(matInput)[1],  function(x){
      computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
    })

    LisGLs_MutationTypes  = lapply(1:length(matInput[,2]),function(x){
      #print(x)
      computeMutationTypes(matInput[x,2])
    })

    LisGLs_Exp = lapply(1:dim(matInput)[1],  function(x){
      computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
    })

    ul_LisGLs_Exp =  unlist(LisGLs_Exp)
    return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))

  }
  }

  # Compute mutabilities of sequence based on the tri-nucleotide model
  computeMutabilities <- function(paramSeq){
    seqLen = nchar(paramSeq)
    seqMutabilites = rep(NA,seqLen)

    gaplessSeq = gsub("-", "", paramSeq)
    gaplessSeqLen = nchar(gaplessSeq)
    gaplessSeqMutabilites = rep(NA,gaplessSeqLen)

    if(mutabilityModel!=5){
      pos<- 3:(gaplessSeqLen)
      subSeq =  substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))
      gaplessSeqMutabilites[pos] =
        tapply( c(
                                        getMutability( substr(subSeq,1,3), 3) ,
                                        getMutability( substr(subSeq,2,4), 2),
                                        getMutability( substr(subSeq,3,5), 1)
                                        ),rep(1:(gaplessSeqLen-2),3),mean,na.rm=TRUE
                                      )
      #Pos 1
      subSeq =  substr(gaplessSeq,1,3)
      gaplessSeqMutabilites[1] =  getMutability(subSeq , 1)
      #Pos 2
      subSeq =  substr(gaplessSeq,1,4)
      gaplessSeqMutabilites[2] =  mean( c(
                                            getMutability( substr(subSeq,1,3), 2) ,
                                            getMutability( substr(subSeq,2,4), 1)
                                          ),na.rm=T
                                      )
      seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
      return(seqMutabilites)
    }else{

      pos<- 3:(gaplessSeqLen)
      subSeq =  substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))
      gaplessSeqMutabilites[pos] = sapply(subSeq,function(x){ getMutability5(x) }, simplify=T)
      seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
      return(seqMutabilites)
    }

  }

  # Returns the mutability of a triplet at a given position
  getMutability <- function(codon, pos=1:3){
    triplets <- rownames(mutability)
    mutability[  match(codon,triplets) ,pos]
  }

  getMutability5 <- function(fivemer){
    return(mutability[fivemer])
  }

  # Returns the substitution probabilty
  getTransistionProb <- function(nuc){
    substitution[nuc,]
  }

  getTransistionProb5 <- function(fivemer){
    if(any(which(fivemer==colnames(substitution)))){
      return(substitution[,fivemer])
    }else{
      return(array(NA,4))
    }
  }

  # Given a nuc, returns the other 3 nucs it can mutate to
  canMutateTo <- function(nuc){
    NUCLEOTIDES[- which(NUCLEOTIDES==nuc)]
  }

  # Given a nucleotide, returns the probabilty of other nucleotide it can mutate to
  canMutateToProb <- function(nuc){
    substitution[nuc,canMutateTo(nuc)]
  }

  # Compute targeting, based on precomputed mutatbility & substitution
  computeTargeting <- function(param_strSeq,param_vecMutabilities){

    if(substitutionModel!=5){
      vecSeq = s2c(param_strSeq)
      matTargeting = sapply( 1:length(vecSeq), function(x) { param_vecMutabilities[x] * getTransistionProb(vecSeq[x]) } )
      #matTargeting = apply( rbind(vecSeq,param_vecMutabilities),2, function(x) { as.vector(as.numeric(x[2]) * getTransistionProb(x[1])) } )
      dimnames( matTargeting ) =  list(NUCLEOTIDES,1:(length(vecSeq)))
      return (matTargeting)
    }else{

      seqLen = nchar(param_strSeq)
      seqsubstitution = matrix(NA,ncol=seqLen,nrow=4)
      paramSeq <- param_strSeq
      gaplessSeq = gsub("-", "", paramSeq)
      gaplessSeqLen = nchar(gaplessSeq)
      gaplessSeqSubstitution  = matrix(NA,ncol=gaplessSeqLen,nrow=4)

      pos<- 3:(gaplessSeqLen)
      subSeq =  substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))
      gaplessSeqSubstitution[,pos] = sapply(subSeq,function(x){ getTransistionProb5(x) }, simplify=T)
      seqsubstitution[,which(s2c(paramSeq)!="-")]<- gaplessSeqSubstitution
      #matTargeting <- param_vecMutabilities  %*% seqsubstitution
      matTargeting <- sweep(seqsubstitution,2,param_vecMutabilities,`*`)
      dimnames( matTargeting ) =  list(NUCLEOTIDES,1:(seqLen))
      return (matTargeting)
    }
  }

  # Compute the mutations types
  computeMutationTypes <- function(param_strSeq){
  #cat(param_strSeq,"\n")
    #vecSeq = trimToLastCodon(param_strSeq)
    lenSeq = nchar(param_strSeq)
    vecCodons = sapply({1:(lenSeq/3)}*3-2,function(x){substr(param_strSeq,x,x+2)})
    matMutationTypes = matrix( unlist(CODON_TABLE[,vecCodons]) ,ncol=lenSeq,nrow=4, byrow=F)
    dimnames( matMutationTypes ) =  list(NUCLEOTIDES,1:(ncol(matMutationTypes)))
    return(matMutationTypes)
  }
  computeMutationTypesFast <- function(param_strSeq){
    matMutationTypes = matrix( CODON_TABLE[,param_strSeq] ,ncol=3,nrow=4, byrow=F)
    #dimnames( matMutationTypes ) =  list(NUCLEOTIDES,1:(length(vecSeq)))
    return(matMutationTypes)
  }
  mutationTypeOptimized <- function( matOfCodons ){
   apply( matOfCodons,1,function(x){ mutationType(x[2],x[1]) } )
  }

  # Returns a vector of codons 1 mutation away from the given codon
  permutateAllCodon <- function(codon){
    cCodon = s2c(codon)
    matCodons = t(array(cCodon,dim=c(3,12)))
    matCodons[1:4,1] = NUCLEOTIDES
    matCodons[5:8,2] = NUCLEOTIDES
    matCodons[9:12,3] = NUCLEOTIDES
    apply(matCodons,1,c2s)
  }

  # Given two codons, tells you if the mutation is R or S (based on your definition)
  mutationType <- function(codonFrom,codonTo){
    if(testID==4){
      if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
        return(NA)
      }else{
        mutationType = "S"
        if( translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonFrom)) != translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonTo)) ){
          mutationType = "R"
        }
        if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
          mutationType = "Stop"
        }
        return(mutationType)
      }
    }else if(testID==5){
      if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
        return(NA)
      }else{
        if(codonFrom==codonTo){
          mutationType = "S"
        }else{
          codonFrom = s2c(codonFrom)
          codonTo = s2c(codonTo)
          mutationType = "Stop"
          nucOfI = codonFrom[which(codonTo!=codonFrom)]
          if(nucOfI=="C"){
            mutationType = "R"
          }else if(nucOfI=="G"){
            mutationType = "S"
          }
        }
        return(mutationType)
      }
    }else{
      if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
        return(NA)
      }else{
        mutationType = "S"
        if( translateCodonToAminoAcid(codonFrom) != translateCodonToAminoAcid(codonTo) ){
          mutationType = "R"
        }
        if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
          mutationType = "Stop"
        }
        return(mutationType)
      }
    }
  }


  #given a mat of targeting & it's corresponding mutationtypes returns
  #a vector of Exp_RCDR,Exp_SCDR,Exp_RFWR,Exp_RFWR
  computeExpected <- function(paramTargeting,paramMutationTypes){
    # Replacements
    RPos = which(paramMutationTypes=="R")
      #FWR
      Exp_R_FWR = sum(paramTargeting[ RPos[which(FWR_Nuc_Mat[RPos]==T)] ],na.rm=T)
      #CDR
      Exp_R_CDR = sum(paramTargeting[ RPos[which(CDR_Nuc_Mat[RPos]==T)] ],na.rm=T)
    # Silents
    SPos = which(paramMutationTypes=="S")
      #FWR
      Exp_S_FWR = sum(paramTargeting[ SPos[which(FWR_Nuc_Mat[SPos]==T)] ],na.rm=T)
      #CDR
      Exp_S_CDR = sum(paramTargeting[ SPos[which(CDR_Nuc_Mat[SPos]==T)] ],na.rm=T)

      return(c(Exp_R_CDR,Exp_S_CDR,Exp_R_FWR,Exp_S_FWR))
  }

  # Count the mutations in a sequence
  # each mutation is treated independently
  analyzeMutations2NucUri_website <- function( rev_in_matrix ){
    paramGL = rev_in_matrix[2,]
    paramSeq = rev_in_matrix[1,]

    #Fill seq with GL seq if gapped
    #if( any(paramSeq=="-") ){
    #  gapPos_Seq =  which(paramSeq=="-")
    #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "-"]
    #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
    #}


    #if( any(paramSeq=="N") ){
    #  gapPos_Seq =  which(paramSeq=="N")
    #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
    #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
    #}

    analyzeMutations2NucUri(  matrix(c( paramGL, paramSeq  ),2,length(paramGL),byrow=T)  )

  }

  #1 = GL
  #2 = Seq
  analyzeMutations2NucUri <- function( in_matrix=matrix(c(c("A","A","A","C","C","C"),c("A","G","G","C","C","A")),2,6,byrow=T) ){
    paramGL = in_matrix[2,]
    paramSeq = in_matrix[1,]
    paramSeqUri = paramGL
    #mutations = apply(rbind(paramGL,paramSeq), 2, function(x){!x[1]==x[2]})
    mutations_val = paramGL != paramSeq
    if(any(mutations_val)){
      mutationPos = {1:length(mutations_val)}[mutations_val]
      mutationPos = mutationPos[sapply(mutationPos, function(x){!any(paramSeq[getCodonPos(x)]=="N")})]
      length_mutations =length(mutationPos)
      mutationInfo = rep(NA,length_mutations)
      if(any(mutationPos)){

        pos<- mutationPos
        pos_array<-array(sapply(pos,getCodonPos))
        codonGL =  paramGL[pos_array]

        codonSeq = sapply(pos,function(x){
                                  seqP = paramGL[getCodonPos(x)]
                                  muCodonPos = {x-1}%%3+1
                                  seqP[muCodonPos] = paramSeq[x]
                                  return(seqP)
                                })
        GLcodons =  apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
        Seqcodons =   apply(codonSeq,2,c2s)
        mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
        names(mutationInfo) = mutationPos
    }
    if(any(!is.na(mutationInfo))){
      return(mutationInfo[!is.na(mutationInfo)])
    }else{
      return(NA)
    }


    }else{
      return (NA)
    }
  }

  processNucMutations2 <- function(mu){
    if(!is.na(mu)){
      #R
      if(any(mu=="R")){
        Rs = mu[mu=="R"]
        nucNumbs = as.numeric(names(Rs))
        R_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
        R_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)
      }else{
        R_CDR = 0
        R_FWR = 0
      }

      #S
      if(any(mu=="S")){
        Ss = mu[mu=="S"]
        nucNumbs = as.numeric(names(Ss))
        S_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
        S_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)
      }else{
        S_CDR = 0
        S_FWR = 0
      }


      retVec = c(R_CDR,S_CDR,R_FWR,S_FWR)
      retVec[is.na(retVec)]=0
      return(retVec)
    }else{
      return(rep(0,4))
    }
  }


  ## Z-score Test
  computeZScore <- function(mat, test="Focused"){
    matRes <- matrix(NA,ncol=2,nrow=(nrow(mat)))
    if(test=="Focused"){
      #Z_Focused_CDR
      #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
      P = apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))})
      R_mean = apply(cbind(mat[,c(1,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
      R_sd=sqrt(R_mean*(1-P))
      matRes[,1] = (mat[,1]-R_mean)/R_sd

      #Z_Focused_FWR
      #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
      P = apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))})
      R_mean = apply(cbind(mat[,c(3,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
      R_sd=sqrt(R_mean*(1-P))
      matRes[,2] = (mat[,3]-R_mean)/R_sd
    }

    if(test=="Local"){
      #Z_Focused_CDR
      #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
      P = apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))})
      R_mean = apply(cbind(mat[,c(1,2)],P),1,function(x){x[3]*(sum(x[1:2]))})
      R_sd=sqrt(R_mean*(1-P))
      matRes[,1] = (mat[,1]-R_mean)/R_sd

      #Z_Focused_FWR
      #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
      P = apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))})
      R_mean = apply(cbind(mat[,c(3,4)],P),1,function(x){x[3]*(sum(x[1:2]))})
      R_sd=sqrt(R_mean*(1-P))
      matRes[,2] = (mat[,3]-R_mean)/R_sd
    }

    if(test=="Imbalanced"){
      #Z_Focused_CDR
      #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
      P = apply(mat[,5:8],1,function(x){((x[1]+x[2])/sum(x))})
      R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
      R_sd=sqrt(R_mean*(1-P))
      matRes[,1] = (mat[,1]-R_mean)/R_sd

      #Z_Focused_FWR
      #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
      P = apply(mat[,5:8],1,function(x){((x[3]+x[4])/sum(x))})
      R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
      R_sd=sqrt(R_mean*(1-P))
      matRes[,2] = (mat[,3]-R_mean)/R_sd
    }

    matRes[is.nan(matRes)] = NA
    return(matRes)
  }

  # Return a p-value for a z-score
  z2p <- function(z){
    p=NA
    if( !is.nan(z) && !is.na(z)){
      if(z>0){
        p = (1 - pnorm(z,0,1))
      } else if(z<0){
        p = (-1 * pnorm(z,0,1))
      } else{
        p = 0.5
      }
    }else{
      p = NA
    }
    return(p)
  }


  ## Bayesian  Test

  # Fitted parameter for the bayesian framework
BAYESIAN_FITTED<-c(0.407277142798302, 0.554007336744485, 0.63777155771234, 0.693989162719009, 0.735450014674917, 0.767972534429806, 0.794557287143399, 0.816906816601605, 0.83606796225341, 0.852729446430296, 0.867370424541641, 0.880339760590323, 0.891900995024999, 0.902259181289864, 0.911577919359,0.919990301665853, 0.927606458124537, 0.934518806350661, 0.940805863754375, 0.946534836475715, 0.951763691199255, 0.95654428191308, 0.960920179487397, 0.964930893680829, 0.968611312149038, 0.971992459313836, 0.975102110004818, 0.977964943023096, 0.980603428208439, 0.983037660179428, 0.985285800977406, 0.987364285326685, 0.989288037855441, 0.991070478823525, 0.992723699729969, 0.994259575477392, 0.995687688867975, 0.997017365051493, 0.998257085153047, 0.999414558305388, 1.00049681357804, 1.00151036237481, 1.00246080204981, 1.00335370751909, 1.0041939329768, 1.0049859393417, 1.00573382091263, 1.00644127217376, 1.00711179729107, 1.00774845526417, 1.00835412715854, 1.00893143010366, 1.00948275846309, 1.01001030293661, 1.01051606798079, 1.01100188771288, 1.01146944044216, 1.01192026195449, 1.01235575766094, 1.01277721370986)
  CONST_i <- sort(c(((2^(seq(-39,0,length.out=201)))/2)[1:200],(c(0:11,13:99)+0.5)/100,1-(2^(seq(-39,0,length.out=201)))/2))

  # Given x, M & p, returns a pdf
  calculate_bayes <- function ( x=3, N=10, p=0.33,
                                i=CONST_i,
                                max_sigma=20,length_sigma=4001
                              ){
    if(!0%in%N){
      G <- max(length(x),length(N),length(p))
      x=array(x,dim=G)
      N=array(N,dim=G)
      p=array(p,dim=G)
      sigma_s<-seq(-max_sigma,max_sigma,length.out=length_sigma)
      sigma_1<-log({i/{1-i}}/{p/{1-p}})
      index<-min(N,60)
      y<-dbeta(i,x+BAYESIAN_FITTED[index],N+BAYESIAN_FITTED[index]-x)*(1-p)*p*exp(sigma_1)/({1-p}^2+2*p*{1-p}*exp(sigma_1)+{p^2}*exp(2*sigma_1))
      if(!sum(is.na(y))){
        tmp<-approx(sigma_1,y,sigma_s)$y
        tmp/sum(tmp)/{2*max_sigma/{length_sigma-1}}
      }else{
        return(NA)
      }
    }else{
      return(NA)
    }
  }
  # Given a mat of observed & expected, return a list of CDR & FWR pdf for selection
  computeBayesianScore <- function(mat, test="Focused", max_sigma=20,length_sigma=4001){
    flagOneSeq = F
    if(nrow(mat)==1){
      mat=rbind(mat,mat)
      flagOneSeq = T
    }
    if(test=="Focused"){
      #CDR
      P = c(apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
      N = c(apply(mat[,c(1,2,4)],1,function(x){(sum(x))}),0)
      X = c(mat[,1],0)
      bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
      bayesCDR = bayesCDR[-length(bayesCDR)]

      #FWR
      P = c(apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
      N = c(apply(mat[,c(3,2,4)],1,function(x){(sum(x))}),0)
      X = c(mat[,3],0)
      bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
      bayesFWR = bayesFWR[-length(bayesFWR)]
    }

    if(test=="Local"){
      #CDR
      P = c(apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}),0.5)
      N = c(apply(mat[,c(1,2)],1,function(x){(sum(x))}),0)
      X = c(mat[,1],0)
      bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
      bayesCDR = bayesCDR[-length(bayesCDR)]

      #FWR
      P = c(apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}),0.5)
      N = c(apply(mat[,c(3,4)],1,function(x){(sum(x))}),0)
      X = c(mat[,3],0)
      bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
      bayesFWR = bayesFWR[-length(bayesFWR)]
    }

    if(test=="Imbalanced"){
      #CDR
      P = c(apply(mat[,c(5:8)],1,function(x){((x[1]+x[2])/sum(x))}),0.5)
      N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
      X = c(apply(mat[,c(1:2)],1,function(x){(sum(x))}),0)
      bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
      bayesCDR = bayesCDR[-length(bayesCDR)]

      #FWR
      P = c(apply(mat[,c(5:8)],1,function(x){((x[3]+x[4])/sum(x))}),0.5)
      N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
      X = c(apply(mat[,c(3:4)],1,function(x){(sum(x))}),0)
      bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
      bayesFWR = bayesFWR[-length(bayesFWR)]
    }

    if(test=="ImbalancedSilent"){
      #CDR
      P = c(apply(mat[,c(6,8)],1,function(x){((x[1])/sum(x))}),0.5)
      N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
      X = c(apply(mat[,c(2,4)],1,function(x){(x[1])}),0)
      bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
      bayesCDR = bayesCDR[-length(bayesCDR)]

      #FWR
      P = c(apply(mat[,c(6,8)],1,function(x){((x[2])/sum(x))}),0.5)
      N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
      X = c(apply(mat[,c(2,4)],1,function(x){(x[2])}),0)
      bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
      bayesFWR = bayesFWR[-length(bayesFWR)]
    }

    if(flagOneSeq==T){
      bayesCDR = bayesCDR[1]
      bayesFWR = bayesFWR[1]
    }
    return( list("CDR"=bayesCDR, "FWR"=bayesFWR) )
  }

  ##Covolution
  break2chunks<-function(G=1000){
  base<-2^round(log(sqrt(G),2),0)
  return(c(rep(base,floor(G/base)-1),base+G-(floor(G/base)*base)))
  }

  PowersOfTwo <- function(G=100){
    exponents <- array()
    i = 0
    while(G > 0){
      i=i+1
      exponents[i] <- floor( log2(G) )
      G <- G-2^exponents[i]
    }
    return(exponents)
  }

  convolutionPowersOfTwo <- function( cons, length_sigma=4001 ){
    G = ncol(cons)
    if(G>1){
      for(gen in log(G,2):1){
        ll<-seq(from=2,to=2^gen,by=2)
        sapply(ll,function(l){cons[,l/2]<<-weighted_conv(cons[,l],cons[,l-1],length_sigma=length_sigma)})
      }
    }
    return( cons[,1] )
  }

  convolutionPowersOfTwoByTwos <- function( cons, length_sigma=4001,G=1 ){
    if(length(ncol(cons))) G<-ncol(cons)
    groups <- PowersOfTwo(G)
    matG <- matrix(NA, ncol=length(groups), nrow=length(cons)/G )
    startIndex = 1
    for( i in 1:length(groups) ){
      stopIndex <- 2^groups[i] + startIndex - 1
      if(stopIndex!=startIndex){
        matG[,i] <- convolutionPowersOfTwo( cons[,startIndex:stopIndex], length_sigma=length_sigma )
        startIndex = stopIndex + 1
      }
      else {
        if(G>1) matG[,i] <- cons[,startIndex:stopIndex]
        else matG[,i] <- cons
        #startIndex = stopIndex + 1
      }
    }
    return( list( matG, groups ) )
  }

  weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){
    lx<-length(x)
    ly<-length(y)
    if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){
      if(w<1){
        y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y
        x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y
        lx<-length(x1)
        ly<-length(y1)
      }
      else {
        y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y
        x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y
        lx<-length(x1)
        ly<-length(y1)
      }
    }
    else{
      x1<-x
      y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y
      ly<-length(y1)
    }
    tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y
    tmp[tmp<=0] = 0
    return(tmp/sum(tmp))
  }

  calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){
    matG <- listMatG[[1]]
    groups <- listMatG[[2]]
    i = 1
    resConv <- matG[,i]
    denom <- 2^groups[i]
    if(length(groups)>1){
      while( i<length(groups) ){
        i = i + 1
        resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma)
        #cat({{2^groups[i]}/denom},"\n")
        denom <- denom + 2^groups[i]
      }
    }
    return(resConv)
  }

  # Given a list of PDFs, returns a convoluted PDF
  groupPosteriors <- function( listPosteriors, max_sigma=20, length_sigma=4001 ,Threshold=2 ){
    listPosteriors = listPosteriors[ !is.na(listPosteriors) ]
    Length_Postrior<-length(listPosteriors)
    if(Length_Postrior>1 & Length_Postrior<=Threshold){
      cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
      listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
      y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
      return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
    }else if(Length_Postrior==1) return(listPosteriors[[1]])
    else  if(Length_Postrior==0) return(NA)
    else {
      cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
      y = fastConv(cons,max_sigma=max_sigma, length_sigma=length_sigma )
      return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
    }
  }

  fastConv<-function(cons, max_sigma=20, length_sigma=4001){
    chunks<-break2chunks(G=ncol(cons))
    if(ncol(cons)==3) chunks<-2:1
    index_chunks_end <- cumsum(chunks)
    index_chunks_start <- c(1,index_chunks_end[-length(index_chunks_end)]+1)
    index_chunks <- cbind(index_chunks_start,index_chunks_end)

    case <- sum(chunks!=chunks[1])
    if(case==1) End <- max(1,((length(index_chunks)/2)-1))
    else End <- max(1,((length(index_chunks)/2)))

    firsts <- sapply(1:End,function(i){
          	    indexes<-index_chunks[i,1]:index_chunks[i,2]
          	    convolutionPowersOfTwoByTwos(cons[ ,indexes])[[1]]
          	  })
    if(case==0){
    	result<-calculate_bayesGHelper( convolutionPowersOfTwoByTwos(firsts) )
    }else if(case==1){
      last<-list(calculate_bayesGHelper(
      convolutionPowersOfTwoByTwos( cons[ ,index_chunks[length(index_chunks)/2,1]:index_chunks[length(index_chunks)/2,2]] )
                                      ),0)
      result_first<-calculate_bayesGHelper(convolutionPowersOfTwoByTwos(firsts))
      result<-calculate_bayesGHelper(
        list(
          cbind(
          result_first,last[[1]]),
          c(log(index_chunks_end[length(index_chunks)/2-1],2),log(index_chunks[length(index_chunks)/2,2]-index_chunks[length(index_chunks)/2,1]+1,2))
        )
      )
    }
    return(as.vector(result))
  }

  # Computes the 95% CI for a pdf
  calcBayesCI <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
    if(length(Pdf)!=length_sigma) return(NA)
    sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
    cdf = cumsum(Pdf)
    cdf = cdf/cdf[length(cdf)]
    return( c(sigma_s[findInterval(low,cdf)-1] , sigma_s[findInterval(up,cdf)]) )
  }

  # Computes a mean for a pdf
  calcBayesMean <- function(Pdf,max_sigma=20,length_sigma=4001){
    if(length(Pdf)!=length_sigma) return(NA)
    sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
    norm = {length_sigma-1}/2/max_sigma
    return( (Pdf%*%sigma_s/norm)  )
  }

  # Returns the mean, and the 95% CI for a pdf
  calcBayesOutputInfo <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
    if(is.na(Pdf))
     return(rep(NA,3))
    bCI = calcBayesCI(Pdf=Pdf,low=low,up=up,max_sigma=max_sigma,length_sigma=length_sigma)
    bMean = calcBayesMean(Pdf=Pdf,max_sigma=max_sigma,length_sigma=length_sigma)
    return(c(bMean, bCI))
  }

  # Computes the p-value of a pdf
  computeSigmaP <- function(Pdf, length_sigma=4001, max_sigma=20){
    if(length(Pdf)>1){
      norm = {length_sigma-1}/2/max_sigma
      pVal = {sum(Pdf[1:{{length_sigma-1}/2}]) + Pdf[{{length_sigma+1}/2}]/2}/norm
      if(pVal>0.5){
        pVal = pVal-1
      }
      return(pVal)
    }else{
      return(NA)
    }
  }

  # Compute p-value of two distributions
  compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){
  #print(c(length(dens1),length(dens2)))
  if(length(dens1)>1 & length(dens2)>1 ){
    dens1<-dens1/sum(dens1)
    dens2<-dens2/sum(dens2)
    cum2 <- cumsum(dens2)-dens2/2
    tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i])))
    #print(tmp)
    if(tmp>0.5)tmp<-tmp-1
    return( tmp )
    }
    else {
    return(NA)
    }
    #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N)
  }

  # get number of seqeunces contributing to the sigma (i.e. seqeunces with mutations)
  numberOfSeqsWithMutations <- function(matMutations,test=1){
    if(test==4)test=2
    cdrSeqs <- 0
    fwrSeqs <- 0
    if(test==1){#focused
      cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2,4)]) })
      fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4,2)]) })
      if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
      if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0)
    }
    if(test==2){#local
      cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2)]) })
      fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4)]) })
      if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
      if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0)
    }
  return(c("CDR"=cdrSeqs, "FWR"=fwrSeqs))
}


shadeColor <- function(sigmaVal=NA,pVal=NA){
  if(is.na(sigmaVal) & is.na(pVal)) return(NA)
  if(is.na(sigmaVal) & !is.na(pVal)) sigmaVal=sign(pVal)
  if(is.na(pVal) || pVal==1 || pVal==0){
    returnColor = "#FFFFFF";
  }else{
    colVal=abs(pVal);

    if(sigmaVal<0){
        if(colVal>0.1)
          returnColor = "#CCFFCC";
        if(colVal<=0.1)
          returnColor = "#99FF99";
        if(colVal<=0.050)
          returnColor = "#66FF66";
        if(colVal<=0.010)
          returnColor = "#33FF33";
        if(colVal<=0.005)
          returnColor = "#00FF00";

    }else{
      if(colVal>0.1)
        returnColor = "#FFCCCC";
      if(colVal<=0.1)
        returnColor = "#FF9999";
      if(colVal<=0.05)
        returnColor = "#FF6666";
      if(colVal<=0.01)
        returnColor = "#FF3333";
      if(colVal<0.005)
        returnColor = "#FF0000";
    }
  }

  return(returnColor)
}


plotHelp <- function(xfrac=0.05,yfrac=0.05,log=FALSE){
  if(!log){
    x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
    y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
  }else {
    if(log==2){
      x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
      y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
    }
    if(log==1){
      x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
      y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
    }
    if(log==3){
      x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
      y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
    }
  }
  return(c("x"=x,"y"=y))
}

# SHMulation

  # Based on targeting, introduce a single mutation & then update the targeting
  oneMutation <- function(){
    # Pick a postion + mutation
    posMutation = sample(1:(seqGermlineLen*4),1,replace=F,prob=as.vector(seqTargeting))
    posNucNumb = ceiling(posMutation/4)                    # Nucleotide number
    posNucKind = 4 - ( (posNucNumb*4) - posMutation )   # Nuc the position mutates to

    #mutate the simulation sequence
    seqSimVec <-  s2c(seqSim)
    seqSimVec[posNucNumb] <- NUCLEOTIDES[posNucKind]
    seqSim <<-  c2s(seqSimVec)

    #update Mutability, Targeting & MutationsTypes
    updateMutabilityNTargeting(posNucNumb)

    #return(c(posNucNumb,NUCLEOTIDES[posNucKind]))
    return(posNucNumb)
  }

  updateMutabilityNTargeting <- function(position){
    min_i<-max((position-2),1)
    max_i<-min((position+2),nchar(seqSim))
    min_ii<-min(min_i,3)

    #mutability - update locally
    seqMutability[(min_i):(max_i)] <<- computeMutabilities(substr(seqSim,position-4,position+4))[(min_ii):(max_i-min_i+min_ii)]


    #targeting - compute locally
    seqTargeting[,min_i:max_i] <<- computeTargeting(substr(seqSim,min_i,max_i),seqMutability[min_i:max_i])
    seqTargeting[is.na(seqTargeting)] <<- 0
    #mutCodonPos = getCodonPos(position)
    mutCodonPos = seq(getCodonPos(min_i)[1],getCodonPos(max_i)[3])
    #cat(mutCodonPos,"\n")
    mutTypeCodon = getCodonPos(position)
    seqMutationTypes[,mutTypeCodon] <<- computeMutationTypesFast( substr(seqSim,mutTypeCodon[1],mutTypeCodon[3]) )
    # Stop = 0
    if(any(seqMutationTypes[,mutCodonPos]=="Stop",na.rm=T )){
      seqTargeting[,mutCodonPos][seqMutationTypes[,mutCodonPos]=="Stop"] <<- 0
    }


    #Selection
    selectedPos = (min_i*4-4)+(which(seqMutationTypes[,min_i:max_i]=="R"))
    # CDR
    selectedCDR = selectedPos[which(matCDR[selectedPos]==T)]
    seqTargeting[selectedCDR] <<-  seqTargeting[selectedCDR] *  exp(selCDR)
    seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR]/baseLineCDR_K

    # FWR
    selectedFWR = selectedPos[which(matFWR[selectedPos]==T)]
    seqTargeting[selectedFWR] <<-  seqTargeting[selectedFWR] *  exp(selFWR)
    seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR]/baseLineFWR_K

  }


  # Validate the mutation: if the mutation has not been sampled before validate it, else discard it.
  validateMutation <- function(){
    if( !(mutatedPos%in%mutatedPositions) ){ # if it's a new mutation
      uniqueMutationsIntroduced <<- uniqueMutationsIntroduced + 1
      mutatedPositions[uniqueMutationsIntroduced] <<-  mutatedPos
    }else{
      if(substr(seqSim,mutatedPos,mutatedPos)==substr(seqGermline,mutatedPos,mutatedPos)){ # back to germline mutation
        mutatedPositions <<-  mutatedPositions[-which(mutatedPositions==mutatedPos)]
        uniqueMutationsIntroduced <<-  uniqueMutationsIntroduced - 1
      }
    }
  }


  # Places text (labels) at normalized coordinates
  myaxis <- function(xfrac=0.05,yfrac=0.05,log=FALSE,w="text",cex=1,adj=1,thecol="black"){
    par(xpd=TRUE)
    if(!log)
      text(par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,w,cex=cex,adj=adj,col=thecol)
    else {
    if(log==2)
    text(
      par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,
      10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
      w,cex=cex,adj=adj,col=thecol)
    if(log==1)
      text(
      10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
      par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,
      w,cex=cex,adj=adj,col=thecol)
    if(log==3)
      text(
      10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
      10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
      w,cex=cex,adj=adj,col=thecol)
    }
    par(xpd=FALSE)
  }


  # Count the mutations in a sequence
  analyzeMutations <- function( inputMatrixIndex, model = 0 , multipleMutation=0, seqWithStops=0){

    paramGL = s2c(matInput[inputMatrixIndex,2])
    paramSeq = s2c(matInput[inputMatrixIndex,1])

    #if( any(paramSeq=="N") ){
    #  gapPos_Seq =  which(paramSeq=="N")
    #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
    #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
    #}
    mutations_val = paramGL != paramSeq

    if(any(mutations_val)){
      mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]
      length_mutations =length(mutationPos)
      mutationInfo = rep(NA,length_mutations)

      pos<- mutationPos
      pos_array<-array(sapply(pos,getCodonPos))
      codonGL =  paramGL[pos_array]
      codonSeqWhole =  paramSeq[pos_array]
      codonSeq = sapply(pos,function(x){
                                seqP = paramGL[getCodonPos(x)]
                                muCodonPos = {x-1}%%3+1
                                seqP[muCodonPos] = paramSeq[x]
                                return(seqP)
                              })
      GLcodons =  apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
      SeqcodonsWhole =  apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)
      Seqcodons =   apply(codonSeq,2,c2s)

      mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
      names(mutationInfo) = mutationPos

      mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
      names(mutationInfoWhole) = mutationPos

      mutationInfo <- mutationInfo[!is.na(mutationInfo)]
      mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]

      if(any(!is.na(mutationInfo))){

        #Filter based on Stop (at the codon level)
        if(seqWithStops==1){
          nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
          mutationInfo = mutationInfo[nucleotidesAtStopCodons]
          mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
        }else{
          countStops = sum(mutationInfoWhole=="Stop")
          if(seqWithStops==2 & countStops==0) mutationInfo = NA
          if(seqWithStops==3 & countStops>0) mutationInfo = NA
        }

        if(any(!is.na(mutationInfo))){
          #Filter mutations based on multipleMutation
          if(multipleMutation==1 & !is.na(mutationInfo)){
            mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
            tableMutationCodons <- table(mutationCodons)
            codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
            if(any(codonsWithMultipleMutations)){
              #remove the nucleotide mutations in the codons with multiple mutations
              mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
              #replace those codons with Ns in the input sequence
              paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
              matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
            }
          }

          #Filter mutations based on the model
          if(any(mutationInfo)==T | is.na(any(mutationInfo))){

            if(model==1 & !is.na(mutationInfo)){
              mutationInfo <- mutationInfo[mutationInfo=="S"]
            }
            if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(mutationInfo)
            else return(NA)
          }else{
            return(NA)
          }
        }else{
          return(NA)
        }


      }else{
        return(NA)
      }


    }else{
      return (NA)
    }
  }

   analyzeMutationsFixed <- function( inputArray, model = 0 , multipleMutation=0, seqWithStops=0){

    paramGL = s2c(inputArray[2])
    paramSeq = s2c(inputArray[1])
    inputSeq <- inputArray[1]
    #if( any(paramSeq=="N") ){
    #  gapPos_Seq =  which(paramSeq=="N")
    #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
    #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
    #}
    mutations_val = paramGL != paramSeq

    if(any(mutations_val)){
      mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]
      length_mutations =length(mutationPos)
      mutationInfo = rep(NA,length_mutations)

      pos<- mutationPos
      pos_array<-array(sapply(pos,getCodonPos))
      codonGL =  paramGL[pos_array]
      codonSeqWhole =  paramSeq[pos_array]
      codonSeq = sapply(pos,function(x){
                                seqP = paramGL[getCodonPos(x)]
                                muCodonPos = {x-1}%%3+1
                                seqP[muCodonPos] = paramSeq[x]
                                return(seqP)
                              })
      GLcodons =  apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
      SeqcodonsWhole =  apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)
      Seqcodons =   apply(codonSeq,2,c2s)

      mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
      names(mutationInfo) = mutationPos

      mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
      names(mutationInfoWhole) = mutationPos

      mutationInfo <- mutationInfo[!is.na(mutationInfo)]
      mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]

      if(any(!is.na(mutationInfo))){

        #Filter based on Stop (at the codon level)
        if(seqWithStops==1){
          nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
          mutationInfo = mutationInfo[nucleotidesAtStopCodons]
          mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
        }else{
          countStops = sum(mutationInfoWhole=="Stop")
          if(seqWithStops==2 & countStops==0) mutationInfo = NA
          if(seqWithStops==3 & countStops>0) mutationInfo = NA
        }

        if(any(!is.na(mutationInfo))){
          #Filter mutations based on multipleMutation
          if(multipleMutation==1 & !is.na(mutationInfo)){
            mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
            tableMutationCodons <- table(mutationCodons)
            codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
            if(any(codonsWithMultipleMutations)){
              #remove the nucleotide mutations in the codons with multiple mutations
              mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
              #replace those codons with Ns in the input sequence
              paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
              #matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
              inputSeq <- c2s(paramSeq)
            }
          }

          #Filter mutations based on the model
          if(any(mutationInfo)==T | is.na(any(mutationInfo))){

            if(model==1 & !is.na(mutationInfo)){
              mutationInfo <- mutationInfo[mutationInfo=="S"]
            }
            if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(list(mutationInfo,inputSeq))
            else return(list(NA,inputSeq))
          }else{
            return(list(NA,inputSeq))
          }
        }else{
          return(list(NA,inputSeq))
        }


      }else{
        return(list(NA,inputSeq))
      }


    }else{
      return (list(NA,inputSeq))
    }
  }

  # triMutability Background Count
  buildMutabilityModel <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){

    #rowOrigMatInput = matInput[inputMatrixIndex,]
    seqGL =  gsub("-", "", matInput[inputMatrixIndex,2])
    seqInput = gsub("-", "", matInput[inputMatrixIndex,1])
    #matInput[inputMatrixIndex,] <<- cbind(seqInput,seqGL)
    tempInput <- cbind(seqInput,seqGL)
    seqLength = nchar(seqGL)
    list_analyzeMutationsFixed<- analyzeMutationsFixed(tempInput, model, multipleMutation, seqWithStops)
    mutationCount <- list_analyzeMutationsFixed[[1]]
    seqInput <- list_analyzeMutationsFixed[[2]]
    BackgroundMatrix = mutabilityMatrix
    MutationMatrix = mutabilityMatrix
    MutationCountMatrix = mutabilityMatrix
    if(!is.na(mutationCount)){
      if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){

        fivermerStartPos = 1:(seqLength-4)
        fivemerLength <- length(fivermerStartPos)
        fivemerGL <- substr(rep(seqGL,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
        fivemerSeq <- substr(rep(seqInput,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))

        #Background
        for(fivemerIndex in 1:fivemerLength){
          fivemer = fivemerGL[fivemerIndex]
          if(!any(grep("N",fivemer))){
            fivemerCodonPos = fivemerCodon(fivemerIndex)
            fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3])
            fivemerReadingFrameCodonInputSeq = substr(fivemerSeq[fivemerIndex],fivemerCodonPos[1],fivemerCodonPos[3])

            # All mutations model
            #if(!any(grep("N",fivemerReadingFrameCodon))){
              if(model==0){
                if(stopMutations==0){
                  if(!any(grep("N",fivemerReadingFrameCodonInputSeq)))
                    BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + 1)
                }else{
                  if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" ){
                    positionWithinCodon = which(fivemerCodonPos==3)#positionsWithinCodon[(fivemerCodonPos[1]%%3)+1]
                    BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probNonStopMutations[fivemerReadingFrameCodon,positionWithinCodon])
                  }
                }
              }else{ # Only silent mutations
                if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" & translateCodonToAminoAcid(fivemerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(fivemerReadingFrameCodon)){
                  positionWithinCodon = which(fivemerCodonPos==3)
                  BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probSMutations[fivemerReadingFrameCodon,positionWithinCodon])
                }
              }
            #}
          }
        }

        #Mutations
        if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
        if(model==1) mutationCount = mutationCount[mutationCount=="S"]
        mutationPositions = as.numeric(names(mutationCount))
        mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
        mutationPositions =  mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
        countMutations = 0
        for(mutationPosition in mutationPositions){
          fivemerIndex = mutationPosition-2
          fivemer = fivemerSeq[fivemerIndex]
          GLfivemer = fivemerGL[fivemerIndex]
          fivemerCodonPos = fivemerCodon(fivemerIndex)
          fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3])
          fivemerReadingFrameCodonGL = substr(GLfivemer,fivemerCodonPos[1],fivemerCodonPos[3])
          if(!any(grep("N",fivemer)) & !any(grep("N",GLfivemer))){
            if(model==0){
                countMutations = countMutations + 1
                MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + 1)
                MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)
            }else{
              if( translateCodonToAminoAcid(fivemerReadingFrameCodonGL)!="*" ){
                  countMutations = countMutations + 1
                  positionWithinCodon = which(fivemerCodonPos==3)
                  glNuc =  substr(fivemerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
                  inputNuc =  substr(fivemerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
                  MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + substitution[glNuc,inputNuc])
                  MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)
              }
            }
          }
        }

        seqMutability = MutationMatrix/BackgroundMatrix
        seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
        #cat(inputMatrixIndex,"\t",countMutations,"\n")
        return(list("seqMutability"  = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))

      }
    }

  }

  #Returns the codon position containing the middle nucleotide
  fivemerCodon <- function(fivemerIndex){
    codonPos = list(2:4,1:3,3:5)
    fivemerType = fivemerIndex%%3
    return(codonPos[[fivemerType+1]])
  }

  #returns probability values for one mutation in codons resulting in R, S or Stop
  probMutations <- function(typeOfMutation){
    matMutationProb <- matrix(0,ncol=3,nrow=125,dimnames=list(words(alphabet = c(NUCLEOTIDES,"N"), length=3),c(1:3)))
    for(codon in rownames(matMutationProb)){
        if( !any(grep("N",codon)) ){
        for(muPos in 1:3){
          matCodon = matrix(rep(s2c(codon),3),nrow=3,ncol=3,byrow=T)
          glNuc = matCodon[1,muPos]
          matCodon[,muPos] = canMutateTo(glNuc)
          substitutionRate = substitution[glNuc,matCodon[,muPos]]
          typeOfMutations = apply(rbind(rep(codon,3),apply(matCodon,1,c2s)),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
          matMutationProb[codon,muPos] <- sum(substitutionRate[typeOfMutations==typeOfMutation])
        }
      }
    }

    return(matMutationProb)
  }


#Mapping Trinucleotides to fivemers
mapTriToFivemer <- function(triMutability=triMutability_Literature_Human){
  rownames(triMutability) <- triMutability_Names
  Fivemer<-rep(NA,1024)
  names(Fivemer)<-words(alphabet=NUCLEOTIDES,length=5)
  Fivemer<-sapply(names(Fivemer),function(Word)return(sum( c(triMutability[substring(Word,3,5),1],triMutability[substring(Word,2,4),2],triMutability[substring(Word,1,3),3]),na.rm=TRUE)))
  Fivemer<-Fivemer/sum(Fivemer)
  return(Fivemer)
}

collapseFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
  Indices<-substring(names(Fivemer),3,3)==NUC
  Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
  tapply(which(Indices),Factors,function(i)weighted.mean(Fivemer[i],Weights[i],na.rm=TRUE))
}


CountFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
  Indices<-substring(names(Fivemer),3,3)==NUC
  Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
  tapply(which(Indices),Factors,function(i)sum(Weights[i],na.rm=TRUE))
}

#Uses the real counts of the mutated fivemers
CountFivemerToTri2<-function(Fivemer,Counts=MutabilityCounts,position=1,NUC="A"){
  Indices<-substring(names(Fivemer),3,3)==NUC
  Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
  tapply(which(Indices),Factors,function(i)sum(Counts[i],na.rm=TRUE))
}

bootstrap<-function(x=c(33,12,21),M=10000,alpha=0.05){
N<-sum(x)
if(N){
p<-x/N
k<-length(x)-1
tmp<-rmultinom(M, size = N, prob=p)
tmp_p<-apply(tmp,2,function(y)y/N)
(apply(tmp_p,1,function(y)quantile(y,c(alpha/2/k,1-alpha/2/k))))
}
else return(matrix(0,2,length(x)))
}


bootstrap2<-function(x=c(33,12,21),n=10,M=10000,alpha=0.05){

N<-sum(x)
k<-length(x)
y<-rep(1:k,x)
tmp<-sapply(1:M,function(i)sample(y,n))
if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))/n
if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))/n
(apply(tmp_p,1,function(z)quantile(z,c(alpha/2/(k-1),1-alpha/2/(k-1)))))
}


p_value<-function(x=c(33,12,21),M=100000,x_obs=c(2,5,3)){
n=sum(x_obs)
N<-sum(x)
k<-length(x)
y<-rep(1:k,x)
tmp<-sapply(1:M,function(i)sample(y,n))
if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))
if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))
tmp<-rbind(sapply(1:3,function(i)sum(tmp_p[i,]>=x_obs[i])/M),
sapply(1:3,function(i)sum(tmp_p[i,]<=x_obs[i])/M))
sapply(1:3,function(i){if(tmp[1,i]>=tmp[2,i])return(-tmp[2,i])else return(tmp[1,i])})
}

#"D:\\Sequences\\IMGT Germlines\\Human_SNPless_IGHJ.FASTA"
# Remove SNPs from IMGT germline segment alleles
generateUnambiguousRepertoire <- function(repertoireInFile,repertoireOutFile){
  repertoireIn <- read.fasta(repertoireInFile, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F)
  alleleNames <- sapply(names(repertoireIn),function(x)strsplit(x,"|",fixed=TRUE)[[1]][2])
  SNPs <- tapply(repertoireIn,sapply(alleleNames,function(x)strsplit(x,"*",fixed=TRUE)[[1]][1]),function(x){
    Indices<-NULL
    for(i in 1:length(x)){
      firstSeq = s2c(x[[1]])
      iSeq = s2c(x[[i]])
      Indices<-c(Indices,which(firstSeq[1:320]!=iSeq[1:320] & firstSeq[1:320]!="." & iSeq[1:320]!="."  ))
    }
    return(sort(unique(Indices)))
  })
 repertoireOut <- repertoireIn
 repertoireOut <- lapply(names(repertoireOut), function(repertoireName){
                                        alleleName <- strsplit(repertoireName,"|",fixed=TRUE)[[1]][2]
                                        geneSegmentName <- strsplit(alleleName,"*",fixed=TRUE)[[1]][1]
                                        alleleSeq <- s2c(repertoireOut[[repertoireName]])
                                        alleleSeq[as.numeric(unlist(SNPs[geneSegmentName]))] <- "N"
                                        alleleSeq <- c2s(alleleSeq)
                                        repertoireOut[[repertoireName]] <- alleleSeq
                                      })
  names(repertoireOut) <- names(repertoireIn)
  write.fasta(repertoireOut,names(repertoireOut),file.out=repertoireOutFile)

}


############
groupBayes2 = function(indexes, param_resultMat){

  BayesGDist_Focused_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[4])}))
  BayesGDist_Focused_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[2]+x[4])}))
  #BayesGDist_Local_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2])}))
  #BayesGDist_Local_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[4])}))
  #BayesGDist_Global_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[3]+x[4])}))
  #BayesGDist_Global_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[1]+x[2]+x[3]+x[4])}))
  return ( list("BayesGDist_Focused_CDR"=BayesGDist_Focused_CDR,
                "BayesGDist_Focused_FWR"=BayesGDist_Focused_FWR) )
                #"BayesGDist_Local_CDR"=BayesGDist_Local_CDR,
                #"BayesGDist_Local_FWR" = BayesGDist_Local_FWR))
#                "BayesGDist_Global_CDR" = BayesGDist_Global_CDR,
#                "BayesGDist_Global_FWR" = BayesGDist_Global_FWR) )


}


calculate_bayesG <- function( x=array(), N=array(), p=array(), max_sigma=20, length_sigma=4001){
  G <- max(length(x),length(N),length(p))
  x=array(x,dim=G)
  N=array(N,dim=G)
  p=array(p,dim=G)

  indexOfZero = N>0 & p>0
  N = N[indexOfZero]
  x = x[indexOfZero]
  p = p[indexOfZero]
  G <- length(x)

  if(G){

    cons<-array( dim=c(length_sigma,G) )
    if(G==1) {
    return(calculate_bayes(x=x[G],N=N[G],p=p[G],max_sigma=max_sigma,length_sigma=length_sigma))
    }
    else {
      for(g in 1:G) cons[,g] <- calculate_bayes(x=x[g],N=N[g],p=p[g],max_sigma=max_sigma,length_sigma=length_sigma)
      listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
      y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
      return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
    }
  }else{
    return(NA)
  }
}


calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){
  matG <- listMatG[[1]]
  groups <- listMatG[[2]]
  i = 1
  resConv <- matG[,i]
  denom <- 2^groups[i]
  if(length(groups)>1){
    while( i<length(groups) ){
      i = i + 1
      resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma)
      #cat({{2^groups[i]}/denom},"\n")
      denom <- denom + 2^groups[i]
    }
  }
  return(resConv)
}

weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){
lx<-length(x)
ly<-length(y)
if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){
if(w<1){
y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y
x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y
lx<-length(x1)
ly<-length(y1)
}
else {
y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y
x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y
lx<-length(x1)
ly<-length(y1)
}
}
else{
x1<-x
y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y
ly<-length(y1)
}
tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y
tmp[tmp<=0] = 0
return(tmp/sum(tmp))
}

########################


mutabilityMatrixONE<-rep(0,4)
names(mutabilityMatrixONE)<-NUCLEOTIDES

  # triMutability Background Count
  buildMutabilityModelONE <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){

    #rowOrigMatInput = matInput[inputMatrixIndex,]
    seqGL =  gsub("-", "", matInput[inputMatrixIndex,2])
    seqInput = gsub("-", "", matInput[inputMatrixIndex,1])
    matInput[inputMatrixIndex,] <<- c(seqInput,seqGL)
    seqLength = nchar(seqGL)
    mutationCount <- analyzeMutations(inputMatrixIndex, model, multipleMutation, seqWithStops)
    BackgroundMatrix = mutabilityMatrixONE
    MutationMatrix = mutabilityMatrixONE
    MutationCountMatrix = mutabilityMatrixONE
    if(!is.na(mutationCount)){
      if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){

#         ONEmerStartPos = 1:(seqLength)
#         ONEmerLength <- length(ONEmerStartPos)
        ONEmerGL <- s2c(seqGL)
        ONEmerSeq <- s2c(seqInput)

        #Background
        for(ONEmerIndex in 1:seqLength){
          ONEmer = ONEmerGL[ONEmerIndex]
          if(ONEmer!="N"){
            ONEmerCodonPos = getCodonPos(ONEmerIndex)
            ONEmerReadingFrameCodon = c2s(ONEmerGL[ONEmerCodonPos])
            ONEmerReadingFrameCodonInputSeq = c2s(ONEmerSeq[ONEmerCodonPos] )

            # All mutations model
            #if(!any(grep("N",ONEmerReadingFrameCodon))){
              if(model==0){
                if(stopMutations==0){
                  if(!any(grep("N",ONEmerReadingFrameCodonInputSeq)))
                    BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + 1)
                }else{
                  if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*"){
                    positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)#positionsWithinCodon[(ONEmerCodonPos[1]%%3)+1]
                    BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probNonStopMutations[ONEmerReadingFrameCodon,positionWithinCodon])
                  }
                }
              }else{ # Only silent mutations
                if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*" & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(ONEmerReadingFrameCodon) ){
                  positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
                  BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probSMutations[ONEmerReadingFrameCodon,positionWithinCodon])
                }
              }
            }
          }
        }

        #Mutations
        if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
        if(model==1) mutationCount = mutationCount[mutationCount=="S"]
        mutationPositions = as.numeric(names(mutationCount))
        mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
        mutationPositions =  mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
        countMutations = 0
        for(mutationPosition in mutationPositions){
          ONEmerIndex = mutationPosition
          ONEmer = ONEmerSeq[ONEmerIndex]
          GLONEmer = ONEmerGL[ONEmerIndex]
          ONEmerCodonPos = getCodonPos(ONEmerIndex)
          ONEmerReadingFrameCodon = c2s(ONEmerSeq[ONEmerCodonPos])
          ONEmerReadingFrameCodonGL =c2s(ONEmerGL[ONEmerCodonPos])
          if(!any(grep("N",ONEmer)) & !any(grep("N",GLONEmer))){
            if(model==0){
                countMutations = countMutations + 1
                MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + 1)
                MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)
            }else{
              if( translateCodonToAminoAcid(ONEmerReadingFrameCodonGL)!="*" ){
                  countMutations = countMutations + 1
                  positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
                  glNuc =  substr(ONEmerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
                  inputNuc =  substr(ONEmerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
                  MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + substitution[glNuc,inputNuc])
                  MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)
              }
            }
          }
        }

        seqMutability = MutationMatrix/BackgroundMatrix
        seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
        #cat(inputMatrixIndex,"\t",countMutations,"\n")
        return(list("seqMutability"  = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))
#         tmp<-list("seqMutability"  = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix)
      }
    }

################
# $Id: trim.R 989 2006-10-29 15:28:26Z ggorjan $

trim <- function(s, recode.factor=TRUE, ...)
  UseMethod("trim", s)

trim.default <- function(s, recode.factor=TRUE, ...)
  s

trim.character <- function(s, recode.factor=TRUE, ...)
{
  s <- sub(pattern="^ +", replacement="", x=s)
  s <- sub(pattern=" +$", replacement="", x=s)
  s
}

trim.factor <- function(s, recode.factor=TRUE, ...)
{
  levels(s) <- trim(levels(s))
  if(recode.factor) {
    dots <- list(x=s, ...)
    if(is.null(dots$sort)) dots$sort <- sort
    s <- do.call(what=reorder.factor, args=dots)
  }
  s
}

trim.list <- function(s, recode.factor=TRUE, ...)
  lapply(s, trim, recode.factor=recode.factor, ...)

trim.data.frame <- function(s, recode.factor=TRUE, ...)
{
  s[] <- trim.list(s, recode.factor=recode.factor, ...)
  s
}
#######################################
# Compute the expected for each sequence-germline pair by codon
getExpectedIndividualByCodon <- function(matInput){
if( any(grep("multicore",search())) ){
  facGL <- factor(matInput[,2])
  facLevels = levels(facGL)
  LisGLs_MutabilityU = mclapply(1:length(facLevels),  function(x){
    computeMutabilities(facLevels[x])
  })
  facIndex = match(facGL,facLevels)

  LisGLs_Mutability = mclapply(1:nrow(matInput),  function(x){
    cInput = rep(NA,nchar(matInput[x,1]))
    cInput[s2c(matInput[x,1])!="N"] = 1
    LisGLs_MutabilityU[[facIndex[x]]] * cInput
  })

  LisGLs_Targeting =  mclapply(1:dim(matInput)[1],  function(x){
    computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
  })

  LisGLs_MutationTypes  = mclapply(1:length(matInput[,2]),function(x){
    #print(x)
    computeMutationTypes(matInput[x,2])
  })

  LisGLs_R_Exp = mclapply(1:nrow(matInput),  function(x){
    Exp_R <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
                        function(codonNucs){
                          RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R")
                          sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T )
                        }
    )
  })

  LisGLs_S_Exp = mclapply(1:nrow(matInput),  function(x){
    Exp_S <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
                        function(codonNucs){
                          SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")
                          sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
                        }
    )
  })

  Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
  Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
  return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )
  }else{
    facGL <- factor(matInput[,2])
    facLevels = levels(facGL)
    LisGLs_MutabilityU = lapply(1:length(facLevels),  function(x){
      computeMutabilities(facLevels[x])
    })
    facIndex = match(facGL,facLevels)

    LisGLs_Mutability = lapply(1:nrow(matInput),  function(x){
      cInput = rep(NA,nchar(matInput[x,1]))
      cInput[s2c(matInput[x,1])!="N"] = 1
      LisGLs_MutabilityU[[facIndex[x]]] * cInput
    })

    LisGLs_Targeting =  lapply(1:dim(matInput)[1],  function(x){
      computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
    })

    LisGLs_MutationTypes  = lapply(1:length(matInput[,2]),function(x){
      #print(x)
      computeMutationTypes(matInput[x,2])
    })

    LisGLs_R_Exp = lapply(1:nrow(matInput),  function(x){
      Exp_R <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
                          function(codonNucs){
                            RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R")
                            sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T )
                          }
      )
    })

    LisGLs_S_Exp = lapply(1:nrow(matInput),  function(x){
      Exp_S <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
                          function(codonNucs){
                            SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")
                            sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
                          }
      )
    })

    Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
    Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
    return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )
  }
}

# getObservedMutationsByCodon <- function(listMutations){
#   numbSeqs <- length(listMutations)
#   obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
#   obsMu_S <- obsMu_R
#   temp <- mclapply(1:length(listMutations), function(i){
#     arrMutations = listMutations[[i]]
#     RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
#     RPos <- sapply(RPos,getCodonNumb)
#     if(any(RPos)){
#       tabR <- table(RPos)
#       obsMu_R[i,as.numeric(names(tabR))] <<- tabR
#     }
#
#     SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
#     SPos <- sapply(SPos,getCodonNumb)
#     if(any(SPos)){
#       tabS <- table(SPos)
#       obsMu_S[i,names(tabS)] <<- tabS
#     }
#   }
#   )
#   return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) )
# }

getObservedMutationsByCodon <- function(listMutations){
  numbSeqs <- length(listMutations)
  obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
  obsMu_S <- obsMu_R
  temp <- lapply(1:length(listMutations), function(i){
    arrMutations = listMutations[[i]]
    RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
    RPos <- sapply(RPos,getCodonNumb)
    if(any(RPos)){
      tabR <- table(RPos)
      obsMu_R[i,as.numeric(names(tabR))] <<- tabR
    }

    SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
    SPos <- sapply(SPos,getCodonNumb)
    if(any(SPos)){
      tabS <- table(SPos)
      obsMu_S[i,names(tabS)] <<- tabS
    }
  }
  )
  return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) )
}
author	davidvanzessen
date	Mon, 29 Aug 2016 05:36:10 -0400
parents
children