| 
0
 | 
     1 #########################################################################################
 | 
| 
 | 
     2 # License Agreement
 | 
| 
 | 
     3 # 
 | 
| 
 | 
     4 # THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE 
 | 
| 
 | 
     5 # ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER 
 | 
| 
 | 
     6 # APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE 
 | 
| 
 | 
     7 # OR COPYRIGHT LAW IS PROHIBITED.
 | 
| 
 | 
     8 # 
 | 
| 
 | 
     9 # BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE 
 | 
| 
 | 
    10 # BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED 
 | 
| 
 | 
    11 # TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN 
 | 
| 
 | 
    12 # CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
 | 
| 
 | 
    13 #
 | 
| 
 | 
    14 # BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences
 | 
| 
 | 
    15 # Coded by: Mohamed Uduman & Gur Yaari
 | 
| 
 | 
    16 # Copyright 2012 Kleinstein Lab
 | 
| 
 | 
    17 # Version: 1.3 (01/23/2014)
 | 
| 
 | 
    18 #########################################################################################
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 # Global variables  
 | 
| 
 | 
    21   
 | 
| 
 | 
    22   FILTER_BY_MUTATIONS = 1000
 | 
| 
 | 
    23 
 | 
| 
 | 
    24   # Nucleotides
 | 
| 
 | 
    25   NUCLEOTIDES = c("A","C","G","T")
 | 
| 
 | 
    26   
 | 
| 
 | 
    27   # Amino Acids
 | 
| 
 | 
    28   AMINO_ACIDS <- c("F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "*", "*", "C", "C", "*", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G")
 | 
| 
 | 
    29   names(AMINO_ACIDS) <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG")
 | 
| 
 | 
    30   names(AMINO_ACIDS) <- names(AMINO_ACIDS)
 | 
| 
 | 
    31 
 | 
| 
 | 
    32   #Amino Acid Traits
 | 
| 
 | 
    33   #"*" "A" "C" "D" "E" "F" "G" "H" "I" "K" "L" "M" "N" "P" "Q" "R" "S" "T" "V" "W" "Y"
 | 
| 
 | 
    34   #B = "Hydrophobic/Burried"  N = "Intermediate/Neutral"  S="Hydrophilic/Surface") 
 | 
| 
 | 
    35   TRAITS_AMINO_ACIDS_CHOTHIA98 <- c("*","N","B","S","S","B","N","N","B","S","B","B","S","N","S","S","N","N","B","B","N")
 | 
| 
 | 
    36   names(TRAITS_AMINO_ACIDS_CHOTHIA98) <- sort(unique(AMINO_ACIDS))
 | 
| 
 | 
    37   TRAITS_AMINO_ACIDS <- array(NA,21)
 | 
| 
 | 
    38   
 | 
| 
 | 
    39   # Codon Table
 | 
| 
 | 
    40   CODON_TABLE <- as.data.frame(matrix(NA,ncol=64,nrow=12))
 | 
| 
 | 
    41 
 | 
| 
 | 
    42   # Substitution Model: Smith DS et al. 1996
 | 
| 
 | 
    43   substitution_Literature_Mouse <- matrix(c(0, 0.156222928, 0.601501588, 0.242275484, 0.172506739, 0, 0.241239892, 0.586253369, 0.54636291, 0.255795364, 0, 0.197841727, 0.290240811, 0.467680608, 0.24207858, 0),nrow=4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
 | 
| 
 | 
    44   substitution_Flu_Human <- matrix(c(0,0.2795596,0.5026927,0.2177477,0.1693210,0,0.3264723,0.5042067,0.4983549,0.3328321,0,0.1688130,0.2021079,0.4696077,0.3282844,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
 | 
| 
 | 
    45   substitution_Flu25_Human <- matrix(c(0,0.2580641,0.5163685,0.2255674,0.1541125,0,0.3210224,0.5248651,0.5239281,0.3101292,0,0.1659427,0.1997207,0.4579444,0.3423350,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
 | 
| 
 | 
    46   load("FiveS_Substitution.RData")
 | 
| 
 | 
    47 
 | 
| 
 | 
    48   # Mutability Models: Shapiro GS et al. 2002
 | 
| 
 | 
    49   triMutability_Literature_Human <- matrix(c(0.24, 1.2, 0.96, 0.43, 2.14, 2, 1.11, 1.9, 0.85, 1.83, 2.36, 1.31, 0.82, 0.52, 0.89, 1.33, 1.4, 0.82, 1.83, 0.73, 1.83, 1.62, 1.53, 0.57, 0.92, 0.42, 0.42, 1.47, 3.44, 2.58, 1.18, 0.47, 0.39, 1.12, 1.8, 0.68, 0.47, 2.19, 2.35, 2.19, 1.05, 1.84, 1.26, 0.28, 0.98, 2.37, 0.66, 1.58, 0.67, 0.92, 1.76, 0.83, 0.97, 0.56, 0.75, 0.62, 2.26, 0.62, 0.74, 1.11, 1.16, 0.61, 0.88, 0.67, 0.37, 0.07, 1.08, 0.46, 0.31, 0.94, 0.62, 0.57, 0.29, NA, 1.44, 0.46, 0.69, 0.57, 0.24, 0.37, 1.1, 0.99, 1.39, 0.6, 2.26, 1.24, 1.36, 0.52, 0.33, 0.26, 1.25, 0.37, 0.58, 1.03, 1.2, 0.34, 0.49, 0.33, 2.62, 0.16, 0.4, 0.16, 0.35, 0.75, 1.85, 0.94, 1.61, 0.85, 2.09, 1.39, 0.3, 0.52, 1.33, 0.29, 0.51, 0.26, 0.51, 3.83, 2.01, 0.71, 0.58, 0.62, 1.07, 0.28, 1.2, 0.74, 0.25, 0.59, 1.09, 0.91, 1.36, 0.45, 2.89, 1.27, 3.7, 0.69, 0.28, 0.41, 1.17, 0.56, 0.93, 3.41, 1, 1, NA, 5.9, 0.74, 2.51, 2.24, 2.24, 1.95, 3.32, 2.34, 1.3, 2.3, 1, 0.66, 0.73, 0.93, 0.41, 0.65, 0.89, 0.65, 0.32, NA, 0.43, 0.85, 0.43, 0.31, 0.31, 0.23, 0.29, 0.57, 0.71, 0.48, 0.44, 0.76, 0.51, 1.7, 0.85, 0.74, 2.23, 2.08, 1.16, 0.51, 0.51, 1, 0.5, NA, NA, 0.71, 2.14), nrow=64,byrow=T)
 | 
| 
 | 
    50   triMutability_Literature_Mouse <- matrix(c(1.31, 1.35, 1.42, 1.18, 2.02, 2.02, 1.02, 1.61, 1.99, 1.42, 2.01, 1.03, 2.02, 0.97, 0.53, 0.71, 1.19, 0.83, 0.96, 0.96, 0, 1.7, 2.22, 0.59, 1.24, 1.07, 0.51, 1.68, 3.36, 3.36, 1.14, 0.29, 0.33, 0.9, 1.11, 0.63, 1.08, 2.07, 2.27, 1.74, 0.22, 1.19, 2.37, 1.15, 1.15, 1.56, 0.81, 0.34, 0.87, 0.79, 2.13, 0.49, 0.85, 0.97, 0.36, 0.82, 0.66, 0.63, 1.15, 0.94, 0.85, 0.25, 0.93, 1.19, 0.4, 0.2, 0.44, 0.44, 0.88, 1.06, 0.77, 0.39, 0, 0, 0, 0, 0, 0, 0.43, 0.43, 0.86, 0.59, 0.59, 0, 1.18, 0.86, 2.9, 1.66, 0.4, 0.2, 1.54, 0.43, 0.69, 1.71, 0.68, 0.55, 0.91, 0.7, 1.71, 0.09, 0.27, 0.63, 0.2, 0.45, 1.01, 1.63, 0.96, 1.48, 2.18, 1.2, 1.31, 0.66, 2.13, 0.49, 0, 0, 0, 2.97, 2.8, 0.79, 0.4, 0.5, 0.4, 0.11, 1.68, 0.42, 0.13, 0.44, 0.93, 0.71, 1.11, 1.19, 2.71, 1.08, 3.43, 0.4, 0.67, 0.47, 1.02, 0.14, 1.56, 1.98, 0.53, 0.33, 0.63, 2.06, 1.77, 1.46, 3.74, 2.93, 2.1, 2.18, 0.78, 0.73, 2.93, 0.63, 0.57, 0.17, 0.85, 0.52, 0.31, 0.31, 0, 0, 0.51, 0.29, 0.83, 0.54, 0.28, 0.47, 0.9, 0.99, 1.24, 2.47, 0.73, 0.23, 1.13, 0.24, 2.12, 0.24, 0.33, 0.83, 1.41, 0.62, 0.28, 0.35, 0.77, 0.17, 0.72, 0.58, 0.45, 0.41), nrow=64,byrow=T)
 | 
| 
 | 
    51   triMutability_Names <- c("AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT")
 | 
| 
 | 
    52   load("FiveS_Mutability.RData")
 | 
| 
 | 
    53 
 | 
| 
 | 
    54 # Functions
 | 
| 
 | 
    55   
 | 
| 
 | 
    56   # Translate codon to amino acid
 | 
| 
 | 
    57   translateCodonToAminoAcid<-function(Codon){
 | 
| 
 | 
    58      return(AMINO_ACIDS[Codon])
 | 
| 
 | 
    59   }
 | 
| 
 | 
    60 
 | 
| 
 | 
    61   # Translate amino acid to trait change
 | 
| 
 | 
    62   translateAminoAcidToTraitChange<-function(AminoAcid){
 | 
| 
 | 
    63      return(TRAITS_AMINO_ACIDS[AminoAcid])
 | 
| 
 | 
    64   }
 | 
| 
 | 
    65     
 | 
| 
 | 
    66   # Initialize Amino Acid Trait Changes
 | 
| 
 | 
    67   initializeTraitChange <- function(traitChangeModel=1,species=1,traitChangeFileName=NULL){
 | 
| 
 | 
    68     if(!is.null(traitChangeFileName)){
 | 
| 
 | 
    69       tryCatch(
 | 
| 
 | 
    70           traitChange <- read.delim(traitChangeFileName,sep="\t",header=T)
 | 
| 
 | 
    71           , error = function(ex){
 | 
| 
 | 
    72             cat("Error|Error reading trait changes. Please check file name/path and format.\n")
 | 
| 
 | 
    73             q()
 | 
| 
 | 
    74           }
 | 
| 
 | 
    75         )
 | 
| 
 | 
    76     }else{
 | 
| 
 | 
    77       traitChange <- TRAITS_AMINO_ACIDS_CHOTHIA98
 | 
| 
 | 
    78     }
 | 
| 
 | 
    79     TRAITS_AMINO_ACIDS <<- traitChange
 | 
| 
 | 
    80  } 
 | 
| 
 | 
    81   
 | 
| 
 | 
    82   # Read in formatted nucleotide substitution matrix
 | 
| 
 | 
    83   initializeSubstitutionMatrix <- function(substitutionModel,species,subsMatFileName=NULL){
 | 
| 
 | 
    84     if(!is.null(subsMatFileName)){
 | 
| 
 | 
    85       tryCatch(
 | 
| 
 | 
    86           subsMat <- read.delim(subsMatFileName,sep="\t",header=T)
 | 
| 
 | 
    87           , error = function(ex){
 | 
| 
 | 
    88             cat("Error|Error reading substitution matrix. Please check file name/path and format.\n")
 | 
| 
 | 
    89             q()
 | 
| 
 | 
    90           }
 | 
| 
 | 
    91         )
 | 
| 
 | 
    92       if(sum(apply(subsMat,1,sum)==1)!=4) subsMat = t(apply(subsMat,1,function(x)x/sum(x)))
 | 
| 
 | 
    93     }else{
 | 
| 
 | 
    94       if(substitutionModel==1)subsMat <- substitution_Literature_Mouse
 | 
| 
 | 
    95       if(substitutionModel==2)subsMat <- substitution_Flu_Human      
 | 
| 
 | 
    96       if(substitutionModel==3)subsMat <- substitution_Flu25_Human      
 | 
| 
 | 
    97        
 | 
| 
 | 
    98     }
 | 
| 
 | 
    99 
 | 
| 
 | 
   100     if(substitutionModel==0){
 | 
| 
 | 
   101       subsMat <- matrix(1,4,4)
 | 
| 
 | 
   102       subsMat[,] = 1/3
 | 
| 
 | 
   103       subsMat[1,1] = 0
 | 
| 
 | 
   104       subsMat[2,2] = 0
 | 
| 
 | 
   105       subsMat[3,3] = 0
 | 
| 
 | 
   106       subsMat[4,4] = 0
 | 
| 
 | 
   107     }
 | 
| 
 | 
   108     
 | 
| 
 | 
   109     
 | 
| 
 | 
   110     NUCLEOTIDESN = c(NUCLEOTIDES,"N", "-")
 | 
| 
 | 
   111     if(substitutionModel==5){
 | 
| 
 | 
   112       subsMat <- FiveS_Substitution
 | 
| 
 | 
   113       return(subsMat)
 | 
| 
 | 
   114     }else{
 | 
| 
 | 
   115       subsMat <- rbind(subsMat,rep(NA,4),rep(NA,4))
 | 
| 
 | 
   116       return( matrix(data.matrix(subsMat),6,4,dimnames=list(NUCLEOTIDESN,NUCLEOTIDES) ) )
 | 
| 
 | 
   117     }
 | 
| 
 | 
   118   }
 | 
| 
 | 
   119 
 | 
| 
 | 
   120    
 | 
| 
 | 
   121   # Read in formatted Mutability file
 | 
| 
 | 
   122   initializeMutabilityMatrix <- function(mutabilityModel=1, species=1,mutabilityMatFileName=NULL){
 | 
| 
 | 
   123     if(!is.null(mutabilityMatFileName)){
 | 
| 
 | 
   124         tryCatch(
 | 
| 
 | 
   125             mutabilityMat <- read.delim(mutabilityMatFileName,sep="\t",header=T)
 | 
| 
 | 
   126             , error = function(ex){
 | 
| 
 | 
   127               cat("Error|Error reading mutability matrix. Please check file name/path and format.\n")
 | 
| 
 | 
   128               q()
 | 
| 
 | 
   129             }
 | 
| 
 | 
   130           )
 | 
| 
 | 
   131     }else{
 | 
| 
 | 
   132       mutabilityMat <- triMutability_Literature_Human
 | 
| 
 | 
   133       if(species==2) mutabilityMat <- triMutability_Literature_Mouse
 | 
| 
 | 
   134     }
 | 
| 
 | 
   135 
 | 
| 
 | 
   136   if(mutabilityModel==0){ mutabilityMat <- matrix(1,64,3)}
 | 
| 
 | 
   137   
 | 
| 
 | 
   138     if(mutabilityModel==5){
 | 
| 
 | 
   139       mutabilityMat <- FiveS_Mutability
 | 
| 
 | 
   140       return(mutabilityMat)
 | 
| 
 | 
   141     }else{
 | 
| 
 | 
   142       return( matrix( data.matrix(mutabilityMat), 64, 3, dimnames=list(triMutability_Names,1:3)) )
 | 
| 
 | 
   143     }
 | 
| 
 | 
   144   }
 | 
| 
 | 
   145 
 | 
| 
 | 
   146   # Read FASTA file formats
 | 
| 
 | 
   147   # Modified from read.fasta from the seqinR package
 | 
| 
 | 
   148   baseline.read.fasta <-
 | 
| 
 | 
   149   function (file = system.file("sequences/sample.fasta", package = "seqinr"), 
 | 
| 
 | 
   150       seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE, 
 | 
| 
 | 
   151       set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE, 
 | 
| 
 | 
   152       strip.desc = FALSE,  sizeof.longlong = .Machine$sizeof.longlong, 
 | 
| 
 | 
   153       endian = .Platform$endian, apply.mask = TRUE) 
 | 
| 
 | 
   154   {
 | 
| 
 | 
   155       seqtype <- match.arg(seqtype)
 | 
| 
 | 
   156   
 | 
| 
 | 
   157           lines <- readLines(file)
 | 
| 
 | 
   158           
 | 
| 
 | 
   159           if (legacy.mode) {
 | 
| 
 | 
   160               comments <- grep("^;", lines)
 | 
| 
 | 
   161               if (length(comments) > 0) 
 | 
| 
 | 
   162                   lines <- lines[-comments]
 | 
| 
 | 
   163           }
 | 
| 
 | 
   164           
 | 
| 
 | 
   165           
 | 
| 
 | 
   166           ind_groups<-which(substr(lines, 1L, 3L) == ">>>")
 | 
| 
 | 
   167           lines_mod<-lines
 | 
| 
 | 
   168   
 | 
| 
 | 
   169           if(!length(ind_groups)){
 | 
| 
 | 
   170               lines_mod<-c(">>>All sequences combined",lines)            
 | 
| 
 | 
   171           }
 | 
| 
 | 
   172           
 | 
| 
 | 
   173           ind_groups<-which(substr(lines_mod, 1L, 3L) == ">>>")
 | 
| 
 | 
   174   
 | 
| 
 | 
   175           lines <- array("BLA",dim=(length(ind_groups)+length(lines_mod)))
 | 
| 
 | 
   176           id<-sapply(1:length(ind_groups),function(i)ind_groups[i]+i-1)+1
 | 
| 
 | 
   177           lines[id] <- "THIS IS A FAKE SEQUENCE"
 | 
| 
 | 
   178           lines[-id] <- lines_mod
 | 
| 
 | 
   179           rm(lines_mod)
 | 
| 
 | 
   180   
 | 
| 
 | 
   181   		ind <- which(substr(lines, 1L, 1L) == ">")
 | 
| 
 | 
   182           nseq <- length(ind)
 | 
| 
 | 
   183           if (nseq == 0) {
 | 
| 
 | 
   184                stop("no line starting with a > character found")
 | 
| 
 | 
   185           }        
 | 
| 
 | 
   186           start <- ind + 1
 | 
| 
 | 
   187           end <- ind - 1
 | 
| 
 | 
   188   
 | 
| 
 | 
   189           while( any(which(ind%in%end)) ){
 | 
| 
 | 
   190             ind=ind[-which(ind%in%end)]
 | 
| 
 | 
   191             nseq <- length(ind)
 | 
| 
 | 
   192             if (nseq == 0) {
 | 
| 
 | 
   193                 stop("no line starting with a > character found")
 | 
| 
 | 
   194             }        
 | 
| 
 | 
   195             start <- ind + 1
 | 
| 
 | 
   196             end <- ind - 1        
 | 
| 
 | 
   197           }
 | 
| 
 | 
   198           
 | 
| 
 | 
   199           end <- c(end[-1], length(lines))
 | 
| 
 | 
   200           sequences <- lapply(seq_len(nseq), function(i) paste(lines[start[i]:end[i]], collapse = ""))
 | 
| 
 | 
   201           if (seqonly) 
 | 
| 
 | 
   202               return(sequences)
 | 
| 
 | 
   203           nomseq <- lapply(seq_len(nseq), function(i) {
 | 
| 
 | 
   204           
 | 
| 
 | 
   205               #firstword <- strsplit(lines[ind[i]], " ")[[1]][1]
 | 
| 
 | 
   206               substr(lines[ind[i]], 2, nchar(lines[ind[i]]))
 | 
| 
 | 
   207           
 | 
| 
 | 
   208           })
 | 
| 
 | 
   209           if (seqtype == "DNA") {
 | 
| 
 | 
   210               if (forceDNAtolower) {
 | 
| 
 | 
   211                   sequences <- as.list(tolower(chartr(".","-",sequences)))
 | 
| 
 | 
   212               }else{
 | 
| 
 | 
   213                   sequences <- as.list(toupper(chartr(".","-",sequences)))
 | 
| 
 | 
   214               }
 | 
| 
 | 
   215           }
 | 
| 
 | 
   216           if (as.string == FALSE) 
 | 
| 
 | 
   217               sequences <- lapply(sequences, s2c)
 | 
| 
 | 
   218           if (set.attributes) {
 | 
| 
 | 
   219               for (i in seq_len(nseq)) {
 | 
| 
 | 
   220                   Annot <- lines[ind[i]]
 | 
| 
 | 
   221                   if (strip.desc) 
 | 
| 
 | 
   222                     Annot <- substr(Annot, 2L, nchar(Annot))
 | 
| 
 | 
   223                   attributes(sequences[[i]]) <- list(name = nomseq[[i]], 
 | 
| 
 | 
   224                     Annot = Annot, class = switch(seqtype, AA = "SeqFastaAA", 
 | 
| 
 | 
   225                       DNA = "SeqFastadna"))
 | 
| 
 | 
   226               }
 | 
| 
 | 
   227           }
 | 
| 
 | 
   228           names(sequences) <- nomseq
 | 
| 
 | 
   229           return(sequences)
 | 
| 
 | 
   230   }
 | 
| 
 | 
   231 
 | 
| 
 | 
   232   
 | 
| 
 | 
   233   # Replaces non FASTA characters in input files with N  
 | 
| 
 | 
   234   replaceNonFASTAChars <-function(inSeq="ACGTN-AApA"){
 | 
| 
 | 
   235     gsub('[^ACGTNacgt[:punct:]-[:punct:].]','N',inSeq,perl=TRUE)
 | 
| 
 | 
   236   }    
 | 
| 
 | 
   237   
 | 
| 
 | 
   238   # Find the germlines in the FASTA list
 | 
| 
 | 
   239   germlinesInFile <- function(seqIDs){
 | 
| 
 | 
   240     firstChar = sapply(seqIDs,function(x){substr(x,1,1)})
 | 
| 
 | 
   241     secondChar = sapply(seqIDs,function(x){substr(x,2,2)})
 | 
| 
 | 
   242     return(firstChar==">" & secondChar!=">")
 | 
| 
 | 
   243   }
 | 
| 
 | 
   244   
 | 
| 
 | 
   245   # Find the groups in the FASTA list
 | 
| 
 | 
   246   groupsInFile <- function(seqIDs){
 | 
| 
 | 
   247     sapply(seqIDs,function(x){substr(x,1,2)})==">>"
 | 
| 
 | 
   248   }
 | 
| 
 | 
   249 
 | 
| 
 | 
   250   # In the process of finding germlines/groups, expand from the start to end of the group
 | 
| 
 | 
   251   expandTillNext <- function(vecPosToID){    
 | 
| 
 | 
   252     IDs = names(vecPosToID)
 | 
| 
 | 
   253     posOfInterests =  which(vecPosToID)
 | 
| 
 | 
   254   
 | 
| 
 | 
   255     expandedID = rep(NA,length(IDs))
 | 
| 
 | 
   256     expandedIDNames = gsub(">","",IDs[posOfInterests])
 | 
| 
 | 
   257     startIndexes = c(1,posOfInterests[-1])
 | 
| 
 | 
   258     stopIndexes = c(posOfInterests[-1]-1,length(IDs))
 | 
| 
 | 
   259     expandedID  = unlist(sapply(1:length(startIndexes),function(i){
 | 
| 
 | 
   260                                     rep(i,stopIndexes[i]-startIndexes[i]+1)
 | 
| 
 | 
   261                                   }))
 | 
| 
 | 
   262     names(expandedID) = unlist(sapply(1:length(startIndexes),function(i){
 | 
| 
 | 
   263                                     rep(expandedIDNames[i],stopIndexes[i]-startIndexes[i]+1)
 | 
| 
 | 
   264                                   }))  
 | 
| 
 | 
   265     return(expandedID)                                                                                                  
 | 
| 
 | 
   266   }
 | 
| 
 | 
   267     
 | 
| 
 | 
   268   # Process FASTA (list) to return a matrix[input, germline)
 | 
| 
 | 
   269   processInputAdvanced <- function(inputFASTA){
 | 
| 
 | 
   270   
 | 
| 
 | 
   271     seqIDs = names(inputFASTA)
 | 
| 
 | 
   272     numbSeqs = length(seqIDs)
 | 
| 
 | 
   273     posGermlines1 = germlinesInFile(seqIDs)
 | 
| 
 | 
   274     numbGermlines = sum(posGermlines1)
 | 
| 
 | 
   275     posGroups1 = groupsInFile(seqIDs)
 | 
| 
 | 
   276     numbGroups = sum(posGroups1)
 | 
| 
 | 
   277     consDef = NA
 | 
| 
 | 
   278     
 | 
| 
 | 
   279     if(numbGermlines==0){
 | 
| 
 | 
   280       posGermlines = 2
 | 
| 
 | 
   281       numbGermlines = 1  
 | 
| 
 | 
   282     }
 | 
| 
 | 
   283   
 | 
| 
 | 
   284       glPositionsSum = cumsum(posGermlines1)
 | 
| 
 | 
   285       glPositions = table(glPositionsSum)
 | 
| 
 | 
   286       #Find the position of the conservation row
 | 
| 
 | 
   287       consDefPos = as.numeric(names(glPositions[names(glPositions)!=0 & glPositions==1]))+1  
 | 
| 
 | 
   288     if( length(consDefPos)> 0 ){
 | 
| 
 | 
   289       consDefID =  match(consDefPos, glPositionsSum) 
 | 
| 
 | 
   290       #The coservation rows need to be pulled out and stores seperately 
 | 
| 
 | 
   291       consDef =  inputFASTA[consDefID]
 | 
| 
 | 
   292       inputFASTA =  inputFASTA[-consDefID]
 | 
| 
 | 
   293   
 | 
| 
 | 
   294       seqIDs = names(inputFASTA)
 | 
| 
 | 
   295       numbSeqs = length(seqIDs)
 | 
| 
 | 
   296       posGermlines1 = germlinesInFile(seqIDs)
 | 
| 
 | 
   297       numbGermlines = sum(posGermlines1)
 | 
| 
 | 
   298       posGroups1 = groupsInFile(seqIDs)
 | 
| 
 | 
   299       numbGroups = sum(posGroups1)
 | 
| 
 | 
   300       if(numbGermlines==0){
 | 
| 
 | 
   301         posGermlines = 2
 | 
| 
 | 
   302         numbGermlines = 1  
 | 
| 
 | 
   303       }    
 | 
| 
 | 
   304     }
 | 
| 
 | 
   305     
 | 
| 
 | 
   306     posGroups <- expandTillNext(posGroups1)
 | 
| 
 | 
   307     posGermlines <- expandTillNext(posGermlines1)
 | 
| 
 | 
   308     posGermlines[posGroups1] = 0
 | 
| 
 | 
   309     names(posGermlines)[posGroups1] = names(posGroups)[posGroups1]
 | 
| 
 | 
   310     posInput = rep(TRUE,numbSeqs)
 | 
| 
 | 
   311     posInput[posGroups1 | posGermlines1] = FALSE
 | 
| 
 | 
   312     
 | 
| 
 | 
   313     matInput = matrix(NA, nrow=sum(posInput), ncol=2)
 | 
| 
 | 
   314     rownames(matInput) = seqIDs[posInput]
 | 
| 
 | 
   315     colnames(matInput) = c("Input","Germline")
 | 
| 
 | 
   316     
 | 
| 
 | 
   317     vecInputFASTA = unlist(inputFASTA)  
 | 
| 
 | 
   318     matInput[,1] = vecInputFASTA[posInput]
 | 
| 
 | 
   319     matInput[,2] = vecInputFASTA[ which( names(inputFASTA)%in%paste(">",names(posGermlines)[posInput],sep="") )[ posGermlines[posInput]] ]
 | 
| 
 | 
   320     
 | 
| 
 | 
   321     germlines = posGermlines[posInput]
 | 
| 
 | 
   322     groups = posGroups[posInput]
 | 
| 
 | 
   323     
 | 
| 
 | 
   324     return( list("matInput"=matInput, "germlines"=germlines, "groups"=groups, "conservationDefinition"=consDef ))      
 | 
| 
 | 
   325   }
 | 
| 
 | 
   326 
 | 
| 
 | 
   327 
 | 
| 
 | 
   328   # Replace leading and trailing dashes in the sequence
 | 
| 
 | 
   329   replaceLeadingTrailingDashes <- function(x,readEnd){
 | 
| 
 | 
   330     iiGap = unlist(gregexpr("-",x[1]))
 | 
| 
 | 
   331     ggGap = unlist(gregexpr("-",x[2]))  
 | 
| 
 | 
   332     #posToChange = intersect(iiGap,ggGap)
 | 
| 
 | 
   333     
 | 
| 
 | 
   334     
 | 
| 
 | 
   335     seqIn = replaceLeadingTrailingDashesHelper(x[1])
 | 
| 
 | 
   336     seqGL = replaceLeadingTrailingDashesHelper(x[2])
 | 
| 
 | 
   337     seqTemplate = rep('N',readEnd)
 | 
| 
 | 
   338     seqIn <- c(seqIn,seqTemplate[(length(seqIn)+1):readEnd])
 | 
| 
 | 
   339     seqGL <- c(seqGL,seqTemplate[(length(seqGL)+1):readEnd])
 | 
| 
 | 
   340 #    if(posToChange!=-1){
 | 
| 
 | 
   341 #      seqIn[posToChange] = "-"
 | 
| 
 | 
   342 #      seqGL[posToChange] = "-"
 | 
| 
 | 
   343 #    }
 | 
| 
 | 
   344   
 | 
| 
 | 
   345     seqIn = c2s(seqIn[1:readEnd])
 | 
| 
 | 
   346     seqGL = c2s(seqGL[1:readEnd])
 | 
| 
 | 
   347   
 | 
| 
 | 
   348     lenGL = nchar(seqGL)
 | 
| 
 | 
   349     if(lenGL<readEnd){
 | 
| 
 | 
   350       seqGL = paste(seqGL,c2s(rep("N",readEnd-lenGL)),sep="")
 | 
| 
 | 
   351     }
 | 
| 
 | 
   352   
 | 
| 
 | 
   353     lenInput = nchar(seqIn)
 | 
| 
 | 
   354     if(lenInput<readEnd){
 | 
| 
 | 
   355       seqIn = paste(seqIn,c2s(rep("N",readEnd-lenInput)),sep="")
 | 
| 
 | 
   356     }    
 | 
| 
 | 
   357     return( c(seqIn,seqGL) )
 | 
| 
 | 
   358   }  
 | 
| 
 | 
   359 
 | 
| 
 | 
   360   replaceLeadingTrailingDashesHelper <- function(x){
 | 
| 
 | 
   361     grepResults = gregexpr("-*",x)
 | 
| 
 | 
   362     grepResultsPos = unlist(grepResults)
 | 
| 
 | 
   363     grepResultsLen =  attr(grepResults[[1]],"match.length")   
 | 
| 
 | 
   364     #print(paste("x = '", x, "'", sep=""))
 | 
| 
 | 
   365     x = s2c(x)
 | 
| 
 | 
   366     if(x[1]=="-"){
 | 
| 
 | 
   367       x[1:grepResultsLen[1]] = "N"      
 | 
| 
 | 
   368     }
 | 
| 
 | 
   369     if(x[length(x)]=="-"){
 | 
| 
 | 
   370       x[(length(x)-grepResultsLen[length(grepResultsLen)]+1):length(x)] = "N"      
 | 
| 
 | 
   371     }
 | 
| 
 | 
   372     return(x)
 | 
| 
 | 
   373   }
 | 
| 
 | 
   374 
 | 
| 
 | 
   375 
 | 
| 
 | 
   376 
 | 
| 
 | 
   377   
 | 
| 
 | 
   378   # Check sequences for indels
 | 
| 
 | 
   379   checkForInDels <- function(matInputP){
 | 
| 
 | 
   380     insPos <- checkInsertion(matInputP)
 | 
| 
 | 
   381     delPos <- checkDeletions(matInputP)
 | 
| 
 | 
   382     return(list("Insertions"=insPos, "Deletions"=delPos))
 | 
| 
 | 
   383   }
 | 
| 
 | 
   384 
 | 
| 
 | 
   385   # Check sequences for insertions
 | 
| 
 | 
   386   checkInsertion <- function(matInputP){
 | 
| 
 | 
   387     insertionCheck = apply( matInputP,1, function(x){
 | 
| 
 | 
   388                                           inputGaps <- as.vector( gregexpr("-",x[1])[[1]] )
 | 
| 
 | 
   389                                           glGaps <- as.vector( gregexpr("-",x[2])[[1]] )                                          
 | 
| 
 | 
   390                                           return( is.finite( match(FALSE, glGaps%in%inputGaps ) ) )
 | 
| 
 | 
   391                                         })   
 | 
| 
 | 
   392     return(as.vector(insertionCheck))
 | 
| 
 | 
   393   }
 | 
| 
 | 
   394   # Fix inserstions
 | 
| 
 | 
   395   fixInsertions <- function(matInputP){
 | 
| 
 | 
   396     insPos <- checkInsertion(matInputP)
 | 
| 
 | 
   397     sapply((1:nrow(matInputP))[insPos],function(rowIndex){
 | 
| 
 | 
   398                                                 x <- matInputP[rowIndex,]
 | 
| 
 | 
   399                                                 inputGaps <- gregexpr("-",x[1])[[1]]
 | 
| 
 | 
   400                                                 glGaps <- gregexpr("-",x[2])[[1]]
 | 
| 
 | 
   401                                                 posInsertions <- glGaps[!(glGaps%in%inputGaps)]
 | 
| 
 | 
   402                                                 inputInsertionToN <- s2c(x[2])
 | 
| 
 | 
   403                                                 inputInsertionToN[posInsertions]!="-"
 | 
| 
 | 
   404                                                 inputInsertionToN[posInsertions] <- "N"
 | 
| 
 | 
   405                                                 inputInsertionToN <- c2s(inputInsertionToN)
 | 
| 
 | 
   406                                                 matInput[rowIndex,2] <<- inputInsertionToN 
 | 
| 
 | 
   407                                               })                                                               
 | 
| 
 | 
   408     return(insPos)
 | 
| 
 | 
   409   } 
 | 
| 
 | 
   410     
 | 
| 
 | 
   411   # Check sequences for deletions
 | 
| 
 | 
   412   checkDeletions <-function(matInputP){
 | 
| 
 | 
   413     deletionCheck = apply( matInputP,1, function(x){
 | 
| 
 | 
   414                                           inputGaps <- as.vector( gregexpr("-",x[1])[[1]] )
 | 
| 
 | 
   415                                           glGaps <- as.vector( gregexpr("-",x[2])[[1]] )
 | 
| 
 | 
   416                                           return( is.finite( match(FALSE, inputGaps%in%glGaps ) ) )
 | 
| 
 | 
   417                                       })
 | 
| 
 | 
   418     return(as.vector(deletionCheck))                                      
 | 
| 
 | 
   419   }
 | 
| 
 | 
   420   # Fix sequences with deletions
 | 
| 
 | 
   421   fixDeletions <- function(matInputP){
 | 
| 
 | 
   422     delPos <- checkDeletions(matInputP)    
 | 
| 
 | 
   423     sapply((1:nrow(matInputP))[delPos],function(rowIndex){
 | 
| 
 | 
   424                                                 x <- matInputP[rowIndex,]
 | 
| 
 | 
   425                                                 inputGaps <- gregexpr("-",x[1])[[1]]
 | 
| 
 | 
   426                                                 glGaps <- gregexpr("-",x[2])[[1]]
 | 
| 
 | 
   427                                                 posDeletions <- inputGaps[!(inputGaps%in%glGaps)]
 | 
| 
 | 
   428                                                 inputDeletionToN <- s2c(x[1])
 | 
| 
 | 
   429                                                 inputDeletionToN[posDeletions] <- "N"
 | 
| 
 | 
   430                                                 inputDeletionToN <- c2s(inputDeletionToN)
 | 
| 
 | 
   431                                                 matInput[rowIndex,1] <<- inputDeletionToN 
 | 
| 
 | 
   432                                               })                                                                   
 | 
| 
 | 
   433     return(delPos)
 | 
| 
 | 
   434   }  
 | 
| 
 | 
   435     
 | 
| 
 | 
   436 
 | 
| 
 | 
   437   # Trim DNA sequence to the last codon
 | 
| 
 | 
   438   trimToLastCodon <- function(seqToTrim){
 | 
| 
 | 
   439     seqLen = nchar(seqToTrim)  
 | 
| 
 | 
   440     trimmedSeq = s2c(seqToTrim)
 | 
| 
 | 
   441     poi = seqLen
 | 
| 
 | 
   442     tailLen = 0
 | 
| 
 | 
   443     
 | 
| 
 | 
   444     while(trimmedSeq[poi]=="-" || trimmedSeq[poi]=="."){
 | 
| 
 | 
   445       tailLen = tailLen + 1
 | 
| 
 | 
   446       poi = poi - 1   
 | 
| 
 | 
   447     }
 | 
| 
 | 
   448     
 | 
| 
 | 
   449     trimmedSeq = c2s(trimmedSeq[1:(seqLen-tailLen)])
 | 
| 
 | 
   450     seqLen = nchar(trimmedSeq)
 | 
| 
 | 
   451     # Trim sequence to last codon
 | 
| 
 | 
   452   	if( getCodonPos(seqLen)[3] > seqLen )
 | 
| 
 | 
   453   	  trimmedSeq = substr(seqToTrim,1, ( (getCodonPos(seqLen)[1])-1 ) )
 | 
| 
 | 
   454     
 | 
| 
 | 
   455     return(trimmedSeq)
 | 
| 
 | 
   456   }
 | 
| 
 | 
   457   
 | 
| 
 | 
   458   # Given a nuclotide position, returns the pos of the 3 nucs that made the codon
 | 
| 
 | 
   459   # e.g. nuc 86 is part of nucs 85,86,87
 | 
| 
 | 
   460   getCodonPos <- function(nucPos){
 | 
| 
 | 
   461     codonNum =  (ceiling(nucPos/3))*3
 | 
| 
 | 
   462     return( (codonNum-2):codonNum)
 | 
| 
 | 
   463   }
 | 
| 
 | 
   464   
 | 
| 
 | 
   465   # Given a nuclotide position, returns the codon number
 | 
| 
 | 
   466   # e.g. nuc 86  = codon 29
 | 
| 
 | 
   467   getCodonNumb <- function(nucPos){
 | 
| 
 | 
   468     return( ceiling(nucPos/3) )
 | 
| 
 | 
   469   }
 | 
| 
 | 
   470   
 | 
| 
 | 
   471   # Given a codon, returns all the nuc positions that make the codon
 | 
| 
 | 
   472   getCodonNucs <- function(codonNumb){
 | 
| 
 | 
   473     getCodonPos(codonNumb*3)
 | 
| 
 | 
   474   }  
 | 
| 
 | 
   475 
 | 
| 
 | 
   476   computeCodonTable <- function(testID=1){
 | 
| 
 | 
   477                   
 | 
| 
 | 
   478     if(testID<=4){    
 | 
| 
 | 
   479       # Pre-compute every codons
 | 
| 
 | 
   480       intCounter = 1
 | 
| 
 | 
   481       for(pOne in NUCLEOTIDES){
 | 
| 
 | 
   482         for(pTwo in NUCLEOTIDES){
 | 
| 
 | 
   483           for(pThree in NUCLEOTIDES){
 | 
| 
 | 
   484             codon = paste(pOne,pTwo,pThree,sep="")
 | 
| 
 | 
   485             colnames(CODON_TABLE)[intCounter] =  codon
 | 
| 
 | 
   486             intCounter = intCounter + 1
 | 
| 
 | 
   487             CODON_TABLE[,codon] = mutationTypeOptimized(cbind(permutateAllCodon(codon),rep(codon,12)))
 | 
| 
 | 
   488           }  
 | 
| 
 | 
   489         }
 | 
| 
 | 
   490       }
 | 
| 
 | 
   491       chars = c("N","A","C","G","T", "-")
 | 
| 
 | 
   492       for(a in chars){
 | 
| 
 | 
   493         for(b in chars){
 | 
| 
 | 
   494           for(c in chars){
 | 
| 
 | 
   495             if(a=="N" | b=="N" | c=="N"){ 
 | 
| 
 | 
   496               #cat(paste(a,b,c),sep="","\n") 
 | 
| 
 | 
   497               CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
 | 
| 
 | 
   498             }
 | 
| 
 | 
   499           }  
 | 
| 
 | 
   500         }
 | 
| 
 | 
   501       }
 | 
| 
 | 
   502       
 | 
| 
 | 
   503       chars = c("-","A","C","G","T")
 | 
| 
 | 
   504       for(a in chars){
 | 
| 
 | 
   505         for(b in chars){
 | 
| 
 | 
   506           for(c in chars){
 | 
| 
 | 
   507             if(a=="-" | b=="-" | c=="-"){ 
 | 
| 
 | 
   508               #cat(paste(a,b,c),sep="","\n") 
 | 
| 
 | 
   509               CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
 | 
| 
 | 
   510             }
 | 
| 
 | 
   511           }  
 | 
| 
 | 
   512         }
 | 
| 
 | 
   513       }
 | 
| 
 | 
   514       CODON_TABLE <<- as.matrix(CODON_TABLE)
 | 
| 
 | 
   515     }
 | 
| 
 | 
   516   }
 | 
| 
 | 
   517   
 | 
| 
 | 
   518   collapseClone <- function(vecInputSeqs,glSeq,readEnd,nonTerminalOnly=0){
 | 
| 
 | 
   519   #print(length(vecInputSeqs))
 | 
| 
 | 
   520     vecInputSeqs = unique(vecInputSeqs) 
 | 
| 
 | 
   521     if(length(vecInputSeqs)==1){
 | 
| 
 | 
   522       return( list( c(vecInputSeqs,glSeq), F) )
 | 
| 
 | 
   523     }else{
 | 
| 
 | 
   524       charInputSeqs <- sapply(vecInputSeqs, function(x){
 | 
| 
 | 
   525                                               s2c(x)[1:readEnd]
 | 
| 
 | 
   526                                             })
 | 
| 
 | 
   527       charGLSeq <- s2c(glSeq)
 | 
| 
 | 
   528       matClone <- sapply(1:readEnd, function(i){
 | 
| 
 | 
   529                                             posNucs = unique(charInputSeqs[i,])
 | 
| 
 | 
   530                                             posGL = charGLSeq[i]
 | 
| 
 | 
   531                                             error = FALSE                                            
 | 
| 
 | 
   532                                             if(posGL=="-" & sum(!(posNucs%in%c("-","N")))==0 ){
 | 
| 
 | 
   533                                               return(c("-",error))
 | 
| 
 | 
   534                                             }
 | 
| 
 | 
   535                                             if(length(posNucs)==1)
 | 
| 
 | 
   536                                               return(c(posNucs[1],error))
 | 
| 
 | 
   537                                             else{
 | 
| 
 | 
   538                                               if("N"%in%posNucs){
 | 
| 
 | 
   539                                                 error=TRUE
 | 
| 
 | 
   540                                               }
 | 
| 
 | 
   541                                               if(sum(!posNucs[posNucs!="N"]%in%posGL)==0){
 | 
| 
 | 
   542                                                 return( c(posGL,error) )  
 | 
| 
 | 
   543                                               }else{
 | 
| 
 | 
   544                                                 #return( c(sample(posNucs[posNucs!="N"],1),error) )  
 | 
| 
 | 
   545                                                 if(nonTerminalOnly==0){
 | 
| 
 | 
   546                                                   return( c(sample(charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL],1),error) )  
 | 
| 
 | 
   547                                                 }else{
 | 
| 
 | 
   548                                                   posNucs = charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL]
 | 
| 
 | 
   549                                                   posNucsTable = table(posNucs)
 | 
| 
 | 
   550                                                   if(sum(posNucsTable>1)==0){
 | 
| 
 | 
   551                                                     return( c(posGL,error) )
 | 
| 
 | 
   552                                                   }else{
 | 
| 
 | 
   553                                                     return( c(sample( posNucs[posNucs%in%names(posNucsTable)[posNucsTable>1]],1),error) )
 | 
| 
 | 
   554                                                   }
 | 
| 
 | 
   555                                                 }
 | 
| 
 | 
   556                                                 
 | 
| 
 | 
   557                                               }
 | 
| 
 | 
   558                                             } 
 | 
| 
 | 
   559                                           })
 | 
| 
 | 
   560       
 | 
| 
 | 
   561                                           
 | 
| 
 | 
   562       #print(length(vecInputSeqs))                                        
 | 
| 
 | 
   563       return(list(c(c2s(matClone[1,]),glSeq),"TRUE"%in%matClone[2,]))
 | 
| 
 | 
   564     }
 | 
| 
 | 
   565   }
 | 
| 
 | 
   566 
 | 
| 
 | 
   567   # Compute the expected for each sequence-germline pair
 | 
| 
 | 
   568   getExpectedIndividual <- function(matInput){
 | 
| 
 | 
   569   if( any(grep("multicore",search())) ){ 
 | 
| 
 | 
   570     facGL <- factor(matInput[,2])
 | 
| 
 | 
   571     facLevels = levels(facGL)
 | 
| 
 | 
   572     LisGLs_MutabilityU = mclapply(1:length(facLevels),  function(x){
 | 
| 
 | 
   573                                                       computeMutabilities(facLevels[x])
 | 
| 
 | 
   574                                                     })
 | 
| 
 | 
   575     facIndex = match(facGL,facLevels)
 | 
| 
 | 
   576     
 | 
| 
 | 
   577     LisGLs_Mutability = mclapply(1:nrow(matInput),  function(x){
 | 
| 
 | 
   578                                                       cInput = rep(NA,nchar(matInput[x,1]))
 | 
| 
 | 
   579                                                       cInput[s2c(matInput[x,1])!="N"] = 1
 | 
| 
 | 
   580                                                       LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   
 | 
| 
 | 
   581                                                     })
 | 
| 
 | 
   582                                                     
 | 
| 
 | 
   583     LisGLs_Targeting =  mclapply(1:dim(matInput)[1],  function(x){
 | 
| 
 | 
   584                                                       computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
 | 
| 
 | 
   585                                                     })
 | 
| 
 | 
   586                                                     
 | 
| 
 | 
   587     LisGLs_MutationTypes  = mclapply(1:length(matInput[,2]),function(x){
 | 
| 
 | 
   588                                                     #print(x)
 | 
| 
 | 
   589                                                     computeMutationTypes(matInput[x,2])
 | 
| 
 | 
   590                                                 })
 | 
| 
 | 
   591     
 | 
| 
 | 
   592     LisGLs_Exp = mclapply(1:dim(matInput)[1],  function(x){
 | 
| 
 | 
   593                                                   computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
 | 
| 
 | 
   594                                                 })
 | 
| 
 | 
   595     
 | 
| 
 | 
   596     ul_LisGLs_Exp =  unlist(LisGLs_Exp)                                            
 | 
| 
 | 
   597     return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
 | 
| 
 | 
   598   }else{
 | 
| 
 | 
   599     facGL <- factor(matInput[,2])
 | 
| 
 | 
   600     facLevels = levels(facGL)
 | 
| 
 | 
   601     LisGLs_MutabilityU = lapply(1:length(facLevels),  function(x){
 | 
| 
 | 
   602       computeMutabilities(facLevels[x])
 | 
| 
 | 
   603     })
 | 
| 
 | 
   604     facIndex = match(facGL,facLevels)
 | 
| 
 | 
   605     
 | 
| 
 | 
   606     LisGLs_Mutability = lapply(1:nrow(matInput),  function(x){
 | 
| 
 | 
   607       cInput = rep(NA,nchar(matInput[x,1]))
 | 
| 
 | 
   608       cInput[s2c(matInput[x,1])!="N"] = 1
 | 
| 
 | 
   609       LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   
 | 
| 
 | 
   610     })
 | 
| 
 | 
   611     
 | 
| 
 | 
   612     LisGLs_Targeting =  lapply(1:dim(matInput)[1],  function(x){
 | 
| 
 | 
   613       computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
 | 
| 
 | 
   614     })
 | 
| 
 | 
   615     
 | 
| 
 | 
   616     LisGLs_MutationTypes  = lapply(1:length(matInput[,2]),function(x){
 | 
| 
 | 
   617       #print(x)
 | 
| 
 | 
   618       computeMutationTypes(matInput[x,2])
 | 
| 
 | 
   619     })
 | 
| 
 | 
   620     
 | 
| 
 | 
   621     LisGLs_Exp = lapply(1:dim(matInput)[1],  function(x){
 | 
| 
 | 
   622       computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
 | 
| 
 | 
   623     })
 | 
| 
 | 
   624     
 | 
| 
 | 
   625     ul_LisGLs_Exp =  unlist(LisGLs_Exp)                                            
 | 
| 
 | 
   626     return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
 | 
| 
 | 
   627     
 | 
| 
 | 
   628   }
 | 
| 
 | 
   629   }
 | 
| 
 | 
   630 
 | 
| 
 | 
   631   # Compute mutabilities of sequence based on the tri-nucleotide model
 | 
| 
 | 
   632   computeMutabilities <- function(paramSeq){
 | 
| 
 | 
   633     seqLen = nchar(paramSeq)
 | 
| 
 | 
   634     seqMutabilites = rep(NA,seqLen)
 | 
| 
 | 
   635   
 | 
| 
 | 
   636     gaplessSeq = gsub("-", "", paramSeq)
 | 
| 
 | 
   637     gaplessSeqLen = nchar(gaplessSeq)
 | 
| 
 | 
   638     gaplessSeqMutabilites = rep(NA,gaplessSeqLen)
 | 
| 
 | 
   639     
 | 
| 
 | 
   640     if(mutabilityModel!=5){
 | 
| 
 | 
   641       pos<- 3:(gaplessSeqLen)
 | 
| 
 | 
   642       subSeq =  substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))    
 | 
| 
 | 
   643       gaplessSeqMutabilites[pos] =      
 | 
| 
 | 
   644         tapply( c(
 | 
| 
 | 
   645                                         getMutability( substr(subSeq,1,3), 3) , 
 | 
| 
 | 
   646                                         getMutability( substr(subSeq,2,4), 2), 
 | 
| 
 | 
   647                                         getMutability( substr(subSeq,3,5), 1) 
 | 
| 
 | 
   648                                         ),rep(1:(gaplessSeqLen-2),3),mean,na.rm=TRUE
 | 
| 
 | 
   649                                       )
 | 
| 
 | 
   650       #Pos 1
 | 
| 
 | 
   651       subSeq =  substr(gaplessSeq,1,3)
 | 
| 
 | 
   652       gaplessSeqMutabilites[1] =  getMutability(subSeq , 1)
 | 
| 
 | 
   653       #Pos 2
 | 
| 
 | 
   654       subSeq =  substr(gaplessSeq,1,4)
 | 
| 
 | 
   655       gaplessSeqMutabilites[2] =  mean( c(
 | 
| 
 | 
   656                                             getMutability( substr(subSeq,1,3), 2) , 
 | 
| 
 | 
   657                                             getMutability( substr(subSeq,2,4), 1) 
 | 
| 
 | 
   658                                           ),na.rm=T
 | 
| 
 | 
   659                                       ) 
 | 
| 
 | 
   660       seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
 | 
| 
 | 
   661       return(seqMutabilites)
 | 
| 
 | 
   662     }else{
 | 
| 
 | 
   663       
 | 
| 
 | 
   664       pos<- 3:(gaplessSeqLen)
 | 
| 
 | 
   665       subSeq =  substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))    
 | 
| 
 | 
   666       gaplessSeqMutabilites[pos] = sapply(subSeq,function(x){ getMutability5(x) }, simplify=T)
 | 
| 
 | 
   667       seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
 | 
| 
 | 
   668       return(seqMutabilites)
 | 
| 
 | 
   669     }
 | 
| 
 | 
   670 
 | 
| 
 | 
   671   }
 | 
| 
 | 
   672 
 | 
| 
 | 
   673   # Returns the mutability of a triplet at a given position
 | 
| 
 | 
   674   getMutability <- function(codon, pos=1:3){
 | 
| 
 | 
   675     triplets <- rownames(mutability)
 | 
| 
 | 
   676     mutability[  match(codon,triplets) ,pos]
 | 
| 
 | 
   677   }
 | 
| 
 | 
   678 
 | 
| 
 | 
   679   getMutability5 <- function(fivemer){
 | 
| 
 | 
   680     return(mutability[fivemer])
 | 
| 
 | 
   681   }
 | 
| 
 | 
   682 
 | 
| 
 | 
   683   # Returns the substitution probabilty
 | 
| 
 | 
   684   getTransistionProb <- function(nuc){
 | 
| 
 | 
   685     substitution[nuc,]
 | 
| 
 | 
   686   }
 | 
| 
 | 
   687 
 | 
| 
 | 
   688   getTransistionProb5 <- function(fivemer){    
 | 
| 
 | 
   689     if(any(which(fivemer==colnames(substitution)))){
 | 
| 
 | 
   690       return(substitution[,fivemer])
 | 
| 
 | 
   691     }else{
 | 
| 
 | 
   692       return(array(NA,4))
 | 
| 
 | 
   693     }
 | 
| 
 | 
   694   }
 | 
| 
 | 
   695 
 | 
| 
 | 
   696   # Given a nuc, returns the other 3 nucs it can mutate to
 | 
| 
 | 
   697   canMutateTo <- function(nuc){
 | 
| 
 | 
   698     NUCLEOTIDES[- which(NUCLEOTIDES==nuc)]
 | 
| 
 | 
   699   }
 | 
| 
 | 
   700   
 | 
| 
 | 
   701   # Given a nucleotide, returns the probabilty of other nucleotide it can mutate to 
 | 
| 
 | 
   702   canMutateToProb <- function(nuc){
 | 
| 
 | 
   703     substitution[nuc,canMutateTo(nuc)]
 | 
| 
 | 
   704   }
 | 
| 
 | 
   705 
 | 
| 
 | 
   706   # Compute targeting, based on precomputed mutatbility & substitution  
 | 
| 
 | 
   707   computeTargeting <- function(param_strSeq,param_vecMutabilities){
 | 
| 
 | 
   708 
 | 
| 
 | 
   709     if(substitutionModel!=5){
 | 
| 
 | 
   710       vecSeq = s2c(param_strSeq)
 | 
| 
 | 
   711       matTargeting = sapply( 1:length(vecSeq), function(x) { param_vecMutabilities[x] * getTransistionProb(vecSeq[x]) } )  
 | 
| 
 | 
   712       #matTargeting = apply( rbind(vecSeq,param_vecMutabilities),2, function(x) { as.vector(as.numeric(x[2]) * getTransistionProb(x[1])) } )
 | 
| 
 | 
   713       dimnames( matTargeting ) =  list(NUCLEOTIDES,1:(length(vecSeq))) 
 | 
| 
 | 
   714       return (matTargeting)
 | 
| 
 | 
   715     }else{
 | 
| 
 | 
   716       
 | 
| 
 | 
   717       seqLen = nchar(param_strSeq)
 | 
| 
 | 
   718       seqsubstitution = matrix(NA,ncol=seqLen,nrow=4)
 | 
| 
 | 
   719       paramSeq <- param_strSeq
 | 
| 
 | 
   720       gaplessSeq = gsub("-", "", paramSeq)
 | 
| 
 | 
   721       gaplessSeqLen = nchar(gaplessSeq)
 | 
| 
 | 
   722       gaplessSeqSubstitution  = matrix(NA,ncol=gaplessSeqLen,nrow=4) 
 | 
| 
 | 
   723       
 | 
| 
 | 
   724       pos<- 3:(gaplessSeqLen)
 | 
| 
 | 
   725       subSeq =  substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))    
 | 
| 
 | 
   726       gaplessSeqSubstitution[,pos] = sapply(subSeq,function(x){ getTransistionProb5(x) }, simplify=T)
 | 
| 
 | 
   727       seqsubstitution[,which(s2c(paramSeq)!="-")]<- gaplessSeqSubstitution
 | 
| 
 | 
   728       #matTargeting <- param_vecMutabilities  %*% seqsubstitution
 | 
| 
 | 
   729       matTargeting <- sweep(seqsubstitution,2,param_vecMutabilities,`*`)
 | 
| 
 | 
   730       dimnames( matTargeting ) =  list(NUCLEOTIDES,1:(seqLen)) 
 | 
| 
 | 
   731       return (matTargeting)      
 | 
| 
 | 
   732     }
 | 
| 
 | 
   733   }  
 | 
| 
 | 
   734 
 | 
| 
 | 
   735   # Compute the mutations types   
 | 
| 
 | 
   736   computeMutationTypes <- function(param_strSeq){
 | 
| 
 | 
   737   #cat(param_strSeq,"\n")
 | 
| 
 | 
   738     #vecSeq = trimToLastCodon(param_strSeq)
 | 
| 
 | 
   739     lenSeq = nchar(param_strSeq)
 | 
| 
 | 
   740     vecCodons = sapply({1:(lenSeq/3)}*3-2,function(x){substr(param_strSeq,x,x+2)})
 | 
| 
 | 
   741     matMutationTypes = matrix( unlist(CODON_TABLE[,vecCodons]) ,ncol=lenSeq,nrow=4, byrow=F)
 | 
| 
 | 
   742     dimnames( matMutationTypes ) =  list(NUCLEOTIDES,1:(ncol(matMutationTypes)))
 | 
| 
 | 
   743     return(matMutationTypes)   
 | 
| 
 | 
   744   }  
 | 
| 
 | 
   745   computeMutationTypesFast <- function(param_strSeq){
 | 
| 
 | 
   746     matMutationTypes = matrix( CODON_TABLE[,param_strSeq] ,ncol=3,nrow=4, byrow=F)
 | 
| 
 | 
   747     #dimnames( matMutationTypes ) =  list(NUCLEOTIDES,1:(length(vecSeq)))
 | 
| 
 | 
   748     return(matMutationTypes)   
 | 
| 
 | 
   749   }  
 | 
| 
 | 
   750   mutationTypeOptimized <- function( matOfCodons ){
 | 
| 
 | 
   751    apply( matOfCodons,1,function(x){ mutationType(x[2],x[1]) } ) 
 | 
| 
 | 
   752   }  
 | 
| 
 | 
   753 
 | 
| 
 | 
   754   # Returns a vector of codons 1 mutation away from the given codon
 | 
| 
 | 
   755   permutateAllCodon <- function(codon){
 | 
| 
 | 
   756     cCodon = s2c(codon)
 | 
| 
 | 
   757     matCodons = t(array(cCodon,dim=c(3,12)))
 | 
| 
 | 
   758     matCodons[1:4,1] = NUCLEOTIDES
 | 
| 
 | 
   759     matCodons[5:8,2] = NUCLEOTIDES
 | 
| 
 | 
   760     matCodons[9:12,3] = NUCLEOTIDES
 | 
| 
 | 
   761     apply(matCodons,1,c2s)
 | 
| 
 | 
   762   }
 | 
| 
 | 
   763 
 | 
| 
 | 
   764   # Given two codons, tells you if the mutation is R or S (based on your definition)
 | 
| 
 | 
   765   mutationType <- function(codonFrom,codonTo){
 | 
| 
 | 
   766     if(testID==4){
 | 
| 
 | 
   767       if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
 | 
| 
 | 
   768         return(NA)
 | 
| 
 | 
   769       }else{
 | 
| 
 | 
   770         mutationType = "S"
 | 
| 
 | 
   771         if( translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonFrom)) != translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonTo)) ){
 | 
| 
 | 
   772           mutationType = "R"                                                              
 | 
| 
 | 
   773         }
 | 
| 
 | 
   774         if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
 | 
| 
 | 
   775           mutationType = "Stop"
 | 
| 
 | 
   776         }
 | 
| 
 | 
   777         return(mutationType)
 | 
| 
 | 
   778       }  
 | 
| 
 | 
   779     }else if(testID==5){  
 | 
| 
 | 
   780       if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
 | 
| 
 | 
   781         return(NA)
 | 
| 
 | 
   782       }else{
 | 
| 
 | 
   783         if(codonFrom==codonTo){
 | 
| 
 | 
   784           mutationType = "S"
 | 
| 
 | 
   785         }else{
 | 
| 
 | 
   786           codonFrom = s2c(codonFrom)
 | 
| 
 | 
   787           codonTo = s2c(codonTo)  
 | 
| 
 | 
   788           mutationType = "Stop"
 | 
| 
 | 
   789           nucOfI = codonFrom[which(codonTo!=codonFrom)]
 | 
| 
 | 
   790           if(nucOfI=="C"){
 | 
| 
 | 
   791             mutationType = "R"  
 | 
| 
 | 
   792           }else if(nucOfI=="G"){
 | 
| 
 | 
   793             mutationType = "S"
 | 
| 
 | 
   794           }
 | 
| 
 | 
   795         }
 | 
| 
 | 
   796         return(mutationType)
 | 
| 
 | 
   797       }
 | 
| 
 | 
   798     }else{
 | 
| 
 | 
   799       if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
 | 
| 
 | 
   800         return(NA)
 | 
| 
 | 
   801       }else{
 | 
| 
 | 
   802         mutationType = "S"
 | 
| 
 | 
   803         if( translateCodonToAminoAcid(codonFrom) != translateCodonToAminoAcid(codonTo) ){
 | 
| 
 | 
   804           mutationType = "R"                                                              
 | 
| 
 | 
   805         }
 | 
| 
 | 
   806         if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
 | 
| 
 | 
   807           mutationType = "Stop"
 | 
| 
 | 
   808         }
 | 
| 
 | 
   809         return(mutationType)
 | 
| 
 | 
   810       }  
 | 
| 
 | 
   811     }    
 | 
| 
 | 
   812   }
 | 
| 
 | 
   813 
 | 
| 
 | 
   814   
 | 
| 
 | 
   815   #given a mat of targeting & it's corresponding mutationtypes returns 
 | 
| 
 | 
   816   #a vector of Exp_RCDR,Exp_SCDR,Exp_RFWR,Exp_RFWR
 | 
| 
 | 
   817   computeExpected <- function(paramTargeting,paramMutationTypes){
 | 
| 
 | 
   818     # Replacements
 | 
| 
 | 
   819     RPos = which(paramMutationTypes=="R")  
 | 
| 
 | 
   820       #FWR
 | 
| 
 | 
   821       Exp_R_FWR = sum(paramTargeting[ RPos[which(FWR_Nuc_Mat[RPos]==T)] ],na.rm=T)
 | 
| 
 | 
   822       #CDR
 | 
| 
 | 
   823       Exp_R_CDR = sum(paramTargeting[ RPos[which(CDR_Nuc_Mat[RPos]==T)] ],na.rm=T)
 | 
| 
 | 
   824     # Silents
 | 
| 
 | 
   825     SPos = which(paramMutationTypes=="S")  
 | 
| 
 | 
   826       #FWR
 | 
| 
 | 
   827       Exp_S_FWR = sum(paramTargeting[ SPos[which(FWR_Nuc_Mat[SPos]==T)] ],na.rm=T)
 | 
| 
 | 
   828       #CDR
 | 
| 
 | 
   829       Exp_S_CDR = sum(paramTargeting[ SPos[which(CDR_Nuc_Mat[SPos]==T)] ],na.rm=T)
 | 
| 
 | 
   830   
 | 
| 
 | 
   831       return(c(Exp_R_CDR,Exp_S_CDR,Exp_R_FWR,Exp_S_FWR))
 | 
| 
 | 
   832   }
 | 
| 
 | 
   833   
 | 
| 
 | 
   834   # Count the mutations in a sequence
 | 
| 
 | 
   835   # each mutation is treated independently 
 | 
| 
 | 
   836   analyzeMutations2NucUri_website <- function( rev_in_matrix ){
 | 
| 
 | 
   837     paramGL = rev_in_matrix[2,]
 | 
| 
 | 
   838     paramSeq = rev_in_matrix[1,]  
 | 
| 
 | 
   839     
 | 
| 
 | 
   840     #Fill seq with GL seq if gapped
 | 
| 
 | 
   841     #if( any(paramSeq=="-") ){
 | 
| 
 | 
   842     #  gapPos_Seq =  which(paramSeq=="-")
 | 
| 
 | 
   843     #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "-"]
 | 
| 
 | 
   844     #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
 | 
| 
 | 
   845     #}
 | 
| 
 | 
   846   
 | 
| 
 | 
   847   
 | 
| 
 | 
   848     #if( any(paramSeq=="N") ){
 | 
| 
 | 
   849     #  gapPos_Seq =  which(paramSeq=="N")
 | 
| 
 | 
   850     #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
 | 
| 
 | 
   851     #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
 | 
| 
 | 
   852     #}  
 | 
| 
 | 
   853       
 | 
| 
 | 
   854     analyzeMutations2NucUri(  matrix(c( paramGL, paramSeq  ),2,length(paramGL),byrow=T)  )
 | 
| 
 | 
   855     
 | 
| 
 | 
   856   }
 | 
| 
 | 
   857 
 | 
| 
 | 
   858   #1 = GL 
 | 
| 
 | 
   859   #2 = Seq
 | 
| 
 | 
   860   analyzeMutations2NucUri <- function( in_matrix=matrix(c(c("A","A","A","C","C","C"),c("A","G","G","C","C","A")),2,6,byrow=T) ){
 | 
| 
 | 
   861     paramGL = in_matrix[2,]
 | 
| 
 | 
   862     paramSeq = in_matrix[1,]
 | 
| 
 | 
   863     paramSeqUri = paramGL
 | 
| 
 | 
   864     #mutations = apply(rbind(paramGL,paramSeq), 2, function(x){!x[1]==x[2]})
 | 
| 
 | 
   865     mutations_val = paramGL != paramSeq   
 | 
| 
 | 
   866     if(any(mutations_val)){
 | 
| 
 | 
   867       mutationPos = {1:length(mutations_val)}[mutations_val]  
 | 
| 
 | 
   868       mutationPos = mutationPos[sapply(mutationPos, function(x){!any(paramSeq[getCodonPos(x)]=="N")})]
 | 
| 
 | 
   869       length_mutations =length(mutationPos)
 | 
| 
 | 
   870       mutationInfo = rep(NA,length_mutations)
 | 
| 
 | 
   871       if(any(mutationPos)){  
 | 
| 
 | 
   872 
 | 
| 
 | 
   873         pos<- mutationPos
 | 
| 
 | 
   874         pos_array<-array(sapply(pos,getCodonPos))
 | 
| 
 | 
   875         codonGL =  paramGL[pos_array]
 | 
| 
 | 
   876         
 | 
| 
 | 
   877         codonSeq = sapply(pos,function(x){
 | 
| 
 | 
   878                                   seqP = paramGL[getCodonPos(x)]
 | 
| 
 | 
   879                                   muCodonPos = {x-1}%%3+1 
 | 
| 
 | 
   880                                   seqP[muCodonPos] = paramSeq[x]
 | 
| 
 | 
   881                                   return(seqP)
 | 
| 
 | 
   882                                 })      
 | 
| 
 | 
   883         GLcodons =  apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
 | 
| 
 | 
   884         Seqcodons =   apply(codonSeq,2,c2s)
 | 
| 
 | 
   885         mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})     
 | 
| 
 | 
   886         names(mutationInfo) = mutationPos
 | 
| 
 | 
   887     }
 | 
| 
 | 
   888     if(any(!is.na(mutationInfo))){
 | 
| 
 | 
   889       return(mutationInfo[!is.na(mutationInfo)])    
 | 
| 
 | 
   890     }else{
 | 
| 
 | 
   891       return(NA)
 | 
| 
 | 
   892     }
 | 
| 
 | 
   893     
 | 
| 
 | 
   894     
 | 
| 
 | 
   895     }else{
 | 
| 
 | 
   896       return (NA)
 | 
| 
 | 
   897     }
 | 
| 
 | 
   898   }
 | 
| 
 | 
   899   
 | 
| 
 | 
   900   processNucMutations2 <- function(mu){
 | 
| 
 | 
   901     if(!is.na(mu)){
 | 
| 
 | 
   902       #R
 | 
| 
 | 
   903       if(any(mu=="R")){
 | 
| 
 | 
   904         Rs = mu[mu=="R"]
 | 
| 
 | 
   905         nucNumbs = as.numeric(names(Rs))
 | 
| 
 | 
   906         R_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
 | 
| 
 | 
   907         R_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)      
 | 
| 
 | 
   908       }else{
 | 
| 
 | 
   909         R_CDR = 0
 | 
| 
 | 
   910         R_FWR = 0
 | 
| 
 | 
   911       }    
 | 
| 
 | 
   912       
 | 
| 
 | 
   913       #S
 | 
| 
 | 
   914       if(any(mu=="S")){
 | 
| 
 | 
   915         Ss = mu[mu=="S"]
 | 
| 
 | 
   916         nucNumbs = as.numeric(names(Ss))
 | 
| 
 | 
   917         S_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
 | 
| 
 | 
   918         S_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)      
 | 
| 
 | 
   919       }else{
 | 
| 
 | 
   920         S_CDR = 0
 | 
| 
 | 
   921         S_FWR = 0
 | 
| 
 | 
   922       }    
 | 
| 
 | 
   923       
 | 
| 
 | 
   924       
 | 
| 
 | 
   925       retVec = c(R_CDR,S_CDR,R_FWR,S_FWR)
 | 
| 
 | 
   926       retVec[is.na(retVec)]=0
 | 
| 
 | 
   927       return(retVec)
 | 
| 
 | 
   928     }else{
 | 
| 
 | 
   929       return(rep(0,4))
 | 
| 
 | 
   930     }
 | 
| 
 | 
   931   }        
 | 
| 
 | 
   932   
 | 
| 
 | 
   933   
 | 
| 
 | 
   934   ## Z-score Test
 | 
| 
 | 
   935   computeZScore <- function(mat, test="Focused"){
 | 
| 
 | 
   936     matRes <- matrix(NA,ncol=2,nrow=(nrow(mat)))
 | 
| 
 | 
   937     if(test=="Focused"){
 | 
| 
 | 
   938       #Z_Focused_CDR
 | 
| 
 | 
   939       #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
 | 
| 
 | 
   940       P = apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))})
 | 
| 
 | 
   941       R_mean = apply(cbind(mat[,c(1,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
 | 
| 
 | 
   942       R_sd=sqrt(R_mean*(1-P))
 | 
| 
 | 
   943       matRes[,1] = (mat[,1]-R_mean)/R_sd
 | 
| 
 | 
   944     
 | 
| 
 | 
   945       #Z_Focused_FWR
 | 
| 
 | 
   946       #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
 | 
| 
 | 
   947       P = apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))})
 | 
| 
 | 
   948       R_mean = apply(cbind(mat[,c(3,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
 | 
| 
 | 
   949       R_sd=sqrt(R_mean*(1-P))
 | 
| 
 | 
   950       matRes[,2] = (mat[,3]-R_mean)/R_sd
 | 
| 
 | 
   951     }
 | 
| 
 | 
   952   
 | 
| 
 | 
   953     if(test=="Local"){
 | 
| 
 | 
   954       #Z_Focused_CDR
 | 
| 
 | 
   955       #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
 | 
| 
 | 
   956       P = apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))})
 | 
| 
 | 
   957       R_mean = apply(cbind(mat[,c(1,2)],P),1,function(x){x[3]*(sum(x[1:2]))})
 | 
| 
 | 
   958       R_sd=sqrt(R_mean*(1-P))
 | 
| 
 | 
   959       matRes[,1] = (mat[,1]-R_mean)/R_sd
 | 
| 
 | 
   960     
 | 
| 
 | 
   961       #Z_Focused_FWR
 | 
| 
 | 
   962       #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
 | 
| 
 | 
   963       P = apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))})
 | 
| 
 | 
   964       R_mean = apply(cbind(mat[,c(3,4)],P),1,function(x){x[3]*(sum(x[1:2]))})
 | 
| 
 | 
   965       R_sd=sqrt(R_mean*(1-P))
 | 
| 
 | 
   966       matRes[,2] = (mat[,3]-R_mean)/R_sd
 | 
| 
 | 
   967     }
 | 
| 
 | 
   968     
 | 
| 
 | 
   969     if(test=="Imbalanced"){
 | 
| 
 | 
   970       #Z_Focused_CDR
 | 
| 
 | 
   971       #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
 | 
| 
 | 
   972       P = apply(mat[,5:8],1,function(x){((x[1]+x[2])/sum(x))})
 | 
| 
 | 
   973       R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
 | 
| 
 | 
   974       R_sd=sqrt(R_mean*(1-P))
 | 
| 
 | 
   975       matRes[,1] = (mat[,1]-R_mean)/R_sd
 | 
| 
 | 
   976     
 | 
| 
 | 
   977       #Z_Focused_FWR
 | 
| 
 | 
   978       #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
 | 
| 
 | 
   979       P = apply(mat[,5:8],1,function(x){((x[3]+x[4])/sum(x))})
 | 
| 
 | 
   980       R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
 | 
| 
 | 
   981       R_sd=sqrt(R_mean*(1-P))
 | 
| 
 | 
   982       matRes[,2] = (mat[,3]-R_mean)/R_sd
 | 
| 
 | 
   983     }    
 | 
| 
 | 
   984       
 | 
| 
 | 
   985     matRes[is.nan(matRes)] = NA
 | 
| 
 | 
   986     return(matRes)
 | 
| 
 | 
   987   }
 | 
| 
 | 
   988 
 | 
| 
 | 
   989   # Return a p-value for a z-score
 | 
| 
 | 
   990   z2p <- function(z){
 | 
| 
 | 
   991     p=NA
 | 
| 
 | 
   992     if( !is.nan(z) && !is.na(z)){   
 | 
| 
 | 
   993       if(z>0){
 | 
| 
 | 
   994         p = (1 - pnorm(z,0,1))
 | 
| 
 | 
   995       } else if(z<0){
 | 
| 
 | 
   996         p = (-1 * pnorm(z,0,1))
 | 
| 
 | 
   997       } else{
 | 
| 
 | 
   998         p = 0.5
 | 
| 
 | 
   999       }
 | 
| 
 | 
  1000     }else{
 | 
| 
 | 
  1001       p = NA
 | 
| 
 | 
  1002     }
 | 
| 
 | 
  1003     return(p)
 | 
| 
 | 
  1004   }    
 | 
| 
 | 
  1005   
 | 
| 
 | 
  1006   
 | 
| 
 | 
  1007   ## Bayesian  Test
 | 
| 
 | 
  1008 
 | 
| 
 | 
  1009   # Fitted parameter for the bayesian framework
 | 
| 
 | 
  1010 BAYESIAN_FITTED<-c(0.407277142798302, 0.554007336744485, 0.63777155771234, 0.693989162719009, 0.735450014674917, 0.767972534429806, 0.794557287143399, 0.816906816601605, 0.83606796225341, 0.852729446430296, 0.867370424541641, 0.880339760590323, 0.891900995024999, 0.902259181289864, 0.911577919359,0.919990301665853, 0.927606458124537, 0.934518806350661, 0.940805863754375, 0.946534836475715, 0.951763691199255, 0.95654428191308, 0.960920179487397, 0.964930893680829, 0.968611312149038, 0.971992459313836, 0.975102110004818, 0.977964943023096, 0.980603428208439, 0.983037660179428, 0.985285800977406, 0.987364285326685, 0.989288037855441, 0.991070478823525, 0.992723699729969, 0.994259575477392, 0.995687688867975, 0.997017365051493, 0.998257085153047, 0.999414558305388, 1.00049681357804, 1.00151036237481, 1.00246080204981, 1.00335370751909, 1.0041939329768, 1.0049859393417, 1.00573382091263, 1.00644127217376, 1.00711179729107, 1.00774845526417, 1.00835412715854, 1.00893143010366, 1.00948275846309, 1.01001030293661, 1.01051606798079, 1.01100188771288, 1.01146944044216, 1.01192026195449, 1.01235575766094, 1.01277721370986)
 | 
| 
 | 
  1011   CONST_i <- sort(c(((2^(seq(-39,0,length.out=201)))/2)[1:200],(c(0:11,13:99)+0.5)/100,1-(2^(seq(-39,0,length.out=201)))/2))
 | 
| 
 | 
  1012   
 | 
| 
 | 
  1013   # Given x, M & p, returns a pdf 
 | 
| 
 | 
  1014   calculate_bayes <- function ( x=3, N=10, p=0.33,
 | 
| 
 | 
  1015                                 i=CONST_i,
 | 
| 
 | 
  1016                                 max_sigma=20,length_sigma=4001
 | 
| 
 | 
  1017                               ){
 | 
| 
 | 
  1018     if(!0%in%N){
 | 
| 
 | 
  1019       G <- max(length(x),length(N),length(p))
 | 
| 
 | 
  1020       x=array(x,dim=G)
 | 
| 
 | 
  1021       N=array(N,dim=G)
 | 
| 
 | 
  1022       p=array(p,dim=G)
 | 
| 
 | 
  1023       sigma_s<-seq(-max_sigma,max_sigma,length.out=length_sigma)
 | 
| 
 | 
  1024       sigma_1<-log({i/{1-i}}/{p/{1-p}})
 | 
| 
 | 
  1025       index<-min(N,60)
 | 
| 
 | 
  1026       y<-dbeta(i,x+BAYESIAN_FITTED[index],N+BAYESIAN_FITTED[index]-x)*(1-p)*p*exp(sigma_1)/({1-p}^2+2*p*{1-p}*exp(sigma_1)+{p^2}*exp(2*sigma_1))
 | 
| 
 | 
  1027       if(!sum(is.na(y))){
 | 
| 
 | 
  1028         tmp<-approx(sigma_1,y,sigma_s)$y
 | 
| 
 | 
  1029         tmp/sum(tmp)/{2*max_sigma/{length_sigma-1}}
 | 
| 
 | 
  1030       }else{
 | 
| 
 | 
  1031         return(NA)
 | 
| 
 | 
  1032       }
 | 
| 
 | 
  1033     }else{
 | 
| 
 | 
  1034       return(NA)
 | 
| 
 | 
  1035     }
 | 
| 
 | 
  1036   }  
 | 
| 
 | 
  1037   # Given a mat of observed & expected, return a list of CDR & FWR pdf for selection
 | 
| 
 | 
  1038   computeBayesianScore <- function(mat, test="Focused", max_sigma=20,length_sigma=4001){
 | 
| 
 | 
  1039     flagOneSeq = F
 | 
| 
 | 
  1040     if(nrow(mat)==1){
 | 
| 
 | 
  1041       mat=rbind(mat,mat)
 | 
| 
 | 
  1042       flagOneSeq = T
 | 
| 
 | 
  1043     }
 | 
| 
 | 
  1044     if(test=="Focused"){
 | 
| 
 | 
  1045       #CDR
 | 
| 
 | 
  1046       P = c(apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
 | 
| 
 | 
  1047       N = c(apply(mat[,c(1,2,4)],1,function(x){(sum(x))}),0)
 | 
| 
 | 
  1048       X = c(mat[,1],0)
 | 
| 
 | 
  1049       bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
 | 
| 
 | 
  1050       bayesCDR = bayesCDR[-length(bayesCDR)]
 | 
| 
 | 
  1051   
 | 
| 
 | 
  1052       #FWR
 | 
| 
 | 
  1053       P = c(apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
 | 
| 
 | 
  1054       N = c(apply(mat[,c(3,2,4)],1,function(x){(sum(x))}),0)
 | 
| 
 | 
  1055       X = c(mat[,3],0)
 | 
| 
 | 
  1056       bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
 | 
| 
 | 
  1057       bayesFWR = bayesFWR[-length(bayesFWR)]     
 | 
| 
 | 
  1058     }
 | 
| 
 | 
  1059     
 | 
| 
 | 
  1060     if(test=="Local"){
 | 
| 
 | 
  1061       #CDR
 | 
| 
 | 
  1062       P = c(apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}),0.5)
 | 
| 
 | 
  1063       N = c(apply(mat[,c(1,2)],1,function(x){(sum(x))}),0)
 | 
| 
 | 
  1064       X = c(mat[,1],0)
 | 
| 
 | 
  1065       bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
 | 
| 
 | 
  1066       bayesCDR = bayesCDR[-length(bayesCDR)]
 | 
| 
 | 
  1067   
 | 
| 
 | 
  1068       #FWR
 | 
| 
 | 
  1069       P = c(apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}),0.5)
 | 
| 
 | 
  1070       N = c(apply(mat[,c(3,4)],1,function(x){(sum(x))}),0)
 | 
| 
 | 
  1071       X = c(mat[,3],0)
 | 
| 
 | 
  1072       bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
 | 
| 
 | 
  1073       bayesFWR = bayesFWR[-length(bayesFWR)]     
 | 
| 
 | 
  1074     } 
 | 
| 
 | 
  1075      
 | 
| 
 | 
  1076     if(test=="Imbalanced"){
 | 
| 
 | 
  1077       #CDR
 | 
| 
 | 
  1078       P = c(apply(mat[,c(5:8)],1,function(x){((x[1]+x[2])/sum(x))}),0.5)
 | 
| 
 | 
  1079       N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
 | 
| 
 | 
  1080       X = c(apply(mat[,c(1:2)],1,function(x){(sum(x))}),0)
 | 
| 
 | 
  1081       bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
 | 
| 
 | 
  1082       bayesCDR = bayesCDR[-length(bayesCDR)]
 | 
| 
 | 
  1083   
 | 
| 
 | 
  1084       #FWR
 | 
| 
 | 
  1085       P = c(apply(mat[,c(5:8)],1,function(x){((x[3]+x[4])/sum(x))}),0.5)
 | 
| 
 | 
  1086       N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
 | 
| 
 | 
  1087       X = c(apply(mat[,c(3:4)],1,function(x){(sum(x))}),0)
 | 
| 
 | 
  1088       bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
 | 
| 
 | 
  1089       bayesFWR = bayesFWR[-length(bayesFWR)]     
 | 
| 
 | 
  1090     }
 | 
| 
 | 
  1091 
 | 
| 
 | 
  1092     if(test=="ImbalancedSilent"){
 | 
| 
 | 
  1093       #CDR
 | 
| 
 | 
  1094       P = c(apply(mat[,c(6,8)],1,function(x){((x[1])/sum(x))}),0.5)
 | 
| 
 | 
  1095       N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
 | 
| 
 | 
  1096       X = c(apply(mat[,c(2,4)],1,function(x){(x[1])}),0)
 | 
| 
 | 
  1097       bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
 | 
| 
 | 
  1098       bayesCDR = bayesCDR[-length(bayesCDR)]
 | 
| 
 | 
  1099   
 | 
| 
 | 
  1100       #FWR
 | 
| 
 | 
  1101       P = c(apply(mat[,c(6,8)],1,function(x){((x[2])/sum(x))}),0.5)
 | 
| 
 | 
  1102       N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
 | 
| 
 | 
  1103       X = c(apply(mat[,c(2,4)],1,function(x){(x[2])}),0)
 | 
| 
 | 
  1104       bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
 | 
| 
 | 
  1105       bayesFWR = bayesFWR[-length(bayesFWR)]     
 | 
| 
 | 
  1106     }
 | 
| 
 | 
  1107         
 | 
| 
 | 
  1108     if(flagOneSeq==T){
 | 
| 
 | 
  1109       bayesCDR = bayesCDR[1]  
 | 
| 
 | 
  1110       bayesFWR = bayesFWR[1]
 | 
| 
 | 
  1111     }
 | 
| 
 | 
  1112     return( list("CDR"=bayesCDR, "FWR"=bayesFWR) )
 | 
| 
 | 
  1113   }
 | 
| 
 | 
  1114   
 | 
| 
 | 
  1115   ##Covolution
 | 
| 
 | 
  1116   break2chunks<-function(G=1000){
 | 
| 
 | 
  1117   base<-2^round(log(sqrt(G),2),0)
 | 
| 
 | 
  1118   return(c(rep(base,floor(G/base)-1),base+G-(floor(G/base)*base)))
 | 
| 
 | 
  1119   }  
 | 
| 
 | 
  1120   
 | 
| 
 | 
  1121   PowersOfTwo <- function(G=100){
 | 
| 
 | 
  1122     exponents <- array()
 | 
| 
 | 
  1123     i = 0
 | 
| 
 | 
  1124     while(G > 0){
 | 
| 
 | 
  1125       i=i+1
 | 
| 
 | 
  1126       exponents[i] <- floor( log2(G) )
 | 
| 
 | 
  1127       G <- G-2^exponents[i]
 | 
| 
 | 
  1128     }
 | 
| 
 | 
  1129     return(exponents)
 | 
| 
 | 
  1130   }
 | 
| 
 | 
  1131   
 | 
| 
 | 
  1132   convolutionPowersOfTwo <- function( cons, length_sigma=4001 ){
 | 
| 
 | 
  1133     G = ncol(cons)
 | 
| 
 | 
  1134     if(G>1){
 | 
| 
 | 
  1135       for(gen in log(G,2):1){
 | 
| 
 | 
  1136         ll<-seq(from=2,to=2^gen,by=2)
 | 
| 
 | 
  1137         sapply(ll,function(l){cons[,l/2]<<-weighted_conv(cons[,l],cons[,l-1],length_sigma=length_sigma)})
 | 
| 
 | 
  1138       }
 | 
| 
 | 
  1139     }
 | 
| 
 | 
  1140     return( cons[,1] )
 | 
| 
 | 
  1141   }
 | 
| 
 | 
  1142   
 | 
| 
 | 
  1143   convolutionPowersOfTwoByTwos <- function( cons, length_sigma=4001,G=1 ){
 | 
| 
 | 
  1144     if(length(ncol(cons))) G<-ncol(cons)
 | 
| 
 | 
  1145     groups <- PowersOfTwo(G)
 | 
| 
 | 
  1146     matG <- matrix(NA, ncol=length(groups), nrow=length(cons)/G )
 | 
| 
 | 
  1147     startIndex = 1
 | 
| 
 | 
  1148     for( i in 1:length(groups) ){
 | 
| 
 | 
  1149       stopIndex <- 2^groups[i] + startIndex - 1
 | 
| 
 | 
  1150       if(stopIndex!=startIndex){
 | 
| 
 | 
  1151         matG[,i] <- convolutionPowersOfTwo( cons[,startIndex:stopIndex], length_sigma=length_sigma )
 | 
| 
 | 
  1152         startIndex = stopIndex + 1
 | 
| 
 | 
  1153       }
 | 
| 
 | 
  1154       else {
 | 
| 
 | 
  1155         if(G>1) matG[,i] <- cons[,startIndex:stopIndex]
 | 
| 
 | 
  1156         else matG[,i] <- cons
 | 
| 
 | 
  1157         #startIndex = stopIndex + 1
 | 
| 
 | 
  1158       }
 | 
| 
 | 
  1159     }
 | 
| 
 | 
  1160     return( list( matG, groups ) )
 | 
| 
 | 
  1161   }
 | 
| 
 | 
  1162   
 | 
| 
 | 
  1163   weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){
 | 
| 
 | 
  1164     lx<-length(x)
 | 
| 
 | 
  1165     ly<-length(y)
 | 
| 
 | 
  1166     if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){
 | 
| 
 | 
  1167       if(w<1){
 | 
| 
 | 
  1168         y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y
 | 
| 
 | 
  1169         x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y
 | 
| 
 | 
  1170         lx<-length(x1)
 | 
| 
 | 
  1171         ly<-length(y1)
 | 
| 
 | 
  1172       }
 | 
| 
 | 
  1173       else {
 | 
| 
 | 
  1174         y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y
 | 
| 
 | 
  1175         x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y
 | 
| 
 | 
  1176         lx<-length(x1)
 | 
| 
 | 
  1177         ly<-length(y1)
 | 
| 
 | 
  1178       }
 | 
| 
 | 
  1179     }
 | 
| 
 | 
  1180     else{
 | 
| 
 | 
  1181       x1<-x
 | 
| 
 | 
  1182       y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y
 | 
| 
 | 
  1183       ly<-length(y1)
 | 
| 
 | 
  1184     }
 | 
| 
 | 
  1185     tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y
 | 
| 
 | 
  1186     tmp[tmp<=0] = 0
 | 
| 
 | 
  1187     return(tmp/sum(tmp))
 | 
| 
 | 
  1188   }
 | 
| 
 | 
  1189   
 | 
| 
 | 
  1190   calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){
 | 
| 
 | 
  1191     matG <- listMatG[[1]]
 | 
| 
 | 
  1192     groups <- listMatG[[2]]
 | 
| 
 | 
  1193     i = 1
 | 
| 
 | 
  1194     resConv <- matG[,i]
 | 
| 
 | 
  1195     denom <- 2^groups[i]
 | 
| 
 | 
  1196     if(length(groups)>1){
 | 
| 
 | 
  1197       while( i<length(groups) ){
 | 
| 
 | 
  1198         i = i + 1
 | 
| 
 | 
  1199         resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma)
 | 
| 
 | 
  1200         #cat({{2^groups[i]}/denom},"\n")
 | 
| 
 | 
  1201         denom <- denom + 2^groups[i]
 | 
| 
 | 
  1202       }
 | 
| 
 | 
  1203     }
 | 
| 
 | 
  1204     return(resConv)
 | 
| 
 | 
  1205   }
 | 
| 
 | 
  1206   
 | 
| 
 | 
  1207   # Given a list of PDFs, returns a convoluted PDF    
 | 
| 
 | 
  1208   groupPosteriors <- function( listPosteriors, max_sigma=20, length_sigma=4001 ,Threshold=2 ){  
 | 
| 
 | 
  1209     listPosteriors = listPosteriors[ !is.na(listPosteriors) ]
 | 
| 
 | 
  1210     Length_Postrior<-length(listPosteriors)
 | 
| 
 | 
  1211     if(Length_Postrior>1 & Length_Postrior<=Threshold){
 | 
| 
 | 
  1212       cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
 | 
| 
 | 
  1213       listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
 | 
| 
 | 
  1214       y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
 | 
| 
 | 
  1215       return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
 | 
| 
 | 
  1216     }else if(Length_Postrior==1) return(listPosteriors[[1]])
 | 
| 
 | 
  1217     else  if(Length_Postrior==0) return(NA)
 | 
| 
 | 
  1218     else {
 | 
| 
 | 
  1219       cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
 | 
| 
 | 
  1220       y = fastConv(cons,max_sigma=max_sigma, length_sigma=length_sigma )
 | 
| 
 | 
  1221       return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
 | 
| 
 | 
  1222     }
 | 
| 
 | 
  1223   }
 | 
| 
 | 
  1224 
 | 
| 
 | 
  1225   fastConv<-function(cons, max_sigma=20, length_sigma=4001){
 | 
| 
 | 
  1226     chunks<-break2chunks(G=ncol(cons))
 | 
| 
 | 
  1227     if(ncol(cons)==3) chunks<-2:1
 | 
| 
 | 
  1228     index_chunks_end <- cumsum(chunks)
 | 
| 
 | 
  1229     index_chunks_start <- c(1,index_chunks_end[-length(index_chunks_end)]+1)
 | 
| 
 | 
  1230     index_chunks <- cbind(index_chunks_start,index_chunks_end)
 | 
| 
 | 
  1231     
 | 
| 
 | 
  1232     case <- sum(chunks!=chunks[1])
 | 
| 
 | 
  1233     if(case==1) End <- max(1,((length(index_chunks)/2)-1))
 | 
| 
 | 
  1234     else End <- max(1,((length(index_chunks)/2)))
 | 
| 
 | 
  1235     
 | 
| 
 | 
  1236     firsts <- sapply(1:End,function(i){
 | 
| 
 | 
  1237           	    indexes<-index_chunks[i,1]:index_chunks[i,2]
 | 
| 
 | 
  1238           	    convolutionPowersOfTwoByTwos(cons[ ,indexes])[[1]]
 | 
| 
 | 
  1239           	  })
 | 
| 
 | 
  1240     if(case==0){
 | 
| 
 | 
  1241     	result<-calculate_bayesGHelper( convolutionPowersOfTwoByTwos(firsts) )
 | 
| 
 | 
  1242     }else if(case==1){
 | 
| 
 | 
  1243       last<-list(calculate_bayesGHelper(
 | 
| 
 | 
  1244       convolutionPowersOfTwoByTwos( cons[ ,index_chunks[length(index_chunks)/2,1]:index_chunks[length(index_chunks)/2,2]] )
 | 
| 
 | 
  1245                                       ),0)
 | 
| 
 | 
  1246       result_first<-calculate_bayesGHelper(convolutionPowersOfTwoByTwos(firsts))
 | 
| 
 | 
  1247       result<-calculate_bayesGHelper(
 | 
| 
 | 
  1248         list(
 | 
| 
 | 
  1249           cbind(
 | 
| 
 | 
  1250           result_first,last[[1]]),
 | 
| 
 | 
  1251           c(log(index_chunks_end[length(index_chunks)/2-1],2),log(index_chunks[length(index_chunks)/2,2]-index_chunks[length(index_chunks)/2,1]+1,2))
 | 
| 
 | 
  1252         )
 | 
| 
 | 
  1253       )
 | 
| 
 | 
  1254     }
 | 
| 
 | 
  1255     return(as.vector(result))
 | 
| 
 | 
  1256   }
 | 
| 
 | 
  1257     
 | 
| 
 | 
  1258   # Computes the 95% CI for a pdf
 | 
| 
 | 
  1259   calcBayesCI <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
 | 
| 
 | 
  1260     if(length(Pdf)!=length_sigma) return(NA)
 | 
| 
 | 
  1261     sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
 | 
| 
 | 
  1262     cdf = cumsum(Pdf)
 | 
| 
 | 
  1263     cdf = cdf/cdf[length(cdf)]  
 | 
| 
 | 
  1264     return( c(sigma_s[findInterval(low,cdf)-1] , sigma_s[findInterval(up,cdf)]) ) 
 | 
| 
 | 
  1265   }
 | 
| 
 | 
  1266   
 | 
| 
 | 
  1267   # Computes a mean for a pdf
 | 
| 
 | 
  1268   calcBayesMean <- function(Pdf,max_sigma=20,length_sigma=4001){
 | 
| 
 | 
  1269     if(length(Pdf)!=length_sigma) return(NA)
 | 
| 
 | 
  1270     sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
 | 
| 
 | 
  1271     norm = {length_sigma-1}/2/max_sigma
 | 
| 
 | 
  1272     return( (Pdf%*%sigma_s/norm)  ) 
 | 
| 
 | 
  1273   }
 | 
| 
 | 
  1274   
 | 
| 
 | 
  1275   # Returns the mean, and the 95% CI for a pdf
 | 
| 
 | 
  1276   calcBayesOutputInfo <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
 | 
| 
 | 
  1277     if(is.na(Pdf)) 
 | 
| 
 | 
  1278      return(rep(NA,3))  
 | 
| 
 | 
  1279     bCI = calcBayesCI(Pdf=Pdf,low=low,up=up,max_sigma=max_sigma,length_sigma=length_sigma)
 | 
| 
 | 
  1280     bMean = calcBayesMean(Pdf=Pdf,max_sigma=max_sigma,length_sigma=length_sigma)
 | 
| 
 | 
  1281     return(c(bMean, bCI))
 | 
| 
 | 
  1282   }   
 | 
| 
 | 
  1283 
 | 
| 
 | 
  1284   # Computes the p-value of a pdf
 | 
| 
 | 
  1285   computeSigmaP <- function(Pdf, length_sigma=4001, max_sigma=20){
 | 
| 
 | 
  1286     if(length(Pdf)>1){
 | 
| 
 | 
  1287       norm = {length_sigma-1}/2/max_sigma
 | 
| 
 | 
  1288       pVal = {sum(Pdf[1:{{length_sigma-1}/2}]) + Pdf[{{length_sigma+1}/2}]/2}/norm
 | 
| 
 | 
  1289       if(pVal>0.5){
 | 
| 
 | 
  1290         pVal = pVal-1
 | 
| 
 | 
  1291       }
 | 
| 
 | 
  1292       return(pVal)
 | 
| 
 | 
  1293     }else{
 | 
| 
 | 
  1294       return(NA)
 | 
| 
 | 
  1295     }
 | 
| 
 | 
  1296   }    
 | 
| 
 | 
  1297   
 | 
| 
 | 
  1298   # Compute p-value of two distributions
 | 
| 
 | 
  1299   compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){
 | 
| 
 | 
  1300   #print(c(length(dens1),length(dens2)))
 | 
| 
 | 
  1301   if(length(dens1)>1 & length(dens2)>1 ){
 | 
| 
 | 
  1302     dens1<-dens1/sum(dens1)
 | 
| 
 | 
  1303     dens2<-dens2/sum(dens2)
 | 
| 
 | 
  1304     cum2 <- cumsum(dens2)-dens2/2
 | 
| 
 | 
  1305     tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i])))
 | 
| 
 | 
  1306     #print(tmp)
 | 
| 
 | 
  1307     if(tmp>0.5)tmp<-tmp-1
 | 
| 
 | 
  1308     return( tmp )
 | 
| 
 | 
  1309     }
 | 
| 
 | 
  1310     else {
 | 
| 
 | 
  1311     return(NA)
 | 
| 
 | 
  1312     }
 | 
| 
 | 
  1313     #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N)
 | 
| 
 | 
  1314   }  
 | 
| 
 | 
  1315   
 | 
| 
 | 
  1316   # get number of seqeunces contributing to the sigma (i.e. seqeunces with mutations)
 | 
| 
 | 
  1317   numberOfSeqsWithMutations <- function(matMutations,test=1){
 | 
| 
 | 
  1318     if(test==4)test=2
 | 
| 
 | 
  1319     cdrSeqs <- 0
 | 
| 
 | 
  1320     fwrSeqs <- 0    
 | 
| 
 | 
  1321     if(test==1){#focused
 | 
| 
 | 
  1322       cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2,4)]) })
 | 
| 
 | 
  1323       fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4,2)]) })
 | 
| 
 | 
  1324       if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
 | 
| 
 | 
  1325       if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) 
 | 
| 
 | 
  1326     }
 | 
| 
 | 
  1327     if(test==2){#local
 | 
| 
 | 
  1328       cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2)]) })
 | 
| 
 | 
  1329       fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4)]) })
 | 
| 
 | 
  1330       if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
 | 
| 
 | 
  1331       if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) 
 | 
| 
 | 
  1332     }
 | 
| 
 | 
  1333   return(c("CDR"=cdrSeqs, "FWR"=fwrSeqs))
 | 
| 
 | 
  1334 }  
 | 
| 
 | 
  1335 
 | 
| 
 | 
  1336 
 | 
| 
 | 
  1337 
 | 
| 
 | 
  1338 shadeColor <- function(sigmaVal=NA,pVal=NA){
 | 
| 
 | 
  1339   if(is.na(sigmaVal) & is.na(pVal)) return(NA)
 | 
| 
 | 
  1340   if(is.na(sigmaVal) & !is.na(pVal)) sigmaVal=sign(pVal)
 | 
| 
 | 
  1341   if(is.na(pVal) || pVal==1 || pVal==0){
 | 
| 
 | 
  1342     returnColor = "#FFFFFF";
 | 
| 
 | 
  1343   }else{
 | 
| 
 | 
  1344     colVal=abs(pVal);
 | 
| 
 | 
  1345     
 | 
| 
 | 
  1346     if(sigmaVal<0){      
 | 
| 
 | 
  1347         if(colVal>0.1)
 | 
| 
 | 
  1348           returnColor = "#CCFFCC";
 | 
| 
 | 
  1349         if(colVal<=0.1)
 | 
| 
 | 
  1350           returnColor = "#99FF99";
 | 
| 
 | 
  1351         if(colVal<=0.050)
 | 
| 
 | 
  1352           returnColor = "#66FF66";
 | 
| 
 | 
  1353         if(colVal<=0.010)
 | 
| 
 | 
  1354           returnColor = "#33FF33";
 | 
| 
 | 
  1355         if(colVal<=0.005)
 | 
| 
 | 
  1356           returnColor = "#00FF00";
 | 
| 
 | 
  1357       
 | 
| 
 | 
  1358     }else{
 | 
| 
 | 
  1359       if(colVal>0.1)
 | 
| 
 | 
  1360         returnColor = "#FFCCCC";
 | 
| 
 | 
  1361       if(colVal<=0.1)
 | 
| 
 | 
  1362         returnColor = "#FF9999";
 | 
| 
 | 
  1363       if(colVal<=0.05)
 | 
| 
 | 
  1364         returnColor = "#FF6666";
 | 
| 
 | 
  1365       if(colVal<=0.01)
 | 
| 
 | 
  1366         returnColor = "#FF3333";
 | 
| 
 | 
  1367       if(colVal<0.005)
 | 
| 
 | 
  1368         returnColor = "#FF0000";
 | 
| 
 | 
  1369     }
 | 
| 
 | 
  1370   }
 | 
| 
 | 
  1371   
 | 
| 
 | 
  1372   return(returnColor)
 | 
| 
 | 
  1373 }
 | 
| 
 | 
  1374 
 | 
| 
 | 
  1375 
 | 
| 
 | 
  1376 
 | 
| 
 | 
  1377 plotHelp <- function(xfrac=0.05,yfrac=0.05,log=FALSE){
 | 
| 
 | 
  1378   if(!log){
 | 
| 
 | 
  1379     x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
 | 
| 
 | 
  1380     y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
 | 
| 
 | 
  1381   }else {
 | 
| 
 | 
  1382     if(log==2){
 | 
| 
 | 
  1383       x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
 | 
| 
 | 
  1384       y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
 | 
| 
 | 
  1385     }
 | 
| 
 | 
  1386     if(log==1){
 | 
| 
 | 
  1387       x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
 | 
| 
 | 
  1388       y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
 | 
| 
 | 
  1389     }
 | 
| 
 | 
  1390     if(log==3){
 | 
| 
 | 
  1391       x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
 | 
| 
 | 
  1392       y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
 | 
| 
 | 
  1393     }
 | 
| 
 | 
  1394   }
 | 
| 
 | 
  1395   return(c("x"=x,"y"=y))
 | 
| 
 | 
  1396 }
 | 
| 
 | 
  1397 
 | 
| 
 | 
  1398 # SHMulation
 | 
| 
 | 
  1399 
 | 
| 
 | 
  1400   # Based on targeting, introduce a single mutation & then update the targeting 
 | 
| 
 | 
  1401   oneMutation <- function(){
 | 
| 
 | 
  1402     # Pick a postion + mutation
 | 
| 
 | 
  1403     posMutation = sample(1:(seqGermlineLen*4),1,replace=F,prob=as.vector(seqTargeting))
 | 
| 
 | 
  1404     posNucNumb = ceiling(posMutation/4)                    # Nucleotide number
 | 
| 
 | 
  1405     posNucKind = 4 - ( (posNucNumb*4) - posMutation )   # Nuc the position mutates to
 | 
| 
 | 
  1406   
 | 
| 
 | 
  1407     #mutate the simulation sequence
 | 
| 
 | 
  1408     seqSimVec <-  s2c(seqSim)
 | 
| 
 | 
  1409     seqSimVec[posNucNumb] <- NUCLEOTIDES[posNucKind]
 | 
| 
 | 
  1410     seqSim <<-  c2s(seqSimVec)
 | 
| 
 | 
  1411     
 | 
| 
 | 
  1412     #update Mutability, Targeting & MutationsTypes
 | 
| 
 | 
  1413     updateMutabilityNTargeting(posNucNumb)
 | 
| 
 | 
  1414   
 | 
| 
 | 
  1415     #return(c(posNucNumb,NUCLEOTIDES[posNucKind])) 
 | 
| 
 | 
  1416     return(posNucNumb)
 | 
| 
 | 
  1417   }  
 | 
| 
 | 
  1418   
 | 
| 
 | 
  1419   updateMutabilityNTargeting <- function(position){
 | 
| 
 | 
  1420     min_i<-max((position-2),1)
 | 
| 
 | 
  1421     max_i<-min((position+2),nchar(seqSim))
 | 
| 
 | 
  1422     min_ii<-min(min_i,3)
 | 
| 
 | 
  1423     
 | 
| 
 | 
  1424     #mutability - update locally
 | 
| 
 | 
  1425     seqMutability[(min_i):(max_i)] <<- computeMutabilities(substr(seqSim,position-4,position+4))[(min_ii):(max_i-min_i+min_ii)]
 | 
| 
 | 
  1426     
 | 
| 
 | 
  1427     
 | 
| 
 | 
  1428     #targeting - compute locally
 | 
| 
 | 
  1429     seqTargeting[,min_i:max_i] <<- computeTargeting(substr(seqSim,min_i,max_i),seqMutability[min_i:max_i])                 
 | 
| 
 | 
  1430     seqTargeting[is.na(seqTargeting)] <<- 0
 | 
| 
 | 
  1431     #mutCodonPos = getCodonPos(position) 
 | 
| 
 | 
  1432     mutCodonPos = seq(getCodonPos(min_i)[1],getCodonPos(max_i)[3])
 | 
| 
 | 
  1433     #cat(mutCodonPos,"\n")                                                  
 | 
| 
 | 
  1434     mutTypeCodon = getCodonPos(position)
 | 
| 
 | 
  1435     seqMutationTypes[,mutTypeCodon] <<- computeMutationTypesFast( substr(seqSim,mutTypeCodon[1],mutTypeCodon[3]) ) 
 | 
| 
 | 
  1436     # Stop = 0
 | 
| 
 | 
  1437     if(any(seqMutationTypes[,mutCodonPos]=="Stop",na.rm=T )){
 | 
| 
 | 
  1438       seqTargeting[,mutCodonPos][seqMutationTypes[,mutCodonPos]=="Stop"] <<- 0
 | 
| 
 | 
  1439     }
 | 
| 
 | 
  1440     
 | 
| 
 | 
  1441   
 | 
| 
 | 
  1442     #Selection
 | 
| 
 | 
  1443     selectedPos = (min_i*4-4)+(which(seqMutationTypes[,min_i:max_i]=="R"))  
 | 
| 
 | 
  1444     # CDR
 | 
| 
 | 
  1445     selectedCDR = selectedPos[which(matCDR[selectedPos]==T)]
 | 
| 
 | 
  1446     seqTargeting[selectedCDR] <<-  seqTargeting[selectedCDR] *  exp(selCDR)
 | 
| 
 | 
  1447     seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR]/baseLineCDR_K
 | 
| 
 | 
  1448         
 | 
| 
 | 
  1449     # FWR
 | 
| 
 | 
  1450     selectedFWR = selectedPos[which(matFWR[selectedPos]==T)]
 | 
| 
 | 
  1451     seqTargeting[selectedFWR] <<-  seqTargeting[selectedFWR] *  exp(selFWR)
 | 
| 
 | 
  1452     seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR]/baseLineFWR_K      
 | 
| 
 | 
  1453     
 | 
| 
 | 
  1454   }  
 | 
| 
 | 
  1455   
 | 
| 
 | 
  1456 
 | 
| 
 | 
  1457 
 | 
| 
 | 
  1458   # Validate the mutation: if the mutation has not been sampled before validate it, else discard it.   
 | 
| 
 | 
  1459   validateMutation <- function(){  
 | 
| 
 | 
  1460     if( !(mutatedPos%in%mutatedPositions) ){ # if it's a new mutation
 | 
| 
 | 
  1461       uniqueMutationsIntroduced <<- uniqueMutationsIntroduced + 1
 | 
| 
 | 
  1462       mutatedPositions[uniqueMutationsIntroduced] <<-  mutatedPos  
 | 
| 
 | 
  1463     }else{
 | 
| 
 | 
  1464       if(substr(seqSim,mutatedPos,mutatedPos)==substr(seqGermline,mutatedPos,mutatedPos)){ # back to germline mutation
 | 
| 
 | 
  1465         mutatedPositions <<-  mutatedPositions[-which(mutatedPositions==mutatedPos)]
 | 
| 
 | 
  1466         uniqueMutationsIntroduced <<-  uniqueMutationsIntroduced - 1
 | 
| 
 | 
  1467       }      
 | 
| 
 | 
  1468     }
 | 
| 
 | 
  1469   }  
 | 
| 
 | 
  1470   
 | 
| 
 | 
  1471   
 | 
| 
 | 
  1472   
 | 
| 
 | 
  1473   # Places text (labels) at normalized coordinates 
 | 
| 
 | 
  1474   myaxis <- function(xfrac=0.05,yfrac=0.05,log=FALSE,w="text",cex=1,adj=1,thecol="black"){
 | 
| 
 | 
  1475     par(xpd=TRUE)
 | 
| 
 | 
  1476     if(!log)
 | 
| 
 | 
  1477       text(par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,w,cex=cex,adj=adj,col=thecol)
 | 
| 
 | 
  1478     else {
 | 
| 
 | 
  1479     if(log==2)
 | 
| 
 | 
  1480     text(
 | 
| 
 | 
  1481       par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,
 | 
| 
 | 
  1482       10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
 | 
| 
 | 
  1483       w,cex=cex,adj=adj,col=thecol)
 | 
| 
 | 
  1484     if(log==1)
 | 
| 
 | 
  1485       text(
 | 
| 
 | 
  1486       10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
 | 
| 
 | 
  1487       par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,
 | 
| 
 | 
  1488       w,cex=cex,adj=adj,col=thecol)
 | 
| 
 | 
  1489     if(log==3)
 | 
| 
 | 
  1490       text(
 | 
| 
 | 
  1491       10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
 | 
| 
 | 
  1492       10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
 | 
| 
 | 
  1493       w,cex=cex,adj=adj,col=thecol)
 | 
| 
 | 
  1494     }
 | 
| 
 | 
  1495     par(xpd=FALSE)
 | 
| 
 | 
  1496   }
 | 
| 
 | 
  1497   
 | 
| 
 | 
  1498   
 | 
| 
 | 
  1499   
 | 
| 
 | 
  1500   # Count the mutations in a sequence
 | 
| 
 | 
  1501   analyzeMutations <- function( inputMatrixIndex, model = 0 , multipleMutation=0, seqWithStops=0){
 | 
| 
 | 
  1502 
 | 
| 
 | 
  1503     paramGL = s2c(matInput[inputMatrixIndex,2])
 | 
| 
 | 
  1504     paramSeq = s2c(matInput[inputMatrixIndex,1])            
 | 
| 
 | 
  1505     
 | 
| 
 | 
  1506     #if( any(paramSeq=="N") ){
 | 
| 
 | 
  1507     #  gapPos_Seq =  which(paramSeq=="N")
 | 
| 
 | 
  1508     #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
 | 
| 
 | 
  1509     #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
 | 
| 
 | 
  1510     #}        
 | 
| 
 | 
  1511     mutations_val = paramGL != paramSeq   
 | 
| 
 | 
  1512     
 | 
| 
 | 
  1513     if(any(mutations_val)){
 | 
| 
 | 
  1514       mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]  
 | 
| 
 | 
  1515       length_mutations =length(mutationPos)
 | 
| 
 | 
  1516       mutationInfo = rep(NA,length_mutations)
 | 
| 
 | 
  1517                           
 | 
| 
 | 
  1518       pos<- mutationPos
 | 
| 
 | 
  1519       pos_array<-array(sapply(pos,getCodonPos))
 | 
| 
 | 
  1520       codonGL =  paramGL[pos_array]
 | 
| 
 | 
  1521       codonSeqWhole =  paramSeq[pos_array]
 | 
| 
 | 
  1522       codonSeq = sapply(pos,function(x){
 | 
| 
 | 
  1523                                 seqP = paramGL[getCodonPos(x)]
 | 
| 
 | 
  1524                                 muCodonPos = {x-1}%%3+1 
 | 
| 
 | 
  1525                                 seqP[muCodonPos] = paramSeq[x]
 | 
| 
 | 
  1526                                 return(seqP)
 | 
| 
 | 
  1527                               })
 | 
| 
 | 
  1528       GLcodons =  apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
 | 
| 
 | 
  1529       SeqcodonsWhole =  apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)      
 | 
| 
 | 
  1530       Seqcodons =   apply(codonSeq,2,c2s)
 | 
| 
 | 
  1531       
 | 
| 
 | 
  1532       mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})     
 | 
| 
 | 
  1533       names(mutationInfo) = mutationPos     
 | 
| 
 | 
  1534       
 | 
| 
 | 
  1535       mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})           
 | 
| 
 | 
  1536       names(mutationInfoWhole) = mutationPos
 | 
| 
 | 
  1537 
 | 
| 
 | 
  1538       mutationInfo <- mutationInfo[!is.na(mutationInfo)]
 | 
| 
 | 
  1539       mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]
 | 
| 
 | 
  1540       
 | 
| 
 | 
  1541       if(any(!is.na(mutationInfo))){       
 | 
| 
 | 
  1542   
 | 
| 
 | 
  1543         #Filter based on Stop (at the codon level)
 | 
| 
 | 
  1544         if(seqWithStops==1){
 | 
| 
 | 
  1545           nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
 | 
| 
 | 
  1546           mutationInfo = mutationInfo[nucleotidesAtStopCodons]
 | 
| 
 | 
  1547           mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
 | 
| 
 | 
  1548         }else{
 | 
| 
 | 
  1549           countStops = sum(mutationInfoWhole=="Stop")
 | 
| 
 | 
  1550           if(seqWithStops==2 & countStops==0) mutationInfo = NA
 | 
| 
 | 
  1551           if(seqWithStops==3 & countStops>0) mutationInfo = NA
 | 
| 
 | 
  1552         }         
 | 
| 
 | 
  1553         
 | 
| 
 | 
  1554         if(any(!is.na(mutationInfo))){
 | 
| 
 | 
  1555           #Filter mutations based on multipleMutation
 | 
| 
 | 
  1556           if(multipleMutation==1 & !is.na(mutationInfo)){
 | 
| 
 | 
  1557             mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
 | 
| 
 | 
  1558             tableMutationCodons <- table(mutationCodons)
 | 
| 
 | 
  1559             codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
 | 
| 
 | 
  1560             if(any(codonsWithMultipleMutations)){
 | 
| 
 | 
  1561               #remove the nucleotide mutations in the codons with multiple mutations
 | 
| 
 | 
  1562               mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
 | 
| 
 | 
  1563               #replace those codons with Ns in the input sequence
 | 
| 
 | 
  1564               paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
 | 
| 
 | 
  1565               matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
 | 
| 
 | 
  1566             }
 | 
| 
 | 
  1567           }
 | 
| 
 | 
  1568 
 | 
| 
 | 
  1569           #Filter mutations based on the model
 | 
| 
 | 
  1570           if(any(mutationInfo)==T | is.na(any(mutationInfo))){        
 | 
| 
 | 
  1571             
 | 
| 
 | 
  1572             if(model==1 & !is.na(mutationInfo)){
 | 
| 
 | 
  1573               mutationInfo <- mutationInfo[mutationInfo=="S"]
 | 
| 
 | 
  1574             }  
 | 
| 
 | 
  1575             if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(mutationInfo)
 | 
| 
 | 
  1576             else return(NA)
 | 
| 
 | 
  1577           }else{
 | 
| 
 | 
  1578             return(NA)
 | 
| 
 | 
  1579           }
 | 
| 
 | 
  1580         }else{
 | 
| 
 | 
  1581           return(NA)
 | 
| 
 | 
  1582         }
 | 
| 
 | 
  1583         
 | 
| 
 | 
  1584         
 | 
| 
 | 
  1585       }else{
 | 
| 
 | 
  1586         return(NA)
 | 
| 
 | 
  1587       }
 | 
| 
 | 
  1588     
 | 
| 
 | 
  1589     
 | 
| 
 | 
  1590     }else{
 | 
| 
 | 
  1591       return (NA)
 | 
| 
 | 
  1592     }    
 | 
| 
 | 
  1593   }  
 | 
| 
 | 
  1594 
 | 
| 
 | 
  1595    analyzeMutationsFixed <- function( inputArray, model = 0 , multipleMutation=0, seqWithStops=0){
 | 
| 
 | 
  1596 
 | 
| 
 | 
  1597     paramGL = s2c(inputArray[2])
 | 
| 
 | 
  1598     paramSeq = s2c(inputArray[1])            
 | 
| 
 | 
  1599     inputSeq <- inputArray[1]
 | 
| 
 | 
  1600     #if( any(paramSeq=="N") ){
 | 
| 
 | 
  1601     #  gapPos_Seq =  which(paramSeq=="N")
 | 
| 
 | 
  1602     #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
 | 
| 
 | 
  1603     #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
 | 
| 
 | 
  1604     #}        
 | 
| 
 | 
  1605     mutations_val = paramGL != paramSeq   
 | 
| 
 | 
  1606     
 | 
| 
 | 
  1607     if(any(mutations_val)){
 | 
| 
 | 
  1608       mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]  
 | 
| 
 | 
  1609       length_mutations =length(mutationPos)
 | 
| 
 | 
  1610       mutationInfo = rep(NA,length_mutations)
 | 
| 
 | 
  1611                           
 | 
| 
 | 
  1612       pos<- mutationPos
 | 
| 
 | 
  1613       pos_array<-array(sapply(pos,getCodonPos))
 | 
| 
 | 
  1614       codonGL =  paramGL[pos_array]
 | 
| 
 | 
  1615       codonSeqWhole =  paramSeq[pos_array]
 | 
| 
 | 
  1616       codonSeq = sapply(pos,function(x){
 | 
| 
 | 
  1617                                 seqP = paramGL[getCodonPos(x)]
 | 
| 
 | 
  1618                                 muCodonPos = {x-1}%%3+1 
 | 
| 
 | 
  1619                                 seqP[muCodonPos] = paramSeq[x]
 | 
| 
 | 
  1620                                 return(seqP)
 | 
| 
 | 
  1621                               })
 | 
| 
 | 
  1622       GLcodons =  apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
 | 
| 
 | 
  1623       SeqcodonsWhole =  apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)      
 | 
| 
 | 
  1624       Seqcodons =   apply(codonSeq,2,c2s)
 | 
| 
 | 
  1625       
 | 
| 
 | 
  1626       mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})     
 | 
| 
 | 
  1627       names(mutationInfo) = mutationPos     
 | 
| 
 | 
  1628       
 | 
| 
 | 
  1629       mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})           
 | 
| 
 | 
  1630       names(mutationInfoWhole) = mutationPos
 | 
| 
 | 
  1631 
 | 
| 
 | 
  1632       mutationInfo <- mutationInfo[!is.na(mutationInfo)]
 | 
| 
 | 
  1633       mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]
 | 
| 
 | 
  1634       
 | 
| 
 | 
  1635       if(any(!is.na(mutationInfo))){       
 | 
| 
 | 
  1636   
 | 
| 
 | 
  1637         #Filter based on Stop (at the codon level)
 | 
| 
 | 
  1638         if(seqWithStops==1){
 | 
| 
 | 
  1639           nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
 | 
| 
 | 
  1640           mutationInfo = mutationInfo[nucleotidesAtStopCodons]
 | 
| 
 | 
  1641           mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
 | 
| 
 | 
  1642         }else{
 | 
| 
 | 
  1643           countStops = sum(mutationInfoWhole=="Stop")
 | 
| 
 | 
  1644           if(seqWithStops==2 & countStops==0) mutationInfo = NA
 | 
| 
 | 
  1645           if(seqWithStops==3 & countStops>0) mutationInfo = NA
 | 
| 
 | 
  1646         }         
 | 
| 
 | 
  1647         
 | 
| 
 | 
  1648         if(any(!is.na(mutationInfo))){
 | 
| 
 | 
  1649           #Filter mutations based on multipleMutation
 | 
| 
 | 
  1650           if(multipleMutation==1 & !is.na(mutationInfo)){
 | 
| 
 | 
  1651             mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
 | 
| 
 | 
  1652             tableMutationCodons <- table(mutationCodons)
 | 
| 
 | 
  1653             codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
 | 
| 
 | 
  1654             if(any(codonsWithMultipleMutations)){
 | 
| 
 | 
  1655               #remove the nucleotide mutations in the codons with multiple mutations
 | 
| 
 | 
  1656               mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
 | 
| 
 | 
  1657               #replace those codons with Ns in the input sequence
 | 
| 
 | 
  1658               paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
 | 
| 
 | 
  1659               #matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
 | 
| 
 | 
  1660               inputSeq <- c2s(paramSeq)
 | 
| 
 | 
  1661             }
 | 
| 
 | 
  1662           }
 | 
| 
 | 
  1663           
 | 
| 
 | 
  1664           #Filter mutations based on the model
 | 
| 
 | 
  1665           if(any(mutationInfo)==T | is.na(any(mutationInfo))){        
 | 
| 
 | 
  1666             
 | 
| 
 | 
  1667             if(model==1 & !is.na(mutationInfo)){
 | 
| 
 | 
  1668               mutationInfo <- mutationInfo[mutationInfo=="S"]
 | 
| 
 | 
  1669             }  
 | 
| 
 | 
  1670             if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(list(mutationInfo,inputSeq))
 | 
| 
 | 
  1671             else return(list(NA,inputSeq))
 | 
| 
 | 
  1672           }else{
 | 
| 
 | 
  1673             return(list(NA,inputSeq))
 | 
| 
 | 
  1674           }
 | 
| 
 | 
  1675         }else{
 | 
| 
 | 
  1676           return(list(NA,inputSeq))
 | 
| 
 | 
  1677         }
 | 
| 
 | 
  1678         
 | 
| 
 | 
  1679         
 | 
| 
 | 
  1680       }else{
 | 
| 
 | 
  1681         return(list(NA,inputSeq))
 | 
| 
 | 
  1682       }
 | 
| 
 | 
  1683     
 | 
| 
 | 
  1684     
 | 
| 
 | 
  1685     }else{
 | 
| 
 | 
  1686       return (list(NA,inputSeq))
 | 
| 
 | 
  1687     }    
 | 
| 
 | 
  1688   }  
 | 
| 
 | 
  1689  
 | 
| 
 | 
  1690   # triMutability Background Count
 | 
| 
 | 
  1691   buildMutabilityModel <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){
 | 
| 
 | 
  1692     
 | 
| 
 | 
  1693     #rowOrigMatInput = matInput[inputMatrixIndex,]    
 | 
| 
 | 
  1694     seqGL =  gsub("-", "", matInput[inputMatrixIndex,2])
 | 
| 
 | 
  1695     seqInput = gsub("-", "", matInput[inputMatrixIndex,1])    
 | 
| 
 | 
  1696     #matInput[inputMatrixIndex,] <<- cbind(seqInput,seqGL)
 | 
| 
 | 
  1697     tempInput <- cbind(seqInput,seqGL)
 | 
| 
 | 
  1698     seqLength = nchar(seqGL)      
 | 
| 
 | 
  1699     list_analyzeMutationsFixed<- analyzeMutationsFixed(tempInput, model, multipleMutation, seqWithStops)
 | 
| 
 | 
  1700     mutationCount <- list_analyzeMutationsFixed[[1]]
 | 
| 
 | 
  1701     seqInput <- list_analyzeMutationsFixed[[2]]
 | 
| 
 | 
  1702     BackgroundMatrix = mutabilityMatrix
 | 
| 
 | 
  1703     MutationMatrix = mutabilityMatrix    
 | 
| 
 | 
  1704     MutationCountMatrix = mutabilityMatrix    
 | 
| 
 | 
  1705     if(!is.na(mutationCount)){
 | 
| 
 | 
  1706       if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){ 
 | 
| 
 | 
  1707                   
 | 
| 
 | 
  1708         fivermerStartPos = 1:(seqLength-4)
 | 
| 
 | 
  1709         fivemerLength <- length(fivermerStartPos)
 | 
| 
 | 
  1710         fivemerGL <- substr(rep(seqGL,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
 | 
| 
 | 
  1711         fivemerSeq <- substr(rep(seqInput,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
 | 
| 
 | 
  1712     
 | 
| 
 | 
  1713         #Background
 | 
| 
 | 
  1714         for(fivemerIndex in 1:fivemerLength){
 | 
| 
 | 
  1715           fivemer = fivemerGL[fivemerIndex]
 | 
| 
 | 
  1716           if(!any(grep("N",fivemer))){
 | 
| 
 | 
  1717             fivemerCodonPos = fivemerCodon(fivemerIndex)
 | 
| 
 | 
  1718             fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) 
 | 
| 
 | 
  1719             fivemerReadingFrameCodonInputSeq = substr(fivemerSeq[fivemerIndex],fivemerCodonPos[1],fivemerCodonPos[3])          
 | 
| 
 | 
  1720             
 | 
| 
 | 
  1721             # All mutations model
 | 
| 
 | 
  1722             #if(!any(grep("N",fivemerReadingFrameCodon))){
 | 
| 
 | 
  1723               if(model==0){
 | 
| 
 | 
  1724                 if(stopMutations==0){
 | 
| 
 | 
  1725                   if(!any(grep("N",fivemerReadingFrameCodonInputSeq)))
 | 
| 
 | 
  1726                     BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + 1)              
 | 
| 
 | 
  1727                 }else{
 | 
| 
 | 
  1728                   if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" ){
 | 
| 
 | 
  1729                     positionWithinCodon = which(fivemerCodonPos==3)#positionsWithinCodon[(fivemerCodonPos[1]%%3)+1]
 | 
| 
 | 
  1730                     BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probNonStopMutations[fivemerReadingFrameCodon,positionWithinCodon])
 | 
| 
 | 
  1731                   }
 | 
| 
 | 
  1732                 }
 | 
| 
 | 
  1733               }else{ # Only silent mutations
 | 
| 
 | 
  1734                 if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" & translateCodonToAminoAcid(fivemerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(fivemerReadingFrameCodon)){
 | 
| 
 | 
  1735                   positionWithinCodon = which(fivemerCodonPos==3)
 | 
| 
 | 
  1736                   BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probSMutations[fivemerReadingFrameCodon,positionWithinCodon])
 | 
| 
 | 
  1737                 }
 | 
| 
 | 
  1738               }
 | 
| 
 | 
  1739             #}
 | 
| 
 | 
  1740           }
 | 
| 
 | 
  1741         }
 | 
| 
 | 
  1742         
 | 
| 
 | 
  1743         #Mutations
 | 
| 
 | 
  1744         if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
 | 
| 
 | 
  1745         if(model==1) mutationCount = mutationCount[mutationCount=="S"]  
 | 
| 
 | 
  1746         mutationPositions = as.numeric(names(mutationCount))
 | 
| 
 | 
  1747         mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
 | 
| 
 | 
  1748         mutationPositions =  mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
 | 
| 
 | 
  1749         countMutations = 0 
 | 
| 
 | 
  1750         for(mutationPosition in mutationPositions){
 | 
| 
 | 
  1751           fivemerIndex = mutationPosition-2
 | 
| 
 | 
  1752           fivemer = fivemerSeq[fivemerIndex]
 | 
| 
 | 
  1753           GLfivemer = fivemerGL[fivemerIndex]
 | 
| 
 | 
  1754           fivemerCodonPos = fivemerCodon(fivemerIndex)
 | 
| 
 | 
  1755           fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) 
 | 
| 
 | 
  1756           fivemerReadingFrameCodonGL = substr(GLfivemer,fivemerCodonPos[1],fivemerCodonPos[3])
 | 
| 
 | 
  1757           if(!any(grep("N",fivemer)) & !any(grep("N",GLfivemer))){
 | 
| 
 | 
  1758             if(model==0){
 | 
| 
 | 
  1759                 countMutations = countMutations + 1              
 | 
| 
 | 
  1760                 MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + 1)
 | 
| 
 | 
  1761                 MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)             
 | 
| 
 | 
  1762             }else{
 | 
| 
 | 
  1763               if( translateCodonToAminoAcid(fivemerReadingFrameCodonGL)!="*" ){
 | 
| 
 | 
  1764                   countMutations = countMutations + 1
 | 
| 
 | 
  1765                   positionWithinCodon = which(fivemerCodonPos==3)
 | 
| 
 | 
  1766                   glNuc =  substr(fivemerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
 | 
| 
 | 
  1767                   inputNuc =  substr(fivemerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
 | 
| 
 | 
  1768                   MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + substitution[glNuc,inputNuc])
 | 
| 
 | 
  1769                   MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)                                    
 | 
| 
 | 
  1770               }                
 | 
| 
 | 
  1771             }                  
 | 
| 
 | 
  1772           }              
 | 
| 
 | 
  1773         }
 | 
| 
 | 
  1774         
 | 
| 
 | 
  1775         seqMutability = MutationMatrix/BackgroundMatrix
 | 
| 
 | 
  1776         seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
 | 
| 
 | 
  1777         #cat(inputMatrixIndex,"\t",countMutations,"\n")
 | 
| 
 | 
  1778         return(list("seqMutability"  = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))      
 | 
| 
 | 
  1779         
 | 
| 
 | 
  1780       }        
 | 
| 
 | 
  1781     }
 | 
| 
 | 
  1782   
 | 
| 
 | 
  1783   }  
 | 
| 
 | 
  1784   
 | 
| 
 | 
  1785   #Returns the codon position containing the middle nucleotide
 | 
| 
 | 
  1786   fivemerCodon <- function(fivemerIndex){
 | 
| 
 | 
  1787     codonPos = list(2:4,1:3,3:5)
 | 
| 
 | 
  1788     fivemerType = fivemerIndex%%3
 | 
| 
 | 
  1789     return(codonPos[[fivemerType+1]])
 | 
| 
 | 
  1790   }
 | 
| 
 | 
  1791 
 | 
| 
 | 
  1792   #returns probability values for one mutation in codons resulting in R, S or Stop
 | 
| 
 | 
  1793   probMutations <- function(typeOfMutation){    
 | 
| 
 | 
  1794     matMutationProb <- matrix(0,ncol=3,nrow=125,dimnames=list(words(alphabet = c(NUCLEOTIDES,"N"), length=3),c(1:3)))   
 | 
| 
 | 
  1795     for(codon in rownames(matMutationProb)){
 | 
| 
 | 
  1796         if( !any(grep("N",codon)) ){
 | 
| 
 | 
  1797         for(muPos in 1:3){
 | 
| 
 | 
  1798           matCodon = matrix(rep(s2c(codon),3),nrow=3,ncol=3,byrow=T)
 | 
| 
 | 
  1799           glNuc = matCodon[1,muPos]
 | 
| 
 | 
  1800           matCodon[,muPos] = canMutateTo(glNuc) 
 | 
| 
 | 
  1801           substitutionRate = substitution[glNuc,matCodon[,muPos]]
 | 
| 
 | 
  1802           typeOfMutations = apply(rbind(rep(codon,3),apply(matCodon,1,c2s)),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})        
 | 
| 
 | 
  1803           matMutationProb[codon,muPos] <- sum(substitutionRate[typeOfMutations==typeOfMutation])
 | 
| 
 | 
  1804         }
 | 
| 
 | 
  1805       }
 | 
| 
 | 
  1806     }
 | 
| 
 | 
  1807     
 | 
| 
 | 
  1808     return(matMutationProb) 
 | 
| 
 | 
  1809   }
 | 
| 
 | 
  1810   
 | 
| 
 | 
  1811   
 | 
| 
 | 
  1812   
 | 
| 
 | 
  1813   
 | 
| 
 | 
  1814 #Mapping Trinucleotides to fivemers
 | 
| 
 | 
  1815 mapTriToFivemer <- function(triMutability=triMutability_Literature_Human){
 | 
| 
 | 
  1816   rownames(triMutability) <- triMutability_Names
 | 
| 
 | 
  1817   Fivemer<-rep(NA,1024)
 | 
| 
 | 
  1818   names(Fivemer)<-words(alphabet=NUCLEOTIDES,length=5)
 | 
| 
 | 
  1819   Fivemer<-sapply(names(Fivemer),function(Word)return(sum( c(triMutability[substring(Word,3,5),1],triMutability[substring(Word,2,4),2],triMutability[substring(Word,1,3),3]),na.rm=TRUE)))
 | 
| 
 | 
  1820   Fivemer<-Fivemer/sum(Fivemer)
 | 
| 
 | 
  1821   return(Fivemer)
 | 
| 
 | 
  1822 }
 | 
| 
 | 
  1823 
 | 
| 
 | 
  1824 collapseFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
 | 
| 
 | 
  1825   Indices<-substring(names(Fivemer),3,3)==NUC
 | 
| 
 | 
  1826   Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
 | 
| 
 | 
  1827   tapply(which(Indices),Factors,function(i)weighted.mean(Fivemer[i],Weights[i],na.rm=TRUE))
 | 
| 
 | 
  1828 }
 | 
| 
 | 
  1829 
 | 
| 
 | 
  1830 
 | 
| 
 | 
  1831 
 | 
| 
 | 
  1832 CountFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
 | 
| 
 | 
  1833   Indices<-substring(names(Fivemer),3,3)==NUC
 | 
| 
 | 
  1834   Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
 | 
| 
 | 
  1835   tapply(which(Indices),Factors,function(i)sum(Weights[i],na.rm=TRUE))
 | 
| 
 | 
  1836 }
 | 
| 
 | 
  1837 
 | 
| 
 | 
  1838 #Uses the real counts of the mutated fivemers
 | 
| 
 | 
  1839 CountFivemerToTri2<-function(Fivemer,Counts=MutabilityCounts,position=1,NUC="A"){
 | 
| 
 | 
  1840   Indices<-substring(names(Fivemer),3,3)==NUC
 | 
| 
 | 
  1841   Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
 | 
| 
 | 
  1842   tapply(which(Indices),Factors,function(i)sum(Counts[i],na.rm=TRUE))
 | 
| 
 | 
  1843 }
 | 
| 
 | 
  1844 
 | 
| 
 | 
  1845 bootstrap<-function(x=c(33,12,21),M=10000,alpha=0.05){
 | 
| 
 | 
  1846 N<-sum(x)
 | 
| 
 | 
  1847 if(N){
 | 
| 
 | 
  1848 p<-x/N
 | 
| 
 | 
  1849 k<-length(x)-1
 | 
| 
 | 
  1850 tmp<-rmultinom(M, size = N, prob=p)
 | 
| 
 | 
  1851 tmp_p<-apply(tmp,2,function(y)y/N)
 | 
| 
 | 
  1852 (apply(tmp_p,1,function(y)quantile(y,c(alpha/2/k,1-alpha/2/k))))
 | 
| 
 | 
  1853 }
 | 
| 
 | 
  1854 else return(matrix(0,2,length(x)))
 | 
| 
 | 
  1855 }
 | 
| 
 | 
  1856 
 | 
| 
 | 
  1857 
 | 
| 
 | 
  1858 
 | 
| 
 | 
  1859 
 | 
| 
 | 
  1860 bootstrap2<-function(x=c(33,12,21),n=10,M=10000,alpha=0.05){
 | 
| 
 | 
  1861 
 | 
| 
 | 
  1862 N<-sum(x)
 | 
| 
 | 
  1863 k<-length(x)
 | 
| 
 | 
  1864 y<-rep(1:k,x)
 | 
| 
 | 
  1865 tmp<-sapply(1:M,function(i)sample(y,n))
 | 
| 
 | 
  1866 if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))/n
 | 
| 
 | 
  1867 if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))/n
 | 
| 
 | 
  1868 (apply(tmp_p,1,function(z)quantile(z,c(alpha/2/(k-1),1-alpha/2/(k-1)))))
 | 
| 
 | 
  1869 }
 | 
| 
 | 
  1870 
 | 
| 
 | 
  1871 
 | 
| 
 | 
  1872 
 | 
| 
 | 
  1873 p_value<-function(x=c(33,12,21),M=100000,x_obs=c(2,5,3)){
 | 
| 
 | 
  1874 n=sum(x_obs)
 | 
| 
 | 
  1875 N<-sum(x)
 | 
| 
 | 
  1876 k<-length(x)
 | 
| 
 | 
  1877 y<-rep(1:k,x)
 | 
| 
 | 
  1878 tmp<-sapply(1:M,function(i)sample(y,n))
 | 
| 
 | 
  1879 if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))
 | 
| 
 | 
  1880 if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))
 | 
| 
 | 
  1881 tmp<-rbind(sapply(1:3,function(i)sum(tmp_p[i,]>=x_obs[i])/M),
 | 
| 
 | 
  1882 sapply(1:3,function(i)sum(tmp_p[i,]<=x_obs[i])/M))
 | 
| 
 | 
  1883 sapply(1:3,function(i){if(tmp[1,i]>=tmp[2,i])return(-tmp[2,i])else return(tmp[1,i])})
 | 
| 
 | 
  1884 }
 | 
| 
 | 
  1885 
 | 
| 
 | 
  1886 #"D:\\Sequences\\IMGT Germlines\\Human_SNPless_IGHJ.FASTA"
 | 
| 
 | 
  1887 # Remove SNPs from IMGT germline segment alleles
 | 
| 
 | 
  1888 generateUnambiguousRepertoire <- function(repertoireInFile,repertoireOutFile){
 | 
| 
 | 
  1889   repertoireIn <- read.fasta(repertoireInFile, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F)
 | 
| 
 | 
  1890   alleleNames <- sapply(names(repertoireIn),function(x)strsplit(x,"|",fixed=TRUE)[[1]][2])
 | 
| 
 | 
  1891   SNPs <- tapply(repertoireIn,sapply(alleleNames,function(x)strsplit(x,"*",fixed=TRUE)[[1]][1]),function(x){
 | 
| 
 | 
  1892     Indices<-NULL
 | 
| 
 | 
  1893     for(i in 1:length(x)){
 | 
| 
 | 
  1894       firstSeq = s2c(x[[1]])
 | 
| 
 | 
  1895       iSeq = s2c(x[[i]])
 | 
| 
 | 
  1896       Indices<-c(Indices,which(firstSeq[1:320]!=iSeq[1:320] & firstSeq[1:320]!="." & iSeq[1:320]!="."  ))
 | 
| 
 | 
  1897     }
 | 
| 
 | 
  1898     return(sort(unique(Indices)))
 | 
| 
 | 
  1899   })
 | 
| 
 | 
  1900  repertoireOut <- repertoireIn
 | 
| 
 | 
  1901  repertoireOut <- lapply(names(repertoireOut), function(repertoireName){
 | 
| 
 | 
  1902                                         alleleName <- strsplit(repertoireName,"|",fixed=TRUE)[[1]][2]
 | 
| 
 | 
  1903                                         geneSegmentName <- strsplit(alleleName,"*",fixed=TRUE)[[1]][1]
 | 
| 
 | 
  1904                                         alleleSeq <- s2c(repertoireOut[[repertoireName]])
 | 
| 
 | 
  1905                                         alleleSeq[as.numeric(unlist(SNPs[geneSegmentName]))] <- "N"
 | 
| 
 | 
  1906                                         alleleSeq <- c2s(alleleSeq)
 | 
| 
 | 
  1907                                         repertoireOut[[repertoireName]] <- alleleSeq
 | 
| 
 | 
  1908                                       })
 | 
| 
 | 
  1909   names(repertoireOut) <- names(repertoireIn)
 | 
| 
 | 
  1910   write.fasta(repertoireOut,names(repertoireOut),file.out=repertoireOutFile)                                               
 | 
| 
 | 
  1911                                       
 | 
| 
 | 
  1912 }
 | 
| 
 | 
  1913 
 | 
| 
 | 
  1914 
 | 
| 
 | 
  1915 
 | 
| 
 | 
  1916 
 | 
| 
 | 
  1917 
 | 
| 
 | 
  1918 
 | 
| 
 | 
  1919 ############
 | 
| 
 | 
  1920 groupBayes2 = function(indexes, param_resultMat){
 | 
| 
 | 
  1921   
 | 
| 
 | 
  1922   BayesGDist_Focused_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[4])}))
 | 
| 
 | 
  1923   BayesGDist_Focused_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[2]+x[4])}))
 | 
| 
 | 
  1924   #BayesGDist_Local_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2])}))
 | 
| 
 | 
  1925   #BayesGDist_Local_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[4])}))
 | 
| 
 | 
  1926   #BayesGDist_Global_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[3]+x[4])}))
 | 
| 
 | 
  1927   #BayesGDist_Global_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[1]+x[2]+x[3]+x[4])}))
 | 
| 
 | 
  1928   return ( list("BayesGDist_Focused_CDR"=BayesGDist_Focused_CDR,
 | 
| 
 | 
  1929                 "BayesGDist_Focused_FWR"=BayesGDist_Focused_FWR) )
 | 
| 
 | 
  1930                 #"BayesGDist_Local_CDR"=BayesGDist_Local_CDR,
 | 
| 
 | 
  1931                 #"BayesGDist_Local_FWR" = BayesGDist_Local_FWR))
 | 
| 
 | 
  1932 #                "BayesGDist_Global_CDR" = BayesGDist_Global_CDR,
 | 
| 
 | 
  1933 #                "BayesGDist_Global_FWR" = BayesGDist_Global_FWR) )
 | 
| 
 | 
  1934 
 | 
| 
 | 
  1935 
 | 
| 
 | 
  1936 }
 | 
| 
 | 
  1937 
 | 
| 
 | 
  1938 
 | 
| 
 | 
  1939 calculate_bayesG <- function( x=array(), N=array(), p=array(), max_sigma=20, length_sigma=4001){
 | 
| 
 | 
  1940   G <- max(length(x),length(N),length(p))
 | 
| 
 | 
  1941   x=array(x,dim=G)
 | 
| 
 | 
  1942   N=array(N,dim=G)
 | 
| 
 | 
  1943   p=array(p,dim=G)
 | 
| 
 | 
  1944 
 | 
| 
 | 
  1945   indexOfZero = N>0 & p>0
 | 
| 
 | 
  1946   N = N[indexOfZero]
 | 
| 
 | 
  1947   x = x[indexOfZero]
 | 
| 
 | 
  1948   p = p[indexOfZero]  
 | 
| 
 | 
  1949   G <- length(x)
 | 
| 
 | 
  1950   
 | 
| 
 | 
  1951   if(G){
 | 
| 
 | 
  1952     
 | 
| 
 | 
  1953     cons<-array( dim=c(length_sigma,G) )
 | 
| 
 | 
  1954     if(G==1) {
 | 
| 
 | 
  1955     return(calculate_bayes(x=x[G],N=N[G],p=p[G],max_sigma=max_sigma,length_sigma=length_sigma))
 | 
| 
 | 
  1956     }
 | 
| 
 | 
  1957     else {
 | 
| 
 | 
  1958       for(g in 1:G) cons[,g] <- calculate_bayes(x=x[g],N=N[g],p=p[g],max_sigma=max_sigma,length_sigma=length_sigma)
 | 
| 
 | 
  1959       listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
 | 
| 
 | 
  1960       y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
 | 
| 
 | 
  1961       return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
 | 
| 
 | 
  1962     }
 | 
| 
 | 
  1963   }else{
 | 
| 
 | 
  1964     return(NA)
 | 
| 
 | 
  1965   }
 | 
| 
 | 
  1966 }
 | 
| 
 | 
  1967 
 | 
| 
 | 
  1968 
 | 
| 
 | 
  1969 calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){
 | 
| 
 | 
  1970   matG <- listMatG[[1]]  
 | 
| 
 | 
  1971   groups <- listMatG[[2]]
 | 
| 
 | 
  1972   i = 1  
 | 
| 
 | 
  1973   resConv <- matG[,i]
 | 
| 
 | 
  1974   denom <- 2^groups[i]
 | 
| 
 | 
  1975   if(length(groups)>1){
 | 
| 
 | 
  1976     while( i<length(groups) ){
 | 
| 
 | 
  1977       i = i + 1
 | 
| 
 | 
  1978       resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma)
 | 
| 
 | 
  1979       #cat({{2^groups[i]}/denom},"\n")
 | 
| 
 | 
  1980       denom <- denom + 2^groups[i]
 | 
| 
 | 
  1981     }
 | 
| 
 | 
  1982   }
 | 
| 
 | 
  1983   return(resConv)  
 | 
| 
 | 
  1984 }
 | 
| 
 | 
  1985 
 | 
| 
 | 
  1986 weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){
 | 
| 
 | 
  1987 lx<-length(x)
 | 
| 
 | 
  1988 ly<-length(y)
 | 
| 
 | 
  1989 if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){
 | 
| 
 | 
  1990 if(w<1){
 | 
| 
 | 
  1991 y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y
 | 
| 
 | 
  1992 x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y
 | 
| 
 | 
  1993 lx<-length(x1)
 | 
| 
 | 
  1994 ly<-length(y1)
 | 
| 
 | 
  1995 }
 | 
| 
 | 
  1996 else {
 | 
| 
 | 
  1997 y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y
 | 
| 
 | 
  1998 x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y
 | 
| 
 | 
  1999 lx<-length(x1)
 | 
| 
 | 
  2000 ly<-length(y1)
 | 
| 
 | 
  2001 }
 | 
| 
 | 
  2002 }
 | 
| 
 | 
  2003 else{
 | 
| 
 | 
  2004 x1<-x
 | 
| 
 | 
  2005 y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y
 | 
| 
 | 
  2006 ly<-length(y1)
 | 
| 
 | 
  2007 }
 | 
| 
 | 
  2008 tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y
 | 
| 
 | 
  2009 tmp[tmp<=0] = 0 
 | 
| 
 | 
  2010 return(tmp/sum(tmp))
 | 
| 
 | 
  2011 }
 | 
| 
 | 
  2012 
 | 
| 
 | 
  2013 ########################
 | 
| 
 | 
  2014 
 | 
| 
 | 
  2015 
 | 
| 
 | 
  2016 
 | 
| 
 | 
  2017 
 | 
| 
 | 
  2018 mutabilityMatrixONE<-rep(0,4)
 | 
| 
 | 
  2019 names(mutabilityMatrixONE)<-NUCLEOTIDES
 | 
| 
 | 
  2020 
 | 
| 
 | 
  2021   # triMutability Background Count
 | 
| 
 | 
  2022   buildMutabilityModelONE <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){
 | 
| 
 | 
  2023     
 | 
| 
 | 
  2024     #rowOrigMatInput = matInput[inputMatrixIndex,]    
 | 
| 
 | 
  2025     seqGL =  gsub("-", "", matInput[inputMatrixIndex,2])
 | 
| 
 | 
  2026     seqInput = gsub("-", "", matInput[inputMatrixIndex,1])    
 | 
| 
 | 
  2027     matInput[inputMatrixIndex,] <<- c(seqInput,seqGL)
 | 
| 
 | 
  2028     seqLength = nchar(seqGL)      
 | 
| 
 | 
  2029     mutationCount <- analyzeMutations(inputMatrixIndex, model, multipleMutation, seqWithStops)
 | 
| 
 | 
  2030     BackgroundMatrix = mutabilityMatrixONE
 | 
| 
 | 
  2031     MutationMatrix = mutabilityMatrixONE    
 | 
| 
 | 
  2032     MutationCountMatrix = mutabilityMatrixONE    
 | 
| 
 | 
  2033     if(!is.na(mutationCount)){
 | 
| 
 | 
  2034       if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){ 
 | 
| 
 | 
  2035                   
 | 
| 
 | 
  2036 #         ONEmerStartPos = 1:(seqLength)
 | 
| 
 | 
  2037 #         ONEmerLength <- length(ONEmerStartPos)
 | 
| 
 | 
  2038         ONEmerGL <- s2c(seqGL)
 | 
| 
 | 
  2039         ONEmerSeq <- s2c(seqInput)
 | 
| 
 | 
  2040     
 | 
| 
 | 
  2041         #Background
 | 
| 
 | 
  2042         for(ONEmerIndex in 1:seqLength){
 | 
| 
 | 
  2043           ONEmer = ONEmerGL[ONEmerIndex]
 | 
| 
 | 
  2044           if(ONEmer!="N"){
 | 
| 
 | 
  2045             ONEmerCodonPos = getCodonPos(ONEmerIndex)
 | 
| 
 | 
  2046             ONEmerReadingFrameCodon = c2s(ONEmerGL[ONEmerCodonPos]) 
 | 
| 
 | 
  2047             ONEmerReadingFrameCodonInputSeq = c2s(ONEmerSeq[ONEmerCodonPos] )         
 | 
| 
 | 
  2048             
 | 
| 
 | 
  2049             # All mutations model
 | 
| 
 | 
  2050             #if(!any(grep("N",ONEmerReadingFrameCodon))){
 | 
| 
 | 
  2051               if(model==0){
 | 
| 
 | 
  2052                 if(stopMutations==0){
 | 
| 
 | 
  2053                   if(!any(grep("N",ONEmerReadingFrameCodonInputSeq)))
 | 
| 
 | 
  2054                     BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + 1)              
 | 
| 
 | 
  2055                 }else{
 | 
| 
 | 
  2056                   if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*"){
 | 
| 
 | 
  2057                     positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)#positionsWithinCodon[(ONEmerCodonPos[1]%%3)+1]
 | 
| 
 | 
  2058                     BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probNonStopMutations[ONEmerReadingFrameCodon,positionWithinCodon])
 | 
| 
 | 
  2059                   }
 | 
| 
 | 
  2060                 }
 | 
| 
 | 
  2061               }else{ # Only silent mutations
 | 
| 
 | 
  2062                 if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*" & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(ONEmerReadingFrameCodon) ){
 | 
| 
 | 
  2063                   positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
 | 
| 
 | 
  2064                   BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probSMutations[ONEmerReadingFrameCodon,positionWithinCodon])
 | 
| 
 | 
  2065                 }
 | 
| 
 | 
  2066               }
 | 
| 
 | 
  2067             }
 | 
| 
 | 
  2068           }
 | 
| 
 | 
  2069         }
 | 
| 
 | 
  2070         
 | 
| 
 | 
  2071         #Mutations
 | 
| 
 | 
  2072         if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
 | 
| 
 | 
  2073         if(model==1) mutationCount = mutationCount[mutationCount=="S"]  
 | 
| 
 | 
  2074         mutationPositions = as.numeric(names(mutationCount))
 | 
| 
 | 
  2075         mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
 | 
| 
 | 
  2076         mutationPositions =  mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
 | 
| 
 | 
  2077         countMutations = 0 
 | 
| 
 | 
  2078         for(mutationPosition in mutationPositions){
 | 
| 
 | 
  2079           ONEmerIndex = mutationPosition
 | 
| 
 | 
  2080           ONEmer = ONEmerSeq[ONEmerIndex]
 | 
| 
 | 
  2081           GLONEmer = ONEmerGL[ONEmerIndex]
 | 
| 
 | 
  2082           ONEmerCodonPos = getCodonPos(ONEmerIndex)
 | 
| 
 | 
  2083           ONEmerReadingFrameCodon = c2s(ONEmerSeq[ONEmerCodonPos])  
 | 
| 
 | 
  2084           ONEmerReadingFrameCodonGL =c2s(ONEmerGL[ONEmerCodonPos])  
 | 
| 
 | 
  2085           if(!any(grep("N",ONEmer)) & !any(grep("N",GLONEmer))){
 | 
| 
 | 
  2086             if(model==0){
 | 
| 
 | 
  2087                 countMutations = countMutations + 1              
 | 
| 
 | 
  2088                 MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + 1)
 | 
| 
 | 
  2089                 MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)             
 | 
| 
 | 
  2090             }else{
 | 
| 
 | 
  2091               if( translateCodonToAminoAcid(ONEmerReadingFrameCodonGL)!="*" ){
 | 
| 
 | 
  2092                   countMutations = countMutations + 1
 | 
| 
 | 
  2093                   positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
 | 
| 
 | 
  2094                   glNuc =  substr(ONEmerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
 | 
| 
 | 
  2095                   inputNuc =  substr(ONEmerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
 | 
| 
 | 
  2096                   MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + substitution[glNuc,inputNuc])
 | 
| 
 | 
  2097                   MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)                                    
 | 
| 
 | 
  2098               }                
 | 
| 
 | 
  2099             }                  
 | 
| 
 | 
  2100           }              
 | 
| 
 | 
  2101         }
 | 
| 
 | 
  2102         
 | 
| 
 | 
  2103         seqMutability = MutationMatrix/BackgroundMatrix
 | 
| 
 | 
  2104         seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
 | 
| 
 | 
  2105         #cat(inputMatrixIndex,"\t",countMutations,"\n")
 | 
| 
 | 
  2106         return(list("seqMutability"  = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))      
 | 
| 
 | 
  2107 #         tmp<-list("seqMutability"  = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix)
 | 
| 
 | 
  2108       }        
 | 
| 
 | 
  2109     }
 | 
| 
 | 
  2110   
 | 
| 
 | 
  2111 ################
 | 
| 
 | 
  2112 # $Id: trim.R 989 2006-10-29 15:28:26Z ggorjan $
 | 
| 
 | 
  2113 
 | 
| 
 | 
  2114 trim <- function(s, recode.factor=TRUE, ...)
 | 
| 
 | 
  2115   UseMethod("trim", s)
 | 
| 
 | 
  2116 
 | 
| 
 | 
  2117 trim.default <- function(s, recode.factor=TRUE, ...)
 | 
| 
 | 
  2118   s
 | 
| 
 | 
  2119 
 | 
| 
 | 
  2120 trim.character <- function(s, recode.factor=TRUE, ...)
 | 
| 
 | 
  2121 {
 | 
| 
 | 
  2122   s <- sub(pattern="^ +", replacement="", x=s)
 | 
| 
 | 
  2123   s <- sub(pattern=" +$", replacement="", x=s)
 | 
| 
 | 
  2124   s
 | 
| 
 | 
  2125 }
 | 
| 
 | 
  2126 
 | 
| 
 | 
  2127 trim.factor <- function(s, recode.factor=TRUE, ...)
 | 
| 
 | 
  2128 {
 | 
| 
 | 
  2129   levels(s) <- trim(levels(s))
 | 
| 
 | 
  2130   if(recode.factor) {
 | 
| 
 | 
  2131     dots <- list(x=s, ...)
 | 
| 
 | 
  2132     if(is.null(dots$sort)) dots$sort <- sort
 | 
| 
 | 
  2133     s <- do.call(what=reorder.factor, args=dots)
 | 
| 
 | 
  2134   }
 | 
| 
 | 
  2135   s
 | 
| 
 | 
  2136 }
 | 
| 
 | 
  2137 
 | 
| 
 | 
  2138 trim.list <- function(s, recode.factor=TRUE, ...)
 | 
| 
 | 
  2139   lapply(s, trim, recode.factor=recode.factor, ...)
 | 
| 
 | 
  2140 
 | 
| 
 | 
  2141 trim.data.frame <- function(s, recode.factor=TRUE, ...)
 | 
| 
 | 
  2142 {
 | 
| 
 | 
  2143   s[] <- trim.list(s, recode.factor=recode.factor, ...)
 | 
| 
 | 
  2144   s
 | 
| 
 | 
  2145 }
 | 
| 
 | 
  2146 #######################################
 | 
| 
 | 
  2147 # Compute the expected for each sequence-germline pair by codon 
 | 
| 
 | 
  2148 getExpectedIndividualByCodon <- function(matInput){    
 | 
| 
 | 
  2149 if( any(grep("multicore",search())) ){  
 | 
| 
 | 
  2150   facGL <- factor(matInput[,2])
 | 
| 
 | 
  2151   facLevels = levels(facGL)
 | 
| 
 | 
  2152   LisGLs_MutabilityU = mclapply(1:length(facLevels),  function(x){
 | 
| 
 | 
  2153     computeMutabilities(facLevels[x])
 | 
| 
 | 
  2154   })
 | 
| 
 | 
  2155   facIndex = match(facGL,facLevels)
 | 
| 
 | 
  2156   
 | 
| 
 | 
  2157   LisGLs_Mutability = mclapply(1:nrow(matInput),  function(x){
 | 
| 
 | 
  2158     cInput = rep(NA,nchar(matInput[x,1]))
 | 
| 
 | 
  2159     cInput[s2c(matInput[x,1])!="N"] = 1
 | 
| 
 | 
  2160     LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   
 | 
| 
 | 
  2161   })
 | 
| 
 | 
  2162   
 | 
| 
 | 
  2163   LisGLs_Targeting =  mclapply(1:dim(matInput)[1],  function(x){
 | 
| 
 | 
  2164     computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
 | 
| 
 | 
  2165   })
 | 
| 
 | 
  2166   
 | 
| 
 | 
  2167   LisGLs_MutationTypes  = mclapply(1:length(matInput[,2]),function(x){
 | 
| 
 | 
  2168     #print(x)
 | 
| 
 | 
  2169     computeMutationTypes(matInput[x,2])
 | 
| 
 | 
  2170   })
 | 
| 
 | 
  2171   
 | 
| 
 | 
  2172   LisGLs_R_Exp = mclapply(1:nrow(matInput),  function(x){
 | 
| 
 | 
  2173     Exp_R <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
 | 
| 
 | 
  2174                         function(codonNucs){                                                      
 | 
| 
 | 
  2175                           RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") 
 | 
| 
 | 
  2176                           sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) 
 | 
| 
 | 
  2177                         }
 | 
| 
 | 
  2178     )                                                   
 | 
| 
 | 
  2179   })
 | 
| 
 | 
  2180   
 | 
| 
 | 
  2181   LisGLs_S_Exp = mclapply(1:nrow(matInput),  function(x){
 | 
| 
 | 
  2182     Exp_S <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
 | 
| 
 | 
  2183                         function(codonNucs){                                                      
 | 
| 
 | 
  2184                           SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")   
 | 
| 
 | 
  2185                           sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
 | 
| 
 | 
  2186                         }
 | 
| 
 | 
  2187     )                                                 
 | 
| 
 | 
  2188   })                                                
 | 
| 
 | 
  2189   
 | 
| 
 | 
  2190   Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  
 | 
| 
 | 
  2191   Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  
 | 
| 
 | 
  2192   return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )
 | 
| 
 | 
  2193   }else{
 | 
| 
 | 
  2194     facGL <- factor(matInput[,2])
 | 
| 
 | 
  2195     facLevels = levels(facGL)
 | 
| 
 | 
  2196     LisGLs_MutabilityU = lapply(1:length(facLevels),  function(x){
 | 
| 
 | 
  2197       computeMutabilities(facLevels[x])
 | 
| 
 | 
  2198     })
 | 
| 
 | 
  2199     facIndex = match(facGL,facLevels)
 | 
| 
 | 
  2200     
 | 
| 
 | 
  2201     LisGLs_Mutability = lapply(1:nrow(matInput),  function(x){
 | 
| 
 | 
  2202       cInput = rep(NA,nchar(matInput[x,1]))
 | 
| 
 | 
  2203       cInput[s2c(matInput[x,1])!="N"] = 1
 | 
| 
 | 
  2204       LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   
 | 
| 
 | 
  2205     })
 | 
| 
 | 
  2206     
 | 
| 
 | 
  2207     LisGLs_Targeting =  lapply(1:dim(matInput)[1],  function(x){
 | 
| 
 | 
  2208       computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
 | 
| 
 | 
  2209     })
 | 
| 
 | 
  2210     
 | 
| 
 | 
  2211     LisGLs_MutationTypes  = lapply(1:length(matInput[,2]),function(x){
 | 
| 
 | 
  2212       #print(x)
 | 
| 
 | 
  2213       computeMutationTypes(matInput[x,2])
 | 
| 
 | 
  2214     })
 | 
| 
 | 
  2215     
 | 
| 
 | 
  2216     LisGLs_R_Exp = lapply(1:nrow(matInput),  function(x){
 | 
| 
 | 
  2217       Exp_R <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
 | 
| 
 | 
  2218                           function(codonNucs){                                                      
 | 
| 
 | 
  2219                             RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") 
 | 
| 
 | 
  2220                             sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) 
 | 
| 
 | 
  2221                           }
 | 
| 
 | 
  2222       )                                                   
 | 
| 
 | 
  2223     })
 | 
| 
 | 
  2224     
 | 
| 
 | 
  2225     LisGLs_S_Exp = lapply(1:nrow(matInput),  function(x){
 | 
| 
 | 
  2226       Exp_S <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
 | 
| 
 | 
  2227                           function(codonNucs){                                                      
 | 
| 
 | 
  2228                             SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")   
 | 
| 
 | 
  2229                             sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
 | 
| 
 | 
  2230                           }
 | 
| 
 | 
  2231       )                                                 
 | 
| 
 | 
  2232     })                                                
 | 
| 
 | 
  2233     
 | 
| 
 | 
  2234     Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  
 | 
| 
 | 
  2235     Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  
 | 
| 
 | 
  2236     return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )    
 | 
| 
 | 
  2237   }
 | 
| 
 | 
  2238 }
 | 
| 
 | 
  2239 
 | 
| 
 | 
  2240 # getObservedMutationsByCodon <- function(listMutations){
 | 
| 
 | 
  2241 #   numbSeqs <- length(listMutations) 
 | 
| 
 | 
  2242 #   obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
 | 
| 
 | 
  2243 #   obsMu_S <- obsMu_R
 | 
| 
 | 
  2244 #   temp <- mclapply(1:length(listMutations), function(i){
 | 
| 
 | 
  2245 #     arrMutations = listMutations[[i]]
 | 
| 
 | 
  2246 #     RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
 | 
| 
 | 
  2247 #     RPos <- sapply(RPos,getCodonNumb)                                                                    
 | 
| 
 | 
  2248 #     if(any(RPos)){
 | 
| 
 | 
  2249 #       tabR <- table(RPos)
 | 
| 
 | 
  2250 #       obsMu_R[i,as.numeric(names(tabR))] <<- tabR
 | 
| 
 | 
  2251 #     }                                    
 | 
| 
 | 
  2252 #     
 | 
| 
 | 
  2253 #     SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
 | 
| 
 | 
  2254 #     SPos <- sapply(SPos,getCodonNumb)
 | 
| 
 | 
  2255 #     if(any(SPos)){
 | 
| 
 | 
  2256 #       tabS <- table(SPos)
 | 
| 
 | 
  2257 #       obsMu_S[i,names(tabS)] <<- tabS
 | 
| 
 | 
  2258 #     }                                          
 | 
| 
 | 
  2259 #   }
 | 
| 
 | 
  2260 #   )
 | 
| 
 | 
  2261 #   return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) 
 | 
| 
 | 
  2262 # }
 | 
| 
 | 
  2263 
 | 
| 
 | 
  2264 getObservedMutationsByCodon <- function(listMutations){
 | 
| 
 | 
  2265   numbSeqs <- length(listMutations) 
 | 
| 
 | 
  2266   obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
 | 
| 
 | 
  2267   obsMu_S <- obsMu_R
 | 
| 
 | 
  2268   temp <- lapply(1:length(listMutations), function(i){
 | 
| 
 | 
  2269     arrMutations = listMutations[[i]]
 | 
| 
 | 
  2270     RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
 | 
| 
 | 
  2271     RPos <- sapply(RPos,getCodonNumb)                                                                    
 | 
| 
 | 
  2272     if(any(RPos)){
 | 
| 
 | 
  2273       tabR <- table(RPos)
 | 
| 
 | 
  2274       obsMu_R[i,as.numeric(names(tabR))] <<- tabR
 | 
| 
 | 
  2275     }                                    
 | 
| 
 | 
  2276     
 | 
| 
 | 
  2277     SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
 | 
| 
 | 
  2278     SPos <- sapply(SPos,getCodonNumb)
 | 
| 
 | 
  2279     if(any(SPos)){
 | 
| 
 | 
  2280       tabS <- table(SPos)
 | 
| 
 | 
  2281       obsMu_S[i,names(tabS)] <<- tabS
 | 
| 
 | 
  2282     }                                          
 | 
| 
 | 
  2283   }
 | 
| 
 | 
  2284   )
 | 
| 
 | 
  2285   return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) 
 | 
| 
 | 
  2286 }
 | 
| 
 | 
  2287 
 |