Repository 'shm_csr'
hg clone https://toolshed.g2.bx.psu.edu/repos/davidvanzessen/shm_csr

Changeset 67:ba33b94637ca (2019-01-29)
Previous changeset 66:43a1aa648537 (2017-12-07) Next changeset 68:7b9481fa4a70 (2019-01-29)
Commit message:
Uploaded
modified:
aa_histogram.r
baseline/Baseline_Functions.r
baseline/Baseline_Main.r
baseline/FiveS_Mutability.RData
baseline/FiveS_Substitution.RData
baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa
baseline/IMGTVHreferencedataset20161215.fa
baseline/comparePDFs.r
baseline/script_imgt.py
baseline/script_xlsx.py
baseline/wrapper.sh
change_o/DefineClones.py
change_o/MakeDb.py
change_o/define_clones.sh
change_o/makedb.sh
merge_and_filter.r
shm_clonality.htm
shm_csr.htm
shm_csr.r
shm_csr.xml
shm_downloads.htm
shm_first.htm
shm_frequency.htm
shm_overview.htm
shm_selection.htm
shm_transition.htm
wrapper.sh
added:
LICENSE
README.md
baseline/baseline_url.txt
change_o/change_o_url.txt
b
diff -r 43a1aa648537 -r ba33b94637ca LICENSE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE Tue Jan 29 03:54:09 2019 -0500
b
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 david
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
b
diff -r 43a1aa648537 -r ba33b94637ca README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Tue Jan 29 03:54:09 2019 -0500
[
@@ -0,0 +1,12 @@
+# SHM CSR
+
+Somatic hypermutation and class switch recombination pipeline 
+
+# Dependencies
+--------------------
+[Python 2.7](https://www.python.org/)  
+[Change-O](https://changeo.readthedocs.io/en/version-0.4.4/)  
+[Baseline](http://selection.med.yale.edu/baseline/)  
+[R data.table](https://cran.r-project.org/web/packages/data.table/data.table.pdf)
+[R ggplot2](https://cran.r-project.org/web/packages/ggplot2/ggplot2.pdf)
+[R reshape2](https://cran.r-project.org/web/packages/reshape/reshape.pdf)
\ No newline at end of file
b
diff -r 43a1aa648537 -r ba33b94637ca aa_histogram.r
--- a/aa_histogram.r Thu Dec 07 03:44:38 2017 -0500
+++ b/aa_histogram.r Tue Jan 29 03:54:09 2019 -0500
[
@@ -1,69 +1,69 @@
-library(ggplot2)
-
-args <- commandArgs(trailingOnly = TRUE)
-
-mutations.by.id.file = args[1]
-absent.aa.by.id.file = args[2]
-genes = strsplit(args[3], ",")[[1]]
-genes = c(genes, "")
-outdir = args[4]
-
-
-print("---------------- read input ----------------")
-
-mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="")
-absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="")
-
-for(gene in genes){
- graph.title = paste(gene, "AA mutation frequency")
- if(gene == ""){
- mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),]
- absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),]
-
- graph.title = "AA mutation frequency all"
- } else {
- mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),]
- absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),]
- }
- print(paste("nrow", gene, nrow(absent.aa.by.id.gene)))
- if(nrow(mutations.by.id.gene) == 0){
- next
- }
-
- mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)])
- aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)])
-
- dat_freq = mutations.at.position / aa.at.position
- dat_freq[is.na(dat_freq)] = 0
- dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq)
-
-
- print("---------------- plot ----------------")
-
- m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=13, colour="black"))
- m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i)
- m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1")
- m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1")
- m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2")
- m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2")
- m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3")
- m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(graph.title) 
- m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
- #m = m + scale_colour_manual(values=c("black"))
-
- print("---------------- write/print ----------------")
-
-
- dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position)
-
- write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
- write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
- write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
- write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
-
- png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720)
- print(m)
- dev.off()
-
- ggsave(paste(outdir, "/aa_histogram_", gene, ".pdf", sep=""), m, width=14, height=7)
-}
+library(ggplot2)
+
+args <- commandArgs(trailingOnly = TRUE)
+
+mutations.by.id.file = args[1]
+absent.aa.by.id.file = args[2]
+genes = strsplit(args[3], ",")[[1]]
+genes = c(genes, "")
+outdir = args[4]
+
+
+print("---------------- read input ----------------")
+
+mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="")
+absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="")
+
+for(gene in genes){
+ graph.title = paste(gene, "AA mutation frequency")
+ if(gene == ""){
+ mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),]
+ absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),]
+
+ graph.title = "AA mutation frequency all"
+ } else {
+ mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),]
+ absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),]
+ }
+ print(paste("nrow", gene, nrow(absent.aa.by.id.gene)))
+ if(nrow(mutations.by.id.gene) == 0){
+ next
+ }
+
+ mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)])
+ aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)])
+
+ dat_freq = mutations.at.position / aa.at.position
+ dat_freq[is.na(dat_freq)] = 0
+ dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq)
+
+
+ print("---------------- plot ----------------")
+
+ m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=13, colour="black"))
+ m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i)
+ m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1")
+ m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1")
+ m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2")
+ m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2")
+ m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3")
+ m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(graph.title) 
+ m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
+ #m = m + scale_colour_manual(values=c("black"))
+
+ print("---------------- write/print ----------------")
+
+
+ dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position)
+
+ write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+ write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+ write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+ write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+
+ png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720)
+ print(m)
+ dev.off()
+
+ ggsave(paste(outdir, "/aa_histogram_", gene, ".pdf", sep=""), m, width=14, height=7)
+}
b
diff -r 43a1aa648537 -r ba33b94637ca baseline/Baseline_Functions.r
--- a/baseline/Baseline_Functions.r Thu Dec 07 03:44:38 2017 -0500
+++ b/baseline/Baseline_Functions.r Tue Jan 29 03:54:09 2019 -0500
[
b'@@ -1,2287 +1,2287 @@\n-#########################################################################################\r\n-# License Agreement\r\n-# \r\n-# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE \r\n-# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER \r\n-# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE \r\n-# OR COPYRIGHT LAW IS PROHIBITED.\r\n-# \r\n-# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE \r\n-# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED \r\n-# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN \r\n-# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.\r\n-#\r\n-# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences\r\n-# Coded by: Mohamed Uduman & Gur Yaari\r\n-# Copyright 2012 Kleinstein Lab\r\n-# Version: 1.3 (01/23/2014)\r\n-#########################################################################################\r\n-\r\n-# Global variables  \r\n-  \r\n-  FILTER_BY_MUTATIONS = 1000\r\n-\r\n-  # Nucleotides\r\n-  NUCLEOTIDES = c("A","C","G","T")\r\n-  \r\n-  # Amino Acids\r\n-  AMINO_ACIDS <- c("F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "*", "*", "C", "C", "*", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G")\r\n-  names(AMINO_ACIDS) <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG")\r\n-  names(AMINO_ACIDS) <- names(AMINO_ACIDS)\r\n-\r\n-  #Amino Acid Traits\r\n-  #"*" "A" "C" "D" "E" "F" "G" "H" "I" "K" "L" "M" "N" "P" "Q" "R" "S" "T" "V" "W" "Y"\r\n-  #B = "Hydrophobic/Burried"  N = "Intermediate/Neutral"  S="Hydrophilic/Surface") \r\n-  TRAITS_AMINO_ACIDS_CHOTHIA98 <- c("*","N","B","S","S","B","N","N","B","S","B","B","S","N","S","S","N","N","B","B","N")\r\n-  names(TRAITS_AMINO_ACIDS_CHOTHIA98) <- sort(unique(AMINO_ACIDS))\r\n-  TRAITS_AMINO_ACIDS <- array(NA,21)\r\n-  \r\n-  # Codon Table\r\n-  CODON_TABLE <- as.data.frame(matrix(NA,ncol=64,nrow=12))\r\n-\r\n-  # Substitution Model: Smith DS et al. 1996\r\n-  substitution_Literature_Mouse <- matrix(c(0, 0.156222928, 0.601501588, 0.242275484, 0.172506739, 0, 0.241239892, 0.586253369, 0.54636291, 0.255795364, 0, 0.197841727, 0.290240811, 0.467680608, 0.24207858, 0),nrow=4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))\r\n-  substitution_Flu_Human <- matrix(c(0,0.2795596,0.5026927,0.2177477,0.1693210,0,0.3264723,0.5042067,0.4983549,0.3328321,0,0.1688130,0.2021079,0.4696077,0.3282844,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))\r\n-  substitution_Flu25_Human <- matrix(c(0,0.2580641,0.5163685,0.2255674,0.1541125,0,0.3210224,0.5248651,0.5239281,0.3101292,0,0.1659427,0.1997207,0.4579444,0.3423350,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))\r\n-  load("FiveS_Substitution.RData")\r\n-\r\n-  # Mutability Models: Shapiro GS et al. 2002\r\n-  triMutability_Literature_Human <- matrix(c(0.24, 1.2, 0.96, 0.43, 2.14, 2, 1.11, 1.9, 0.85, 1.83, 2.36, 1.31, 0.82, 0.52, 0.89, 1.33, 1.4, 0.82, 1.83, 0.73, 1.83, 1.62, 1.53, 0.57, 0.92, 0.42, 0.42, 1.47, 3.44, 2.58, 1.18, 0.47, 0.39, 1.12, 1.8, 0.68, 0.47, 2.19, 2.35, 2.19, 1.05, 1.84, 1.26, 0.28, 0.98, 2.37, 0.66, 1.58, 0.67, 0.92, 1.76, 0.83, 0.97, 0.56, 0.75, 0.62, 2.26, 0.62, 0.74, 1.11, 1.16, 0.61, 0.88, 0.67, 0.37, 0.07, 1.08, 0.46, 0.31, 0.94, 0.62, 0.57, 0.29, NA, 1.44, 0.46, 0.69, 0.57, 0.24, 0.37, 1.1, 0.99, 1.39, 0.6, 2.26, 1.24, 1.36, 0.52, 0.33, 0.26, 1.25, 0.37, 0.58, 1.03, 1.'..b'se{\n+    facGL <- factor(matInput[,2])\n+    facLevels = levels(facGL)\n+    LisGLs_MutabilityU = lapply(1:length(facLevels),  function(x){\n+      computeMutabilities(facLevels[x])\n+    })\n+    facIndex = match(facGL,facLevels)\n+    \n+    LisGLs_Mutability = lapply(1:nrow(matInput),  function(x){\n+      cInput = rep(NA,nchar(matInput[x,1]))\n+      cInput[s2c(matInput[x,1])!="N"] = 1\n+      LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   \n+    })\n+    \n+    LisGLs_Targeting =  lapply(1:dim(matInput)[1],  function(x){\n+      computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])\n+    })\n+    \n+    LisGLs_MutationTypes  = lapply(1:length(matInput[,2]),function(x){\n+      #print(x)\n+      computeMutationTypes(matInput[x,2])\n+    })\n+    \n+    LisGLs_R_Exp = lapply(1:nrow(matInput),  function(x){\n+      Exp_R <-  rollapply(as.zoo(1:readEnd),width=3,by=3,\n+                          function(codonNucs){                                                      \n+                            RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") \n+                            sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) \n+                          }\n+      )                                                   \n+    })\n+    \n+    LisGLs_S_Exp = lapply(1:nrow(matInput),  function(x){\n+      Exp_S <-  rollapply(as.zoo(1:readEnd),width=3,by=3,\n+                          function(codonNucs){                                                      \n+                            SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")   \n+                            sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )\n+                          }\n+      )                                                 \n+    })                                                \n+    \n+    Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  \n+    Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  \n+    return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )    \n+  }\n+}\n+\n+# getObservedMutationsByCodon <- function(listMutations){\n+#   numbSeqs <- length(listMutations) \n+#   obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))\n+#   obsMu_S <- obsMu_R\n+#   temp <- mclapply(1:length(listMutations), function(i){\n+#     arrMutations = listMutations[[i]]\n+#     RPos = as.numeric(names(arrMutations)[arrMutations=="R"])\n+#     RPos <- sapply(RPos,getCodonNumb)                                                                    \n+#     if(any(RPos)){\n+#       tabR <- table(RPos)\n+#       obsMu_R[i,as.numeric(names(tabR))] <<- tabR\n+#     }                                    \n+#     \n+#     SPos = as.numeric(names(arrMutations)[arrMutations=="S"])\n+#     SPos <- sapply(SPos,getCodonNumb)\n+#     if(any(SPos)){\n+#       tabS <- table(SPos)\n+#       obsMu_S[i,names(tabS)] <<- tabS\n+#     }                                          \n+#   }\n+#   )\n+#   return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) \n+# }\n+\n+getObservedMutationsByCodon <- function(listMutations){\n+  numbSeqs <- length(listMutations) \n+  obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))\n+  obsMu_S <- obsMu_R\n+  temp <- lapply(1:length(listMutations), function(i){\n+    arrMutations = listMutations[[i]]\n+    RPos = as.numeric(names(arrMutations)[arrMutations=="R"])\n+    RPos <- sapply(RPos,getCodonNumb)                                                                    \n+    if(any(RPos)){\n+      tabR <- table(RPos)\n+      obsMu_R[i,as.numeric(names(tabR))] <<- tabR\n+    }                                    \n+    \n+    SPos = as.numeric(names(arrMutations)[arrMutations=="S"])\n+    SPos <- sapply(SPos,getCodonNumb)\n+    if(any(SPos)){\n+      tabS <- table(SPos)\n+      obsMu_S[i,names(tabS)] <<- tabS\n+    }                                          \n+  }\n+  )\n+  return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) \n+}\n+\n'
b
diff -r 43a1aa648537 -r ba33b94637ca baseline/Baseline_Main.r
--- a/baseline/Baseline_Main.r Thu Dec 07 03:44:38 2017 -0500
+++ b/baseline/Baseline_Main.r Tue Jan 29 03:54:09 2019 -0500
[
b'@@ -1,388 +1,388 @@\n-#########################################################################################\r\n-# License Agreement\r\n-# \r\n-# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE \r\n-# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER \r\n-# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE \r\n-# OR COPYRIGHT LAW IS PROHIBITED.\r\n-# \r\n-# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE \r\n-# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED \r\n-# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN \r\n-# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.\r\n-#\r\n-# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences\r\n-# Coded by: Mohamed Uduman & Gur Yaari\r\n-# Copyright 2012 Kleinstein Lab\r\n-# Version: 1.3 (01/23/2014)\r\n-#########################################################################################\r\n-\r\n-op <- options();\r\n-options(showWarnCalls=FALSE, showErrorCalls=FALSE, warn=-1)\r\n-library(\'seqinr\')\r\n-if( F & Sys.info()[1]=="Linux"){\r\n-  library("multicore")\r\n-}\r\n-\r\n-# Load functions and initialize global variables\r\n-source("Baseline_Functions.r")\r\n-\r\n-# Initialize parameters with user provided arguments\r\n-  arg <- commandArgs(TRUE)                       \r\n-  #arg = c(2,1,5,5,0,1,"1:26:38:55:65:104:116", "test.fasta","","sample")\r\n-  #arg = c(1,1,5,5,0,1,"1:38:55:65:104:116:200", "test.fasta","","sample")\r\n-  #arg = c(1,1,5,5,1,1,"1:26:38:55:65:104:116", "/home/mu37/Wu/Wu_Cloned_gapped_sequences_D-masked.fasta","/home/mu37/Wu/","Wu")\r\n-  testID <- as.numeric(arg[1])                    # 1 = Focused, 2 = Local\r\n-  species <- as.numeric(arg[2])                   # 1 = Human. 2 = Mouse\r\n-  substitutionModel <- as.numeric(arg[3])         # 0 = Uniform substitution, 1 = Smith DS et al. 1996, 5 = FiveS\r\n-  mutabilityModel <- as.numeric(arg[4])           # 0 = Uniform mutablity, 1 = Tri-nucleotide (Shapiro GS et al. 2002)  , 5 = FiveS\r\n-  clonal <- as.numeric(arg[5])                    # 0 = Independent sequences, 1 = Clonally related, 2 = Clonally related & only non-terminal mutations\r\n-  fixIndels <- as.numeric(arg[6])                 # 0 = Do nothing, 1 = Try and fix Indels\r\n-  region <- as.numeric(strsplit(arg[7],":")[[1]]) # StartPos:LastNucleotideF1:C1:F2:C2:F3:C3\r\n-  inputFilePath <- arg[8]                         # Full path to input file\r\n-  outputPath <- arg[9]                            # Full path to location of output files\r\n-  outputID <- arg[10]                             # ID for session output  \r\n-  \r\n-\r\n-  if(testID==5){\r\n-    traitChangeModel <- 1\r\n-    if( !is.na(any(arg[11])) ) traitChangeModel <- as.numeric(arg[11])    # 1 <- Chothia 1998\r\n-    initializeTraitChange(traitChangeModel)    \r\n-  }\r\n-  \r\n-# Initialize other parameters/variables\r\n-    \r\n-  # Initialzie the codon table ( definitions of R/S )\r\n-  computeCodonTable(testID) \r\n-\r\n-  # Initialize   \r\n-  # Test Name\r\n-  testName<-"Focused"\r\n-  if(testID==2) testName<-"Local"\r\n-  if(testID==3) testName<-"Imbalanced"    \r\n-  if(testID==4) testName<-"ImbalancedSilent"    \r\n-    \r\n-  # Indel placeholders initialization\r\n-  indelPos <- NULL\r\n-  delPos <- NULL\r\n-  insPos <- NULL\r\n-\r\n-  # Initialize in Tranistion & Mutability matrixes\r\n-  substitution <- initializeSubstitutionMatrix(substitutionModel,species)\r\n-  mutability <- initializeMutabilityMatrix(mutabilityModel,species)\r\n-  \r\n-  # FWR/CDR boundaries\r\n-  flagTrim <- F\r\n-  if( is.na(region[7])){\r\n-    flagTrim <- T\r\n-    region[7]<-region[6]\r\n-  }\r\n-  readStart = min(region,na.rm=T)\r\n-  readEnd = max(region,na.rm=T)\r\n-  if(readStart>1){\r\n-    region = region - (readStart - 1)\r\n-  }\r\n-  region_Nuc = c( (region[1]*3-2) , (region[2:7]*3) )\r\n-  region_Cod = region\r\n-  \r\n-  readStart = (readStart*3)-2\r\n-  readEnd = (readEnd*3)\r\n-    \r\n-    FWR_Nuc <- c( rep(TRUE,(region_Nuc[2])),\r\n-                '..b'fwr[G,],simgaP_groups_cdr[G],simgaP_groups_fwr[G])\n+      listPDFs[[rowNumb]] = list("CDR"=bayesPDF_groups_cdr[[G]],"FWR"=bayesPDF_groups_fwr[[G]])\n+      names(listPDFs)[rowNumb] = names(groups[groups==paste(G)])[1]\n+      #if(names(groups)[which(groups==G)[1]]!="All sequences combined"){\n+      gs = unique(germlines[groups==G])\n+      rowNumb = rowNumb+1\n+      if( !is.na(gs) ){\n+        for( g in gs ){\n+          matOutput[rowNumb,c(1,2,11:18)] = c("Germline",names(germlines)[germlines==g][1],bayes_germlines_cdr[g,],bayes_germlines_fwr[g,],simgaP_germlines_cdr[g],simgaP_germlines_fwr[g])\n+          listPDFs[[rowNumb]] = list("CDR"=bayesPDF_germlines_cdr[[g]],"FWR"=bayesPDF_germlines_fwr[[g]])\n+          names(listPDFs)[rowNumb] = names(germlines[germlines==paste(g)])[1]\n+          rowNumb = rowNumb+1\n+          indexesOfInterest = which(germlines==g)\n+          numbSeqsOfInterest =  length(indexesOfInterest)\n+          rowNumb = seq(rowNumb,rowNumb+(numbSeqsOfInterest-1))\n+          matOutput[rowNumb,] = matrix(   c(  rep("Sequence",numbSeqsOfInterest),\n+                                              rownames(matInput)[indexesOfInterest],\n+                                              c(matMutationInfo[indexesOfInterest,1:4]),\n+                                              c(matMutationInfo[indexesOfInterest,5:8]),\n+                                              c(bayes_cdr[indexesOfInterest,]),\n+                                              c(bayes_fwr[indexesOfInterest,]),\n+                                              c(simgaP_cdr[indexesOfInterest]),\n+                                              c(simgaP_fwr[indexesOfInterest])                                              \n+          ), ncol=18, nrow=numbSeqsOfInterest,byrow=F)\n+          increment=0\n+          for( ioi in indexesOfInterest){\n+            listPDFs[[min(rowNumb)+increment]] =  list("CDR"=bayesPDF_cdr[[ioi]] , "FWR"=bayesPDF_fwr[[ioi]])\n+            names(listPDFs)[min(rowNumb)+increment] = rownames(matInput)[ioi]\n+            increment = increment + 1\n+          }\n+          rowNumb=max(rowNumb)+1\n+\n+        }\n+      }\n+    }\n+    colsToFormat = 11:18\n+    matOutput[,colsToFormat] = formatC(  matrix(as.numeric(matOutput[,colsToFormat]), nrow=nrow(matOutput), ncol=length(colsToFormat)) ,  digits=3)\n+    matOutput[matOutput== " NaN"] = NA\n+    \n+    \n+    \n+    colnames(matOutput) = c("Type", "ID", "Observed_CDR_R", "Observed_CDR_S", "Observed_FWR_R", "Observed_FWR_S",\n+                            "Expected_CDR_R", "Expected_CDR_S", "Expected_FWR_R", "Expected_FWR_S",\n+                            paste( rep(testName,6), rep(c("Sigma","CIlower","CIupper"),2),rep(c("CDR","FWR"),each=3), sep="_"),\n+                            paste( rep(testName,2), rep("P",2),c("CDR","FWR"), sep="_")\n+    )\n+    fileName = paste(outputPath,outputID,".txt",sep="")\n+    write.table(matOutput,file=fileName,quote=F,sep="\\t",row.names=T,col.names=NA)\n+    fileName = paste(outputPath,outputID,".RData",sep="")\n+    save(listPDFs,file=fileName)\n+\n+indelWarning = FALSE\n+if(sum(indelPos)>0){\n+  indelWarning = "<P>Warning: The following sequences have either gaps and/or deletions, and have been ommited from the analysis.";\n+  indelWarning = paste( indelWarning , "<UL>", sep="" )\n+  for(indels in names(indelPos)[indelPos]){\n+    indelWarning = paste( indelWarning , "<LI>", indels, "</LI>", sep="" )\n+  }\n+  indelWarning = paste( indelWarning , "</UL></P>", sep="" )\n+}\n+\n+cloneWarning = FALSE\n+if(clonal==1){\n+  if(sum(matInputErrors)>0){\n+    cloneWarning = "<P>Warning: The following clones have sequences of unequal length.";\n+    cloneWarning = paste( cloneWarning , "<UL>", sep="" )\n+    for(clone in names(matInputErrors)[matInputErrors]){\n+      cloneWarning = paste( cloneWarning , "<LI>", names(germlines)[as.numeric(clone)], "</LI>", sep="" )\n+    }\n+    cloneWarning = paste( cloneWarning , "</UL></P>", sep="" )\n+  }\n+}\n+cat(paste("Success",outputID,indelWarning,cloneWarning,sep="|"))\n'
b
diff -r 43a1aa648537 -r ba33b94637ca baseline/baseline_url.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline/baseline_url.txt Tue Jan 29 03:54:09 2019 -0500
b
@@ -0,0 +1,1 @@
+http://selection.med.yale.edu/baseline/
\ No newline at end of file
b
diff -r 43a1aa648537 -r ba33b94637ca baseline/comparePDFs.r
--- a/baseline/comparePDFs.r Thu Dec 07 03:44:38 2017 -0500
+++ b/baseline/comparePDFs.r Tue Jan 29 03:54:09 2019 -0500
[
b'@@ -1,225 +1,225 @@\n-options("warn"=-1)\r\n-\r\n-#from http://selection.med.yale.edu/baseline/Archive/Baseline%20Version%201.3/Baseline_Functions_Version1.3.r\r\n-# Compute p-value of two distributions\r\n-compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){\r\n-#print(c(length(dens1),length(dens2)))\r\n-if(length(dens1)>1 & length(dens2)>1 ){\r\n-\tdens1<-dens1/sum(dens1)\r\n-\tdens2<-dens2/sum(dens2)\r\n-\tcum2 <- cumsum(dens2)-dens2/2\r\n-\ttmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i])))\r\n-\t#print(tmp)\r\n-\tif(tmp>0.5)tmp<-tmp-1\r\n-\treturn( tmp )\r\n-\t}\r\n-\telse {\r\n-\treturn(NA)\r\n-\t}\r\n-\t#return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N)\r\n-}  \r\n-\r\n-\r\n-require("grid")\r\n-arg <- commandArgs(TRUE)\r\n-#arg <- c("300143","4","5")\r\n-arg[!arg=="clonal"]\r\n-input <- arg[1]\r\n-output <- arg[2]\r\n-rowIDs <- as.numeric(  sapply(arg[3:(max(3,length(arg)))],function(x){ gsub("chkbx","",x) } )  )\r\n-\r\n-numbSeqs = length(rowIDs)\r\n-\r\n-if ( is.na(rowIDs[1]) | numbSeqs>10 ) {\r\n-  stop( paste("Error: Please select between one and 10 seqeunces to compare.") )\r\n-}\r\n-\r\n-#load( paste("output/",sessionID,".RData",sep="") )\r\n-load( input )\r\n-#input\r\n-\r\n-xMarks = seq(-20,20,length.out=4001)\r\n-\r\n-plot_grid_s<-function(pdf1,pdf2,Sample=100,cex=1,xlim=NULL,xMarks = seq(-20,20,length.out=4001)){\r\n-  yMax = max(c(abs(as.numeric(unlist(listPDFs[pdf1]))),abs(as.numeric(unlist(listPDFs[pdf2]))),0),na.rm=T) * 1.1\r\n-\r\n-  if(length(xlim==2)){\r\n-    xMin=xlim[1]\r\n-    xMax=xlim[2]\r\n-  } else {\r\n-    xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1]\r\n-    xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1]\r\n-    xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])]\r\n-    xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])]\r\n-  \r\n-    xMin_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][1]\r\n-    xMin_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][1]\r\n-    xMax_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001])]\r\n-    xMax_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001])]\r\n-  \r\n-    xMin=min(c(xMin_CDR,xMin_FWR,xMin_CDR2,xMin_FWR2,0),na.rm=TRUE)\r\n-    xMax=max(c(xMax_CDR,xMax_FWR,xMax_CDR2,xMax_FWR2,0),na.rm=TRUE)\r\n-  }\r\n-\r\n-  sigma<-approx(xMarks,xout=seq(xMin,xMax,length.out=Sample))$x\r\n-  grid.rect(gp = gpar(col=gray(0.6),fill="white",cex=cex))\r\n-  x <- sigma\r\n-  pushViewport(viewport(x=0.175,y=0.175,width=0.825,height=0.825,just=c("left","bottom"),default.units="npc"))\r\n-  #pushViewport(plotViewport(c(1.8, 1.8, 0.25, 0.25)*cex))\r\n-  pushViewport(dataViewport(x, c(yMax,-yMax),gp = gpar(cex=cex),extension=c(0.05)))\r\n-  grid.polygon(c(0,0,1,1),c(0,0.5,0.5,0),gp=gpar(col=grey(0.95),fill=grey(0.95)),default.units="npc")\r\n-  grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.9),fill=grey(0.9)),default.units="npc")\r\n-  grid.rect()\r\n-  grid.xaxis(gp = gpar(cex=cex/1.1))\r\n-  yticks = pretty(c(-yMax,yMax),8)\r\n-  yticks = yticks[yticks>(-yMax) & yticks<(yMax)]\r\n-  grid.yaxis(at=yticks,label=abs(yticks),gp = gpar(cex=cex/1.1))\r\n-  if(length(listPDFs[pdf1][[1]][["CDR"]])>1){\r\n-    ycdr<-approx(xMarks,listPDFs[pdf1][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y\r\n-    grid.lines(unit(x,"native"), unit(ycdr,"native"),gp=gpar(col=2,lwd=2))\r\n-  }\r\n-  if(length(listPDFs[pdf1][[1]][["FWR"]])>1){\r\n-    yfwr<-approx(xMarks,listPDFs[pdf1][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y\r\n-    grid.lines(unit(x,"native"), unit(-yfwr,"native"),gp=gpar(col=4,lwd=2))\r\n-   }\r\n-\r\n-  if(length(listPDFs[pdf2][[1]][["CDR"]])>1){\r\n-    ycdr2<-approx(xMarks,listPDFs[pdf2][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y\r\n-    grid.lines(unit(x,"native"), unit(ycdr2,"native"),gp=gpar(col=2,l'..b'= gpar(cex=cex))\n+    grid.text(formatC(as.numeric(pCDR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex))\n+    grid.text(formatC(as.numeric(pCDR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex))\n+    grid.text(formatC(as.numeric(pFWR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex))\n+    \n+           \n+ #   grid.text(paste("P = ",formatC(pCDRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.98, "npc"),just=c("center", "top"),gp = gpar(cex=cex))\n+ #   grid.text(paste("P = ",formatC(pFWRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.02, "npc"),just=c("center", "bottom"),gp = gpar(cex=cex))\n+  }\n+  else{\n+  }\n+}\n+\n+\n+##################################################################################\n+################## The whole OCD\'s matrix ########################################\n+##################################################################################\n+\n+#pdf(width=4*numbSeqs+1/3,height=4*numbSeqs+1/3)\n+pdf( output ,width=4*numbSeqs+1/3,height=4*numbSeqs+1/3) \n+\n+pushViewport(viewport(x=0.02,y=0.02,just = c("left", "bottom"),w =0.96,height=0.96,layout = grid.layout(numbSeqs+1,numbSeqs+1,widths=unit.c(unit(rep(1,numbSeqs),"null"),unit(4,"lines")),heights=unit.c(unit(4,"lines"),unit(rep(1,numbSeqs),"null")))))\n+\n+for( seqOne in 1:numbSeqs+1){\n+  pushViewport(viewport(layout.pos.col = seqOne-1, layout.pos.row = 1))\n+  if(seqOne>2){ \n+    grid.polygon(c(0,0,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc")\n+    grid.polygon(c(1,1,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc")\n+    grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.5)),default.units="npc")\n+       \n+    grid.text(y=.25,x=0.75,"FWR",gp = gpar(cex=1.5),just="center")\n+    grid.text(y=.25,x=0.25,"CDR",gp = gpar(cex=1.5),just="center")\n+  }\n+  grid.rect(gp = gpar(col=grey(0.9)))\n+  grid.text(y=.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),just="center")\n+  popViewport(1)\n+}\n+\n+for( seqOne in 1:numbSeqs+1){\n+  pushViewport(viewport(layout.pos.row = seqOne, layout.pos.col = numbSeqs+1))\n+  if(seqOne<=numbSeqs){   \n+    grid.polygon(c(0,0.5,0.5,0),c(0,0,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc")\n+    grid.polygon(c(0,0.5,0.5,0),c(1,1,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc")\n+    grid.polygon(c(1,0.5,0.5,1),c(0,0,1,1),gp=gpar(col=grey(0.5)),default.units="npc")\n+    grid.text(x=.25,y=0.75,"CDR",gp = gpar(cex=1.5),just="center",rot=270)\n+    grid.text(x=.25,y=0.25,"FWR",gp = gpar(cex=1.5),just="center",rot=270)\n+  }\n+  grid.rect(gp = gpar(col=grey(0.9)))\n+  grid.text(x=0.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),rot=270,just="center")\n+  popViewport(1)\n+}\n+\n+for( seqOne in 1:numbSeqs+1){\n+  for(seqTwo in 1:numbSeqs+1){\n+    pushViewport(viewport(layout.pos.col = seqTwo-1, layout.pos.row = seqOne))\n+    if(seqTwo>seqOne){\n+      plot_pvals(rowIDs[seqOne-1],rowIDs[seqTwo-1],cex=2)\n+      grid.rect()\n+    }    \n+    popViewport(1)\n+  }\n+}\n+   \n+\n+xMin=0\n+xMax=0.01\n+for(pdf1 in rowIDs){\n+  xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1]\n+  xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1]\n+  xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])]\n+  xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])]\n+  xMin=min(c(xMin_CDR,xMin_FWR,xMin),na.rm=TRUE)\n+  xMax=max(c(xMax_CDR,xMax_FWR,xMax),na.rm=TRUE)\n+}\n+\n+\n+\n+for(i in 1:numbSeqs+1){\n+  for(j in (i-1):numbSeqs){    \n+    pushViewport(viewport(layout.pos.col = i-1, layout.pos.row = j+1))\n+    grid.rect()\n+    plot_grid_s(rowIDs[i-1],rowIDs[j],cex=1)\n+    popViewport(1)\n+  }\n+}\n+\n+dev.off() \n+\n+cat("Success", paste(rowIDs,collapse="_"),sep=":")\n+\n'
b
diff -r 43a1aa648537 -r ba33b94637ca baseline/script_imgt.py
--- a/baseline/script_imgt.py Thu Dec 07 03:44:38 2017 -0500
+++ b/baseline/script_imgt.py Tue Jan 29 03:54:09 2019 -0500
[
@@ -1,86 +1,86 @@
-#import xlrd #avoid dep
-import argparse
-import re
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence")
-parser.add_argument("--ref", help="Reference file")
-parser.add_argument("--output", help="Output file")
-parser.add_argument("--id", help="ID to be used at the '>>>' line in the output")
-
-args = parser.parse_args()
-
-print "script_imgt.py"
-print "input:", args.input
-print "ref:", args.ref
-print "output:", args.output
-print "id:", args.id
-
-refdic = dict()
-with open(args.ref, 'rU') as ref:
- currentSeq = ""
- currentId = ""
- for line in ref:
- if line.startswith(">"):
- if currentSeq is not "" and currentId is not "":
- refdic[currentId[1:]] = currentSeq
- currentId = line.rstrip()
- currentSeq = ""
- else:
- currentSeq += line.rstrip()
- refdic[currentId[1:]] = currentSeq
-
-print "Have", str(len(refdic)), "reference sequences"
-
-vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#,
-# r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)",
-# r"(IGKV[0-3]D?-[0-9]{1,2})",
-# r"(IGLV[0-9]-[0-9]{1,2})",
-# r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)",
-# r"(TRGV[234589])",
-# r"(TRDV[1-3])"]
-
-#vPattern = re.compile(r"|".join(vPattern))
-vPattern = re.compile("|".join(vPattern))
-
-def filterGene(s, pattern):
-    if type(s) is not str:
-        return None
-    res = pattern.search(s)
-    if res:
-        return res.group(0)
-    return None
-
-
-
-currentSeq = ""
-currentId = ""
-first=True
-with open(args.input, 'r') as i:
- with open(args.output, 'a') as o:
- o.write(">>>" + args.id + "\n")
- outputdic = dict()
- for line in i:
- if first:
- first = False
- continue
- linesplt = line.split("\t")
- ref = filterGene(linesplt[1], vPattern)
- if not ref or not linesplt[2].rstrip():
- continue
- if ref in outputdic:
- outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
- else:
- outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
- #print outputdic
-
- for k in outputdic.keys():
- if k in refdic:
- o.write(">>" + k + "\n")
- o.write(refdic[k] + "\n")
- for seq in outputdic[k]:
- #print seq
- o.write(">" + seq[0] + "\n")
- o.write(seq[1] + "\n")
- else:
- print k + " not in reference, skipping " + k
+#import xlrd #avoid dep
+import argparse
+import re
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence")
+parser.add_argument("--ref", help="Reference file")
+parser.add_argument("--output", help="Output file")
+parser.add_argument("--id", help="ID to be used at the '>>>' line in the output")
+
+args = parser.parse_args()
+
+print "script_imgt.py"
+print "input:", args.input
+print "ref:", args.ref
+print "output:", args.output
+print "id:", args.id
+
+refdic = dict()
+with open(args.ref, 'rU') as ref:
+ currentSeq = ""
+ currentId = ""
+ for line in ref:
+ if line.startswith(">"):
+ if currentSeq is not "" and currentId is not "":
+ refdic[currentId[1:]] = currentSeq
+ currentId = line.rstrip()
+ currentSeq = ""
+ else:
+ currentSeq += line.rstrip()
+ refdic[currentId[1:]] = currentSeq
+
+print "Have", str(len(refdic)), "reference sequences"
+
+vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#,
+# r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)",
+# r"(IGKV[0-3]D?-[0-9]{1,2})",
+# r"(IGLV[0-9]-[0-9]{1,2})",
+# r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)",
+# r"(TRGV[234589])",
+# r"(TRDV[1-3])"]
+
+#vPattern = re.compile(r"|".join(vPattern))
+vPattern = re.compile("|".join(vPattern))
+
+def filterGene(s, pattern):
+    if type(s) is not str:
+        return None
+    res = pattern.search(s)
+    if res:
+        return res.group(0)
+    return None
+
+
+
+currentSeq = ""
+currentId = ""
+first=True
+with open(args.input, 'r') as i:
+ with open(args.output, 'a') as o:
+ o.write(">>>" + args.id + "\n")
+ outputdic = dict()
+ for line in i:
+ if first:
+ first = False
+ continue
+ linesplt = line.split("\t")
+ ref = filterGene(linesplt[1], vPattern)
+ if not ref or not linesplt[2].rstrip():
+ continue
+ if ref in outputdic:
+ outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
+ else:
+ outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
+ #print outputdic
+
+ for k in outputdic.keys():
+ if k in refdic:
+ o.write(">>" + k + "\n")
+ o.write(refdic[k] + "\n")
+ for seq in outputdic[k]:
+ #print seq
+ o.write(">" + seq[0] + "\n")
+ o.write(seq[1] + "\n")
+ else:
+ print k + " not in reference, skipping " + k
b
diff -r 43a1aa648537 -r ba33b94637ca baseline/script_xlsx.py
--- a/baseline/script_xlsx.py Thu Dec 07 03:44:38 2017 -0500
+++ b/baseline/script_xlsx.py Tue Jan 29 03:54:09 2019 -0500
[
@@ -1,58 +1,58 @@
-import xlrd
-import argparse
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence")
-parser.add_argument("--ref", help="Reference file")
-parser.add_argument("--output", help="Output file")
-
-args = parser.parse_args()
-
-gene_column = 6
-id_column = 7
-seq_column = 8
-LETTERS = [x for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]
-
-
-refdic = dict()
-with open(args.ref, 'r') as ref:
- currentSeq = ""
- currentId = ""
- for line in ref.readlines():
- if line[0] is ">":
- if currentSeq is not "" and currentId is not "":
- refdic[currentId[1:]] = currentSeq
- currentId = line.rstrip()
- currentSeq = ""
- else:
- currentSeq += line.rstrip()
- refdic[currentId[1:]] = currentSeq
-
-currentSeq = ""
-currentId = ""
-with xlrd.open_workbook(args.input, 'r') as wb:
- with open(args.output, 'a') as o:
- for sheet in wb.sheets():
- if sheet.cell(1,gene_column).value.find("IGHV") < 0:
- print "Genes not in column " + LETTERS[gene_column] + ", skipping sheet " + sheet.name
- continue
- o.write(">>>" + sheet.name + "\n")
- outputdic = dict()
- for rowindex in range(1, sheet.nrows):
- ref = sheet.cell(rowindex, gene_column).value.replace(">", "")
- if ref in outputdic:
- outputdic[ref] += [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)]
- else:
- outputdic[ref] = [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)]
- #print outputdic
-
- for k in outputdic.keys():
- if k in refdic:
- o.write(">>" + k + "\n")
- o.write(refdic[k] + "\n")
- for seq in outputdic[k]:
- #print seq
- o.write(">" + seq[0] + "\n")
- o.write(seq[1] + "\n")
- else:
- print k + " not in reference, skipping " + k
+import xlrd
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence")
+parser.add_argument("--ref", help="Reference file")
+parser.add_argument("--output", help="Output file")
+
+args = parser.parse_args()
+
+gene_column = 6
+id_column = 7
+seq_column = 8
+LETTERS = [x for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]
+
+
+refdic = dict()
+with open(args.ref, 'r') as ref:
+ currentSeq = ""
+ currentId = ""
+ for line in ref.readlines():
+ if line[0] is ">":
+ if currentSeq is not "" and currentId is not "":
+ refdic[currentId[1:]] = currentSeq
+ currentId = line.rstrip()
+ currentSeq = ""
+ else:
+ currentSeq += line.rstrip()
+ refdic[currentId[1:]] = currentSeq
+
+currentSeq = ""
+currentId = ""
+with xlrd.open_workbook(args.input, 'r') as wb:
+ with open(args.output, 'a') as o:
+ for sheet in wb.sheets():
+ if sheet.cell(1,gene_column).value.find("IGHV") < 0:
+ print "Genes not in column " + LETTERS[gene_column] + ", skipping sheet " + sheet.name
+ continue
+ o.write(">>>" + sheet.name + "\n")
+ outputdic = dict()
+ for rowindex in range(1, sheet.nrows):
+ ref = sheet.cell(rowindex, gene_column).value.replace(">", "")
+ if ref in outputdic:
+ outputdic[ref] += [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)]
+ else:
+ outputdic[ref] = [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)]
+ #print outputdic
+
+ for k in outputdic.keys():
+ if k in refdic:
+ o.write(">>" + k + "\n")
+ o.write(refdic[k] + "\n")
+ for seq in outputdic[k]:
+ #print seq
+ o.write(">" + seq[0] + "\n")
+ o.write(seq[1] + "\n")
+ else:
+ print k + " not in reference, skipping " + k
b
diff -r 43a1aa648537 -r ba33b94637ca change_o/change_o_url.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/change_o/change_o_url.txt Tue Jan 29 03:54:09 2019 -0500
b
@@ -0,0 +1,1 @@
+https://changeo.readthedocs.io/en/version-0.4.4/
\ No newline at end of file
b
diff -r 43a1aa648537 -r ba33b94637ca merge_and_filter.r
--- a/merge_and_filter.r Thu Dec 07 03:44:38 2017 -0500
+++ b/merge_and_filter.r Tue Jan 29 03:54:09 2019 -0500
[
b'@@ -1,303 +1,303 @@\n-args <- commandArgs(trailingOnly = TRUE)\r\n-\r\n-\r\n-summaryfile = args[1]\r\n-sequencesfile = args[2]\r\n-mutationanalysisfile = args[3]\r\n-mutationstatsfile = args[4]\r\n-hotspotsfile = args[5]\r\n-aafile = args[6]\r\n-gene_identification_file= args[7]\r\n-output = args[8]\r\n-before.unique.file = args[9]\r\n-unmatchedfile = args[10]\r\n-method=args[11]\r\n-functionality=args[12]\r\n-unique.type=args[13]\r\n-filter.unique=args[14]\r\n-filter.unique.count=as.numeric(args[15])\r\n-class.filter=args[16]\r\n-empty.region.filter=args[17]\r\n-\r\n-print(paste("filter.unique.count:", filter.unique.count))\r\n-\r\n-summ = read.table(summaryfile, header=T, sep="\\t", fill=T, stringsAsFactors=F, quote="")\r\n-sequences = read.table(sequencesfile, header=T, sep="\\t", fill=T, stringsAsFactors=F, quote="")\r\n-mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\\t", fill=T, stringsAsFactors=F, quote="")\r\n-mutationstats = read.table(mutationstatsfile, header=T, sep="\\t", fill=T, stringsAsFactors=F, quote="")\r\n-hotspots = read.table(hotspotsfile, header=T, sep="\\t", fill=T, stringsAsFactors=F, quote="")\r\n-AAs = read.table(aafile, header=T, sep="\\t", fill=T, stringsAsFactors=F, quote="")\r\n-gene_identification = read.table(gene_identification_file, header=T, sep="\\t", fill=T, stringsAsFactors=F, quote="")\r\n-\r\n-fix_column_names = function(df){\r\n-    if("V.DOMAIN.Functionality" %in% names(df)){\r\n-        names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality"\r\n-        print("found V.DOMAIN.Functionality, changed")\r\n-    }\r\n-    if("V.DOMAIN.Functionality.comment" %in% names(df)){\r\n-        names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment"\r\n-        print("found V.DOMAIN.Functionality.comment, changed")\r\n-    }\r\n-    return(df)\r\n-}\r\n-\r\n-fix_non_unique_ids = function(df){\r\n-\tdf$Sequence.ID = paste(df$Sequence.ID, 1:nrow(df))\r\n-\treturn(df)\r\n-}\r\n-\r\n-summ = fix_column_names(summ)\r\n-sequences = fix_column_names(sequences)\r\n-mutationanalysis = fix_column_names(mutationanalysis)\r\n-mutationstats = fix_column_names(mutationstats)\r\n-hotspots = fix_column_names(hotspots)\r\n-AAs = fix_column_names(AAs)\r\n-\r\n-if(method == "blastn"){\r\n-\t#"qseqid\\tsseqid\\tpident\\tlength\\tmismatch\\tgapopen\\tqstart\\tqend\\tsstart\\tsend\\tevalue\\tbitscore"\r\n-\tgene_identification = gene_identification[!duplicated(gene_identification$qseqid),]\r\n-\tref_length = data.frame(sseqid=c("ca1", "ca2", "cg1", "cg2", "cg3", "cg4", "cm"), ref.length=c(81,81,141,141,141,141,52))\r\n-\tgene_identification = merge(gene_identification, ref_length, by="sseqid", all.x=T)\r\n-\tgene_identification$chunk_hit_percentage = (gene_identification$length / gene_identification$ref.length) * 100\r\n-\tgene_identification = gene_identification[,c("qseqid", "chunk_hit_percentage", "pident", "qstart", "sseqid")]\r\n-\tcolnames(gene_identification) = c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")\r\n-}\r\n-\r\n-#print("Summary analysis files columns")\r\n-#print(names(summ))\r\n-\r\n-\r\n-\r\n-input.sequence.count = nrow(summ)\r\n-print(paste("Number of sequences in summary file:", input.sequence.count))\r\n-\r\n-filtering.steps = data.frame(character(0), numeric(0))\r\n-\r\n-filtering.steps = rbind(filtering.steps, c("Input", input.sequence.count))\r\n-\r\n-filtering.steps[,1] = as.character(filtering.steps[,1])\r\n-filtering.steps[,2] = as.character(filtering.steps[,2])\r\n-#filtering.steps[,3] = as.numeric(filtering.steps[,3])\r\n-\r\n-#print("summary files columns")\r\n-#print(names(summ))\r\n-\r\n-summ = merge(summ, gene_identification, by="Sequence.ID")\r\n-\r\n-print(paste("Number of sequences after merging with gene identification:", nrow(summ)))\r\n-\r\n-summ = summ[summ$Functionality != "No results",]\r\n-\r\n-print(paste("Number of sequences after \'No results\' filter:", nrow(summ)))\r\n-\r\n-filtering.steps = rbind(filtering.steps, c("After \'No results\' filter", nrow(summ)))\r\n-\r\n-if(functionality == "productive"){\r\n-\tsumm = summ[summ$Functionality == "productive (see comment)" | summ$Functionali'..b'mes=T)\n+\n+if(filter.unique != "no"){\n+\tclmns = names(result)\n+\tif(filter.unique == "remove_vjaa"){\n+\t\tresult$unique.def = paste(result$VGene, result$JGene, result$CDR3.IMGT.AA)\n+\t} else if(empty.region.filter == "leader"){\n+\t\tresult$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)\n+\t} else if(empty.region.filter == "FR1"){\n+\t\tresult$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)\n+\t} else if(empty.region.filter == "CDR1"){\n+\t\tresult$unique.def = paste(result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)\n+\t} else if(empty.region.filter == "FR2"){\n+\t\tresult$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)\n+\t}\n+\t\n+\tif(grepl("remove", filter.unique)){\n+\t\tresult = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),]\n+\t\tunique.defs = data.frame(table(result$unique.def))\n+\t\tunique.defs = unique.defs[unique.defs$Freq >= filter.unique.count,]\n+\t\tresult = result[result$unique.def %in% unique.defs$Var1,]\n+\t}\n+\n+\tif(filter.unique != "remove_vjaa"){\n+\t\tresult$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes, gsub so the unmatched don\'t have a class after it\n+\t}\n+\n+\tresult = result[!duplicated(result$unique.def),]\n+}\n+\n+write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\\t", quote=F,row.names=F,col.names=T)\n+\n+filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result)))\n+\n+print(paste("Number of sequences in result after unique filtering:", nrow(result)))\n+\n+if(nrow(summ) == 0){\n+\tstop("No data remaining after filter")\n+}\n+\n+result$best_match_class = gsub(",.*", "", result$best_match) #gsub so the unmatched don\'t have a class after it\n+\n+#result$past = ""\n+#cls = unlist(strsplit(unique.type, ","))\n+#for (i in 1:nrow(result)){\n+#\tresult[i,"past"] = paste(result[i,cls], collapse=":")\n+#}\n+\n+\n+\n+result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":"))\n+\n+result.matched = result[!grepl("unmatched", result$best_match),]\n+result.unmatched = result[grepl("unmatched", result$best_match),]\n+\n+result = rbind(result.matched, result.unmatched)\n+\n+result = result[!(duplicated(result$past)), ]\n+\n+result = result[,!(names(result) %in% c("past", "best_match_class"))]\n+\n+print(paste("Number of sequences in result after", unique.type, "filtering:", nrow(result)))\n+\n+filtering.steps = rbind(filtering.steps, c("After remove duplicates based on filter", nrow(result)))\n+\n+unmatched = result[grepl("^unmatched", result$best_match),c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]\n+\n+print(paste("Number of rows in result:", nrow(result)))\n+print(paste("Number of rows in unmatched:", nrow(unmatched)))\n+\n+matched.sequences = result[!grepl("^unmatched", result$best_match),]\n+\n+write.table(x=matched.sequences, file=gsub("merged.txt$", "filtered.txt", output), sep="\\t",quote=F,row.names=F,col.names=T)\n+\n+matched.sequences.count = nrow(matched.sequences)\n+unmatched.sequences.count = sum(grepl("^unmatched", result$best_match))\n+\n+filtering.steps = rbind(filtering.steps, c("Number of matched sequences", matched.sequences.count))\n+filtering.steps = rbind(filtering.steps, c("Number of unmatched sequences", unmatched.sequences.count))\n+filtering.steps[,2] = as.numeric(filtering.steps[,2])\n+filtering.steps$perc = round(filtering.steps[,2] / input.sequence.count * 100, 2)\n+\n+write.table(x=filtering.steps, file=gsub("unmatched", "filtering_steps", unmatchedfile), sep="\\t",quote=F,row.names=F,col.names=F)\n+\n+write.table(x=result, file=output, sep="\\t",quote=F,row.names=F,col.names=T)\n+write.table(x=unmatched, file=unmatchedfile, sep="\\t",quote=F,row.names=F,col.names=T)\n'
b
diff -r 43a1aa648537 -r ba33b94637ca shm_clonality.htm
--- a/shm_clonality.htm Thu Dec 07 03:44:38 2017 -0500
+++ b/shm_clonality.htm Tue Jan 29 03:54:09 2019 -0500
[
b'@@ -1,144 +1,144 @@\n-<html>\r\n-\r\n-<head>\r\n-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">\r\n-<meta name=Generator content="Microsoft Word 14 (filtered)">\r\n-<style>\r\n-<!--\r\n- /* Font Definitions */\r\n- @font-face\r\n-\t{font-family:Calibri;\r\n-\tpanose-1:2 15 5 2 2 2 4 3 2 4;}\r\n-@font-face\r\n-\t{font-family:Tahoma;\r\n-\tpanose-1:2 11 6 4 3 5 4 4 2 4;}\r\n- /* Style Definitions */\r\n- p.MsoNormal, li.MsoNormal, div.MsoNormal\r\n-\t{margin-top:0in;\r\n-\tmargin-right:0in;\r\n-\tmargin-bottom:10.0pt;\r\n-\tmargin-left:0in;\r\n-\tline-height:115%;\r\n-\tfont-size:11.0pt;\r\n-\tfont-family:"Calibri","sans-serif";}\r\n-a:link, span.MsoHyperlink\r\n-\t{color:blue;\r\n-\ttext-decoration:underline;}\r\n-a:visited, span.MsoHyperlinkFollowed\r\n-\t{color:purple;\r\n-\ttext-decoration:underline;}\r\n-p\r\n-\t{margin-right:0in;\r\n-\tmargin-left:0in;\r\n-\tfont-size:12.0pt;\r\n-\tfont-family:"Times New Roman","serif";}\r\n-p.MsoAcetate, li.MsoAcetate, div.MsoAcetate\r\n-\t{mso-style-link:"Balloon Text Char";\r\n-\tmargin:0in;\r\n-\tmargin-bottom:.0001pt;\r\n-\tfont-size:8.0pt;\r\n-\tfont-family:"Tahoma","sans-serif";}\r\n-p.msochpdefault, li.msochpdefault, div.msochpdefault\r\n-\t{mso-style-name:msochpdefault;\r\n-\tmargin-right:0in;\r\n-\tmargin-left:0in;\r\n-\tfont-size:12.0pt;\r\n-\tfont-family:"Calibri","sans-serif";}\r\n-p.msopapdefault, li.msopapdefault, div.msopapdefault\r\n-\t{mso-style-name:msopapdefault;\r\n-\tmargin-right:0in;\r\n-\tmargin-bottom:10.0pt;\r\n-\tmargin-left:0in;\r\n-\tline-height:115%;\r\n-\tfont-size:12.0pt;\r\n-\tfont-family:"Times New Roman","serif";}\r\n-span.apple-converted-space\r\n-\t{mso-style-name:apple-converted-space;}\r\n-span.BalloonTextChar\r\n-\t{mso-style-name:"Balloon Text Char";\r\n-\tmso-style-link:"Balloon Text";\r\n-\tfont-family:"Tahoma","sans-serif";}\r\n-.MsoChpDefault\r\n-\t{font-size:10.0pt;\r\n-\tfont-family:"Calibri","sans-serif";}\r\n-.MsoPapDefault\r\n-\t{margin-bottom:10.0pt;\r\n-\tline-height:115%;}\r\n-@page WordSection1\r\n-\t{size:8.5in 11.0in;\r\n-\tmargin:1.0in 1.0in 1.0in 1.0in;}\r\n-div.WordSection1\r\n-\t{page:WordSection1;}\r\n--->\r\n-</style>\r\n-\r\n-</head>\r\n-\r\n-<body lang=EN-US link=blue vlink=purple>\r\n-\r\n-<div class=WordSection1>\r\n-\r\n-<p style=\'margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;\r\n-text-align:justify;background:white\'><b><span lang=EN-GB style=\'color:black\'>References</span></b></p>\r\n-\r\n-<p style=\'margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;\r\n-text-align:justify;background:white\'><span lang=EN-GB style=\'color:black\'>Gupta,\r\n-Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria,\r\n-Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). <a name="OLE_LINK106"></a><a\r\n-name="OLE_LINK107"></a>Change-O: a toolkit for analyzing large-scale B cell\r\n-immunoglobulin repertoire sequencing data: Table 1. In<span\r\n-class=apple-converted-space>&nbsp;</span><em>Bioinformatics, 31 (20), pp.\r\n-3356\x963358.</em><span class=apple-converted-space><i>&nbsp;</i></span>[</span><a\r\n-href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span\r\n-lang=EN-GB style=\'color:#303030\'>doi:10.1093/bioinformatics/btv359</span></a><span\r\n-lang=EN-GB style=\'color:black\'>][</span><a\r\n-href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span\r\n-lang=EN-GB style=\'color:#303030\'>Link</span></a><span lang=EN-GB\r\n-style=\'color:black\'>]</span></p>\r\n-\r\n-<p style=\'margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;\r\n-text-align:justify;background:white\'><span lang=EN-GB style=\'color:black\'>&nbsp;</span></p>\r\n-\r\n-<p style=\'margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;\r\n-text-align:justify;background:white\'><a name="OLE_LINK110"><u><span lang=EN-GB\r\n-style=\'color:black\'>All, IGA, IGG, IGM and IGE tabs</span></u></a></p>\r\n-\r\n-<p style=\'margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;\r\n-text-align:justify;background:white\'><span lang=EN-GB style=\'color:black\'>In\r\n-these tabs information on the clonal relation of transcripts can be found. To\r\n-calculate clonal relation Change-O is used (Gupta et'..b'ckground:white\'><span lang=EN-GB style=\'color:black\'>Gupta,\n+Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria,\n+Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). <a name="OLE_LINK106"></a><a\n+name="OLE_LINK107"></a>Change-O: a toolkit for analyzing large-scale B cell\n+immunoglobulin repertoire sequencing data: Table 1. In<span\n+class=apple-converted-space>&nbsp;</span><em>Bioinformatics, 31 (20), pp.\n+3356\x963358.</em><span class=apple-converted-space><i>&nbsp;</i></span>[</span><a\n+href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span\n+lang=EN-GB style=\'color:#303030\'>doi:10.1093/bioinformatics/btv359</span></a><span\n+lang=EN-GB style=\'color:black\'>][</span><a\n+href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span\n+lang=EN-GB style=\'color:#303030\'>Link</span></a><span lang=EN-GB\n+style=\'color:black\'>]</span></p>\n+\n+<p style=\'margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;\n+text-align:justify;background:white\'><span lang=EN-GB style=\'color:black\'>&nbsp;</span></p>\n+\n+<p style=\'margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;\n+text-align:justify;background:white\'><a name="OLE_LINK110"><u><span lang=EN-GB\n+style=\'color:black\'>All, IGA, IGG, IGM and IGE tabs</span></u></a></p>\n+\n+<p style=\'margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;\n+text-align:justify;background:white\'><span lang=EN-GB style=\'color:black\'>In\n+these tabs information on the clonal relation of transcripts can be found. To\n+calculate clonal relation Change-O is used (Gupta et al, PMID: 26069265).\n+Transcripts are considered clonally related if they have maximal three nucleotides\n+difference in their CDR3 sequence and the same first V segment (as assigned by\n+IMGT). Results are represented in a table format showing the clone size and the\n+number of clones or sequences with this clone size. Change-O settings used are\n+the </span><span lang=EN-GB>nucleotide hamming distance substitution model with\n+a complete distance of maximal three. For clonal assignment the first gene\n+segments were used, and the distances were not normalized. In case of\n+asymmetric distances, the minimal distance was used.<span style=\'color:black\'> </span></span></p>\n+\n+<p style=\'margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;\n+text-align:justify;background:white\'><span lang=EN-GB style=\'color:black\'>&nbsp;</span></p>\n+\n+<p style=\'margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;\n+text-align:justify;background:white\'><u><span lang=EN-GB style=\'color:black\'>Overlap\n+tab</span></u><span lang=EN-GB style=\'color:black\'> </span></p>\n+\n+<p style=\'margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;\n+text-align:justify;background:white\'><span lang=EN-GB style=\'color:black\'>This\n+tab gives information on with which (sub)classe(s) each unique analyzed region\n+(based on the exact nucleotide sequence of the analyzes region and the CDR3\n+nucleotide sequence) is found with. This gives information if the combination\n+of the exact same nucleotide sequence of the analyzed region and the CDR3\n+sequence can be found in multiple (sub)classes.</span></p>\n+\n+<p style=\'margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;\n+text-align:justify;background:white\'><span style=\'color:black\'><img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAPCAYAAAA71pVKAAAAzElEQVQoka2TwQ2CQBBFpwTshw4ImW8ogJMlUIMmhNCDxgasAi50oSXA8XlAjCG7aqKTzGX/vsnM31mzR0gk7tTudO5MEizpzvQ4ryUSe408J3Xn+grE0p1rnpOamVmWsZG4rS+dzzAMsN8Hi9yyjI1JNGtxu4VxBJgLRLpoTKIPiW0LlwtUVRTubW2OBGUJu92cZRmdfbKQMAw8o+vi5v0fLorZ7Y9waGYJjsf38DJz0O1PsEQffOcv4Sa6YYfDDJ5Obzbsp93+5VfdATueO1fdLdI0AAAAAElFTkSuQmCC"> Please note that this tab is based on all\n+sequences before filter unique sequences and the remove duplicates based on\n+filters are applied. In this table only sequences occuring more than once are\n+included. </span></p>\n+\n+</div>\n+\n+</body>\n+\n+</html>\n'
b
diff -r 43a1aa648537 -r ba33b94637ca shm_csr.htm
--- a/shm_csr.htm Thu Dec 07 03:44:38 2017 -0500
+++ b/shm_csr.htm Tue Jan 29 03:54:09 2019 -0500
b
@@ -1,95 +1,95 @@
-<html>
-
-<head>
-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
-<meta name=Generator content="Microsoft Word 14 (filtered)">
-<style>
-<!--
- /* Font Definitions */
- @font-face
- {font-family:Calibri;
- panose-1:2 15 5 2 2 2 4 3 2 4;}
- /* Style Definitions */
- p.MsoNormal, li.MsoNormal, div.MsoNormal
- {margin-top:0in;
- margin-right:0in;
- margin-bottom:10.0pt;
- margin-left:0in;
- line-height:115%;
- font-size:11.0pt;
- font-family:"Calibri","sans-serif";}
-a:link, span.MsoHyperlink
- {color:blue;
- text-decoration:underline;}
-a:visited, span.MsoHyperlinkFollowed
- {color:purple;
- text-decoration:underline;}
-span.apple-converted-space
- {mso-style-name:apple-converted-space;}
-.MsoChpDefault
- {font-family:"Calibri","sans-serif";}
-.MsoPapDefault
- {margin-bottom:10.0pt;
- line-height:115%;}
-@page WordSection1
- {size:8.5in 11.0in;
- margin:1.0in 1.0in 1.0in 1.0in;}
-div.WordSection1
- {page:WordSection1;}
--->
-</style>
-
-</head>
-
-<body lang=EN-US link=blue vlink=purple>
-
-<div class=WordSection1>
-
-<p class=MsoNormalCxSpFirst style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The
-graphs in this tab give insight into the subclass distribution of IGG and IGA
-transcripts. </span><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'>Human C�, C&#945;, C&#947; and C&#949;
-constant genes are assigned using a </span><span lang=EN-GB style='font-size:
-12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>custom script
-specifically designed for human (sub)class assignment in repertoire data as
-described in van Schouwenburg and IJspeert et al, submitted for publication. In
-this script the reference sequences for the subclasses are divided in 8
-nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are
-then individually aligned in the right order to each input sequence. The
-percentage of the chunks identified in each rearrangement is calculated in the
-�chunk hit percentage�. </span><span lang=EN-GB style='font-size:12.0pt;
-line-height:115%;font-family:"Times New Roman","serif"'>C&#945; and C&#947;
-subclasses are very homologous and only differ in a few nucleotides. To assign
-subclasses the </span><span lang=EN-GB style='font-size:12.0pt;line-height:
-115%;font-family:"Times New Roman","serif"'>�nt hit percentage� is calculated.
-This percentage indicates how well the chunks covering the subclass specific
-nucleotide match with the different subclasses. </span><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Information
-on normal distribution of subclasses in healthy individuals of different ages
-can be found in IJspeert and van Schouwenburg et al, PMID: 27799928.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK100"></a><a
-name="OLE_LINK99"></a><a name="OLE_LINK25"><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGA
-subclass distribution</span></u></a></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie
-chart showing the relative distribution of IGA1 and IGA2 transcripts in the
-sample.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGG
-subclass distribution</span></u></p>
-
-<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie
-chart showing the relative distribution of IGG1, IGG2, IGG3 and IGG4
-transcripts in the sample.</span></p>
-
-</div>
-
-</body>
-
-</html>
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+ {font-family:Calibri;
+ panose-1:2 15 5 2 2 2 4 3 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+ {margin-top:0in;
+ margin-right:0in;
+ margin-bottom:10.0pt;
+ margin-left:0in;
+ line-height:115%;
+ font-size:11.0pt;
+ font-family:"Calibri","sans-serif";}
+a:link, span.MsoHyperlink
+ {color:blue;
+ text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+ {color:purple;
+ text-decoration:underline;}
+span.apple-converted-space
+ {mso-style-name:apple-converted-space;}
+.MsoChpDefault
+ {font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+ {margin-bottom:10.0pt;
+ line-height:115%;}
+@page WordSection1
+ {size:8.5in 11.0in;
+ margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+ {page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US link=blue vlink=purple>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The
+graphs in this tab give insight into the subclass distribution of IGG and IGA
+transcripts. </span><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Human C�, C&#945;, C&#947; and C&#949;
+constant genes are assigned using a </span><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>custom script
+specifically designed for human (sub)class assignment in repertoire data as
+described in van Schouwenburg and IJspeert et al, submitted for publication. In
+this script the reference sequences for the subclasses are divided in 8
+nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are
+then individually aligned in the right order to each input sequence. The
+percentage of the chunks identified in each rearrangement is calculated in the
+�chunk hit percentage�. </span><span lang=EN-GB style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'>C&#945; and C&#947;
+subclasses are very homologous and only differ in a few nucleotides. To assign
+subclasses the </span><span lang=EN-GB style='font-size:12.0pt;line-height:
+115%;font-family:"Times New Roman","serif"'>�nt hit percentage� is calculated.
+This percentage indicates how well the chunks covering the subclass specific
+nucleotide match with the different subclasses. </span><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Information
+on normal distribution of subclasses in healthy individuals of different ages
+can be found in IJspeert and van Schouwenburg et al, PMID: 27799928.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK100"></a><a
+name="OLE_LINK99"></a><a name="OLE_LINK25"><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGA
+subclass distribution</span></u></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie
+chart showing the relative distribution of IGA1 and IGA2 transcripts in the
+sample.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGG
+subclass distribution</span></u></p>
+
+<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie
+chart showing the relative distribution of IGG1, IGG2, IGG3 and IGG4
+transcripts in the sample.</span></p>
+
+</div>
+
+</body>
+
+</html>
b
diff -r 43a1aa648537 -r ba33b94637ca shm_csr.xml
--- a/shm_csr.xml Thu Dec 07 03:44:38 2017 -0500
+++ b/shm_csr.xml Tue Jan 29 03:54:09 2019 -0500
b
@@ -1,14 +1,13 @@
 <tool id="shm_csr" name="SHM &amp; CSR pipeline" version="1.0">
  <description></description>
  <requirements>
- <!--
- <requirement type="package" version="3.3.2">r-base</requirement>
- <requirement type="package" version="3.1_3">r-seqinr</requirement>
- <requirement type="package" version="2.2.0">r-ggplot2</requirement>
- <requirement type="package" version="1.4.2">r-reshape2</requirement>
- <requirement type="package" version="0.4.1">r-scales</requirement>
- <requirement type="package" version="1.10.0">r-data.table</requirement>
- -->
+ <requirement type="package" version="1.16.0">numpy</requirement>
+ <requirement type="package" version="1.2.0">xlrd</requirement>
+ <requirement type="package" version="3.0.0">r-ggplot2</requirement>
+ <requirement type="package" version="1.4.3">r-reshape2</requirement>
+ <requirement type="package" version="0.5.0">r-scales</requirement>
+ <requirement type="package" version="3.4_5">r-seqinr</requirement>
+ <requirement type="package" version="1.11.4">r-data.table</requirement>
  </requirements>
  <command interpreter="bash">
  #if str ( $filter_unique.filter_unique_select ) == "remove":
b
diff -r 43a1aa648537 -r ba33b94637ca shm_downloads.htm
--- a/shm_downloads.htm Thu Dec 07 03:44:38 2017 -0500
+++ b/shm_downloads.htm Tue Jan 29 03:54:09 2019 -0500
b
b'@@ -1,538 +1,538 @@\n-<html>\r\n-\r\n-<head>\r\n-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">\r\n-<meta name=Generator content="Microsoft Word 14 (filtered)">\r\n-<style>\r\n-<!--\r\n- /* Font Definitions */\r\n- @font-face\r\n-\t{font-family:Calibri;\r\n-\tpanose-1:2 15 5 2 2 2 4 3 2 4;}\r\n- /* Style Definitions */\r\n- p.MsoNormal, li.MsoNormal, div.MsoNormal\r\n-\t{margin-top:0in;\r\n-\tmargin-right:0in;\r\n-\tmargin-bottom:10.0pt;\r\n-\tmargin-left:0in;\r\n-\tline-height:115%;\r\n-\tfont-size:11.0pt;\r\n-\tfont-family:"Calibri","sans-serif";}\r\n-a:link, span.MsoHyperlink\r\n-\t{color:blue;\r\n-\ttext-decoration:underline;}\r\n-a:visited, span.MsoHyperlinkFollowed\r\n-\t{color:purple;\r\n-\ttext-decoration:underline;}\r\n-p.MsoNoSpacing, li.MsoNoSpacing, div.MsoNoSpacing\r\n-\t{margin:0in;\r\n-\tmargin-bottom:.0001pt;\r\n-\tfont-size:11.0pt;\r\n-\tfont-family:"Calibri","sans-serif";}\r\n-.MsoChpDefault\r\n-\t{font-family:"Calibri","sans-serif";}\r\n-.MsoPapDefault\r\n-\t{margin-bottom:10.0pt;\r\n-\tline-height:115%;}\r\n-@page WordSection1\r\n-\t{size:8.5in 11.0in;\r\n-\tmargin:1.0in 1.0in 1.0in 1.0in;}\r\n-div.WordSection1\r\n-\t{page:WordSection1;}\r\n--->\r\n-</style>\r\n-\r\n-</head>\r\n-\r\n-<body lang=EN-US link=blue vlink=purple>\r\n-\r\n-<div class=WordSection1>\r\n-\r\n-<p class=MsoNoSpacing style=\'text-align:justify\'><b><span lang=EN-GB\r\n-style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>Info</span></b></p>\r\n-\r\n-<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>The complete\r\n-dataset:</span></u><span lang=EN-GB style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>\r\n-Allows downloading of the complete parsed data set.</span></p>\r\n-\r\n-<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>The filtered\r\n-dataset:</span></u><span lang=EN-GB style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>\r\n-Allows downloading of all parsed IMGT information of all transcripts that\r\n-passed the chosen filter settings.</span></p>\r\n-\r\n-<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>The alignment\r\n-info on the unmatched sequences:</span></u><span lang=EN-GB style=\'font-size:\r\n-12.0pt;font-family:"Times New Roman","serif"\'> Provides information of the subclass\r\n-alignment of all unmatched sequences. For each sequence the chunck hit\r\n-percentage and the nt hit percentage is shown together with the best matched\r\n-subclass.</span></p>\r\n-\r\n-<p class=MsoNoSpacing style=\'text-align:justify\'><b><span lang=EN-GB\r\n-style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>SHM Overview</span></b></p>\r\n-\r\n-<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>The SHM Overview\r\n-table as a dataset:</span></u><span lang=EN-GB style=\'font-size:12.0pt;\r\n-font-family:"Times New Roman","serif"\'> Allows downloading of the SHM Overview\r\n-table as a data set.\xa0 </span></p>\r\n-\r\n-<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>Motif data per\r\n-sequence ID:</span></u><span lang=EN-GB style=\'font-size:12.0pt;font-family:\r\n-"Times New Roman","serif"\'> Provides a file that contains information for each\r\n-transcript on the number of mutations present in WA/TW and RGYW/WRCY motives.</span></p>\r\n-\r\n-<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>Mutation data\r\n-per sequence ID: </span></u><span lang=EN-GB style=\'font-size:12.0pt;\r\n-font-family:"Times New Roman","serif"\'>Provides a file containing information\r\n-on the number of sequences bases, the number and location of mutations and the\r\n-type of mutations found in each transcript. </span></p>\r\n-\r\n-<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style='..b'man","serif"\'>An IMGT archive\n+with just the matched and filtered IGA1 sequences:</span></u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'> Downloads a\n+.txz file with the same format as downloaded IMGT files that contains all IGA1\n+sequences that have passed the chosen filter settings.</span></p>\n+\n+<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>An IMGT archive\n+with just the matched and filtered IGA2 sequences:</span></u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'> Downloads a .txz\n+file with the same format as downloaded IMGT files that contains all IGA2\n+sequences that have passed the chosen filter settings.</span></p>\n+\n+<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>An IMGT archive\n+with just the matched and filtered IGG sequences:</span></u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'> Downloads a .txz\n+file with the same format as downloaded IMGT files that contains all IGG\n+sequences that have passed the chosen filter settings.</span></p>\n+\n+<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>An IMGT archive\n+with just the matched and filtered IGG1 sequences:</span></u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'> Downloads a\n+.txz file with the same format as downloaded IMGT files that contains all IGG1\n+sequences that have passed the chosen filter settings.</span></p>\n+\n+<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>An IMGT archive\n+with just the matched and filtered IGG2 sequences:</span></u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'> Downloads a\n+.txz file with the same format as downloaded IMGT files that contains all IGG2\n+sequences that have passed the chosen filter settings.</span></p>\n+\n+<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>An IMGT archive\n+with just the matched and filtered IGG3 sequences:</span></u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'> Downloads a .txz\n+file with the same format as downloaded IMGT files that contains all IGG3\n+sequences that have passed the chosen filter settings.</span></p>\n+\n+<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>An IMGT archive\n+with just the matched and filtered IGG4 sequences:</span></u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'> Downloads a\n+.txz file with the same format as downloaded IMGT files that contains all IGG4\n+sequences that have passed the chosen filter settings.</span></p>\n+\n+<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>An IMGT archive\n+with just the matched and filtered IGM sequences:</span></u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'> Downloads a .txz\n+file with the same format as downloaded IMGT files that contains all IGM\n+sequences that have passed the chosen filter settings.</span></p>\n+\n+<p class=MsoNoSpacing style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>An IMGT archive\n+with just the matched and filtered IGE sequences:</span></u><span lang=EN-GB\n+style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'> Downloads a\n+.txz file with the same format as downloaded IMGT files that contains all IGE\n+sequences that have passed the chosen filter settings.</span></p>\n+\n+</div>\n+\n+</body>\n+\n+</html>\n'
b
diff -r 43a1aa648537 -r ba33b94637ca shm_first.htm
--- a/shm_first.htm Thu Dec 07 03:44:38 2017 -0500
+++ b/shm_first.htm Tue Jan 29 03:54:09 2019 -0500
b
b'@@ -1,127 +1,127 @@\n-<html>\r\n-\r\n-<head>\r\n-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">\r\n-<meta name=Generator content="Microsoft Word 14 (filtered)">\r\n-<style>\r\n-<!--\r\n- /* Font Definitions */\r\n- @font-face\r\n-\t{font-family:Calibri;\r\n-\tpanose-1:2 15 5 2 2 2 4 3 2 4;}\r\n- /* Style Definitions */\r\n- p.MsoNormal, li.MsoNormal, div.MsoNormal\r\n-\t{margin-top:0in;\r\n-\tmargin-right:0in;\r\n-\tmargin-bottom:10.0pt;\r\n-\tmargin-left:0in;\r\n-\tline-height:115%;\r\n-\tfont-size:11.0pt;\r\n-\tfont-family:"Calibri","sans-serif";}\r\n-.MsoChpDefault\r\n-\t{font-family:"Calibri","sans-serif";}\r\n-.MsoPapDefault\r\n-\t{margin-bottom:10.0pt;\r\n-\tline-height:115%;}\r\n-@page WordSection1\r\n-\t{size:8.5in 11.0in;\r\n-\tmargin:1.0in 1.0in 1.0in 1.0in;}\r\n-div.WordSection1\r\n-\t{page:WordSection1;}\r\n--->\r\n-</style>\r\n-\r\n-</head>\r\n-\r\n-<body lang=EN-US>\r\n-\r\n-<div class=WordSection1>\r\n-\r\n-<p class=MsoNormalCxSpFirst style=\'margin-bottom:0in;margin-bottom:.0001pt;\r\n-text-align:justify;line-height:normal\'><span lang=EN-GB style=\'font-size:12.0pt;\r\n-font-family:"Times New Roman","serif"\'>Table showing the order of each\r\n-filtering step and the number and percentage of sequences after each filtering\r\n-step. </span></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'margin-bottom:0in;margin-bottom:.0001pt;\r\n-text-align:justify;line-height:normal\'><u><span lang=EN-GB style=\'font-size:\r\n-12.0pt;font-family:"Times New Roman","serif"\'>Input:</span></u><span\r\n-lang=EN-GB style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'> The\r\n-number of sequences in the original IMGT file. This is always 100% of the\r\n-sequences.</span></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'margin-bottom:0in;margin-bottom:.0001pt;\r\n-text-align:justify;line-height:normal\'><u><span lang=EN-GB style=\'font-size:\r\n-12.0pt;font-family:"Times New Roman","serif"\'>After &quot;no results&quot; filter: </span></u><span\r\n-lang=EN-GB style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'>IMGT\r\n-classifies sequences either as &quot;productive&quot;, &quot;unproductive&quot;, &quot;unknown&quot;, or &quot;no\r\n-results&quot;. Here, the number and percentages of sequences that are not classified\r\n-as &quot;no results&quot; are reported.</span></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'margin-bottom:0in;margin-bottom:.0001pt;\r\n-text-align:justify;line-height:normal\'><u><span lang=EN-GB style=\'font-size:\r\n-12.0pt;font-family:"Times New Roman","serif"\'>After functionality filter:</span></u><span\r\n-lang=EN-GB style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'> The\r\n-number and percentages of sequences that have passed the functionality filter. The\r\n-filtering performed is dependent on the settings of the functionality filter.\r\n-Details on the functionality filter <a name="OLE_LINK12"></a><a\r\n-name="OLE_LINK11"></a><a name="OLE_LINK10">can be found on the start page of\r\n-the SHM&amp;CSR pipeline</a>.</span></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>After\r\n-removal sequences that are missing a gene region:</span></u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>\r\n-In this step all sequences that are missing a gene region (FR1, CDR1, FR2,\r\n-CDR2, FR3) that should be present are removed from analysis. The sequence\r\n-regions that should be present are dependent on the settings of the sequence\r\n-starts at filter. <a name="OLE_LINK9"></a><a name="OLE_LINK8">The number and\r\n-percentage of sequences that pass this filter step are reported.</a> </span></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>After\r\n-N filter:</span></u><span lang=EN-GB style=\'font-size:12.0pt;line-height:115%;\r\n-font-family:"Times New Roman","serif"\'> In this step all sequences that contain\r\n-an ambiguous base (n) in the analysed r'..b'><u><span lang=EN-GB style=\'font-size:\n+12.0pt;font-family:"Times New Roman","serif"\'>After functionality filter:</span></u><span\n+lang=EN-GB style=\'font-size:12.0pt;font-family:"Times New Roman","serif"\'> The\n+number and percentages of sequences that have passed the functionality filter. The\n+filtering performed is dependent on the settings of the functionality filter.\n+Details on the functionality filter <a name="OLE_LINK12"></a><a\n+name="OLE_LINK11"></a><a name="OLE_LINK10">can be found on the start page of\n+the SHM&amp;CSR pipeline</a>.</span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>After\n+removal sequences that are missing a gene region:</span></u><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>\n+In this step all sequences that are missing a gene region (FR1, CDR1, FR2,\n+CDR2, FR3) that should be present are removed from analysis. The sequence\n+regions that should be present are dependent on the settings of the sequence\n+starts at filter. <a name="OLE_LINK9"></a><a name="OLE_LINK8">The number and\n+percentage of sequences that pass this filter step are reported.</a> </span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>After\n+N filter:</span></u><span lang=EN-GB style=\'font-size:12.0pt;line-height:115%;\n+font-family:"Times New Roman","serif"\'> In this step all sequences that contain\n+an ambiguous base (n) in the analysed region or the CDR3 are removed from the\n+analysis. The analysed region is determined by the setting of the sequence\n+starts at filter. The number and percentage of sequences that pass this filter\n+step are reported.</span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>After\n+filter unique sequences</span></u><span lang=EN-GB style=\'font-size:12.0pt;\n+line-height:115%;font-family:"Times New Roman","serif"\'>: The number and\n+percentage of sequences that pass the &quot;filter unique sequences&quot; filter. Details\n+on this filter </span><span lang=EN-GB style=\'font-size:12.0pt;line-height:\n+115%;font-family:"Times New Roman","serif"\'>can be found on the start page of\n+the SHM&amp;CSR pipeline</span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>After\n+remove duplicate based on filter:</span></u><span lang=EN-GB style=\'font-size:\n+12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'> The number and\n+percentage of sequences that passed the remove duplicate filter. Details on the\n+&quot;remove duplicate filter based on filter&quot; can be found on the start page of the\n+SHM&amp;CSR pipeline.</span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><a name="OLE_LINK17"></a><a\n+name="OLE_LINK16"><u><span lang=EN-GB style=\'font-size:12.0pt;line-height:115%;\n+font-family:"Times New Roman","serif"\'>Number of matches sequences:</span></u></a><span\n+lang=EN-GB style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>\n+The number and percentage of sequences that passed all the filters described\n+above and have a (sub)class assigned.</span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Number\n+of unmatched sequences</span></u><span lang=EN-GB style=\'font-size:12.0pt;\n+line-height:115%;font-family:"Times New Roman","serif"\'>: The number and percentage\n+of sequences that passed all the filters described above and do not have\n+subclass assigned.</span></p>\n+\n+<p class=MsoNormal><span lang=EN-GB>&nbsp;</span></p>\n+\n+</div>\n+\n+</body>\n+\n+</html>\n'
b
diff -r 43a1aa648537 -r ba33b94637ca shm_frequency.htm
--- a/shm_frequency.htm Thu Dec 07 03:44:38 2017 -0500
+++ b/shm_frequency.htm Tue Jan 29 03:54:09 2019 -0500
[
@@ -1,87 +1,87 @@
-<html>
-
-<head>
-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
-<meta name=Generator content="Microsoft Word 14 (filtered)">
-<style>
-<!--
- /* Style Definitions */
- p.MsoNormal, li.MsoNormal, div.MsoNormal
- {margin-top:0in;
- margin-right:0in;
- margin-bottom:10.0pt;
- margin-left:0in;
- line-height:115%;
- font-size:11.0pt;
- font-family:"Calibri","sans-serif";}
-.MsoChpDefault
- {font-family:"Calibri","sans-serif";}
-.MsoPapDefault
- {margin-bottom:10.0pt;
- line-height:115%;}
-@page WordSection1
- {size:8.5in 11.0in;
- margin:1.0in 1.0in 1.0in 1.0in;}
-div.WordSection1
- {page:WordSection1;}
--->
-</style>
-
-</head>
-
-<body lang=EN-US>
-
-<div class=WordSection1>
-
-<p class=MsoNormalCxSpFirst style='text-align:justify'><b><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>SHM
-frequency tab</span></u></b></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These
-graphs give insight into the level of SHM. The data represented in these graphs
-can be downloaded in the download tab. <a name="OLE_LINK24"></a><a
-name="OLE_LINK23"></a><a name="OLE_LINK90"></a><a name="OLE_LINK89">More
-information on the values found in healthy individuals of different ages can be
-found in IJspeert and van Schouwenburg et al, PMID: 27799928. </a></span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Frequency
-scatter plot</span></u></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A
-dot plot showing the percentage of SHM in each transcript divided into the
-different (sub)classes. </span><span lang=NL style='font-size:12.0pt;
-line-height:115%;font-family:"Times New Roman","serif"'>In the graph each dot
-represents an individual transcript.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Mutation
-frequency by class</span></u></p>
-
-<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A
-bar graph showing the percentage of transcripts that contain 0%, 0-2%, 2-5%,
-5-10% 10-15%, 15-20% or more than 20% SHM for each subclass. </span></p>
-
-<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
-Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
-Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
-of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
-Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
-href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
-style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
-href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
-style='color:windowtext'>Link</span></a>]</span></p>
-
-</div>
-
-</body>
-
-</html>
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+ {margin-top:0in;
+ margin-right:0in;
+ margin-bottom:10.0pt;
+ margin-left:0in;
+ line-height:115%;
+ font-size:11.0pt;
+ font-family:"Calibri","sans-serif";}
+.MsoChpDefault
+ {font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+ {margin-bottom:10.0pt;
+ line-height:115%;}
+@page WordSection1
+ {size:8.5in 11.0in;
+ margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+ {page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><b><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>SHM
+frequency tab</span></u></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These
+graphs give insight into the level of SHM. The data represented in these graphs
+can be downloaded in the download tab. <a name="OLE_LINK24"></a><a
+name="OLE_LINK23"></a><a name="OLE_LINK90"></a><a name="OLE_LINK89">More
+information on the values found in healthy individuals of different ages can be
+found in IJspeert and van Schouwenburg et al, PMID: 27799928. </a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Frequency
+scatter plot</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A
+dot plot showing the percentage of SHM in each transcript divided into the
+different (sub)classes. </span><span lang=NL style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'>In the graph each dot
+represents an individual transcript.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Mutation
+frequency by class</span></u></p>
+
+<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A
+bar graph showing the percentage of transcripts that contain 0%, 0-2%, 2-5%,
+5-10% 10-15%, 15-20% or more than 20% SHM for each subclass. </span></p>
+
+<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
+Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
+Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
+of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
+Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>Link</span></a>]</span></p>
+
+</div>
+
+</body>
+
+</html>
b
diff -r 43a1aa648537 -r ba33b94637ca shm_overview.htm
--- a/shm_overview.htm Thu Dec 07 03:44:38 2017 -0500
+++ b/shm_overview.htm Tue Jan 29 03:54:09 2019 -0500
[
b'@@ -1,332 +1,332 @@\n-<html>\r\n-\r\n-<head>\r\n-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">\r\n-<meta name=Generator content="Microsoft Word 14 (filtered)">\r\n-<style>\r\n-<!--\r\n- /* Font Definitions */\r\n- @font-face\r\n-\t{font-family:Calibri;\r\n-\tpanose-1:2 15 5 2 2 2 4 3 2 4;}\r\n- /* Style Definitions */\r\n- p.MsoNormal, li.MsoNormal, div.MsoNormal\r\n-\t{margin-top:0in;\r\n-\tmargin-right:0in;\r\n-\tmargin-bottom:10.0pt;\r\n-\tmargin-left:0in;\r\n-\tline-height:115%;\r\n-\tfont-size:11.0pt;\r\n-\tfont-family:"Calibri","sans-serif";}\r\n-.MsoChpDefault\r\n-\t{font-family:"Calibri","sans-serif";}\r\n-.MsoPapDefault\r\n-\t{margin-bottom:10.0pt;\r\n-\tline-height:115%;}\r\n-@page WordSection1\r\n-\t{size:8.5in 11.0in;\r\n-\tmargin:1.0in 1.0in 1.0in 1.0in;}\r\n-div.WordSection1\r\n-\t{page:WordSection1;}\r\n--->\r\n-</style>\r\n-\r\n-</head>\r\n-\r\n-<body lang=EN-US>\r\n-\r\n-<div class=WordSection1>\r\n-\r\n-<p class=MsoNormalCxSpFirst style=\'text-align:justify\'><b><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Info\r\n-table</span></b></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>This\r\n-table contains information on different characteristics of SHM. For all\r\n-characteristics information can be found for all sequences or only sequences of\r\n-a certain (sub)class. All results are based on the sequences that passed the filter\r\n-settings chosen on the start page of the SHM &amp; CSR pipeline and only\r\n-include details on the analysed region as determined by the setting of the\r\n-sequence starts at filter. All data in this table can be downloaded via the\r\n-\x93downloads\x94 tab.</span></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Mutation\r\n-frequency:</span></u></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><a name="OLE_LINK83"></a><a\r\n-name="OLE_LINK82"></a><a name="OLE_LINK81"><span lang=EN-GB style=\'font-size:\r\n-12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>These values\r\n-give information on the level of SHM. </span></a><a name="OLE_LINK22"></a><a\r\n-name="OLE_LINK21"></a><a name="OLE_LINK20"><span lang=EN-GB style=\'font-size:\r\n-12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>More information\r\n-on the values found in healthy individuals of different ages can be found in </span></a><a\r\n-name="OLE_LINK15"></a><a name="OLE_LINK14"></a><a name="OLE_LINK13"><span\r\n-lang=EN-GB style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>IJspeert\r\n-and van Schouwenburg et al, PMID: 27799928</span></a></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><i><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Number\r\n-of mutations:</span></i><span lang=EN-GB style=\'font-size:12.0pt;line-height:\r\n-115%;font-family:"Times New Roman","serif"\'> Shows the number of total\r\n-mutations / the number of sequenced bases (the % of mutated bases).</span></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><i><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Median\r\n-number of mutations:</span></i><span lang=EN-GB style=\'font-size:12.0pt;\r\n-line-height:115%;font-family:"Times New Roman","serif"\'> Shows the median % of\r\n-SHM of all sequences.</span></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Patterns\r\n-of SHM:</span></u></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><a name="OLE_LINK72"></a><a\r\n-name="OLE_LINK71"></a><a name="OLE_LINK70"><span lang=EN-GB style=\'font-size:\r\n-12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>These values\r\n-give insights into the targeting and patterns of '..b'-size:12.0pt;line-height:115%;\n+font-family:"Times New Roman","serif"\'>Shows the total number of sequenced <a\n+name="OLE_LINK59"></a><a name="OLE_LINK58">guanine</a>s / The total number of\n+sequenced bases (the percentage of sequenced bases that were guanines).</span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><a name="OLE_LINK69"><b><span\n+lang=EN-GB style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Graphs</span></b></a></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><a name="OLE_LINK75"></a><a\n+name="OLE_LINK74"></a><a name="OLE_LINK73"><span lang=EN-GB style=\'font-size:\n+12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>These graphs visualize\n+information on the patterns and targeting of SHM and thereby give information\n+into the repair pathways used to repair the U:G mismatches introduced by AID. The\n+data represented in these graphs can be downloaded in the download tab. More\n+information on the values found in healthy individuals of different ages can be\n+found in IJspeert and van Schouwenburg et al, PMID: 27799928</span></a><span\n+lang=EN-GB style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>.\n+<a name="OLE_LINK85"></a><a name="OLE_LINK84"></a></span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Percentage\n+of mutations in AID and pol eta motives</span></u></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Visualizes\n+<a name="OLE_LINK80"></a><a name="OLE_LINK79"></a><a name="OLE_LINK78">for each\n+(sub)class </a>the percentage of mutations that are present in AID (RGYW or\n+WRCY) or polymerase eta motives (WA or TW) in the different subclasses </span><span\n+lang=EN-GB style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>(R=Purine,\n+Y=pyrimidine, W = A or T).</span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=NL\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Relative\n+mutation patterns</span></u></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Visualizes\n+for each (sub)class the distribution of mutations between mutations at AT\n+locations and transitions or transversions at GC locations. </span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=NL\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Absolute\n+mutation patterns</span></u></p>\n+\n+<p class=MsoNormalCxSpLast style=\'text-align:justify\'><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Visualized\n+for each (sub)class the percentage of sequenced AT and GC bases that are\n+mutated. The mutations at GC bases are divided into transition and transversion\n+mutations<a name="OLE_LINK77"></a><a name="OLE_LINK76">. </a></span></p>\n+\n+<p class=MsoNormal><span lang=NL style=\'font-size:12.0pt;line-height:115%;\n+font-family:"Times New Roman","serif"\'>Hanna IJspeert, Pauline A. van\n+Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,\n+Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Evaluation\n+of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and\n+Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a\n+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span\n+style=\'color:windowtext\'>doi:10.3389/fimmu.2016.00410</span></a>][<a\n+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span\n+style=\'color:windowtext\'>Link</span></a>]</span></p>\n+\n+</div>\n+\n+</body>\n+\n+</html>\n'
b
diff -r 43a1aa648537 -r ba33b94637ca shm_selection.htm
--- a/shm_selection.htm Thu Dec 07 03:44:38 2017 -0500
+++ b/shm_selection.htm Tue Jan 29 03:54:09 2019 -0500
[
b'@@ -1,128 +1,128 @@\n-<html>\r\n-\r\n-<head>\r\n-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">\r\n-<meta name=Generator content="Microsoft Word 14 (filtered)">\r\n-<style>\r\n-<!--\r\n- /* Font Definitions */\r\n- @font-face\r\n-\t{font-family:Calibri;\r\n-\tpanose-1:2 15 5 2 2 2 4 3 2 4;}\r\n-@font-face\r\n-\t{font-family:UICTFontTextStyleBody;}\r\n- /* Style Definitions */\r\n- p.MsoNormal, li.MsoNormal, div.MsoNormal\r\n-\t{margin-top:0in;\r\n-\tmargin-right:0in;\r\n-\tmargin-bottom:10.0pt;\r\n-\tmargin-left:0in;\r\n-\tline-height:115%;\r\n-\tfont-size:11.0pt;\r\n-\tfont-family:"Calibri","sans-serif";}\r\n-a:link, span.MsoHyperlink\r\n-\t{color:blue;\r\n-\ttext-decoration:underline;}\r\n-a:visited, span.MsoHyperlinkFollowed\r\n-\t{color:purple;\r\n-\ttext-decoration:underline;}\r\n-span.apple-converted-space\r\n-\t{mso-style-name:apple-converted-space;}\r\n-.MsoChpDefault\r\n-\t{font-family:"Calibri","sans-serif";}\r\n-.MsoPapDefault\r\n-\t{margin-bottom:10.0pt;\r\n-\tline-height:115%;}\r\n-@page WordSection1\r\n-\t{size:8.5in 11.0in;\r\n-\tmargin:1.0in 1.0in 1.0in 1.0in;}\r\n-div.WordSection1\r\n-\t{page:WordSection1;}\r\n--->\r\n-</style>\r\n-\r\n-</head>\r\n-\r\n-<body lang=EN-US link=blue vlink=purple>\r\n-\r\n-<div class=WordSection1>\r\n-\r\n-<p class=MsoNormalCxSpFirst style=\'text-align:justify\'><b><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>References</span></b></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";\r\n-color:black\'>Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying\r\n-selection in high-throughput Immunoglobulin sequencing data sets. In<span\r\n-class=apple-converted-space>&nbsp;</span><em>Nucleic Acids Research, 40 (17),\r\n-pp. e134\x96e134.</em><span class=apple-converted-space><i>&nbsp;</i></span>[</span><span\r\n-lang=EN-GB><a href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";\r\n-color:#303030\'>doi:10.1093/nar/gks457</span></a></span><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";\r\n-color:black\'>][</span><span lang=EN-GB><a\r\n-href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";\r\n-color:#303030\'>Link</span></a></span><span lang=EN-GB style=\'font-size:12.0pt;\r\n-line-height:115%;font-family:"Times New Roman","serif";color:black\'>]</span></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><b><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Graphs</span></b></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>AA\r\n-mutation frequency</span></u></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>For\r\n-each class, the frequency of replacement mutations at each amino acid position\r\n-is shown, which is calculated by dividing the number of replacement mutations\r\n-at a particular amino acid position/the number sequences that have an amino\r\n-acid at that particular position. Since the length of the CDR1 and CDR2 region\r\n-is not the same for every VH gene, some amino acids positions are absent.\r\n-Therefore we calculate the frequency using the number of amino acids present at\r\n-that that particular location. </span></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Antigen\r\n-selection (BASELINe)</span></u></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><span lang=EN-GB\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Shows\r\n-the results of the an'..b'target="_blank"><span\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";\n+color:#303030\'>doi:10.1093/nar/gks457</span></a></span><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";\n+color:black\'>][</span><span lang=EN-GB><a\n+href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";\n+color:#303030\'>Link</span></a></span><span lang=EN-GB style=\'font-size:12.0pt;\n+line-height:115%;font-family:"Times New Roman","serif";color:black\'>]</span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><b><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Graphs</span></b></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>AA\n+mutation frequency</span></u></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>For\n+each class, the frequency of replacement mutations at each amino acid position\n+is shown, which is calculated by dividing the number of replacement mutations\n+at a particular amino acid position/the number sequences that have an amino\n+acid at that particular position. Since the length of the CDR1 and CDR2 region\n+is not the same for every VH gene, some amino acids positions are absent.\n+Therefore we calculate the frequency using the number of amino acids present at\n+that that particular location. </span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Antigen\n+selection (BASELINe)</span></u></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Shows\n+the results of the analysis of antigen selection as performed using BASELINe.\n+Details on the analysis performed by BASELINe can be found in Yaari et al,\n+PMID: 22641856. The settings used for the analysis are</span><span lang=EN-GB\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>:\n+focused, SHM targeting model: human Tri-nucleotide, custom bounderies. The\n+custom boundries are dependent on the \x91sequence starts at filter\x92. </span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'line-height:normal\'><span lang=NL\n+style=\'font-family:UICTFontTextStyleBody;color:black\'>Leader:\n+1:26:38:55:65:104:-</span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'line-height:normal\'><span lang=NL\n+style=\'font-family:UICTFontTextStyleBody;color:black\'>FR1: 27:27:38:55:65:104:-</span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'line-height:normal\'><span lang=NL\n+style=\'font-family:UICTFontTextStyleBody;color:black\'>CDR1:&nbsp;27:27:38:55:65:104:-</span></p>\n+\n+<p class=MsoNormalCxSpLast style=\'line-height:normal\'><span lang=NL\n+style=\'font-family:UICTFontTextStyleBody;color:black\'>FR2:&nbsp;27:27:38:55:65:104:-</span></p>\n+\n+<p class=MsoNormal><span lang=NL style=\'font-size:12.0pt;line-height:115%;\n+font-family:"Times New Roman","serif"\'>Hanna IJspeert, Pauline A. van\n+Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,\n+Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Evaluation\n+of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and\n+Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a\n+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span\n+style=\'color:windowtext\'>doi:10.3389/fimmu.2016.00410</span></a>][<a\n+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span\n+style=\'color:windowtext\'>Link</span></a>]</span></p>\n+\n+</div>\n+\n+</body>\n+\n+</html>\n'
b
diff -r 43a1aa648537 -r ba33b94637ca shm_transition.htm
--- a/shm_transition.htm Thu Dec 07 03:44:38 2017 -0500
+++ b/shm_transition.htm Tue Jan 29 03:54:09 2019 -0500
[
b'@@ -1,120 +1,120 @@\n-<html>\r\n-\r\n-<head>\r\n-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">\r\n-<meta name=Generator content="Microsoft Word 14 (filtered)">\r\n-<style>\r\n-<!--\r\n- /* Font Definitions */\r\n- @font-face\r\n-\t{font-family:Calibri;\r\n-\tpanose-1:2 15 5 2 2 2 4 3 2 4;}\r\n- /* Style Definitions */\r\n- p.MsoNormal, li.MsoNormal, div.MsoNormal\r\n-\t{margin-top:0in;\r\n-\tmargin-right:0in;\r\n-\tmargin-bottom:10.0pt;\r\n-\tmargin-left:0in;\r\n-\tline-height:115%;\r\n-\tfont-size:11.0pt;\r\n-\tfont-family:"Calibri","sans-serif";}\r\n-a:link, span.MsoHyperlink\r\n-\t{color:blue;\r\n-\ttext-decoration:underline;}\r\n-a:visited, span.MsoHyperlinkFollowed\r\n-\t{color:purple;\r\n-\ttext-decoration:underline;}\r\n-p.msochpdefault, li.msochpdefault, div.msochpdefault\r\n-\t{mso-style-name:msochpdefault;\r\n-\tmargin-right:0in;\r\n-\tmargin-left:0in;\r\n-\tfont-size:12.0pt;\r\n-\tfont-family:"Calibri","sans-serif";}\r\n-p.msopapdefault, li.msopapdefault, div.msopapdefault\r\n-\t{mso-style-name:msopapdefault;\r\n-\tmargin-right:0in;\r\n-\tmargin-bottom:10.0pt;\r\n-\tmargin-left:0in;\r\n-\tline-height:115%;\r\n-\tfont-size:12.0pt;\r\n-\tfont-family:"Times New Roman","serif";}\r\n-span.apple-converted-space\r\n-\t{mso-style-name:apple-converted-space;}\r\n-.MsoChpDefault\r\n-\t{font-size:10.0pt;\r\n-\tfont-family:"Calibri","sans-serif";}\r\n-.MsoPapDefault\r\n-\t{margin-bottom:10.0pt;\r\n-\tline-height:115%;}\r\n-@page WordSection1\r\n-\t{size:8.5in 11.0in;\r\n-\tmargin:1.0in 1.0in 1.0in 1.0in;}\r\n-div.WordSection1\r\n-\t{page:WordSection1;}\r\n--->\r\n-</style>\r\n-\r\n-</head>\r\n-\r\n-<body lang=EN-US link=blue vlink=purple>\r\n-\r\n-<div class=WordSection1>\r\n-\r\n-<p class=MsoNormalCxSpFirst style=\'text-align:justify\'><span style=\'font-size:\r\n-12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>These graphs and\r\n-tables give insight into the targeting and patterns of SHM. This can give\r\n-insight into the DNA repair pathways used to solve the U:G mismatches\r\n-introduced by AID. More information on the values found in healthy individuals\r\n-of different ages can be found in IJspeert and van Schouwenburg et al, PMID:\r\n-27799928.</span></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><b><span\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Graphs\r\n-</span></b></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><a name="OLE_LINK93"></a><a\r\n-name="OLE_LINK92"></a><a name="OLE_LINK91"><u><span style=\'font-size:12.0pt;\r\n-line-height:115%;font-family:"Times New Roman","serif"\'>Heatmap transition\r\n-information</span></u></a></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><a name="OLE_LINK98"></a><a\r\n-name="OLE_LINK97"><span style=\'font-size:12.0pt;line-height:115%;font-family:\r\n-"Times New Roman","serif"\'>Heatmaps visualizing for each subclass the frequency\r\n-of all possible substitutions. On the x-axes the original base is shown, while\r\n-the y-axes shows the new base. The darker the shade of blue, the more frequent\r\n-this type of substitution is occurring.\xa0 </span></a></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Bargraph\r\n-transition information</span></u></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><span style=\'font-size:\r\n-12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Bar graph\r\n-visualizing for each original base the distribution of substitutions into the other\r\n-bases. A graph is included for each (sub)class. </span></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><b><span\r\n-style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Tables</span></b></p>\r\n-\r\n-<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><span style=\'font-size:\r\n-12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Transition\r\n-tables are shown for each (sub)class. All the original bases are listed\r\n-horizontally, while the new bases are listed vertically. </span></p>\r\n-\r\n-<p class'..b't, div.msochpdefault\n+\t{mso-style-name:msochpdefault;\n+\tmargin-right:0in;\n+\tmargin-left:0in;\n+\tfont-size:12.0pt;\n+\tfont-family:"Calibri","sans-serif";}\n+p.msopapdefault, li.msopapdefault, div.msopapdefault\n+\t{mso-style-name:msopapdefault;\n+\tmargin-right:0in;\n+\tmargin-bottom:10.0pt;\n+\tmargin-left:0in;\n+\tline-height:115%;\n+\tfont-size:12.0pt;\n+\tfont-family:"Times New Roman","serif";}\n+span.apple-converted-space\n+\t{mso-style-name:apple-converted-space;}\n+.MsoChpDefault\n+\t{font-size:10.0pt;\n+\tfont-family:"Calibri","sans-serif";}\n+.MsoPapDefault\n+\t{margin-bottom:10.0pt;\n+\tline-height:115%;}\n+@page WordSection1\n+\t{size:8.5in 11.0in;\n+\tmargin:1.0in 1.0in 1.0in 1.0in;}\n+div.WordSection1\n+\t{page:WordSection1;}\n+-->\n+</style>\n+\n+</head>\n+\n+<body lang=EN-US link=blue vlink=purple>\n+\n+<div class=WordSection1>\n+\n+<p class=MsoNormalCxSpFirst style=\'text-align:justify\'><span style=\'font-size:\n+12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>These graphs and\n+tables give insight into the targeting and patterns of SHM. This can give\n+insight into the DNA repair pathways used to solve the U:G mismatches\n+introduced by AID. More information on the values found in healthy individuals\n+of different ages can be found in IJspeert and van Schouwenburg et al, PMID:\n+27799928.</span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><b><span\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Graphs\n+</span></b></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><a name="OLE_LINK93"></a><a\n+name="OLE_LINK92"></a><a name="OLE_LINK91"><u><span style=\'font-size:12.0pt;\n+line-height:115%;font-family:"Times New Roman","serif"\'>Heatmap transition\n+information</span></u></a></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><a name="OLE_LINK98"></a><a\n+name="OLE_LINK97"><span style=\'font-size:12.0pt;line-height:115%;font-family:\n+"Times New Roman","serif"\'>Heatmaps visualizing for each subclass the frequency\n+of all possible substitutions. On the x-axes the original base is shown, while\n+the y-axes shows the new base. The darker the shade of blue, the more frequent\n+this type of substitution is occurring.\xa0 </span></a></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><u><span\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Bargraph\n+transition information</span></u></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><span style=\'font-size:\n+12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Bar graph\n+visualizing for each original base the distribution of substitutions into the other\n+bases. A graph is included for each (sub)class. </span></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><b><span\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Tables</span></b></p>\n+\n+<p class=MsoNormalCxSpMiddle style=\'text-align:justify\'><span style=\'font-size:\n+12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Transition\n+tables are shown for each (sub)class. All the original bases are listed\n+horizontally, while the new bases are listed vertically. </span></p>\n+\n+<p class=MsoNormal><span lang=NL style=\'font-size:12.0pt;line-height:115%;\n+font-family:"Times New Roman","serif"\'>Hanna IJspeert, Pauline A. van\n+Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,\n+Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span\n+style=\'font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"\'>Evaluation\n+of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and\n+Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a\n+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span\n+style=\'color:windowtext\'>doi:10.3389/fimmu.2016.00410</span></a>][<a\n+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span\n+style=\'color:windowtext\'>Link</span></a>]</span></p>\n+\n+</div>\n+\n+</body>\n+\n+</html>\n'