# HG changeset patch
# User davidvanzessen
# Date 1482417567 18000
# Node ID a24f8c93583a59e55e7579fb262c517270eca446
# Parent  05c62efdc3931ca72699927cf1910044cd22c5e0
Uploaded

diff -r 05c62efdc393 -r a24f8c93583a aa_histogram.r
--- a/aa_histogram.r	Tue Dec 20 09:03:15 2016 -0500
+++ b/aa_histogram.r	Thu Dec 22 09:39:27 2016 -0500
@@ -15,9 +15,12 @@
 absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="")
 
 for(gene in genes){
+	graph.title = paste(gene, "AA mutation frequency")
 	if(gene == ""){
 		mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),]
 		absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),]
+		
+		graph.title = "AA mutation frequency all"
 	} else {
 		mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),]
 		absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),]
@@ -33,6 +36,7 @@
 	dat_freq = mutations.at.position / aa.at.position
 	dat_freq[is.na(dat_freq)] = 0
 	dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq)
+	
 
 	print("---------------- plot ----------------")
 
@@ -43,7 +47,7 @@
 	m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2")
 	m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2")
 	m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3")
-	m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) 
+	m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(graph.title) 
 	m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
 	#m = m + scale_colour_manual(values=c("black"))
 
diff -r 05c62efdc393 -r a24f8c93583a baseline/comparePDFs.r
--- a/baseline/comparePDFs.r	Tue Dec 20 09:03:15 2016 -0500
+++ b/baseline/comparePDFs.r	Thu Dec 22 09:39:27 2016 -0500
@@ -95,7 +95,7 @@
   grid.lines(unit(c(0,1),"npc"), unit(c(0.5,0.5),"npc"),gp=gpar(col=1))
   grid.lines(unit(c(0,0),"native"), unit(c(0,1),"npc"),gp=gpar(col=1,lwd=1,lty=3))
 
-  grid.text("Density", x = unit(-2.5, "lines"), rot = 90,gp = gpar(cex=cex))
+  grid.text("All", x = unit(-2.5, "lines"), rot = 90,gp = gpar(cex=cex))
   grid.text( expression(paste("Selection Strength (", Sigma, ")", sep="")) , y = unit(-2.5, "lines"),gp = gpar(cex=cex))
   
   if(pdf1==pdf2 & length(listPDFs[pdf2][[1]][["FWR"]])>1 & length(listPDFs[pdf2][[1]][["CDR"]])>1 ){
diff -r 05c62efdc393 -r a24f8c93583a pattern_plots.r
--- a/pattern_plots.r	Tue Dec 20 09:03:15 2016 -0500
+++ b/pattern_plots.r	Thu Dec 22 09:39:27 2016 -0500
@@ -56,10 +56,10 @@
 
 write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
 
-p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL))
+p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) + ggtitle("Percentage of mutations in AID and pol eta motives")
 p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4"))
 #p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4"))
-png(filename=plot1.png, width=480, height=300)
+png(filename=plot1.png, width=510, height=300)
 print(p)
 dev.off()
 
@@ -93,7 +93,7 @@
 
 write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
 
-p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations")
+p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") + ggtitle("Relative mutation patterns")
 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
 png(filename=plot2.png, width=480, height=300)
@@ -122,7 +122,7 @@
 data3 = melt(t(data3[8:10,]))
 names(data3) = c("Class", "Type", "value")
 
-chk = is.na(data1$data3)
+chk = is.na(data3$value)
 if(any(chk)){
 	data3[chk, "value"] = 0
 }
@@ -131,7 +131,7 @@
 
 write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
 
-p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL))
+p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) + ggtitle("Absolute mutation patterns")
 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
 png(filename=plot3.png, width=480, height=300)
diff -r 05c62efdc393 -r a24f8c93583a sequence_overview.r
--- a/sequence_overview.r	Tue Dec 20 09:03:15 2016 -0500
+++ b/sequence_overview.r	Thu Dec 22 09:39:27 2016 -0500
@@ -70,7 +70,8 @@
 	res = paste(res, "</table>")
 }
 
-cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=F)
+cat("<center><img src='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAPCAYAAAA71pVKAAAAzElEQVQoka2TwQ2CQBBFpwTshw4ImW8ogJMlUIMmhNCDxgasAi50oSXA8XlAjCG7aqKTzGX/vsnM31mzR0gk7tTudO5MEizpzvQ4ryUSe408J3Xn+grE0p1rnpOamVmWsZG4rS+dzzAMsN8Hi9yyjI1JNGtxu4VxBJgLRLpoTKIPiW0LlwtUVRTubW2OBGUJu92cZRmdfbKQMAw8o+vi5v0fLorZ7Y9waGYJjsf38DJz0O1PsEQffOcv4Sa6YYfDDJ5Obzbsp93+5VfdATueO1fdLdI0AAAAAElFTkSuQmCC'> Please note that this tab is based on all sequences before filter unique sequences and the remove duplicates based on filters are applied. In this table only sequences according more than once are included. </center>", file=main.html, append=F)
+cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=T)
 
 if(empty.region.filter == "leader"){
 	cat("<caption>FR1+CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
@@ -305,7 +306,11 @@
 	names(NTresult) = c(tmp, paste(clazz, c("x", "y", "z"), sep=""))
 }
 
-write.table(NToverview[,c("Sequence.ID", "best_match", "seq", "A", "C", "G", "T")], NToverview.file, quote=F, sep="\t", row.names=F, col.names=T)
+NToverview.tmp = NToverview[,c("Sequence.ID", "best_match", "seq", "A", "C", "G", "T")]
+
+names(NToverview.tmp) = c("Sequence.ID", "best_match", "Sequence of the analysed region", "A", "C", "G", "T")
+
+write.table(NToverview.tmp, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T)
 
 NToverview = NToverview[!grepl("unmatched", NToverview$best_match),]
 
diff -r 05c62efdc393 -r a24f8c93583a shm_clonality.htm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_clonality.htm	Thu Dec 22 09:39:27 2016 -0500
@@ -0,0 +1,144 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+@font-face
+	{font-family:Tahoma;
+	panose-1:2 11 6 4 3 5 4 4 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+a:link, span.MsoHyperlink
+	{color:blue;
+	text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+	{color:purple;
+	text-decoration:underline;}
+p
+	{margin-right:0in;
+	margin-left:0in;
+	font-size:12.0pt;
+	font-family:"Times New Roman","serif";}
+p.MsoAcetate, li.MsoAcetate, div.MsoAcetate
+	{mso-style-link:"Balloon Text Char";
+	margin:0in;
+	margin-bottom:.0001pt;
+	font-size:8.0pt;
+	font-family:"Tahoma","sans-serif";}
+p.msochpdefault, li.msochpdefault, div.msochpdefault
+	{mso-style-name:msochpdefault;
+	margin-right:0in;
+	margin-left:0in;
+	font-size:12.0pt;
+	font-family:"Calibri","sans-serif";}
+p.msopapdefault, li.msopapdefault, div.msopapdefault
+	{mso-style-name:msopapdefault;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:12.0pt;
+	font-family:"Times New Roman","serif";}
+span.apple-converted-space
+	{mso-style-name:apple-converted-space;}
+span.BalloonTextChar
+	{mso-style-name:"Balloon Text Char";
+	mso-style-link:"Balloon Text";
+	font-family:"Tahoma","sans-serif";}
+.MsoChpDefault
+	{font-size:10.0pt;
+	font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US link=blue vlink=purple>
+
+<div class=WordSection1>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><b><span lang=EN-GB style='color:black'>References</span></b></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><span lang=EN-GB style='color:black'>Gupta,
+Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria,
+Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). <a name="OLE_LINK106"></a><a
+name="OLE_LINK107"></a>Change-O: a toolkit for analyzing large-scale B cell
+immunoglobulin repertoire sequencing data: Table 1. In<span
+class=apple-converted-space>&nbsp;</span><em>Bioinformatics, 31 (20), pp.
+3356–3358.</em><span class=apple-converted-space><i>&nbsp;</i></span>[</span><a
+href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span
+lang=EN-GB style='color:#303030'>doi:10.1093/bioinformatics/btv359</span></a><span
+lang=EN-GB style='color:black'>][</span><a
+href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span
+lang=EN-GB style='color:#303030'>Link</span></a><span lang=EN-GB
+style='color:black'>]</span></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><span lang=EN-GB style='color:black'>&nbsp;</span></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><a name="OLE_LINK110"><u><span lang=EN-GB
+style='color:black'>All, IGA, IGG, IGM and IGE tabs</span></u></a></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><span lang=EN-GB style='color:black'>In
+these tabs information on the clonal relation of transcripts can be found. To
+calculate clonal relation Change-O is used (Gupta et al, PMID: 26069265).
+Transcripts are considered clonally related if they have maximal three nucleotides
+difference in their CDR3 sequence and the same first V segment (as assigned by
+IMGT). Results are represented in a table format showing the clone size and the
+number of clones or sequences with this clone size. Change-O settings used are
+the </span><span lang=EN-GB>nucleotide hamming distance substitution model with
+a complete distance of maximal three. For clonal assignment the first gene
+segments were used, and the distances were not normalized. In case of
+asymmetric distances, the minimal distance was used.<span style='color:black'> </span></span></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><span lang=EN-GB style='color:black'>&nbsp;</span></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><u><span lang=EN-GB style='color:black'>Overlap
+tab</span></u><span lang=EN-GB style='color:black'> </span></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><span lang=EN-GB style='color:black'>This
+tab gives information on with which (sub)classe(s) each unique analyzed region
+(based on the exact nucleotide sequence of the analyzes region and the CDR3
+nucleotide sequence) is found with. This gives information if the combination
+of the exact same nucleotide sequence of the analyzed region and the CDR3
+sequence can be found in multiple (sub)classes.</span></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><span style='color:black'><img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAPCAYAAAA71pVKAAAAzElEQVQoka2TwQ2CQBBFpwTshw4ImW8ogJMlUIMmhNCDxgasAi50oSXA8XlAjCG7aqKTzGX/vsnM31mzR0gk7tTudO5MEizpzvQ4ryUSe408J3Xn+grE0p1rnpOamVmWsZG4rS+dzzAMsN8Hi9yyjI1JNGtxu4VxBJgLRLpoTKIPiW0LlwtUVRTubW2OBGUJu92cZRmdfbKQMAw8o+vi5v0fLorZ7Y9waGYJjsf38DJz0O1PsEQffOcv4Sa6YYfDDJ5Obzbsp93+5VfdATueO1fdLdI0AAAAAElFTkSuQmCC"> Please note that this tab is based on all
+sequences before filter unique sequences and the remove duplicates based on
+filters are applied. In this table only sequences according more than once are
+included. </span></p>
+
+</div>
+
+</body>
+
+</html>
diff -r 05c62efdc393 -r a24f8c93583a shm_csr.htm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr.htm	Thu Dec 22 09:39:27 2016 -0500
@@ -0,0 +1,95 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+a:link, span.MsoHyperlink
+	{color:blue;
+	text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+	{color:purple;
+	text-decoration:underline;}
+span.apple-converted-space
+	{mso-style-name:apple-converted-space;}
+.MsoChpDefault
+	{font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US link=blue vlink=purple>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The
+graphs in this tab give insight into the subclass distribution of IGG and IGA
+transcripts. </span><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Human Cµ, C&#945;, C&#947; and C&#949;
+constant genes are assigned using a </span><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>custom script
+specifically designed for human (sub)class assignment in repertoire data as
+described in van Schouwenburg and IJspeert et al, submitted for publication. In
+this script the reference sequences for the subclasses are divided in 8
+nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are
+then individually aligned in the right order to each input sequence. The
+percentage of the chunks identified in each rearrangement is calculated in the
+‘chunk hit percentage’. </span><span lang=EN-GB style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'>C&#945; and C&#947;
+subclasses are very homologous and only differ in a few nucleotides. To assign
+subclasses the </span><span lang=EN-GB style='font-size:12.0pt;line-height:
+115%;font-family:"Times New Roman","serif"'>‘nt hit percentage’ is calculated.
+This percentage indicates how well the chunks covering the subclass specific
+nucleotide match with the different subclasses. </span><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Information
+on normal distribution of subclasses in healthy individuals of different ages
+can be found in IJspeert and van Schouwenburg et al, PMID: 27799928.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK100"></a><a
+name="OLE_LINK99"></a><a name="OLE_LINK25"><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGA
+subclass distribution</span></u></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie
+chart showing the relative distribution of IGA1 and IGA2 transcripts in the
+sample.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGG
+subclass distribution</span></u></p>
+
+<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie
+chart showing the relative distribution of IGG1, IGG2, IGG3 and IGG4
+transcripts in the sample.</span></p>
+
+</div>
+
+</body>
+
+</html>
diff -r 05c62efdc393 -r a24f8c93583a shm_csr.r
--- a/shm_csr.r	Tue Dec 20 09:03:15 2016 -0500
+++ b/shm_csr.r	Thu Dec 22 09:39:27 2016 -0500
@@ -302,14 +302,14 @@
 			print("Plotting heatmap and transition")
 			png(filename=paste("transitions_stacked_", name, ".png", sep=""))
 			p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar
-			p = p + xlab("From base") + ylab("") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL))
+			p = p + xlab("From base") + ylab("") + ggtitle("Bargraph transition information") + guides(fill=guide_legend(title=NULL))
 			p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4"))
 			#p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black"))
 			print(p)
 			dev.off()
 			png(filename=paste("transitions_heatmap_", name, ".png", sep=""))
 			p = ggplot(transition2, aes(factor(reorder(variable, -order.y)), factor(reorder(id, -order.x)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap
-			p = p + xlab("To base") + ylab("From Base") + ggtitle("Mutations frequency from base to base")  + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black"))
+			p = p + xlab("To base") + ylab("From Base") + ggtitle("Heatmap transition information")  + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
 			print(p)
 			dev.off()
 		} else {
@@ -388,7 +388,7 @@
 	pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGA1" = "lightblue1", "IGA2" = "blue4"))
 	pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL)
 	pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
-	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclasses", "( n =", sum(genesForPlot$Freq), ")"))
+	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclass distribution", "( n =", sum(genesForPlot$Freq), ")"))
 	write.table(genesForPlot, "IGA_pie.txt", sep="\t",quote=F,row.names=F,col.names=T)
 
 	png(filename="IGA.png")
@@ -409,7 +409,7 @@
 	pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred"))
 	pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL)
 	pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
-	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclasses", "( n =", sum(genesForPlot$Freq), ")"))
+	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclass distribution", "( n =", sum(genesForPlot$Freq), ")"))
 	write.table(genesForPlot, "IGG_pie.txt", sep="\t",quote=F,row.names=F,col.names=T)
 
 	png(filename="IGG.png")
@@ -430,7 +430,7 @@
 p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA)
 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
 p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
-p = p + scale_colour_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
+p = p + scale_colour_manual(guide = guide_legend(title = "Subclass"), values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
 
 png(filename="scatter.png")
 print(p)
@@ -454,7 +454,7 @@
 
 p = ggplot(frequency_bins_data, aes(frequency_bins, frequency))
 p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
-p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
+p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(guide = guide_legend(title = "Class"), values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
 
 png(filename="frequency_ranges.png")
 print(p)
diff -r 05c62efdc393 -r a24f8c93583a shm_csr.xml
--- a/shm_csr.xml	Tue Dec 20 09:03:15 2016 -0500
+++ b/shm_csr.xml	Thu Dec 22 09:39:27 2016 -0500
@@ -96,11 +96,11 @@
 
 **Input files**
 
-IMGT/HighV-QUEST .zip and .txz are accepted as input files.
+IMGT/HighV-QUEST .zip and .txz are accepted as input files. The file to be analysed can be selected using the dropdown menu.
 
 .. class:: infomark
 
-Note: Files can be uploaded by using â€œget dataâ€ and â€œupload fileâ€ and selecting â€œIMGT archiveâ€œ as a file type. 
+Note: Files can be uploaded by using â€œget dataâ€ and â€œupload fileâ€ and selecting â€œIMGT archiveâ€œ as a file type. Special characters should be prevented in the file names of the uploaded samples as these can give errors when running the immune repertoire pipeline. Underscores are allowed in the file names.
 
 -----
 
@@ -108,15 +108,15 @@
 
 Identifies the region which will be included in the analysis (analysed region)
 
-- Sequences which are missing a gene region (FR1/CDR1 etc) in the analysed region are excluded
-- Sequences containing an ambiguous base in the analysed region are excluded
-- All other filtering/analysis is based on the analysed region
+- Sequences which are missing a gene region (FR1/CDR1 etc) in the analysed region are excluded. 
+- Sequences containing an ambiguous base in the analysed region or the CDR3 are excluded. 
+- All other filtering/analysis is based on the analysed region.
 
 -----
 
 **Functionality filter**
 
-Allows filtering on productive rearrangement, unproductive rearrangements or both based on the assignment provided by IMGT. 
+Allows filtering on productive rearrangements, unproductive rearrangements or both based on the assignment provided by IMGT. 
 
 **Filter unique sequences**
 
@@ -125,13 +125,13 @@
 
 This filter consists of two different steps.
 
-Step 1: removes all sequences of which the nucleotide sequence in the â€œanalysed regionâ€ (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step.
+Step 1: removes all sequences of which the nucleotide sequence in the â€œanalysed regionâ€ and the CDR3 (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step.
 
-Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
+Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region, the CDR3 and the same (sub)class).
 
 .. class:: infomark
 
-Note: This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes.
+This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes.
 
 *Keep unique:*
 
@@ -167,7 +167,7 @@
 
 .. class:: infomark
 
-Note: The first sequence (in the data set) of each clone is always included in the analysis. When the first matched sequence is unmatched (no subclass assigned) the first matched sequence will be included. This means that altering the data order (by for instance sorting) can change the sequence which is included in the analysis and therefore slightly influence results. 
+Note: The first sequence (in the data set) of each clone is always included in the analysis. When the first matched sequence is unmatched (no subclass assigned) the first matched sequence will be included. This means that altering the data order (by for instance sorting) can change the sequence which is included in the analysis and therefore slightly influences the results. 
 
 -----
 
@@ -175,21 +175,27 @@
 
 .. class:: warningmark
 
-Note: This filter should only be applied when analysing human IGH data in which a (sub)class specific sequence is present. Otherwise please select the "do not assign (sub)class" option to prevent errors when running the pipeline. 
+Note: This filter should only be applied when analysing human IGH data in which a (sub)class specific sequence is present. Otherwise please select the do not assign (sub)class option to prevent errors when running the pipeline. 
 
 The class percentage is based on the â€˜chunk hit percentageâ€™ (see below). The subclass percentage is based on the â€˜nt hit percentageâ€™ (see below).
 
 The SHM & CSR pipeline identifies human CÂµ, CÎ±, CÎ³ and CÎµ constant genes by dividing the reference sequences for the subclasses (NG_001019) in 8 nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are then individually aligned in the right order to each input sequence. This alignment is used to calculate the chunck hit percentage and the nt hit percentage. 
 
-*Chunk hit percentage*: the percentage of the chunks that is aligned 
+*Chunk hit percentage*: The percentage of the chunks that is aligned 
 
-*Nt hit percentage*: The percentage of chunks covering the subclass specific nucleotide match with the different subclasses. The most stringent filter for the subclass is 70% â€˜nt hit percentageâ€™ which means that 5 out of 7 subclass specific nucleotides for CÎ± or 6 out of 8 subclass specific nucleotides of CÎ³ should match with the specific subclass. 
+*Nt hit percentage*: The percentage of chunks covering the subclass specific nucleotide match with the different subclasses. The most stringent filter for the subclass is 70% â€˜nt hit percentageâ€™ which means that 5 out of 7 subclass specific nucleotides for CÎ± or 6 out of 8 subclass specific nucleotides of CÎ³ should match with the specific subclass.
 
 -----
 
 **Output new IMGT archives per class into your history?**
 
-If yes is selected, additional output files (one for each class) will be added to the history which contain information of the sequences that passed the selected filtering criteria. These files are in the same format as the IMGT/HighV-QUEST output files and therefore are also compatible with many other analysis programs, such as IGGalaxy. 
+If yes is selected, additional output files (one for each class) will be added to the history which contain information of the sequences that passed the selected filtering criteria. These files are in the same format as the IMGT/HighV-QUEST output files and therefore are also compatible with many other analysis programs, such as the Immune repertoire pipeline.  
+
+-----
+
+**Execute**
+
+Upon pressing execute a new analysis is added to your history (right side of the page). Initially this analysis will be grey, after initiating the analysis colour of the analysis in the history will change to yellow. When the analysis is finished it will turn green in the history. Now the analysis can be opened by clicking on the eye icon on the analysis of interest. When an analysis turns red an error has occurred when running the analysis. If you click on the analysis title additional information can be found on the analysis. In addition a bug icon appears. Here more information on the error can be found.
 
 ]]>
 	</help>
diff -r 05c62efdc393 -r a24f8c93583a shm_downloads.htm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_downloads.htm	Thu Dec 22 09:39:27 2016 -0500
@@ -0,0 +1,538 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+a:link, span.MsoHyperlink
+	{color:blue;
+	text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+	{color:purple;
+	text-decoration:underline;}
+p.MsoNoSpacing, li.MsoNoSpacing, div.MsoNoSpacing
+	{margin:0in;
+	margin-bottom:.0001pt;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+.MsoChpDefault
+	{font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US link=blue vlink=purple>
+
+<div class=WordSection1>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Info</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The complete
+dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Allows downloading of the complete parsed data set.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The filtered
+dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Allows downloading of all parsed IMGT information of all transcripts that
+passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The alignment
+info on the unmatched sequences:</span></u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'> Provides information of the subclass
+alignment of all unmatched sequences. For each sequence the chunck hit
+percentage and the nt hit percentage is shown together with the best matched
+subclass.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>SHM Overview</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The SHM Overview
+table as a dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Allows downloading of the SHM Overview
+table as a data set.  </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Motif data per
+sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'> Provides a file that contains information for each
+transcript on the number of mutations present in WA/TW and RGYW/WRCY motives.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Mutation data
+per sequence ID: </span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'>Provides a file containing information
+on the number of sequences bases, the number and location of mutations and the
+type of mutations found in each transcript. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Base count for
+every sequence:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'> links to a page showing for each transcript the
+sequence of the analysed region (as dependent on the sequence starts at filter),
+the assigned subclass and the number of sequenced A,C,G and T’s.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the percentage of mutations in AID and pol eta motives plot:</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Provides a file containing the values used to generate the percentage of
+mutations in AID and pol eta motives plot in the SHM overview tab.</span></p>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The
+data used to generate the relative mutation patterns plot:</span></u><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+Provides a download with the data used to generate the relative mutation
+patterns plot in the SHM overview tab.</span></p>
+
+<p class=MsoNormalCxSpLast style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The
+data used to generate the absolute mutation patterns plot:</span></u><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+Provides a download with the data used to generate the absolute mutation
+patterns plot in the SHM overview tab. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>SHM Frequency</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data
+generate the frequency scatter plot:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Allows
+downloading the data used to generate the frequency scatter plot in the SHM
+frequency tab. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the frequency by class plot:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Allows
+downloading the data used to generate frequency by class plot included in the
+SHM frequency tab.           </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for
+frequency by subclass:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Provides information of the number and
+percentage of sequences that have 0%, 0-2%, 2-5%, 5-10%, 10-15%, 15-20%,
+&gt;20% SHM. Information is provided for each subclass.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Transition
+Tables</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'all' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGA' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGA sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGA1' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGA1 sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGA2' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGA2 sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGG' transition plot :</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGG sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGG1' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGG1 sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGG2' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGG2 sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGG3' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGG3 sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGG4' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGG4 sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGM' transition plot :</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGM sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGE' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the
+information used to generate the transition table for all IGE sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Antigen
+selection</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>AA mutation data
+per sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'> Provides for each transcript information on whether
+there is replacement mutation at each amino acid location (as defined by IMGT).
+For all amino acids outside of the analysed region the value 0 is given.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Presence of AA
+per sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'> Provides for each transcript information on which
+amino acid location (as defined by IMGT) is present. </span><span lang=NL
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>0 is absent, 1
+is present. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the aa mutation frequency plot:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the
+data used to generate the aa mutation frequency plot for all sequences in the
+antigen selection tab.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the aa mutation frequency plot for IGA:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>  Provides the
+data used to generate the aa mutation frequency plot for all IGA sequences in
+the antigen selection tab.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the aa mutation frequency plot for IGG:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the
+data used to generate the aa mutation frequency plot for all IGG sequences in
+the antigen selection tab.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the aa mutation frequency plot for IGM:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the
+data used to generate the aa mutation frequency plot for all IGM sequences in
+the antigen selection tab.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the aa mutation frequency plot for IGE:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>   Provides the
+data used to generate the aa mutation frequency plot for all IGE sequences in
+the antigen selection tab.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline PDF (</span></u><span
+lang=EN-GB><a href="http://selection.med.yale.edu/baseline/"><span
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>http://selection.med.yale.edu/baseline/</span></a></span><u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>):</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> PDF
+containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all
+sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline data:</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Table output of the BASELINe analysis. Calculation of antigen selection as
+performed by BASELINe are shown for each individual sequence and the sum of all
+sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGA
+PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all
+sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGA
+data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Table output of the BASELINe analysis. Calculation of antigen selection as
+performed by BASELINe are shown for each individual IGA sequence and the sum of
+all IGA sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGG
+PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGG
+sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGG
+data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Table output of the BASELINe analysis. Calculation of antigen selection as
+performed by BASELINe are shown for each individual IGG sequence and the sum of
+all IGG sequences.        </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGM PDF:</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> PDF
+containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGM
+sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGM
+data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Table output of the BASELINe analysis. Calculation of antigen selection as
+performed by BASELINe are shown for each individual IGM sequence and the sum of
+all IGM sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGE
+PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGE
+sequences.</span><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGE
+data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Table output of the BASELINe analysis. Calculation of antigen selection as
+performed by BASELINe are shown for each individual IGE sequence and the sum of
+all IGE sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>CSR</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+</span></u><u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGA
+subclass distribution plot :</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> </span><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Data used for
+the generation of the </span><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'>IGA subclass distribution plot provided
+in the CSR tab. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+</span></u><u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGA
+subclass distribution plot :</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Data used for the generation of the </span><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGG
+subclass distribution plot provided in the CSR tab. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=NL
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Clonal relation</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Sequence overlap
+between subclasses:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Link to the overlap table as provided
+under the clonality overlap tab.         </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+file with defined clones and subclass annotation:</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Downloads a table with the calculation of clonal relation between all
+sequences. For each individual transcript the results of the clonal assignment
+as provided by Change-O are provided. Sequences with the same number in the CLONE
+column are considered clonally related. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+defined clones summary file:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Gives a summary of the total number of
+clones in all sequences and their clone size.           </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+file with defined clones of IGA:</span></u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the
+calculation of clonal relation between all IGA sequences. For each individual
+transcript the results of the clonal assignment as provided by Change-O are
+provided. Sequences with the same number in the CLONE column are considered
+clonally related. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+defined clones summary file of IGA:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary
+of the total number of clones in all IGA sequences and their clone size.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+file with defined clones of IGG:</span></u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the
+calculation of clonal relation between all IGG sequences. For each individual
+transcript the results of the clonal assignment as provided by Change-O are
+provided. Sequences with the same number in the CLONE column are considered
+clonally related. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+defined clones summary file of IGG:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary
+of the total number of clones in all IGG sequences and their clone size.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+file with defined clones of IGM:</span></u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'> Downloads a table
+with the calculation of clonal relation between all IGM sequences. For each
+individual transcript the results of the clonal assignment as provided by
+Change-O are provided. Sequences with the same number in the CLONE column are
+considered clonally related. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+defined clones summary file of IGM:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary
+of the total number of clones in all IGM sequences and their clone size.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+file with defined clones of IGE:</span></u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the
+calculation of clonal relation between all IGE sequences. For each individual
+transcript the results of the clonal assignment as provided by Change-O are
+provided. Sequences with the same number in the CLONE column are considered
+clonally related. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+defined clones summary file of IGE:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary
+of the total number of clones in all IGE sequences and their clone size.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Filtered IMGT
+output files</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGA sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all IGA
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGA1 sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all IGA1
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGA2 sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz
+file with the same format as downloaded IMGT files that contains all IGA2
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGG sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz
+file with the same format as downloaded IMGT files that contains all IGG
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGG1 sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all IGG1
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGG2 sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all IGG2
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGG3 sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz
+file with the same format as downloaded IMGT files that contains all IGG3
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGG4 sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all IGG4
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGM sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz
+file with the same format as downloaded IMGT files that contains all IGM
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGE sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all IGE
+sequences that have passed the chosen filter settings.</span></p>
+
+</div>
+
+</body>
+
+</html>
diff -r 05c62efdc393 -r a24f8c93583a shm_first.htm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_first.htm	Thu Dec 22 09:39:27 2016 -0500
@@ -0,0 +1,127 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+.MsoChpDefault
+	{font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='margin-bottom:0in;margin-bottom:.0001pt;
+text-align:justify;line-height:normal'><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'>Table showing the order of each
+filtering step and the number and percentage of sequences after each filtering
+step. </span></p>
+
+<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt;
+text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'>Input:</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> The
+number of sequences in the original IMGT file. This is always 100% of the
+sequences.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt;
+text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'>After &quot;no results&quot; filter: </span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IMGT
+classifies sequences either as &quot;productive&quot;, &quot;unproductive&quot;, &quot;unknown&quot;, or &quot;no
+results&quot;. Here, the number and percentages of sequences that are not classified
+as &quot;no results&quot; are reported.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt;
+text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'>After functionality filter:</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> The
+number and percentages of sequences that have passed the functionality filter. The
+filtering performed is dependent on the settings of the functionality filter.
+Details on the functionality filter <a name="OLE_LINK12"></a><a
+name="OLE_LINK11"></a><a name="OLE_LINK10">can be found on the start page of
+the SHM&amp;CSR pipeline</a>.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After
+removal sequences that are missing a gene region:</span></u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+In this step all sequences that are missing a gene region (FR1, CDR1, FR2,
+CDR2, FR3) that should be present are removed from analysis. The sequence
+regions that should be present are dependent on the settings of the sequence
+starts at filter. <a name="OLE_LINK9"></a><a name="OLE_LINK8">The number and
+percentage of sequences that pass this filter step are reported.</a> </span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After
+N filter:</span></u><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> In this step all sequences that contain
+an ambiguous base (n) in the analysed region or the CDR3 are removed from the
+analysis. The analysed region is determined by the setting of the sequence
+starts at filter. The number and percentage of sequences that pass this filter
+step are reported.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After
+filter unique sequences</span></u><span lang=EN-GB style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'>: The number and
+percentage of sequences that pass the &quot;filter unique sequences&quot; filter. Details
+on this filter </span><span lang=EN-GB style='font-size:12.0pt;line-height:
+115%;font-family:"Times New Roman","serif"'>can be found on the start page of
+the SHM&amp;CSR pipeline</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After
+remove duplicate based on filter:</span></u><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> The number and
+percentage of sequences that passed the remove duplicate filter. Details on the
+&quot;remove duplicate filter based on filter&quot; can be found on the start page of the
+SHM&amp;CSR pipeline.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK17"></a><a
+name="OLE_LINK16"><u><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Number of matches sequences:</span></u></a><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+The number and percentage of sequences that passed all the filters described
+above and have a (sub)class assigned.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number
+of unmatched sequences</span></u><span lang=EN-GB style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'>: The number and percentage
+of sequences that passed all the filters described above and do not have
+subclass assigned.</span></p>
+
+<p class=MsoNormal><span lang=EN-GB>&nbsp;</span></p>
+
+</div>
+
+</body>
+
+</html>
diff -r 05c62efdc393 -r a24f8c93583a shm_frequency.htm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_frequency.htm	Thu Dec 22 09:39:27 2016 -0500
@@ -0,0 +1,87 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+.MsoChpDefault
+	{font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><b><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>SHM
+frequency tab</span></u></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These
+graphs give insight into the level of SHM. The data represented in these graphs
+can be downloaded in the download tab. <a name="OLE_LINK24"></a><a
+name="OLE_LINK23"></a><a name="OLE_LINK90"></a><a name="OLE_LINK89">More
+information on the values found in healthy individuals of different ages can be
+found in IJspeert and van Schouwenburg et al, PMID: 27799928. </a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Frequency
+scatter plot</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A
+dot plot showing the percentage of SHM in each transcript divided into the
+different (sub)classes. </span><span lang=NL style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'>In the graph each dot
+represents an individual transcript.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Mutation
+frequency by class</span></u></p>
+
+<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A
+bar graph showing the percentage of transcripts that contain 0%, 0-2%, 2-5%,
+5-10% 10-15%, 15-20% or more than 20% SHM for each subclass. </span></p>
+
+<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
+Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
+Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
+of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
+Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>Link</span></a>]</span></p>
+
+</div>
+
+</body>
+
+</html>
diff -r 05c62efdc393 -r a24f8c93583a shm_overview.htm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_overview.htm	Thu Dec 22 09:39:27 2016 -0500
@@ -0,0 +1,332 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+.MsoChpDefault
+	{font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Info
+table</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>This
+table contains information on different characteristics of SHM. For all
+characteristics information can be found for all sequences or only sequences of
+a certain (sub)class. All results are based on the sequences that passed the filter
+settings chosen on the start page of the SHM &amp; CSR pipeline and only
+include details on the analysed region as determined by the setting of the
+sequence starts at filter. All data in this table can be downloaded via the
+“downloads” tab.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Mutation
+frequency:</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK83"></a><a
+name="OLE_LINK82"></a><a name="OLE_LINK81"><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These values
+give information on the level of SHM. </span></a><a name="OLE_LINK22"></a><a
+name="OLE_LINK21"></a><a name="OLE_LINK20"><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>More information
+on the values found in healthy individuals of different ages can be found in </span></a><a
+name="OLE_LINK15"></a><a name="OLE_LINK14"></a><a name="OLE_LINK13"><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IJspeert
+and van Schouwenburg et al, PMID: 27799928</span></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number
+of mutations:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:
+115%;font-family:"Times New Roman","serif"'> Shows the number of total
+mutations / the number of sequenced bases (the % of mutated bases).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Median
+number of mutations:</span></i><span lang=EN-GB style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'> Shows the median % of
+SHM of all sequences.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Patterns
+of SHM:</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK72"></a><a
+name="OLE_LINK71"></a><a name="OLE_LINK70"><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These values
+give insights into the targeting and patterns of SHM. These values can give
+insight into the repair pathways used to repair the U:G mismatches introduced
+by AID. </span></a><a name="OLE_LINK40"></a><a name="OLE_LINK39"></a><a
+name="OLE_LINK38"></a><a name="OLE_LINK60"><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>More information
+on the values found in healthy individuals of different ages can be found in
+IJspeert and van Schouwenburg et al, PMID: 27799928</span></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions:</span></i><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+Shows the number of transition mutations / the number of total mutations (the
+percentage of mutations that are transitions). Transition mutations are C&gt;T,
+T&gt;C, A&gt;G, G&gt;A. </span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transversions:</span></i><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+Shows the number of transversion mutations / the number of total mutations (the
+percentage of mutations that are transitions). Transversion mutations are
+C&gt;A, C&gt;G, T&gt;A, T&gt;G, A&gt;T, A&gt;C, G&gt;T, G&gt;C.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions
+at GC:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> <a name="OLE_LINK2"></a><a
+name="OLE_LINK1">Shows the number of transitions at GC locations (C&gt;T,
+G&gt;A) / the total number of mutations at GC locations (the percentage of
+mutations at GC locations that are transitions).</a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Targeting
+of GC:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> <a name="OLE_LINK7"></a><a
+name="OLE_LINK6"></a><a name="OLE_LINK3">Shows the number of mutations at GC
+locations / the total number of mutations (the percentage of total mutations
+that are at GC locations).</a> </span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions
+at AT:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> Shows the number of transitions at AT
+locations (T&gt;C, A&gt;G) / the total number of mutations at AT locations (the
+percentage of mutations at AT locations that are transitions).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Targeting
+of AT:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> Shows the number of mutations at AT
+locations / the total number of mutations (the percentage of total mutations
+that are at AT locations).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>RGYW:</span></i><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+<a name="OLE_LINK28"></a><a name="OLE_LINK27"></a><a name="OLE_LINK26">Shows
+the number of mutations that are in a RGYW motive / The number of total mutations
+(the percentage of mutations that are in a RGYW motive). </a><a
+name="OLE_LINK62"></a><a name="OLE_LINK61">RGYW motives are known to be
+preferentially targeted by AID </a></span><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine,
+Y=pyrimidine, W = A or T).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>WRCY:</span></i><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+<a name="OLE_LINK34"></a><a name="OLE_LINK33">Shows the number of mutations
+that are in a </a><a name="OLE_LINK32"></a><a name="OLE_LINK31"></a><a
+name="OLE_LINK30"></a><a name="OLE_LINK29">WRCY</a> motive / The number of
+total mutations (the percentage of mutations that are in a WRCY motive). WRCY
+motives are known to be preferentially targeted by AID </span><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine,
+Y=pyrimidine, W = A or T).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>WA:</span></i><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+<a name="OLE_LINK37"></a><a name="OLE_LINK36"></a><a name="OLE_LINK35">Shows
+the number of mutations that are in a WA motive / The number of total mutations
+(the percentage of mutations that are in a WA motive). It is described that
+polymerase eta preferentially makes errors at WA motives </a></span><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(W
+= A or T).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>TW:</span></i><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+Shows the number of mutations that are in a TW motive / The number of total mutations
+(the percentage of mutations that are in a TW motive). It is described that
+polymerase eta preferentially makes errors at TW motives </span><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(W
+= A or T).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Antigen
+selection:</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These
+values give insight into antigen selection. It has been described that during
+antigen selection, there is selection against replacement mutations in the FR
+regions as these can cause instability of the B-cell receptor. In contrast
+replacement mutations in the CDR regions are important for changing the
+affinity of the B-cell receptor and therefore there is selection for this type
+of mutations. Silent mutations do not alter the amino acid sequence and
+therefore do not play a role in selection. More information on the values found
+in healthy individuals of different ages can be found in IJspeert and van
+Schouwenburg et al, PMID: 27799928</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>FR
+R/S:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> <a name="OLE_LINK43"></a><a
+name="OLE_LINK42"></a><a name="OLE_LINK41">Shows the number of replacement
+mutations in the FR regions / The number of silent mutations in the FR regions
+(the number of replacement mutations in the FR regions divided by the number of
+silent mutations in the FR regions)</a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>CDR
+R/S:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> Shows the number of replacement
+mutations in the CDR regions / The number of silent mutations in the CDR
+regions (the number of replacement mutations in the CDR regions divided by the
+number of silent mutations in the CDR regions)</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number
+of sequences nucleotides:</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These
+values give information on the number of sequenced nucleotides.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Nt
+in FR:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> <a name="OLE_LINK46"></a><a
+name="OLE_LINK45"></a><a name="OLE_LINK44">Shows the number of sequences bases
+that are located in the FR regions / The total number of sequenced bases (the
+percentage of sequenced bases that are present in the FR regions).</a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Nt
+in CDR:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> Shows the number of sequenced bases
+that are located in the CDR regions / <a name="OLE_LINK48"></a><a
+name="OLE_LINK47">The total number of sequenced bases (the percentage of
+sequenced bases that are present in the CDR regions).</a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A:
+</span></i><a name="OLE_LINK51"></a><a name="OLE_LINK50"></a><a
+name="OLE_LINK49"><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Shows the total number of sequenced
+adenines / The total number of sequenced bases (the percentage of sequenced
+bases that were adenines).</span></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>C:
+</span></i><a name="OLE_LINK53"></a><a name="OLE_LINK52"><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows
+the total number of sequenced cytosines / The total number of sequenced bases
+(the percentage of sequenced bases that were cytosines).</span></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>T:
+</span></i><a name="OLE_LINK57"></a><a name="OLE_LINK56"><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows
+the total number of sequenced </span></a><a name="OLE_LINK55"></a><a
+name="OLE_LINK54"><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>thymines</span></a><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+/ The total number of sequenced bases (the percentage of sequenced bases that
+were thymines).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>G:
+</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Shows the total number of sequenced <a
+name="OLE_LINK59"></a><a name="OLE_LINK58">guanine</a>s / The total number of
+sequenced bases (the percentage of sequenced bases that were guanines).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK69"><b><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK75"></a><a
+name="OLE_LINK74"></a><a name="OLE_LINK73"><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These graphs visualize
+information on the patterns and targeting of SHM and thereby give information
+into the repair pathways used to repair the U:G mismatches introduced by AID. The
+data represented in these graphs can be downloaded in the download tab. More
+information on the values found in healthy individuals of different ages can be
+found in IJspeert and van Schouwenburg et al, PMID: 27799928</span></a><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>.
+<a name="OLE_LINK85"></a><a name="OLE_LINK84"></a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Percentage
+of mutations in AID and pol eta motives</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualizes
+<a name="OLE_LINK80"></a><a name="OLE_LINK79"></a><a name="OLE_LINK78">for each
+(sub)class </a>the percentage of mutations that are present in AID (RGYW or
+WRCY) or polymerase eta motives (WA or TW) in the different subclasses </span><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine,
+Y=pyrimidine, W = A or T).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=NL
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Relative
+mutation patterns</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualizes
+for each (sub)class the distribution of mutations between mutations at AT
+locations and transitions or transversions at GC locations. </span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=NL
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Absolute
+mutation patterns</span></u></p>
+
+<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualized
+for each (sub)class the percentage of sequenced AT and GC bases that are
+mutated. The mutations at GC bases are divided into transition and transversion
+mutations<a name="OLE_LINK77"></a><a name="OLE_LINK76">. </a></span></p>
+
+<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
+Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
+Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
+of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
+Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>Link</span></a>]</span></p>
+
+</div>
+
+</body>
+
+</html>
diff -r 05c62efdc393 -r a24f8c93583a shm_selection.htm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_selection.htm	Thu Dec 22 09:39:27 2016 -0500
@@ -0,0 +1,128 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+@font-face
+	{font-family:UICTFontTextStyleBody;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+a:link, span.MsoHyperlink
+	{color:blue;
+	text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+	{color:purple;
+	text-decoration:underline;}
+span.apple-converted-space
+	{mso-style-name:apple-converted-space;}
+.MsoChpDefault
+	{font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US link=blue vlink=purple>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>References</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";
+color:black'>Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying
+selection in high-throughput Immunoglobulin sequencing data sets. In<span
+class=apple-converted-space>&nbsp;</span><em>Nucleic Acids Research, 40 (17),
+pp. e134–e134.</em><span class=apple-converted-space><i>&nbsp;</i></span>[</span><span
+lang=EN-GB><a href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";
+color:#303030'>doi:10.1093/nar/gks457</span></a></span><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";
+color:black'>][</span><span lang=EN-GB><a
+href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";
+color:#303030'>Link</span></a></span><span lang=EN-GB style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif";color:black'>]</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>AA
+mutation frequency</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>For
+each class, the frequency of replacement mutations at each amino acid position
+is shown, which is calculated by dividing the number of replacement mutations
+at a particular amino acid position/the number sequences that have an amino
+acid at that particular position. Since the length of the CDR1 and CDR2 region
+is not the same for every VH gene, some amino acids positions are absent.
+Therefore we calculate the frequency using the number of amino acids present at
+that that particular location. </span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Antigen
+selection (BASELINe)</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows
+the results of the analysis of antigen selection as performed using BASELINe.
+Details on the analysis performed by BASELINe can be found in Yaari et al,
+PMID: 22641856. The settings used for the analysis are</span><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>:
+focused, SHM targeting model: human Tri-nucleotide, custom bounderies. The
+custom boundries are dependent on the ‘sequence starts at filter’. </span></p>
+
+<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL
+style='font-family:UICTFontTextStyleBody;color:black'>Leader:
+1:26:38:55:65:104:-</span></p>
+
+<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL
+style='font-family:UICTFontTextStyleBody;color:black'>FR1: 27:27:38:55:65:104:-</span></p>
+
+<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL
+style='font-family:UICTFontTextStyleBody;color:black'>CDR1:&nbsp;27:27:38:55:65:104:-</span></p>
+
+<p class=MsoNormalCxSpLast style='line-height:normal'><span lang=NL
+style='font-family:UICTFontTextStyleBody;color:black'>FR2:&nbsp;27:27:38:55:65:104:-</span></p>
+
+<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
+Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
+Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
+of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
+Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>Link</span></a>]</span></p>
+
+</div>
+
+</body>
+
+</html>
diff -r 05c62efdc393 -r a24f8c93583a shm_transition.htm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_transition.htm	Thu Dec 22 09:39:27 2016 -0500
@@ -0,0 +1,120 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+a:link, span.MsoHyperlink
+	{color:blue;
+	text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+	{color:purple;
+	text-decoration:underline;}
+p.msochpdefault, li.msochpdefault, div.msochpdefault
+	{mso-style-name:msochpdefault;
+	margin-right:0in;
+	margin-left:0in;
+	font-size:12.0pt;
+	font-family:"Calibri","sans-serif";}
+p.msopapdefault, li.msopapdefault, div.msopapdefault
+	{mso-style-name:msopapdefault;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:12.0pt;
+	font-family:"Times New Roman","serif";}
+span.apple-converted-space
+	{mso-style-name:apple-converted-space;}
+.MsoChpDefault
+	{font-size:10.0pt;
+	font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US link=blue vlink=purple>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><span style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These graphs and
+tables give insight into the targeting and patterns of SHM. This can give
+insight into the DNA repair pathways used to solve the U:G mismatches
+introduced by AID. More information on the values found in healthy individuals
+of different ages can be found in IJspeert and van Schouwenburg et al, PMID:
+27799928.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs
+</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK93"></a><a
+name="OLE_LINK92"></a><a name="OLE_LINK91"><u><span style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'>Heatmap transition
+information</span></u></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK98"></a><a
+name="OLE_LINK97"><span style='font-size:12.0pt;line-height:115%;font-family:
+"Times New Roman","serif"'>Heatmaps visualizing for each subclass the frequency
+of all possible substitutions. On the x-axes the original base is shown, while
+the y-axes shows the new base. The darker the shade of blue, the more frequent
+this type of substitution is occurring.  </span></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Bargraph
+transition information</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Bar graph
+visualizing for each original base the distribution of substitutions into the other
+bases. A graph is included for each (sub)class. </span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Tables</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transition
+tables are shown for each (sub)class. All the original bases are listed
+horizontally, while the new bases are listed vertically. </span></p>
+
+<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
+Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
+Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
+of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
+Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>Link</span></a>]</span></p>
+
+</div>
+
+</body>
+
+</html>
diff -r 05c62efdc393 -r a24f8c93583a wrapper.sh
--- a/wrapper.sh	Tue Dec 20 09:03:15 2016 -0500
+++ b/wrapper.sh	Thu Dec 22 09:39:27 2016 -0500
@@ -247,7 +247,7 @@
 	echo "---------------- pattern_plots.r ----------------"
 	echo "---------------- pattern_plots.r ----------------<br />" >> $log
 
-	Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/plot1 $outdir/plot2 $outdir/plot3 $outdir/shm_overview.txt 2>&1
+	Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/aid_motives $outdir/relative_mutations $outdir/abolute_mutations $outdir/shm_overview.txt 2>&1
 	
 	echo "<table class='pure-table pure-table-striped'>" >> $output
 	echo "<thead><tr><th>info</th>" >> $output
@@ -296,10 +296,11 @@
 	#echo "<a href='data_${func}.txt'>Download data</a>" >> $output
 done
 
-echo "<img src='plot1.png' /><br />" >> $output
-echo "<img src='plot2.png' /><br />" >> $output
-echo "<img src='plot3.png' /><br />" >> $output
-
+echo "<img src='aid_motives.png' /><br />" >> $output
+echo "<img src='relative_mutations.png' /><br />" >> $output
+echo "<img src='abolute_mutations.png' /><br />" >> $output
+echo "<br />" >> $output
+cat $dir/shm_overview.htm >> $output
 echo "</div>" >> $output #SHM overview tab end
 
 echo "---------------- images ----------------"
@@ -316,6 +317,9 @@
 	echo "<img src='frequency_ranges.png'/><br />" >> $output
 fi
 
+echo "<br />" >> $output
+cat $dir/shm_frequency.htm >> $output
+
 echo "</div>" >> $output #SHM frequency tab end
 
 echo "<div class='tabbertab' title='Transition tables' style='width: 3000px;'>" >> $output
@@ -380,6 +384,9 @@
 
 echo "</table>" >> $output
 
+echo "<br />" >> $output
+cat $dir/shm_transition.htm >> $output
+
 echo "</div>" >> $output #transition tables tab end
 
 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
@@ -428,7 +435,7 @@
 	mkdir $outdir/baseline/IGA_IGG_IGM
 	if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
 		cd $outdir/baseline/IGA_IGG_IGM
-		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"	
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"	
 	else
 		echo "No sequences" > "$outdir/baseline.txt"
 	fi
@@ -496,6 +503,9 @@
 	fi
 fi
 
+echo "<br />" >> $output
+cat $dir/shm_selection.htm >> $output
+
 echo "</div>" >> $output #antigen selection tab end
 
 echo "<div class='tabbertab' title='CSR'>" >> $output #CSR tab
@@ -509,6 +519,9 @@
 	echo "<img src='IGG.png'/><br />" >> $output
 fi
 
+echo "<br />" >> $output
+cat $dir/shm_csr.htm >> $output
+
 echo "</div>" >> $output #CSR tab end
 
 if [[ "$fast" == "no" ]] ; then
@@ -562,7 +575,7 @@
 
 	PWD="$tmp"
 
-	echo "<div class='tabbertab' title='Clonality'>" >> $output #clonality tab
+	echo "<div class='tabbertab' title='Clonal Relation' style='width: 7000px;'>" >> $output #clonality tab
 
 	function clonality_table {
 		local infile=$1
@@ -606,13 +619,15 @@
 	clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output
 	echo "</div>" >> $output
 
-	echo "<div class='tabbertab' title='Overlap'>" >> $output
-	cat "$outdir/sequence_overview/index.html" | sed "s%href='\(.*\).html%href='sequence_overview/\1.html%g" >> $output # rewrite href to 'sequence_overview/..."
+	echo "<div class='tabbertab' title='Overlap' style='width: 7000px;'>" >> $output
+	cat "$outdir/sequence_overview/index.html" | sed -e 's:</td>:</td>\n:g' | sed "s:href='\(.*\).html:href='sequence_overview/\1.html:g" >> $output # rewrite href to 'sequence_overview/..."
 	echo "</div>" >> $output
-
-
+	
 	echo "</div>" >> $output #clonality tabber end
-
+	
+	echo "<br />" >> $output
+	cat $dir/shm_clonality.htm >> $output
+	
 	echo "</div>" >> $output #clonality tab end
 
 fi
@@ -630,9 +645,9 @@
 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the RGYW/WRCY and TW/WA plot</td><td><a href='plot1.txt' download='plot1.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the relative transition and transversion plot</td><td><a href='plot2.txt' download='plot2.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the absolute transition and transversion plot</td><td><a href='plot3.txt' download='plot3.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the percentage of mutations in AID and pol eta motives plot</td><td><a href='aid_motives.txt' download='aid_motives.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the relative mutation patterns plot</td><td><a href='relative_mutations.txt' download='relative_mutations.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the absolute mutation patterns plot</td><td><a href='absolute_mutations.txt' download='abolute_mutations.txt' >Download</a></td></tr>" >> $output
 
 echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>SHM Frequency</td></tr>" >> $output
 echo "<tr><td>The data  generate the frequency scatter plot</td><td><a href='scatter.txt' download='scatter.txt' >Download</a></td></tr>" >> $output
@@ -654,7 +669,7 @@
 
 echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Antigen Selection</td></tr>" >> $output
 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt' download='aa_id_mutations.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt' download='absent_aa_id.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Presence of AA per sequence ID</td><td><a href='absent_aa_id.txt' download='absent_aa_id.txt' >Download</a></td></tr>" >> $output
 
 echo "<tr><td>The data used to generate the aa mutation frequency plot</td><td><a href='aa_histogram_sum.txt' download='aa_histogram_sum.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data used to generate the aa mutation frequency plot for IGA</td><td><a href='aa_histogram_sum_IGA.txt' download='aa_histogram_sum_IGA.txt' >Download</a></td></tr>" >> $output
@@ -674,10 +689,10 @@
 echo "<tr><td>Baseline IGE data</td><td><a href='baseline_IGE.txt' download='baseline_IGE.txt' >Download</a></td></tr>" >> $output
 
 echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>CSR</td></tr>" >> $output
-echo "<tr><td>The data for the CSR IGA pie plot</td><td><a href='IGA_pie.txt' download='IGA_pie.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the CSR IGG pie plot</td><td><a href='IGG_pie.txt' download='IGG_pie.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the IGA subclass distribution plot</td><td><a href='IGA_pie.txt' download='IGA_pie.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the IGG subclass distribution plot</td><td><a href='IGG_pie.txt' download='IGG_pie.txt' >Download</a></td></tr>" >> $output
 
-echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Clonality</td></tr>" >> $output
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Clonal Relation</td></tr>" >> $output
 echo "<tr><td>Sequence overlap between subclasses</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output
 echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output
@@ -705,6 +720,9 @@
 
 echo "</table>" >> $output
 
+echo "<br />" >> $output
+cat $dir/shm_downloads.htm >> $output
+
 echo "</div>" >> $output #downloads tab end
 
 echo "</div>" >> $output #tabs end 
@@ -748,7 +766,10 @@
 		echo "<td>${perc}%</td>" >> $log
 		echo "</tr>" >> $log
 done < $outdir/filtering_steps.txt
-echo "</table border></center></html>" >> $log
+echo "</table>" >> $log
+echo "<br />" >> $log
+cat $dir/shm_first.htm >> $log
+echo "</center></html>" >> $log
 
 IFS="$tIFS"