changeset 20:9185c3dfc679 draft

Uploaded
author davidvanzessen
date Fri, 27 Jan 2017 03:44:18 -0500
parents 3ef457aa5df6
children 9c843eb06416
files complete.sh.old complete_immunerepertoire.xml.old experimental_design.xml igblastn.xml igparse.xml imgt_loader.xml imgt_loader/imgt_loader.r imgt_loader/imgt_loader.sh report_clonality/RScript.r report_clonality/RScript.r.old report_clonality/r_wrapper.sh.old report_clonality_igg.xml
diffstat 12 files changed, 1901 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/complete.sh.old	Fri Jan 27 03:44:18 2017 -0500
@@ -0,0 +1,67 @@
+#!/bin/bash
+set -e
+inputFiles=($1)
+outputDir=$3
+outputFile=$3/index.html #$1
+clonalType=$4
+species=$5
+locus=$6
+filterproductive=$7
+clonality_method=$8
+
+html=$2
+dir="$(cd "$(dirname "$0")" && pwd)"
+array=("$@")
+echo "<html><h3>Progress</h3><table><tr><td>info</td></tr>" > $html
+echo "<tr><td>-----------------------------------</td></tr>" >> $html
+
+#mkdir $PWD/igblastdatabase
+#unzip $dir/database.zip -d $PWD/igblastdatabase/
+#export IGDATA=$PWD/igblastdatabase/
+
+id=""
+forwardSlash="/"
+mergerInput=()
+echo "Before loop"
+count=1
+for current in "${inputFiles[@]}"
+do
+	if [[ "$current" != *"$forwardSlash"* ]]; then
+			id="$current"
+			mergerInput+=($id)
+			count=1
+			continue
+	fi
+	echo "working on $current"
+	fileName=$(basename $current)
+	fileName="${fileName%.*}"
+	parsedFileName="$PWD/$fileName.parsed"
+	f=$(file $current)
+	zipType="Zip archive"
+	zxType="XZ compressed data"
+  	if [[ "$f" == *"$zipType"* ]] || [[ "$f" == *"$zxType"* ]]
+	then
+		echo "<tr><td>Sample $count of patient $id is an archive file, using IMGT Loader</td></tr>" >> $html
+	  	fileName=$(basename $current)
+		bash ${dir}/imgt_loader/imgt_loader.sh $current $parsedFileName "${fileName}"
+	else
+		echo "<tr><td>Sample $count of patient $id is not a zip file so assuming fasta/fastq, using igBLASTn</td></tr>" >> $html
+		bash ${dir}/igblast/igblast.sh $current "$species" $locus $parsedFileName
+	fi
+	mergerInput+=($parsedFileName)
+	count=$((count+1))
+done
+
+echo "<tr><td>-----------------------------------</td></tr>" >> $html
+echo "<tr><td>merging</td></tr>" >> $html
+
+bash $dir/experimental_design/experimental_design.sh ${mergerInput[*]} $PWD/merged.txt
+
+echo "<tr><td>done</td></tr>" >> $html
+echo "<tr><td>-----------------------------------</td></tr>" >> $html
+echo "<tr><td>plotting</td></tr>" >> $html
+
+echo "after ED"
+
+bash $dir/report_clonality/r_wrapper.sh $PWD/merged.txt $2 $outputDir $clonalType "$species" "$locus" $filterproductive $clonality_method
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/complete_immunerepertoire.xml.old	Fri Jan 27 03:44:18 2017 -0500
@@ -0,0 +1,203 @@
+<tool id="complete_immunerepertoire_igg" name="Immune Repertoire pipeline" version="1.0">
+	<description> </description>
+	<command interpreter="bash">
+complete.sh "
+#for $i, $f in enumerate($patients)
+	"${f.id}"
+	#for $j, $g in enumerate($f.samples)
+		${g.sample}
+	#end for
+#end for
+" $out_file $out_file.files_path "$clonaltype"
+#if $gene_selection.source == "imgtdb"		
+	"${gene_selection.species}" "${gene_selection.locus}" $filterproductive ${clonality_method}
+#else
+	"custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive $clonality_method
+#end if
+	</command>
+	<inputs>
+		<repeat name="patients" title="Donor" min="1" default="1">
+			<repeat name="samples" title="Sample" min="1" default="1">
+					<param name="sample" type="data" label="Sample to Process" />
+			</repeat>
+			<param name="id" type="text" label="ID" />
+		</repeat>
+		<param name="clonaltype" type="select" label="Clonal Type Definition">
+			<option value="none">Don't remove duplicates based on clonaltype</option>
+			<option value="Top.V.Gene,CDR3.Seq">Top.V.Gene, CDR3 (AA)</option>
+			<option value="Top.V.Gene,CDR3.Seq.DNA">Top.V.Gene, CDR3 (nt)</option>
+			<option value="Top.V.Gene,Top.J.Gene,CDR3.Seq">Top.V.Gene, Top.J.Gene, CDR3 (AA)</option>
+			<option value="Top.V.Gene,Top.J.Gene,CDR3.Seq.DNA">Top.V.Gene, Top.J.Gene, CDR3 (nt)</option>
+			<option value="Top.V.Gene,Top.D.Gene,Top.J.Gene,CDR3.Seq.DNA">Top.V.Gene, Top.D.Gene, Top.J.Gene, CDR3 (nt)</option>
+		</param>
+		
+		<conditional name="gene_selection" >
+			<param name="source" type="select" label="Order of V(D)J genes in graphs" help="" >
+					<option value="imgtdb" selected="true">IMGT-DB</option>
+					<option value="custom">User defined</option>
+			</param>
+			<when value="imgtdb">
+				<param name="species" type="select" label="Species">
+					<option value="Homo sapiens functional">Homo sapiens functional</option>
+					<option value="Homo sapiens">Homo sapiens</option>
+					<option value="Homo sapiens non-functional">Homo sapiens non-functional</option>
+					<option value="Bos taurus">Bos taurus</option>
+					<option value="Bos taurus functional">Bos taurus functional</option>
+					<option value="Bos taurus non-functional">Bos taurus non-functional</option>
+					<option value="Camelus dromedarius">Camelus dromedarius</option>
+					<option value="Camelus dromedarius functional">Camelus dromedarius functional</option>
+					<option value="Camelus dromedarius non-functional">Camelus dromedarius non-functional</option>
+					<option value="Canis lupus familiaris">Canis lupus familiaris</option>
+					<option value="Canis lupus familiaris functional">Canis lupus familiaris functional</option>
+					<option value="Canis lupus familiaris non-functional">Canis lupus familiaris non-functional</option>
+					<option value="Danio rerio">Danio rerio</option>
+					<option value="Danio rerio functional">Danio rerio functional</option>
+					<option value="Danio rerio non-functional">Danio rerio non-functional</option>
+					<option value="Macaca mulatta">Macaca mulatta</option>
+					<option value="Macaca mulatta functional">Macaca mulatta functional</option>
+					<option value="Macaca mulatta non-functional">Macaca mulatta non-functional</option>
+					<option value="Mus musculus">Mus musculus</option>
+					<option value="Mus musculus functional">Mus musculus functional</option>
+					<option value="Mus musculus non-functional">Mus musculus non-functional</option>
+					<option value="Mus spretus">Mus spretus</option>
+					<option value="Mus spretus functional">Mus spretus functional</option>
+					<option value="Mus spretus non-functional">Mus spretus non-functional</option>
+					<option value="Oncorhynchus mykiss">Oncorhynchus mykiss</option>
+					<option value="Oncorhynchus mykiss functional">Oncorhynchus mykiss functional</option>
+					<option value="Oncorhynchus mykiss non-functional">Oncorhynchus mykiss non-functional</option>
+					<option value="Ornithorhynchus anatinus">Ornithorhynchus anatinus</option>
+					<option value="Ornithorhynchus anatinus functional">Ornithorhynchus anatinus functional</option>
+					<option value="Ornithorhynchus anatinus non-functional">Ornithorhynchus anatinus non-functional</option>
+					<option value="Oryctolagus cuniculus">Oryctolagus cuniculus</option>
+					<option value="Oryctolagus cuniculus functional">Oryctolagus cuniculus functional</option>
+					<option value="Oryctolagus cuniculus non-functional">Oryctolagus cuniculus non-functional</option>
+					<option value="Rattus norvegicus">Rattus norvegicus</option>
+					<option value="Rattus norvegicus functional">Rattus norvegicus functional</option>
+					<option value="Rattus norvegicus non-functional">Rattus norvegicus non-functional</option>
+					<option value="Sus scrofa">Sus scrofa</option>
+					<option value="Sus scrofa functional">Sus scrofa functional</option>
+					<option value="Sus scrofa non-functional">Sus scrofa non-functional</option>
+				</param>
+			
+				<param name="locus" type="select" label="Locus">
+					<option value="TRA">TRA</option>
+					<option value="TRD">TRD</option>
+					<option value="TRG">TRG</option>
+					<option value="TRB">TRB</option>
+					<option value="IGH">IGH</option>
+					<option value="IGI">IGI</option>
+					<option value="IGK">IGK</option>
+					<option value="IGL">IGL</option>
+				</param>
+			</when>
+			<when value="custom">
+				<param name="species" type="hidden" value="custom" size="50" />
+				<param name="vgenes" type="text" label="V Genes, add the custom genes comma seperated, no spaces" size="100" />
+				<param name="dgenes" type="text" label="D Genes" size="100" />
+				<param name="jgenes" type="text" label="J Genes" size="100" />
+			</when>
+		</conditional>
+		
+		<param name="filterproductive" type="select" label="Remove the unproductive sequences from graphs ">
+			<option value="yes">Yes</option>
+			<option value="no">No</option>
+		</param>
+		
+		<param name="clonality_method" type="select" label="Old clonality algorithm or the newer R package">
+			<option value="old">Old</option>
+			<option value="boyd">R Package (needs 3 replicate minimum)</option>
+		</param>
+	</inputs>
+	<outputs>
+		<data format="html" name="out_file" />
+	</outputs>
+	<requirements>
+		<requirement type="package" version="0.6">igblastwrp</requirement>
+		<requirement type="package" version="3.3">weblogo</requirement>
+		<!--<requirement type="package" version="0.20">circostools</requirement>-->
+	</requirements>
+	<help>
+		The entire Immune Repertoire pipeline as a single tool, input several FASTA files or IMGT zip/txz files, give them an ID and it will BLAST/parse, merge and plot them.
+
+		.. class:: warningmark
+
+Custom gene ordering based on position on genome: 
+
+**Human**
+
+IGH::
+
+    V:
+    IGHV7-81,IGHV3-74,IGHV3-73,IGHV3-72,IGHV3-71,IGHV2-70,IGHV1-69,IGHV3-66,IGHV3-64,IGHV4-61,IGHV4-59,IGHV1-58,IGHV3-53,IGHV3-52,IGHV5-a,IGHV5-51,IGHV3-49,IGHV3-48,IGHV3-47,IGHV1-46,IGHV1-45,IGHV3-43,IGHV4-39,IGHV3-35,IGHV4-34,IGHV3-33,IGHV4-31,IGHV4-30-4,IGHV4-30-2,IGHV3-30-3,IGHV3-30,IGHV4-28,IGHV2-26,IGHV1-24,IGHV3-23,IGHV3-22,IGHV3-21,IGHV3-20,IGHV3-19,IGHV1-18,IGHV3-15,IGHV3-13,IGHV3-11,IGHV3-9,IGHV1-8,IGHV3-7,IGHV2-5,IGHV7-4-1,IGHV4-4,IGHV4-b,IGHV1-3,IGHV1-2,IGHV6-1
+    D:
+    IGHD1-1,IGHD2-2,IGHD3-3,IGHD6-6,IGHD1-7,IGHD2-8,IGHD3-9,IGHD3-10,IGHD4-11,IGHD5-12,IGHD6-13,IGHD1-14,IGHD2-15,IGHD3-16,IGHD4-17,IGHD5-18,IGHD6-19,IGHD1-20,IGHD2-21,IGHD3-22,IGHD4-23,IGHD5-24,IGHD6-25,IGHD1-26,IGHD7-27
+    J:
+    IGHJ1,IGHJ2,IGHJ3,IGHJ4,IGHJ5,IGHJ6
+
+
+IGK::
+
+    V:
+    IGKV3D-7,IGKV1D-8,IGKV1D-43,IGKV3D-11,IGKV1D-12,IGKV1D-13,IGKV3D-15,IGKV1D-16,IGKV1D-17,IGKV3D-20,IGKV2D-26,IGKV2D-28,IGKV2D-29,IGKV2D-30,IGKV1D-33,IGKV1D-39,IGKV2D-40,IGKV2-40,IGKV1-39,IGKV1-33,IGKV2-30,IGKV2-29,IGKV2-28,IGKV1-27,IGKV2-24,IGKV3-20,IGKV1-17,IGKV1-16,IGKV3-15,IGKV1-13,IGKV1-12,IGKV3-11,IGKV1-9,IGKV1-8,IGKV1-6,IGKV1-5,IGKV5-2,IGKV4-1
+    J:
+    IGKJ1,IGKJ2,IGKJ3,IGKJ4,IGKJ5
+
+
+IGL::
+
+    V:
+    IGLV4-69,IGLV8-61,IGLV4-60,IGLV6-57,IGLV5-52,IGLV1-51,IGLV9-49,IGLV1-47,IGLV7-46,IGLV5-45,IGLV1-44,IGLV7-43,IGLV1-41,IGLV1-40,IGLV5-39,IGLV5-37,IGLV1-36,IGLV3-27,IGLV3-25,IGLV2-23,IGLV3-22,IGLV3-21,IGLV3-19,IGLV2-18,IGLV3-16,IGLV2-14,IGLV3-12,IGLV2-11,IGLV3-10,IGLV3-9,IGLV2-8,IGLV4-3,IGLV3-1
+    J:
+    IGLJ1,IGLJ2,IGLJ3,IGLJ6,IGLJ7
+
+
+TRB::
+
+    V:
+    TRBV2,TRBV3-1,TRBV4-1,TRBV5-1,TRBV6-1,TRBV4-2,TRBV6-2,TRBV4-3,TRBV6-3,TRBV7-2,TRBV6-4,TRBV7-3,TRBV9,TRBV10-1,TRBV11-1,TRBV10-2,TRBV11-2,TRBV6-5,TRBV7-4,TRBV5-4,TRBV6-6,TRBV5-5,TRBV7-6,TRBV5-6,TRBV6-8,TRBV7-7,TRBV6-9,TRBV7-8,TRBV5-8,TRBV7-9,TRBV13,TRBV10-3,TRBV11-3,TRBV12-3,TRBV12-4,TRBV12-5,TRBV14,TRBV15,TRBV16,TRBV18,TRBV19,TRBV20-1,TRBV24-1,TRBV25-1,TRBV27,TRBV28,TRBV29-1,TRBV30
+    D:
+    TRBD1,TRBD2
+    J:
+    TRBJ1-1,TRBJ1-2,TRBJ1-3,TRBJ1-4,TRBJ1-5,TRBJ1-6,TRBJ2-1,TRBJ2-2,TRBJ2-3,TRBJ2-4,TRBJ2-5,TRBJ2-6,TRBJ2-7
+
+
+TRA::
+
+    V:
+    TRAV1-1,TRAV1-2,TRAV2,TRAV3,TRAV4,TRAV5,TRAV6,TRAV7,TRAV8-1,TRAV9-1,TRAV10,TRAV12-1,TRAV8-2,TRAV8-3,TRAV13-1,TRAV12-2,TRAV8-4,TRAV13-2,TRAV14/DV4,TRAV9-2,TRAV12-3,TRAV8-6,TRAV16,TRAV17,TRAV18,TRAV19,TRAV20,TRAV21,TRAV22,TRAV23/DV6,TRAV24,TRAV25,TRAV26-1,TRAV27,TRAV29/DV5,TRAV30,TRAV26-2,TRAV34,TRAV35,TRAV36/DV7,TRAV38-1,TRAV38-2/DV8,TRAV39,TRAV40,TRAV41
+    J:
+    TRAJ57,TRAJ56,TRAJ54,TRAJ53,TRAJ52,TRAJ50,TRAJ49,TRAJ48,TRAJ47,TRAJ46,TRAJ45,TRAJ44,TRAJ43,TRAJ42,TRAJ41,TRAJ40,TRAJ39,TRAJ38,TRAJ37,TRAJ36,TRAJ34,TRAJ33,TRAJ32,TRAJ31,TRAJ30,TRAJ29,TRAJ28,TRAJ27,TRAJ26,TRAJ24,TRAJ23,TRAJ22,TRAJ21,TRAJ20,TRAJ18,TRAJ17,TRAJ16,TRAJ15,TRAJ14,TRAJ13,TRAJ12,TRAJ11,TRAJ10,TRAJ9,TRAJ8,TRAJ7,TRAJ6,TRAJ5,TRAJ4,TRAJ3
+
+
+TRG::
+
+    V:
+    TRGV9,TRGV8,TRGV5,TRGV4,TRGV3,TRGV2
+    J:
+    TRGJ2,TRGJP2,TRGJ1,TRGJP1
+
+
+TRD::
+
+    V:
+    TRDV1,TRDV2,TRDV3
+    D:
+    TRDD1,TRDD2,TRDD3
+    J:
+    TRDJ1,TRDJ4,TRDJ2,TRDJ3
+
+
+**Mouse**
+
+TRB::
+
+    V:
+    TRBV1,TRBV2,TRBV3,TRBV4,TRBV5,TRBV12-1,TRBV13-1,TRBV12-2,TRBV13-2,TRBV13-3,TRBV14,TRBV15,TRBV16,TRBV17,TRBV19,TRBV20,TRBV23,TRBV24,TRBV26,TRBV29,TRBV30,TRBV31
+    D:
+    TRBD1,TRBD2
+    J:
+    TRBJ1-1,TRBJ1-2,TRBJ1-3,TRBJ1-4,TRBJ1-5,TRBJ2-1,TRBJ2-2,TRBJ2-3,TRBJ2-4,TRBJ2-5,TRBJ2-6,TRBJ2-7
+    
+	</help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/experimental_design.xml	Fri Jan 27 03:44:18 2017 -0500
@@ -0,0 +1,57 @@
+<tool id="experimentaldesign_igg" name="ExperimentalDesign" version="1.0">
+	<description> </description>
+	<command interpreter="bash">
+		experimental_design/experimental_design.sh 
+		#for $i, $f in enumerate($patients)
+            "$f.id"
+            #for $j, $g in enumerate($f.samples)
+            	${g.sample}
+            #end for
+		#end for
+		$out_file
+	</command>
+	<inputs>
+		<repeat name="patients" title="Patient" min="1" default="1">
+            <repeat name="samples" title="Sample" min="1" default="1">
+                <param name="sample" format="tabular" type="data" label="Sample to Process" />
+            </repeat>
+			<param name="id" type="text" label="ID" />
+		</repeat>
+	</inputs>
+	<outputs>
+		<data format="tabular" name="out_file"/>
+	</outputs>
+	<help>
+Takes the ARGalaxy proprietary format and merges several samples and/or patients together.
+	</help>
+ <citations>
+    <!-- Example of annotating a citation using a DOI. -->
+    <citation type="doi">10.1093/bioinformatics/btq281</citation>
+
+    <!-- Example of annotating a citation using a BibTex entry. -->
+    <citation type="bibtex">@ARTICLE{Kim07aninterior-point,
+    author = {Seung-jean Kim and Kwangmoo Koh and Michael Lustig and Stephen Boyd and Dimitry Gorinevsky},
+    title = {An interior-point method for large-scale l1-regularized logistic regression},
+    journal = {Journal of Machine Learning Research},
+    year = {2007},
+    volume = {8},
+    pages = {1519-1555}
+    }</citation>
+  </citations>
+  <tests>
+    <test>
+      <param name="input" value="1.bed"/>
+      <param name="column" value="1"/>
+      <param name="order" value="ASC"/>
+      <param name="style" value="num"/>
+      <output name="out_file1" file="sort1_num.bed"/>
+    </test>
+    <test>
+      <param name="input" value="7.bed"/>
+      <param name="column" value="1"/>
+      <param name="order" value="ASC"/>
+      <param name="style" value="alpha"/>
+      <output name="out_file1" file="sort1_alpha.bed"/>
+    </test>
+  </tests>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igblastn.xml	Fri Jan 27 03:44:18 2017 -0500
@@ -0,0 +1,107 @@
+<tool id="igblastn" name="igBLASTn" version="0.1.0">
+    <description> </description>
+    <command interpreter="bash">
+		igblast/igblast.sh $input $species $locus $output
+	</command>
+	<inputs>
+		<param name="input" type="data" format="fasta" label="Fasta file"/>
+		<param name="species" type="select" label="Species">
+			<option value="human">Homo sapiens</option>
+			<option value="mouse">Mus musculus</option>
+			<option value="rat">Rattus norvegicus</option>
+			<option value="rabbit">Oryctolagus cuniculus</option>
+			<option value="rhesus_monkey">Macaca mulatta</option>
+			<option value="BosTaurus">BosTaurus</option>
+			<option value="CamelusDromedarius">CamelusDromedarius</option>
+			<option value="CanisLupusFamiliaris">CanisLupusFamiliaris</option>
+			<option value="DanioRerio">DanioRerio</option>
+			<option value="MusSpretus">MusSpretus</option>
+			<option value="OncorhynchusMykiss">OncorhynchusMykiss</option>
+			<option value="SusScrofa">SusScrofa</option>
+			<option value="GallusGallus">GallusGallus</option>
+			<option value="AnasPlatyrhynchos">AnasPlatyrhynchos</option>
+				</param>
+		<param name="locus" type="select" label="Locus">
+			<option value="TRA">TRA</option>
+			<option value="TRB">TRB</option>
+			<option value="TRG">TRG</option>
+			<option value="TRD">TRD</option>
+			<option value="IGH">IGH</option>
+			<option value="IGK">IGK</option>
+			<option value="IGL">IGL</option>
+		</param>
+	</inputs>
+	<outputs>
+		<data name="output" format="tabular" type="data" label="${input.name}-igBLASTn aligned"/>
+	<!--<data name="log" format="text" label="log"/>-->
+	</outputs>
+	<requirements>
+		<requirement type="package" version="0.6">igblastwrp</requirement>
+	</requirements>
+	<help>
+============
+iReport
+============
+
+This tool uses the online igBLAST website hosted by NCBI to blast a FASTA file, it retrieves the result and generates a convenient tabular format for further processing.
+
+**NOTE**
+
+.. class:: warningmark
+
+- Everything goes through the servers of NCBI, so if you have sensitive data that that isn't allowed to leave your local network, this isn't the tool the use.
+
+**USAGE**
+
+.. class:: infomark
+
+- This tool uses a free service provided by NCBI, and although there doesn't seem to be any restrictions on usage, avoid unnecessary usage to lighten the load on NCBI's servers.
+
+
+**INPUT**
+
+This tool accepts FASTA files as input:
+
+::
+
+		>lcl|FLN1FA002RWEZA.1| 
+		ggctggagtgggtttcatacattagtagtaatagtggtgccatatactacgcagactctgtgaagggccgattcaccatc
+		tccagaaacaatgccaaggactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgc
+		gagagcgatcccccggtattactatgatactagtggcccaaacgactactggggccagggaaccctggtcaccgtctcct
+		cag
+		>lcl|FLN1FA001BLION.1| 
+		aggcttgagtggatgggatggatcaacgctggcaatggtaacacaaaatattcacagaagttccagggcagagtcaccat
+		taccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtg
+		cgagagtgggcagcagctggtctgatgcttttgattatctggggccaagggacaatggtcaccgtctcctcag
+
+**OUTPUT**
+
+The following data is used for ARGalaxy
+
++-----------------+----------------------------------------------+
+| Column name     | Column contents                              |
++-----------------+----------------------------------------------+
+| ID              | The Sequence ID provided by the sequencer.   |
++-----------------+----------------------------------------------+
+| VDJ Frame       | In-frame/Out-frame                           |
++-----------------+----------------------------------------------+
+| Top V Gene      | The best matching V gene found.              |
++-----------------+----------------------------------------------+
+| Top D Gene      | The best matching D gene found.              |
++-----------------+----------------------------------------------+
+| Top J Gene      | The best matching J gene found.              |
++-----------------+----------------------------------------------+
+| CDR3 Seq        | The CDR3 region.                             |
++-----------------+----------------------------------------------+
+| CDR3 Length     | The length of the CDR3 region.               |
++-----------------+----------------------------------------------+
+| CDR3 Seq DNA    | The CDR3 sequence region.                    |
++-----------------+----------------------------------------------+
+| CDR3 Length DNA | The length of the CDR3 sequence region.      |
++-----------------+----------------------------------------------+
+| Functionality   | If sequence is productive/unproductive       |
++-----------------+----------------------------------------------+
+
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igparse.xml	Fri Jan 27 03:44:18 2017 -0500
@@ -0,0 +1,15 @@
+<tool id="igblastparser_igg" name="igBLASTparser" version="0.2.0">
+	<description> </description>
+	<command interpreter="perl">
+		igblastparser/igparse.pl $input 0 2>/dev/null | grep -v "D:" | cut -f2- > $output
+	</command>
+	<inputs>
+		<param name="input" type="data" format="text" label="igBLASTn report"/>
+	</inputs>
+	<outputs>
+		<data name="output" format="tabular" label="${input.name}-parsed" />
+	</outputs>
+	<help>
+		Step 2 of the Immune Repertoire tools, extracts the relevant information needed from the reports generated by igblast (Step 1)
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/imgt_loader.xml	Fri Jan 27 03:44:18 2017 -0500
@@ -0,0 +1,48 @@
+<tool id="imgt_loader_igg" name="IMGT Loader" version="1.0">
+	<description> </description>
+	<command interpreter="bash">
+		imgt_loader/imgt_loader.sh $in_file $out_file "tmp"
+	</command>
+	<inputs>
+        <param name="in_file" type="data" label="Archive with files" />
+	</inputs>
+	<outputs>
+		<data format="tabular" name="out_file" label="IMGT Loader on ${in_file.name}"/>
+	</outputs>
+	<help>
+**INPUT**
+
+This tool accepts an IMGT/HIGHV-QUEST ZIP file
+
+**OUTPUT**
+
+The following data is used for ARGalaxy
+
++-----------------+----------------------------------------------+
+| Column name     | Column contents                              |
++-----------------+----------------------------------------------+
+| ID              | The Sequence ID provided by the sequencer.   |
++-----------------+----------------------------------------------+
+| VDJ Frame       | In-frame/Out-frame                           |
++-----------------+----------------------------------------------+
+| Top V Gene      | The best matching V gene found.              |
++-----------------+----------------------------------------------+
+| Top D Gene      | The best matching D gene found.              |
++-----------------+----------------------------------------------+
+| Top J Gene      | The best matching J gene found.              |
++-----------------+----------------------------------------------+
+| CDR3 Seq        | The CDR3 region.                             |
++-----------------+----------------------------------------------+
+| CDR3 Length     | The length of the CDR3 region.               |
++-----------------+----------------------------------------------+
+| CDR3 Seq DNA    | The CDR3 sequence region.                    |
++-----------------+----------------------------------------------+
+| CDR3 Length DNA | The length of the CDR3 sequence region.      |
++-----------------+----------------------------------------------+
+| Functionality   | If sequence is productive/unproductive       |
++-----------------+----------------------------------------------+
+
+
+	</help>
+
+</tool>
--- a/imgt_loader/imgt_loader.r	Thu Dec 22 03:43:02 2016 -0500
+++ b/imgt_loader/imgt_loader.r	Fri Jan 27 03:44:18 2017 -0500
@@ -1,11 +1,13 @@
 args <- commandArgs(trailingOnly = TRUE)
 
 summ.file = args[1]
-aa.file = args[2]
-junction.file = args[3]
-out.file = args[4]
+sequences.file = args[2]
+aa.file = args[3]
+junction.file = args[4]
+out.file = args[5]
 
 summ = read.table(summ.file, sep="\t", header=T, quote="", fill=T)
+sequences = read.table(sequences.file, sep="\t", header=T, quote="", fill=T)
 aa = read.table(aa.file, sep="\t", header=T, quote="", fill=T)
 junction = read.table(junction.file, sep="\t", header=T, quote="", fill=T)
 
@@ -30,8 +32,8 @@
 out[,"CDR3.Seq"] = aa[,"CDR3.IMGT"]
 out[,"CDR3.Length"] = summ[,"CDR3.IMGT.length"]
 
-out[,"CDR3.Seq.DNA"] = junction[,"JUNCTION"]
-out[,"CDR3.Length.DNA"] = nchar(as.character(junction[,"JUNCTION"]))
+out[,"CDR3.Seq.DNA"] = sequences[,"CDR3.IMGT"]
+out[,"CDR3.Length.DNA"] = nchar(as.character(sequences[,"CDR3.IMGT"]))
 out[,"Strand"] = summ[,"Orientation"]
 out[,"CDR3.Found.How"] = "a"
 
--- a/imgt_loader/imgt_loader.sh	Thu Dec 22 03:43:02 2016 -0500
+++ b/imgt_loader/imgt_loader.sh	Fri Jan 27 03:44:18 2017 -0500
@@ -61,9 +61,10 @@
 	tar xJf $input -C $PWD/$name/files
 fi
 find $PWD/$name/files -iname "1_*" -exec cat {} + > $PWD/$name/summ.txt
+find $PWD/$name/files -iname "3_*" -exec cat {} + > $PWD/$name/sequences.txt
 find $PWD/$name/files -iname "5_*" -exec cat {} + > $PWD/$name/aa.txt
 find $PWD/$name/files -iname "6_*" -exec cat {} + > $PWD/$name/junction.txt
 
 #python $dir/imgt_loader.py --summ $PWD/$name/summ.txt --aa $PWD/$name/aa.txt --junction $PWD/$name/junction.txt --output $output
 
-Rscript --verbose $dir/imgt_loader.r $PWD/$name/summ.txt $PWD/$name/aa.txt $PWD/$name/junction.txt $output 2>&1
+Rscript --verbose $dir/imgt_loader.r $PWD/$name/summ.txt $PWD/$name/sequences.txt $PWD/$name/aa.txt $PWD/$name/junction.txt $output 2>&1
--- a/report_clonality/RScript.r	Thu Dec 22 03:43:02 2016 -0500
+++ b/report_clonality/RScript.r	Fri Jan 27 03:44:18 2017 -0500
@@ -596,19 +596,22 @@
       
       res[is.na(res)] = 0
       
-      write.table(res, file=paste("raw_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F)
+      write.table(res, file=paste("raw_clonality_", sample_id, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=F)
+      write.table(as.matrix(res[,2:ncol(res)]), file=paste("raw_clonality2_", sample_id, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=F)
+      
+      res = read.table(paste("raw_clonality_", sample_id, ".txt", sep=""), header=F, sep="\t", quote="", stringsAsFactors=F, fill=T, comment.char="")
       
       infer.result = infer.clonality(as.matrix(res[,2:ncol(res)]))
       
       #print(infer.result)
       
-      write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F)
+      write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=F)
       
       res$type = rowSums(res[,2:ncol(res)])
       
       coincidence.table = data.frame(table(res$type))
       colnames(coincidence.table) = c("Coincidence Type",  "Raw Coincidence Freq")
-      write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)
+      write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
     }
   } else if(clonality_method == "old") {
     clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/report_clonality/RScript.r.old	Fri Jan 27 03:44:18 2017 -0500
@@ -0,0 +1,877 @@
+# ---------------------- load/install packages ----------------------
+
+if (!("gridExtra" %in% rownames(installed.packages()))) {
+  install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") 
+}
+library(gridExtra)
+if (!("ggplot2" %in% rownames(installed.packages()))) {
+  install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") 
+}
+library(ggplot2)
+if (!("plyr" %in% rownames(installed.packages()))) {
+  install.packages("plyr", repos="http://cran.xl-mirror.nl/") 
+}
+library(plyr)
+
+if (!("data.table" %in% rownames(installed.packages()))) {
+  install.packages("data.table", repos="http://cran.xl-mirror.nl/") 
+}
+library(data.table)
+
+if (!("reshape2" %in% rownames(installed.packages()))) {
+  install.packages("reshape2", repos="http://cran.xl-mirror.nl/")
+}
+library(reshape2)
+
+if (!("lymphclon" %in% rownames(installed.packages()))) {
+  install.packages("lymphclon", repos="http://cran.xl-mirror.nl/")
+}
+library(lymphclon)
+
+# ---------------------- parameters ----------------------
+
+args <- commandArgs(trailingOnly = TRUE)
+
+infile = args[1] #path to input file
+outfile = args[2] #path to output file
+outdir = args[3] #path to output folder (html/images/data)
+clonaltype = args[4] #clonaltype definition, or 'none' for no unique filtering
+ct = unlist(strsplit(clonaltype, ","))
+species = args[5] #human or mouse
+locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD
+filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no)
+clonality_method = args[8]
+
+
+# ---------------------- Data preperation ----------------------
+
+print("Report Clonality - Data preperation")
+
+inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="", stringsAsFactors=F)
+
+print(paste("nrows: ", nrow(inputdata)))
+
+setwd(outdir)
+
+# remove weird rows
+inputdata = inputdata[inputdata$Sample != "",]
+
+print(paste("nrows: ", nrow(inputdata)))
+
+#remove the allele from the V,D and J genes
+inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene)
+inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene)
+inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene)
+
+print(paste("nrows: ", nrow(inputdata)))
+
+#filter uniques
+inputdata.removed = inputdata[NULL,]
+
+print(paste("nrows: ", nrow(inputdata)))
+
+inputdata$clonaltype = 1:nrow(inputdata)
+
+#keep track of the count of sequences in samples or samples/replicates for the front page overview
+input.sample.count = data.frame(data.table(inputdata)[, list(All=.N), by=c("Sample")])
+input.rep.count = data.frame(data.table(inputdata)[, list(All=.N), by=c("Sample", "Replicate")])
+
+PRODF = inputdata
+UNPROD = inputdata
+if(filterproductive){
+  if("Functionality" %in% colnames(inputdata)) { # "Functionality" is an IMGT column
+    #PRODF = inputdata[inputdata$Functionality == "productive" | inputdata$Functionality == "productive (see comment)", ]
+    PRODF = inputdata[inputdata$Functionality %in% c("productive (see comment)","productive"),]
+    
+    PRODF.count = data.frame(data.table(PRODF)[, list(count=.N), by=c("Sample")])
+    
+    UNPROD = inputdata[inputdata$Functionality %in% c("unproductive (see comment)","unproductive"), ]
+  } else {
+    PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ]
+    UNPROD = inputdata[!(inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" ), ]
+  }
+}
+
+for(i in 1:nrow(UNPROD)){
+    if(!is.numeric(UNPROD[i,"CDR3.Length"])){
+        UNPROD[i,"CDR3.Length"] = 0
+    }
+}
+
+prod.sample.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample")])
+prod.rep.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample", "Replicate")])
+
+unprod.sample.count = data.frame(data.table(UNPROD)[, list(Unproductive=.N), by=c("Sample")])
+unprod.rep.count = data.frame(data.table(UNPROD)[, list(Unproductive=.N), by=c("Sample", "Replicate")])
+
+clonalityFrame = PRODF
+
+#remove duplicates based on the clonaltype
+if(clonaltype != "none"){
+  clonaltype = paste(clonaltype, ",Sample", sep="") #add sample column to clonaltype, unique within samples
+  PRODF$clonaltype = do.call(paste, c(PRODF[unlist(strsplit(clonaltype, ","))], sep = ":"))
+  PRODF = PRODF[!duplicated(PRODF$clonaltype), ]
+    
+  UNPROD$clonaltype = do.call(paste, c(UNPROD[unlist(strsplit(clonaltype, ","))], sep = ":"))
+  UNPROD = UNPROD[!duplicated(UNPROD$clonaltype), ]
+  
+  #again for clonalityFrame but with sample+replicate
+  clonalityFrame$clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(clonaltype, ","))], sep = ":"))
+  clonalityFrame$clonality_clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(paste(clonaltype, ",Replicate", sep=""), ","))], sep = ":"))
+  clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$clonality_clonaltype), ]
+}
+
+print("SAMPLE TABLE:")
+print(table(PRODF$Sample))
+
+prod.unique.sample.count = data.frame(data.table(PRODF)[, list(Productive_unique=.N), by=c("Sample")])
+prod.unique.rep.count = data.frame(data.table(PRODF)[, list(Productive_unique=.N), by=c("Sample", "Replicate")])
+
+unprod.unique.sample.count = data.frame(data.table(UNPROD)[, list(Unproductive_unique=.N), by=c("Sample")])
+unprod.unique.rep.count = data.frame(data.table(UNPROD)[, list(Unproductive_unique=.N), by=c("Sample", "Replicate")])
+
+PRODF$freq = 1
+
+if(any(grepl(pattern="_", x=PRODF$ID))){ #the frequency can be stored in the ID with the pattern ".*_freq_.*"
+  PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID)
+  PRODF$freq = gsub("_.*", "", PRODF$freq)
+  PRODF$freq = as.numeric(PRODF$freq)
+  if(any(is.na(PRODF$freq))){ #if there was an "_" in the ID, but not the frequency, go back to frequency of 1 for every sequence
+    PRODF$freq = 1
+  }
+}
+
+#make a names list with sample -> color
+naive.colors = c('blue4', 'darkred', 'olivedrab3', 'red', 'gray74', 'darkviolet', 'lightblue1', 'gold', 'chartreuse2', 'pink', 'Paleturquoise3', 'Chocolate1', 'Yellow', 'Deeppink3', 'Mediumorchid1', 'Darkgreen', 'Blue', 'Gray36', 'Hotpink', 'Yellow4')
+unique.samples = unique(PRODF$Sample)
+
+if(length(unique.samples) <= length(naive.colors)){
+	sample.colors = naive.colors[1:length(unique.samples)]
+} else {
+	sample.colors = rainbow(length(unique.samples))
+}
+
+names(sample.colors) = unique.samples
+
+print("Sample.colors")
+print(sample.colors)
+
+
+#write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive
+write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T)
+write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T)
+write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T)
+
+#write the samples to a file
+sampleFile <- file("samples.txt")
+un = unique(inputdata$Sample)
+un = paste(un, sep="\n")
+writeLines(un, sampleFile)
+close(sampleFile)
+
+# ---------------------- Counting the productive/unproductive and unique sequences ----------------------
+
+print("Report Clonality - counting productive/unproductive/unique")
+
+#create the table on the overview page with the productive/unique counts per sample/replicate
+#first for sample
+sample.count = merge(input.sample.count, prod.sample.count, by="Sample", all.x=T)
+sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100)
+sample.count = merge(sample.count, prod.unique.sample.count, by="Sample", all.x=T)
+sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100)
+
+sample.count = merge(sample.count , unprod.sample.count, by="Sample", all.x=T)
+sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100)
+sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample", all.x=T)
+sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100)
+
+#then sample/replicate
+rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate"), all.x=T)
+rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100)
+rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T)
+rep.count$perc_prod_un = round(rep.count$Productive_unique / rep.count$All * 100)
+
+rep.count = merge(rep.count, unprod.rep.count, by=c("Sample", "Replicate"), all.x=T)
+rep.count$perc_unprod = round(rep.count$Unproductive / rep.count$All * 100)
+rep.count = merge(rep.count, unprod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T)
+rep.count$perc_unprod_un = round(rep.count$Unproductive_unique / rep.count$All * 100)
+
+rep.count$Sample = paste(rep.count$Sample, rep.count$Replicate, sep="_")
+rep.count = rep.count[,names(rep.count) != "Replicate"]
+
+count = rbind(sample.count, rep.count)
+
+
+
+write.table(x=count, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F)
+
+# ---------------------- V+J+CDR3 sequence count ----------------------
+
+VJCDR3.count = data.frame(table(clonalityFrame$Top.V.Gene, clonalityFrame$Top.J.Gene, clonalityFrame$CDR3.Seq.DNA))
+names(VJCDR3.count) = c("Top.V.Gene", "Top.J.Gene", "CDR3.Seq.DNA", "Count")
+
+VJCDR3.count = VJCDR3.count[VJCDR3.count$Count > 0,]
+VJCDR3.count = VJCDR3.count[order(-VJCDR3.count$Count),]
+
+write.table(x=VJCDR3.count, file="VJCDR3_count.txt", sep="\t",quote=F,row.names=F,col.names=T)
+
+# ---------------------- Frequency calculation for V, D and J ----------------------
+
+print("Report Clonality - frequency calculation V, D and J")
+
+PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")])
+Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length)))
+PRODFV = merge(PRODFV, Total, by.x='Sample', by.y='Sample', all.x=TRUE)
+PRODFV = ddply(PRODFV, c("Sample", "Top.V.Gene"), summarise, relFreq= (Length*100 / Total))
+
+PRODFD = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.D.Gene")])
+Total = ddply(PRODFD, .(Sample), function(x) data.frame(Total = sum(x$Length)))
+PRODFD = merge(PRODFD, Total, by.x='Sample', by.y='Sample', all.x=TRUE)
+PRODFD = ddply(PRODFD, c("Sample", "Top.D.Gene"), summarise, relFreq= (Length*100 / Total))
+
+PRODFJ = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.J.Gene")])
+Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length)))
+PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE)
+PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total))
+
+# ---------------------- Setting up the gene names for the different species/loci ----------------------
+
+print("Report Clonality - getting genes for species/loci")
+
+Vchain = ""
+Dchain = ""
+Jchain = ""
+
+if(species == "custom"){
+	print("Custom genes: ")
+	splt = unlist(strsplit(locus, ";"))
+	print(paste("V:", splt[1]))
+	print(paste("D:", splt[2]))
+	print(paste("J:", splt[3]))
+	
+	Vchain = unlist(strsplit(splt[1], ","))
+	Vchain = data.frame(v.name = Vchain, chr.orderV = 1:length(Vchain))
+	
+	Dchain = unlist(strsplit(splt[2], ","))
+	if(length(Dchain) > 0){
+		Dchain = data.frame(v.name = Dchain, chr.orderD = 1:length(Dchain))
+	} else {
+		Dchain = data.frame(v.name = character(0), chr.orderD = numeric(0))
+	}
+	
+	Jchain = unlist(strsplit(splt[3], ","))
+	Jchain = data.frame(v.name = Jchain, chr.orderJ = 1:length(Jchain))
+
+} else {
+	genes = read.table("genes.txt", sep="\t", header=TRUE, fill=T, comment.char="")
+
+	Vchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "V",c("IMGT.GENE.DB", "chr.order")]
+	colnames(Vchain) = c("v.name", "chr.orderV")
+	Dchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "D",c("IMGT.GENE.DB", "chr.order")]
+	colnames(Dchain) = c("v.name", "chr.orderD")
+	Jchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "J",c("IMGT.GENE.DB", "chr.order")]
+	colnames(Jchain) = c("v.name", "chr.orderJ")
+}
+useD = TRUE
+if(nrow(Dchain) == 0){
+  useD = FALSE
+  cat("No D Genes in this species/locus")
+}
+print(paste(nrow(Vchain), "genes in V"))
+print(paste(nrow(Dchain), "genes in D"))
+print(paste(nrow(Jchain), "genes in J"))
+
+# ---------------------- merge with the frequency count ----------------------
+
+PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE)
+
+PRODFD = merge(PRODFD, Dchain, by.x='Top.D.Gene', by.y='v.name', all.x=TRUE)
+
+PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE)
+
+# ---------------------- Create the V, D and J frequency plots and write the data.frame for every plot to a file ----------------------
+
+print("Report Clonality - V, D and J frequency plots")
+
+pV = ggplot(PRODFV)
+pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
+pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") + scale_fill_manual(values=sample.colors)
+pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
+write.table(x=PRODFV, file="VFrequency.csv", sep=",",quote=F,row.names=F,col.names=T)
+
+png("VPlot.png",width = 1280, height = 720)
+pV
+dev.off();
+
+if(useD){
+  pD = ggplot(PRODFD)
+  pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
+  pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors)
+  pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
+  write.table(x=PRODFD, file="DFrequency.csv", sep=",",quote=F,row.names=F,col.names=T)
+  
+  png("DPlot.png",width = 800, height = 600)
+  print(pD)
+  dev.off();
+}
+
+pJ = ggplot(PRODFJ)
+pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
+pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors)
+pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
+write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T)
+
+png("JPlot.png",width = 800, height = 600)
+pJ
+dev.off();
+
+# ---------------------- Now the frequency plots of the V, D and J families ----------------------
+
+print("Report Clonality - V, D and J family plots")
+
+VGenes = PRODF[,c("Sample", "Top.V.Gene")]
+VGenes$Top.V.Gene = gsub("-.*", "", VGenes$Top.V.Gene)
+VGenes = data.frame(data.table(VGenes)[, list(Count=.N), by=c("Sample", "Top.V.Gene")])
+TotalPerSample = data.frame(data.table(VGenes)[, list(total=sum(.SD$Count)), by=Sample])
+VGenes = merge(VGenes, TotalPerSample, by="Sample")
+VGenes$Frequency = VGenes$Count * 100 / VGenes$total
+VPlot = ggplot(VGenes)
+VPlot = VPlot + geom_bar(aes( x = Top.V.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
+  ggtitle("Distribution of V gene families") + 
+  ylab("Percentage of sequences") +
+  scale_fill_manual(values=sample.colors) +
+  theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
+png("VFPlot.png")
+VPlot
+dev.off();
+write.table(x=VGenes, file="VFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T)
+
+if(useD){
+  DGenes = PRODF[,c("Sample", "Top.D.Gene")]
+  DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene)
+  DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")])
+  TotalPerSample = data.frame(data.table(DGenes)[, list(total=sum(.SD$Count)), by=Sample])
+  DGenes = merge(DGenes, TotalPerSample, by="Sample")
+  DGenes$Frequency = DGenes$Count * 100 / DGenes$total
+  DPlot = ggplot(DGenes)
+  DPlot = DPlot + geom_bar(aes( x = Top.D.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
+    ggtitle("Distribution of D gene families") + 
+    ylab("Percentage of sequences") + 
+    scale_fill_manual(values=sample.colors) +
+    theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
+  png("DFPlot.png")
+  print(DPlot)
+  dev.off();
+  write.table(x=DGenes, file="DFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T)
+}
+
+# ---------------------- Plotting the cdr3 length ----------------------
+
+print("Report Clonality - CDR3 length plot")
+
+CDR3Length = data.frame(data.table(PRODF)[, list(Count=.N), by=c("Sample", "CDR3.Length")])
+TotalPerSample = data.frame(data.table(CDR3Length)[, list(total=sum(.SD$Count)), by=Sample])
+CDR3Length = merge(CDR3Length, TotalPerSample, by="Sample")
+CDR3Length$Frequency = CDR3Length$Count * 100 / CDR3Length$total
+CDR3LengthPlot = ggplot(CDR3Length)
+CDR3LengthPlot = CDR3LengthPlot + geom_bar(aes( x = factor(reorder(CDR3.Length, as.numeric(CDR3.Length))), y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
+  ggtitle("Length distribution of CDR3") + 
+  xlab("CDR3 Length") + 
+  ylab("Percentage of sequences") +
+  scale_fill_manual(values=sample.colors) +
+  theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
+png("CDR3LengthPlot.png",width = 1280, height = 720)
+CDR3LengthPlot
+dev.off()
+write.table(x=CDR3Length, file="CDR3LengthPlot.csv", sep=",",quote=F,row.names=F,col.names=T)
+
+# ---------------------- Plot the heatmaps ----------------------
+
+#get the reverse order for the V and D genes
+revVchain = Vchain
+revDchain = Dchain
+revVchain$chr.orderV = rev(revVchain$chr.orderV)
+revDchain$chr.orderD = rev(revDchain$chr.orderD)
+
+if(useD){
+  print("Report Clonality - Heatmaps VD")
+  plotVD <- function(dat){
+    if(length(dat[,1]) == 0){
+      return()
+    }
+    
+    img = ggplot() + 
+      geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + 
+      theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
+      scale_fill_gradient(low="gold", high="blue", na.value="white") + 
+      ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + 
+      xlab("D genes") + 
+      ylab("V Genes") +
+      theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro"))
+    
+    png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name)))
+    print(img)
+    dev.off()
+    write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
+  }
+  
+  VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")])
+  
+  VandDCount$l = log(VandDCount$Length)
+  maxVD = data.frame(data.table(VandDCount)[, list(max=max(l)), by=c("Sample")])
+  VandDCount = merge(VandDCount, maxVD, by.x="Sample", by.y="Sample", all.x=T)
+  VandDCount$relLength = VandDCount$l / VandDCount$max
+  check = is.nan(VandDCount$relLength)
+  if(any(check)){
+	VandDCount[check,"relLength"] = 0
+  }
+  
+  cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name)
+  
+  completeVD = merge(VandDCount, cartegianProductVD, by.x=c("Top.V.Gene", "Top.D.Gene"), by.y=c("Top.V.Gene", "Top.D.Gene"), all=TRUE)
+ 
+  completeVD = merge(completeVD, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE)
+ 
+  completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE)
+  
+  fltr = is.nan(completeVD$relLength)
+  if(all(fltr)){
+	  completeVD[fltr,"relLength"] = 0
+  }
+  
+  VDList = split(completeVD, f=completeVD[,"Sample"])
+  lapply(VDList, FUN=plotVD)
+}
+
+print("Report Clonality - Heatmaps VJ")
+
+plotVJ <- function(dat){
+  if(length(dat[,1]) == 0){
+    return()
+  }
+  cat(paste(unique(dat[3])[1,1]))
+  img = ggplot() + 
+    geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + 
+    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
+    scale_fill_gradient(low="gold", high="blue", na.value="white") + 
+    ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + 
+    xlab("J genes") + 
+    ylab("V Genes") +
+    theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro"))
+  
+  png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name)))
+  print(img)
+  dev.off()
+  write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
+}
+
+VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")])
+
+VandJCount$l = log(VandJCount$Length)
+maxVJ = data.frame(data.table(VandJCount)[, list(max=max(l)), by=c("Sample")])
+VandJCount = merge(VandJCount, maxVJ, by.x="Sample", by.y="Sample", all.x=T)
+VandJCount$relLength = VandJCount$l / VandJCount$max
+
+check = is.nan(VandJCount$relLength)
+if(any(check)){
+	VandJCount[check,"relLength"] = 0
+}
+
+cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name)
+
+completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE)
+completeVJ = merge(completeVJ, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE)
+completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE)
+
+fltr = is.nan(completeVJ$relLength)
+if(any(fltr)){
+	completeVJ[fltr,"relLength"] = 1
+}
+
+VJList = split(completeVJ, f=completeVJ[,"Sample"])
+lapply(VJList, FUN=plotVJ)
+
+
+
+if(useD){
+  print("Report Clonality - Heatmaps DJ")	
+  plotDJ <- function(dat){
+    if(length(dat[,1]) == 0){
+      return()
+    }
+    img = ggplot() + 
+      geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=relLength)) + 
+      theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
+      scale_fill_gradient(low="gold", high="blue", na.value="white") + 
+      ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + 
+      xlab("J genes") + 
+      ylab("D Genes") +
+      theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro"))
+    
+    png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name)))
+    print(img)
+    dev.off()
+    write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
+  }
+  
+  
+  DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")])
+  
+  DandJCount$l = log(DandJCount$Length)
+  maxDJ = data.frame(data.table(DandJCount)[, list(max=max(l)), by=c("Sample")])
+  DandJCount = merge(DandJCount, maxDJ, by.x="Sample", by.y="Sample", all.x=T)
+  DandJCount$relLength = DandJCount$l / DandJCount$max
+  
+  check = is.nan(DandJCount$relLength)
+  if(any(check)){
+    DandJCount[check,"relLength"] = 0
+  }
+  
+  cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name)
+  
+  completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE)
+  completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE)
+  completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE)
+  
+  fltr = is.nan(completeDJ$relLength)
+  if(any(fltr)){
+	  completeDJ[fltr, "relLength"] = 1
+  }
+  
+  DJList = split(completeDJ, f=completeDJ[,"Sample"])
+  lapply(DJList, FUN=plotDJ)
+}
+
+
+# ---------------------- output tables for the circos plots ----------------------
+
+print("Report Clonality - Circos data")
+
+for(smpl in unique(PRODF$Sample)){
+	PRODF.sample = PRODF[PRODF$Sample == smpl,]
+	
+	fltr = PRODF.sample$Top.V.Gene == ""
+	if(any(fltr, na.rm=T)){
+	  PRODF.sample[fltr, "Top.V.Gene"] = "NA"
+	}
+	
+	fltr = PRODF.sample$Top.D.Gene == ""
+	if(any(fltr, na.rm=T)){
+	  PRODF.sample[fltr, "Top.D.Gene"] = "NA"
+	}
+
+	fltr = PRODF.sample$Top.J.Gene == ""
+	if(any(fltr, na.rm=T)){
+	  PRODF.sample[fltr, "Top.J.Gene"] = "NA"
+	}
+	
+	v.d = table(PRODF.sample$Top.V.Gene, PRODF.sample$Top.D.Gene)
+	v.j = table(PRODF.sample$Top.V.Gene, PRODF.sample$Top.J.Gene)
+	d.j = table(PRODF.sample$Top.D.Gene, PRODF.sample$Top.J.Gene)
+
+	write.table(v.d, file=paste(smpl, "_VD_circos.txt", sep=""), sep="\t", quote=F, row.names=T, col.names=NA)
+	write.table(v.j, file=paste(smpl, "_VJ_circos.txt", sep=""), sep="\t", quote=F, row.names=T, col.names=NA)
+	write.table(d.j, file=paste(smpl, "_DJ_circos.txt", sep=""), sep="\t", quote=F, row.names=T, col.names=NA)
+}
+
+# ---------------------- calculating the clonality score ----------------------
+
+if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available
+{
+  print("Report Clonality - Clonality")
+  write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T)
+  if(clonality_method == "boyd"){
+    samples = split(clonalityFrame, clonalityFrame$Sample, drop=T)
+   
+    for (sample in samples){
+      res = data.frame(paste=character(0))
+      sample_id = unique(sample$Sample)[[1]]
+      for(replicate in unique(sample$Replicate)){
+        tmp = sample[sample$Replicate == replicate,]
+        clone_table = data.frame(table(tmp$clonaltype))
+        clone_col_name = paste("V", replicate, sep="")
+        colnames(clone_table) = c("paste", clone_col_name)
+        res = merge(res, clone_table, by="paste", all=T)
+      }
+      
+      res[is.na(res)] = 0      
+      infer.result = infer.clonality(as.matrix(res[,2:ncol(res)]))
+      
+      #print(infer.result)
+      
+      write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F)
+      
+      res$type = rowSums(res[,2:ncol(res)])
+      
+      coincidence.table = data.frame(table(res$type))
+      colnames(coincidence.table) = c("Coincidence Type",  "Raw Coincidence Freq")
+      write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)
+    }
+  } else {
+    clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")])
+    
+    #write files for every coincidence group of >1
+    samples = unique(clonalFreq$Sample)
+    for(sample in samples){
+		clonalFreqSample = clonalFreq[clonalFreq$Sample == sample,]
+		if(max(clonalFreqSample$Type) > 1){
+			for(i in 2:max(clonalFreqSample$Type)){
+				clonalFreqSampleType = clonalFreqSample[clonalFreqSample$Type == i,]
+				clonalityFrame.sub = clonalityFrame[clonalityFrame$clonaltype %in% clonalFreqSampleType$clonaltype,]
+				clonalityFrame.sub = clonalityFrame.sub[order(clonalityFrame.sub$clonaltype),]
+				write.table(clonalityFrame.sub, file=paste("coincidences_", sample, "_", i, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+			}
+		}
+	}
+	
+    clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")])
+    clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count
+    clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")])
+    clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample")
+    
+    ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15')
+    tcct = textConnection(ct)
+    CT  = read.table(tcct, sep="\t", header=TRUE)
+    close(tcct)
+    clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T)
+    clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight
+    
+    ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")])
+    ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")])
+    clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads)
+    ReplicateReads$Reads = as.numeric(ReplicateReads$Reads)
+    ReplicateReads$squared = as.numeric(ReplicateReads$Reads * ReplicateReads$Reads)
+    
+    ReplicatePrint <- function(dat){
+      write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
+    }
+    
+    ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
+    lapply(ReplicateSplit, FUN=ReplicatePrint)
+    
+    ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")])
+    clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T)
+    
+    ReplicateSumPrint <- function(dat){
+      write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
+    }
+    
+    ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
+    lapply(ReplicateSumSplit, FUN=ReplicateSumPrint)
+    
+    clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")])
+    clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T)
+    clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow
+    clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2)
+    clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1)
+    
+    ClonalityScorePrint <- function(dat){
+      write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
+    }
+    
+    clonalityScore = clonalFreqCount[c("Sample", "Result")]
+    clonalityScore = unique(clonalityScore)
+    
+    clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"])
+    lapply(clonalityScoreSplit, FUN=ClonalityScorePrint)
+    
+    clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")]
+    
+    
+    
+    ClonalityOverviewPrint <- function(dat){
+	  dat = dat[order(dat[,2]),]
+      write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
+    }
+    
+    clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample)
+    lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)
+  }
+}
+
+bak = PRODF
+
+imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb")
+if(all(imgtcolumns %in% colnames(inputdata)))
+{
+  print("found IMGT columns, running junction analysis")
+  
+  if(locus %in% c("IGK","IGL", "TRA", "TRG")){
+	  print("VJ recombination, no filtering on absent D")
+  } else {
+	  print("VDJ recombination, using N column for junction analysis")
+	  fltr = nchar(PRODF$Top.D.Gene) < 4
+	  print(paste("Removing", sum(fltr), "sequences without a identified D"))
+	  PRODF = PRODF[!fltr,]
+  }
+  
+  
+  #ensure certain columns are in the data (files generated with older versions of IMGT Loader)
+  col.checks = c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb")
+  for(col.check in col.checks){
+	  if(!(col.check %in% names(PRODF))){
+		  print(paste(col.check, "not found adding new column"))
+		  if(nrow(PRODF) > 0){ #because R is anoying...
+			PRODF[,col.check] = 0
+		  } else {
+			PRODF = cbind(PRODF, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0)))
+		  }
+		  if(nrow(UNPROD) > 0){
+			UNPROD[,col.check] = 0
+		  } else {
+			UNPROD = cbind(UNPROD, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0)))
+		  }
+	  }
+  }
+  
+  num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) }
+  
+  newData = data.frame(data.table(PRODF)[,list(unique=.N, 
+                                               VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
+                                               P1=mean(.SD$P3V.nt.nb, na.rm=T),
+                                               N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
+                                               P2=mean(.SD$P5D.nt.nb, na.rm=T),
+                                               DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
+                                               DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
+                                               P3=mean(.SD$P3D.nt.nb, na.rm=T),
+                                               N2=mean(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
+                                               P4=mean(.SD$P5J.nt.nb, na.rm=T),
+                                               DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
+                                               Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
+                                               Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
+                                               Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
+                                               Median.CDR3.l=as.double(median(.SD$CDR3.Length))),
+                                         by=c("Sample")])
+  newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
+  write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  
+  newData = data.frame(data.table(PRODF)[,list(unique=.N, 
+                                               VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
+                                               P1=num_median(.SD$P3V.nt.nb, na.rm=T),
+                                               N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
+                                               P2=num_median(.SD$P5D.nt.nb, na.rm=T),
+                                               DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
+                                               DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
+                                               P3=num_median(.SD$P3D.nt.nb, na.rm=T),
+                                               N2=num_median(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
+                                               P4=num_median(.SD$P5J.nt.nb, na.rm=T),
+                                               DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
+											   Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
+											   Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
+											   Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
+											   Median.CDR3.l=as.double(median(.SD$CDR3.Length))),
+                                         by=c("Sample")])
+  newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
+  write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  
+  newData = data.frame(data.table(UNPROD)[,list(unique=.N, 
+                                                VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
+                                                P1=mean(.SD$P3V.nt.nb, na.rm=T),
+                                                N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
+                                                P2=mean(.SD$P5D.nt.nb, na.rm=T),
+                                                DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
+                                                DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
+                                                P3=mean(.SD$P3D.nt.nb, na.rm=T),
+                                                N2=mean(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
+                                                P4=mean(.SD$P5J.nt.nb, na.rm=T),
+                                                DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
+                                                Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
+                                                Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
+                                                Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
+                                                Median.CDR3.l=as.double(median(.SD$CDR3.Length))),
+                                          by=c("Sample")])
+  newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
+  write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+  
+    newData = data.frame(data.table(UNPROD)[,list(unique=.N, 
+                                                VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
+                                                P1=num_median(.SD$P3V.nt.nb, na.rm=T),
+                                                N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
+                                                P2=num_median(.SD$P5D.nt.nb, na.rm=T),
+                                                DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
+                                                DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T),
+                                                P3=num_median(.SD$P3D.nt.nb, na.rm=T),
+                                                N2=num_median(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
+                                                P4=num_median(.SD$P5J.nt.nb, na.rm=T),
+                                                DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
+                                                Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
+                                                Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
+                                                Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
+                                                Median.CDR3.l=as.double(median(.SD$CDR3.Length))),
+															by=c("Sample")])
+															
+  newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
+  write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+}
+
+PRODF = bak
+
+
+# ---------------------- D reading frame ----------------------
+
+D.REGION.reading.frame = PRODF[,c("Sample", "D.REGION.reading.frame")]
+
+chck = is.na(D.REGION.reading.frame$D.REGION.reading.frame)
+if(any(chck)){
+	D.REGION.reading.frame[chck,"D.REGION.reading.frame"] = "No D"
+}
+
+D.REGION.reading.frame = data.frame(data.table(D.REGION.reading.frame)[, list(Freq=.N), by=c("Sample", "D.REGION.reading.frame")])
+
+write.table(D.REGION.reading.frame, "DReadingFrame.csv" , sep="\t",quote=F,row.names=F,col.names=T)
+
+D.REGION.reading.frame = ggplot(D.REGION.reading.frame)
+D.REGION.reading.frame = D.REGION.reading.frame + geom_bar(aes( x = D.REGION.reading.frame, y = Freq, fill=Sample), stat='identity', position='dodge' ) + ggtitle("D reading frame") + xlab("Frequency") + ylab("Frame")
+D.REGION.reading.frame = D.REGION.reading.frame + scale_fill_manual(values=sample.colors)
+D.REGION.reading.frame = D.REGION.reading.frame + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
+
+png("DReadingFrame.png")
+D.REGION.reading.frame
+dev.off()
+
+
+
+
+# ---------------------- AA composition in CDR3 ----------------------
+
+AACDR3 = PRODF[,c("Sample", "CDR3.Seq")]
+
+TotalPerSample = data.frame(data.table(AACDR3)[, list(total=sum(nchar(as.character(.SD$CDR3.Seq)))), by=Sample])
+
+AAfreq = list()
+
+for(i in 1:nrow(TotalPerSample)){
+	sample = TotalPerSample$Sample[i]
+  AAfreq[[i]] = data.frame(table(unlist(strsplit(as.character(AACDR3[AACDR3$Sample == sample,c("CDR3.Seq")]), ""))))
+  AAfreq[[i]]$Sample = sample
+}
+
+AAfreq = ldply(AAfreq, data.frame)
+AAfreq = merge(AAfreq, TotalPerSample, by="Sample", all.x = T)
+AAfreq$freq_perc = as.numeric(AAfreq$Freq / AAfreq$total * 100)
+
+
+AAorder = read.table(sep="\t", header=TRUE, text="order.aa\tAA\n1\tR\n2\tK\n3\tN\n4\tD\n5\tQ\n6\tE\n7\tH\n8\tP\n9\tY\n10\tW\n11\tS\n12\tT\n13\tG\n14\tA\n15\tM\n16\tC\n17\tF\n18\tL\n19\tV\n20\tI")
+AAfreq = merge(AAfreq, AAorder, by.x='Var1', by.y='AA', all.x=TRUE)
+
+AAfreq = AAfreq[!is.na(AAfreq$order.aa),]
+
+AAfreqplot = ggplot(AAfreq)
+AAfreqplot = AAfreqplot + geom_bar(aes( x=factor(reorder(Var1, order.aa)), y = freq_perc, fill = Sample), stat='identity', position='dodge' )
+AAfreqplot = AAfreqplot + annotate("rect", xmin = 0.5, xmax = 2.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2)
+AAfreqplot = AAfreqplot + annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2)
+AAfreqplot = AAfreqplot + annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2)
+AAfreqplot = AAfreqplot + annotate("rect", xmin = 6.5, xmax = 7.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2)
+AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage") + scale_fill_manual(values=sample.colors)
+AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
+
+png("AAComposition.png",width = 1280, height = 720)
+AAfreqplot
+dev.off()
+write.table(AAfreq, "AAComposition.csv" , sep=",",quote=F,na="-",row.names=F,col.names=T)
+
+# ---------------------- AA median CDR3 length ----------------------
+
+median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length))), by=c("Sample")])
+write.table(median.aa.l, "AAMedianBySample.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/report_clonality/r_wrapper.sh.old	Fri Jan 27 03:44:18 2017 -0500
@@ -0,0 +1,315 @@
+#!/bin/bash
+
+inputFile=$1
+outputDir=$3
+outputFile=$3/index.html #$2
+clonalType=$4
+species=$5
+locus=$6
+filterproductive=$7
+clonality_method=$8
+
+dir="$(cd "$(dirname "$0")" && pwd)"
+useD="false"
+if grep -q "$species.*${locus}D" "$dir/genes.txt" ; then
+	echo "species D region in reference db"
+	useD="true"
+fi
+echo "$species"
+if [[ "$species" == *"custom"* ]] ; then
+	loci=(${locus//;/ })
+	useD="true"
+	echo "${loci[@]}"
+	if [[ "${#loci[@]}" -eq "2" ]] ; then
+		useD="false"
+	fi
+fi
+mkdir $3
+cp $dir/genes.txt $outputDir
+Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive ${clonality_method} 2>&1
+cp $dir/tabber.js $outputDir
+cp $dir/style.css $outputDir
+cp $dir/script.js $outputDir
+cp $dir/jquery-1.11.0.min.js $outputDir
+cp $dir/pure-min.css $outputDir
+samples=`cat $outputDir/samples.txt`
+
+echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $2
+echo "<table border = 1>" >> $2
+echo "<thead><tr><th>Sample/Replicate</th><th>All</th><th>Productive</th><th>Unique Productive</th><th>Unproductive</th><th>Unique Unproductive</th></tr></thead>" >> $2
+while IFS=, read sample all productive perc_prod productive_unique perc_prod_un unproductive perc_unprod unproductive_unique perc_unprod_un
+	do
+		echo "<tr><td>$sample</td>" >> $2
+		echo "<td>$all</td>" >> $2
+		echo "<td>$productive (${perc_prod}%)</td>" >> $2
+		echo "<td>$productive_unique (${perc_prod_un}%)</td>" >> $2
+		echo "<td>$unproductive (${perc_unprod}%)</td>" >> $2
+		echo "<td>$unproductive_unique (${perc_unprod_un}%)</td></tr>" >> $2
+done < $outputDir/productive_counting.txt
+echo "</table border></center></html>" >> $2
+
+echo "<html><head><title>Report on:" >> $outputFile
+
+mkdir $outputDir/circos
+cp $dir/circos/* $outputDir/circos/
+#CIRCOSTOOLS="/data/galaxy/galaxy-dist/toolsheddependencies/circos/0.64/saskia-hiltemann/cg_circos_plots/bbfdd52d64fd/circos-tools-0.21/tools"
+#CIRCOSDIR="/data/galaxy/galaxy-dist/toolsheddependencies/circos/0.64/saskia-hiltemann/cg_circos_plots/bbfdd52d64fd/bin/"
+
+#CIRCOSTOOLS="/home/galaxy/circos/circos-tools-0.22/tools"
+#CIRCOSDIR="/home/galaxy/Anaconda3/bin"
+
+USECIRCOS="no"
+if [ -d "$CIRCOSDIR" ]; then
+	USECIRCOS="yes"
+else
+	if [ -d "/data/galaxy/galaxy-dist/toolsheddependencies/circos/0.64/saskia-hiltemann/cg_circos_plots/bbfdd52d64fd/bin/" ]; then #hopefully temporary fix
+		USECIRCOS="yes"
+		CIRCOSTOOLS="/data/galaxy/galaxy-dist/toolsheddependencies/circos/0.64/saskia-hiltemann/cg_circos_plots/bbfdd52d64fd/circos-tools-0.21/tools"
+		CIRCOSDIR="/data/galaxy/galaxy-dist/toolsheddependencies/circos/0.64/saskia-hiltemann/cg_circos_plots/bbfdd52d64fd/bin/"
+	fi
+	
+	if [ -d "/home/galaxy/Anaconda3/bin" ]; then #hopefully temporary fix
+		USECIRCOS="yes"
+		CIRCOSTOOLS="/home/galaxy/circos/circos-tools-0.22/tools"
+		CIRCOSDIR="/home/galaxy/Anaconda3/bin"
+	fi
+fi
+
+echo "Using Circos: $USECIRCOS"
+sed -i "s%DATA_DIR%$outputDir/circos%" $outputDir/circos/circos.conf
+for sample in $samples; do #output the samples to a file and create the circos plots with the R script output
+	echo " $sample" >> $outputFile
+	
+	if [[ "$USECIRCOS" != "yes" ]]; then
+		continue
+	fi
+	
+	circos_file="$outputDir/${sample}_VJ_circos.txt"
+	echo -e -n "labels$(cat ${circos_file})" > ${circos_file}
+	cat "${circos_file}" | $CIRCOSTOOLS/tableviewer/bin/parse-table -configfile $dir/circos/parse-table.conf 2>&1 | $CIRCOSTOOLS/tableviewer/bin/make-conf -dir $outputDir/circos/
+	$CIRCOSDIR/circos -conf $outputDir/circos/circos.conf 2>&1
+	mv $outputDir/circos/circos.png $outputDir/circosVJ_${sample}.png
+	
+	
+	if [[ "$useD" == "true" ]] ; then
+		circos_file="$outputDir/${sample}_VD_circos.txt"
+		echo -e -n "labels$(cat ${circos_file})" > ${circos_file}
+		cat "${circos_file}" | $CIRCOSTOOLS/tableviewer/bin/parse-table -configfile $dir/circos/parse-table.conf 2>&1 | $CIRCOSTOOLS/tableviewer/bin/make-conf -dir $outputDir/circos/
+		$CIRCOSDIR/circos -conf $outputDir/circos/circos.conf 2>&1
+		mv $outputDir/circos/circos.png $outputDir/circosVD_${sample}.png
+		
+		circos_file="$outputDir/${sample}_DJ_circos.txt"
+		echo -e -n "labels$(cat ${circos_file})" > ${circos_file}
+		cat "${circos_file}" | $CIRCOSTOOLS/tableviewer/bin/parse-table -configfile $dir/circos/parse-table.conf 2>&1 | $CIRCOSTOOLS/tableviewer/bin/make-conf -dir $outputDir/circos/
+		$CIRCOSDIR/circos -conf $outputDir/circos/circos.conf 2>&1
+		mv $outputDir/circos/circos.png $outputDir/circosDJ_${sample}.png
+		
+	fi
+done
+echo "</title><script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $outputFile
+echo "<link rel='stylesheet' type='text/css' href='pure-min.css'>" >> $outputFile
+echo "<script type='text/javascript' src='tabber.js'></script>" >> $outputFile
+echo "<script type='text/javascript' src='script.js'></script>" >> $outputFile
+echo "<link rel='stylesheet' type='text/css' href='style.css'></head>" >> $outputFile
+echo "<div class='tabber'><div class='tabbertab' title='Gene frequencies'>" >> $outputFile
+
+
+echo "<img src='VFPlot.png'/>" >> $outputFile
+if [[ "$useD" == "true" ]] ; then
+	echo "<img src='DFPlot.png'/>" >> $outputFile
+fi
+echo "<img src='VPlot.png'/>" >> $outputFile
+if [[ "$useD" == "true" ]] ; then
+	echo "<img src='DPlot.png'/>" >> $outputFile
+fi
+echo "<img src='JPlot.png'/>" >> $outputFile
+echo "</div>" >> $outputFile
+
+echo "<div class='tabbertab' title='CDR3 Characteristics'>" >> $outputFile
+echo "<img src='CDR3LengthPlot.png'/><br />" >> $outputFile
+echo "<img src='AAComposition.png'/>" >> $outputFile
+echo "<img src='DReadingFrame.png'/>" >> $outputFile
+
+echo "<table class='pure-table pure-table-striped'>" >> $outputFile
+echo "<thead><tr><th>Sample</th><th>Median CDR3 Length</th></tr></thead>" >> $outputFile
+while IFS=, read Sample median
+do
+	echo "<tr><td>$Sample</td><td>$median</td></tr>" >> $outputFile
+done < $outputDir/AAMedianBySample.csv
+echo "</table>" >> $outputFile
+
+echo "</div>" >> $outputFile
+
+#Heatmaps
+
+count=1
+echo "<div class='tabbertab' title='Heatmaps'><div class='tabber'>" >> $outputFile
+for sample in $samples; do
+	echo "<div class='tabbertab' title='$sample'><table border='1'><tr>" >> $outputFile
+	if [[ "$useD" == "true" ]] ; then
+		echo "<td><img src='HeatmapVD_$sample.png'/></td>" >> $outputFile
+	fi
+	echo "<td><img src='HeatmapVJ_$sample.png'/></td>" >> $outputFile
+	if [[ "$useD" == "true" ]] ; then
+		echo "<td><img src='HeatmapDJ_$sample.png'/></td>" >> $outputFile
+	fi
+	echo "</tr></table></div>" >> $outputFile
+	count=$((count+1))
+done
+echo "</div></div>" >> $outputFile
+
+#circos
+
+if [[ "$USECIRCOS" == "yes" ]]; then
+
+	echo "<div class='tabbertab' title='Circos'><div class='tabber'>" >> $outputFile
+	for sample in $samples; do
+		echo "<div class='tabbertab' title='$sample'><table border='1'><center>" >> $outputFile
+		if [[ "$useD" == "true" ]] ; then
+			echo "<tr><td>V-D</td><td><img src='circosVD_${sample}.png' width='700' height='700'/></td></tr>" >> $outputFile
+		fi
+		echo "<tr><td>V-J</td><td><img src='circosVJ_${sample}.png' width='700' height='700'/></td></tr>" >> $outputFile
+		if [[ "$useD" == "true" ]] ; then
+			echo "<tr><td>D-J</td><td><img src='circosDJ_${sample}.png' width='700' height='700'/></td></tr>" >> $outputFile
+		fi
+		echo "<center></table></div>" >> $outputFile
+		count=$((count+1))
+	done
+	echo "</div></div>" >> $outputFile
+fi
+#echo "<div class='tabbertab' title='Interactive'><svg class='chart'></svg><script src='http://d3js.org/d3.v3.min.js'></script></div>" >> $outputFile
+
+hasReplicateColumn="$(if head -n 1 $inputFile | grep -q 'Replicate'; then echo 'Yes'; else echo 'No'; fi)"
+echo "$hasReplicateColumn"
+#if its a 'new' merged file with replicate info
+if [[ "$hasReplicateColumn" == "Yes" ]] ; then
+	echo "<div class='tabbertab' title='Clonality'><div class='tabber'>" >> $outputFile
+	for sample in $samples; do
+		echo "${clonality_method}"
+		if [[ "${clonality_method}" == "old" ]] ; then
+			echo "in old"
+			clonalityScore="$(cat $outputDir/ClonalityScore_$sample.csv)"
+			echo "<div class='tabbertab' title='$sample'><table class='pure-table pure-table-striped'>" >> $outputFile
+			echo "<thead><tr><th colspan='4'>Clonality Score: $clonalityScore</th></tr></thead>" >> $outputFile
+
+			#replicate,reads,squared
+			echo "<tr><td>Replicate ID</td><td>Number of Reads</td></tr>" >> $outputFile
+			while IFS=, read replicate reads squared
+			do
+				echo "<tr><td>$replicate</td><td>$reads</td></tr>" >> $outputFile
+			done < $outputDir/ReplicateReads_$sample.csv
+			
+			#sum of reads and reads squared
+			while IFS=, read readsSum squaredSum
+				do
+					echo "<tr><td>Sum</td><td>$readsSum</td></tr>" >> $outputFile
+			done < $outputDir/ReplicateSumReads_$sample.csv
+			
+			#overview
+			echo "<tr><td>Coincidence Type</td><td>Raw Coincidence Freq</td></tr>" >> $outputFile
+			while IFS=, read type count weight weightedCount
+			do
+				if [[ "$type" -eq "1" ]]; then
+					echo "<tr><td>$type</td><td>$count</td></tr>" >> $outputFile
+				else
+					echo "<tr><td><a href='coincidences_${sample}_${type}.txt'>$type</a></td><td>$count</td></tr>" >> $outputFile
+				fi
+				
+			done < $outputDir/ClonalityOverView_$sample.csv
+			echo "</table></div>" >> $outputFile
+		else
+			echo "in new"
+			clonalityScore="$(cat $outputDir/lymphclon_clonality_${sample}.csv)"
+			echo "<div class='tabbertab' title='$sample'>" >> $outputFile
+			echo "Lymphclon clonality score: <br />$clonalityScore<br /><br />" >> $outputFile
+			echo "<table border = 1>" >> $outputFile
+			while IFS=, read type count
+			do
+				echo "<tr><td>$type</td><td>$count</td></tr>" >> $outputFile
+			done < $outputDir/lymphclon_coincidences_$sample.csv
+			echo "</table></div>" >> $outputFile
+		fi
+	done
+	echo "</div></div>" >> $outputFile
+fi
+
+#hasJunctionData="$(if head -n 1 $inputFile | grep -qE '3V.REGION.trimmed.nt.nb'; then echo 'Yes'; else echo 'No'; fi)"
+
+#if [[ "$hasJunctionData" == "Yes" ]] ; then
+if [ -a "$outputDir/junctionAnalysisProd_mean.csv" ] ; then
+	echo "<div class='tabbertab' title='Junction Analysis'>" >> $outputFile
+	echo "<table class='pure-table pure-table-striped' id='junction_table'> <caption>Productive mean</caption><thead><tr><th>Sample</th><th>count</th><th>V.DEL</th><th>P1</th><th>N1</th><th>P2</th><th>DEL.D</th><th>D.DEL</th><th>P3</th><th>N2</th><th>P4</th><th>DEL.J</th><th>Total.Del</th><th>Total.N</th><th>Total.P</th><th>Median.CDR3</th><thead></tr><tbody>" >> $outputFile
+	while IFS=, read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median
+	do
+		echo "<tr><td>$Sample</td><td>$unique</td><td>$VDEL</td><td>$P1</td><td>$N1</td><td>$P2</td><td>$DELD</td><td>$DDEL</td><td>$P3</td><td>$N2</td><td>$P4</td><td>$DELJ</td><td>$TotalDel</td><td>$TotalN</td><td>$TotalP</td><td>$median</td></tr>" >> $outputFile
+	done < $outputDir/junctionAnalysisProd_mean.csv
+	echo "</tbody></table>" >> $outputFile
+	
+	echo "<table class='pure-table pure-table-striped' id='junction_table'> <caption>Unproductive mean</caption><thead><tr><th>Sample</th><th>count</th><th>V.DEL</th><th>P1</th><th>N1</th><th>P2</th><th>DEL.D</th><th>D.DEL</th><th>P3</th><th>N2</th><th>P4</th><th>DEL.J</th><th>Total.Del</th><th>Total.N</th><th>Total.P</th><th>Median.CDR3</th><thead></tr><tbody>" >> $outputFile
+	while IFS=, read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median
+	do
+		echo "<tr><td>$Sample</td><td>$unique</td><td>$VDEL</td><td>$P1</td><td>$N1</td><td>$P2</td><td>$DELD</td><td>$DDEL</td><td>$P3</td><td>$N2</td><td>$P4</td><td>$DELJ</td><td>$TotalDel</td><td>$TotalN</td><td>$TotalP</td><td>$median</td></tr>" >> $outputFile
+	done < $outputDir/junctionAnalysisUnProd_mean.csv
+	echo "</tbody></table>" >> $outputFile
+	
+	echo "<table class='pure-table pure-table-striped' id='junction_table'> <caption>Productive median</caption><thead><tr><th>Sample</th><th>count</th><th>V.DEL</th><th>P1</th><th>N1</th><th>P2</th><th>DEL.D</th><th>D.DEL</th><th>P3</th><th>N2</th><th>P4</th><th>DEL.J</th><th>Total.Del</th><th>Total.N</th><th>Total.P</th><th>Median.CDR3</th><thead></tr><tbody>" >> $outputFile
+	while IFS=, read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median
+	do
+		echo "<tr><td>$Sample</td><td>$unique</td><td>$VDEL</td><td>$P1</td><td>$N1</td><td>$P2</td><td>$DELD</td><td>$DDEL</td><td>$P3</td><td>$N2</td><td>$P4</td><td>$DELJ</td><td>$TotalDel</td><td>$TotalN</td><td>$TotalP</td><td>$median</td></tr>" >> $outputFile
+	done < $outputDir/junctionAnalysisProd_median.csv
+	echo "</tbody></table>" >> $outputFile
+	
+	echo "<table class='pure-table pure-table-striped' id='junction_table'> <caption>Unproductive median</caption><thead><tr><th>Sample</th><th>count</th><th>V.DEL</th><th>P1</th><th>N1</th><th>P2</th><th>DEL.D</th><th>D.DEL</th><th>P3</th><th>N2</th><th>P4</th><th>DEL.J</th><th>Total.Del</th><th>Total.N</th><th>Total.P</th><th>Median.CDR3</th><thead></tr><tbody>" >> $outputFile
+	while IFS=, read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median
+	do
+		echo "<tr><td>$Sample</td><td>$unique</td><td>$VDEL</td><td>$P1</td><td>$N1</td><td>$P2</td><td>$DELD</td><td>$DDEL</td><td>$P3</td><td>$N2</td><td>$P4</td><td>$DELJ</td><td>$TotalDel</td><td>$TotalN</td><td>$TotalP</td><td>$median</td></tr>" >> $outputFile
+	done < $outputDir/junctionAnalysisUnProd_median.csv
+	echo "</tbody></table>" >> $outputFile
+	
+	echo "</div>" >> $outputFile
+fi
+
+echo "<div class='tabbertab' title='Comparison'><table class='pure-table pure-table-striped'><thead><tr><th>ID</th><th>Include</th></tr></thead>" >> $outputFile
+for sample in $samples; do
+	echo "<tr><td>$sample</td><td><input type='checkbox' onchange=\"javascript:compareAdd('$sample')\" id='compare_checkbox_$sample'/></td></tr>" >> $outputFile
+done
+echo "</table><div name='comparisonarea'>" >> $outputFile
+echo "<table><tr id='comparison_table_vd'></tr></table>" >> $outputFile
+echo "<table><tr id='comparison_table_vj'></tr></table>" >> $outputFile
+echo "<table><tr id='comparison_table_dj'></tr></table>" >> $outputFile
+echo "</div></div>" >> $outputFile
+
+echo "<div class='tabbertab' title='Downloads'>" >> $outputFile
+echo "<table class='pure-table pure-table-striped'>" >> $outputFile
+echo "<thead><tr><th>Description</th><th>Link</th></tr></thead>" >> $outputFile
+echo "<tr><td>The dataset used to generate the frequency graphs and the heatmaps (Unique based on clonaltype, $clonalType)</td><td><a href='allUnique.csv'>Download</a></td></tr>" >> $outputFile
+echo "<tr><td>The dataset used to calculate clonality score (Unique based on clonaltype, $clonalType)</td><td><a href='clonalityComplete.csv'>Download</a></td></tr>" >> $outputFile
+
+echo "<tr><td>The dataset used to generate the CDR3 length frequency graph</td><td><a href='CDR3LengthPlot.csv'>Download</a></td></tr>" >> $outputFile
+
+echo "<tr><td>The dataset used to generate the V gene family frequency graph</td><td><a href='VFFrequency.csv'>Download</a></td></tr>" >> $outputFile
+if [[ "$useD" == "true" ]] ; then
+	echo "<tr><td>The dataset used to generate the D gene family frequency graph</td><td><a href='DFFrequency.csv'>Download</a></td></tr>" >> $outputFile
+fi
+
+echo "<tr><td>The dataset used to generate the V gene frequency graph</td><td><a href='VFrequency.csv'>Download</a></td></tr>" >> $outputFile
+if [[ "$useD" == "true" ]] ; then
+	echo "<tr><td>The dataset used to generate the D gene frequency graph</td><td><a href='DFrequency.csv'>Download</a></td></tr>" >> $outputFile
+fi
+echo "<tr><td>The dataset used to generate the J gene frequency graph</td><td><a href='JFrequency.csv'>Download</a></td></tr>" >> $outputFile
+echo "<tr><td>The dataset used to generate the AA composition graph</td><td><a href='AAComposition.csv'>Download</a></td></tr>" >> $outputFile
+
+for sample in $samples; do
+	if [[ "$useD" == "true" ]] ; then
+		echo "<tr><td>The data used to generate the VD heatmap for $sample.</td><td><a href='HeatmapVD_$sample.csv'>Download</a></td></tr>" >> $outputFile
+	fi
+	echo "<tr><td>The data used to generate the VJ heatmap for $sample.</td><td><a href='HeatmapVJ_$sample.csv'>Download</a></td></tr>" >> $outputFile
+	if [[ "$useD" == "true" ]] ; then
+		echo "<tr><td>The data used to generate the DJ heatmap for $sample.</td><td><a href='HeatmapDJ_$sample.csv'>Download</a></td></tr>" >> $outputFile
+	fi
+done
+
+echo "<tr><td>A frequency count of V Gene + J Gene + CDR3</td><td><a href='VJCDR3_count.txt'>Download</a></td></tr>" >> $outputFile
+
+echo "</table>" >> $outputFile
+echo "</div></html>" >> $outputFile
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/report_clonality_igg.xml	Fri Jan 27 03:44:18 2017 -0500
@@ -0,0 +1,197 @@
+<tool id="report_clonality_igg" name="Report Clonality" version="1.0">
+	<description> </description>
+	<command interpreter="bash">
+#if $gene_selection.source == "imgtdb"		
+	report_clonality/r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "${gene_selection.species}" "${gene_selection.locus}" $filterproductive $clonality_method
+#else
+	report_clonality/r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive $clonality_method
+#end if
+	</command>
+	<inputs>
+		<param name="in_file" format="tabular" type="data" label="Data to Process" />
+		<param name="clonaltype" type="select" label="Clonal Type Definition (Needed for clonality calculation)">
+			<option value="none">Don't remove duplicates based on clonaltype</option>
+			<option value="Top.V.Gene,CDR3.Seq">Top.V.Gene, CDR3 (AA)</option>
+			<option value="Top.V.Gene,CDR3.Seq.DNA">Top.V.Gene, CDR3 (nt)</option>
+			<option value="Top.V.Gene,Top.J.Gene,CDR3.Seq">Top.V.Gene, Top.J.Gene, CDR3 (AA)</option>
+			<option value="Top.V.Gene,Top.J.Gene,CDR3.Seq.DNA">Top.V.Gene, Top.J.Gene, CDR3 (nt)</option>
+			<option value="Top.V.Gene,Top.D.Gene,Top.J.Gene,CDR3.Seq.DNA">Top.V.Gene, Top.D.Gene, Top.J.Gene, CDR3 (nt)</option>
+			<option value="Top.V.Gene,Top.D.Gene,Top.J.Gene,CDR3.Seq">Top.V.Gene, Top.D.Gene, Top.J.Gene, CDR3 (AA)</option>
+		</param>
+		
+		<conditional name="gene_selection" >
+			<param name="source" type="select" label="Gene reference" help="" >
+					<option value="imgtdb" selected="true">IMGT-DB</option>
+					<option value="custom">User defined</option>
+			</param>
+			<when value="imgtdb">
+				<param name="species" type="select" label="Species">
+					<option value="Homo sapiens functional">Homo sapiens functional</option>
+					<option value="Homo sapiens">Homo sapiens</option>
+					<option value="Homo sapiens non-functional">Homo sapiens non-functional</option>
+					<option value="Bos taurus">Bos taurus</option>
+					<option value="Bos taurus functional">Bos taurus functional</option>
+					<option value="Bos taurus non-functional">Bos taurus non-functional</option>
+					<option value="Camelus dromedarius">Camelus dromedarius</option>
+					<option value="Camelus dromedarius functional">Camelus dromedarius functional</option>
+					<option value="Camelus dromedarius non-functional">Camelus dromedarius non-functional</option>
+					<option value="Canis lupus familiaris">Canis lupus familiaris</option>
+					<option value="Canis lupus familiaris functional">Canis lupus familiaris functional</option>
+					<option value="Canis lupus familiaris non-functional">Canis lupus familiaris non-functional</option>
+					<option value="Danio rerio">Danio rerio</option>
+					<option value="Danio rerio functional">Danio rerio functional</option>
+					<option value="Danio rerio non-functional">Danio rerio non-functional</option>
+					<option value="Macaca mulatta">Macaca mulatta</option>
+					<option value="Macaca mulatta functional">Macaca mulatta functional</option>
+					<option value="Macaca mulatta non-functional">Macaca mulatta non-functional</option>
+					<option value="Mus musculus">Mus musculus</option>
+					<option value="Mus musculus functional">Mus musculus functional</option>
+					<option value="Mus musculus non-functional">Mus musculus non-functional</option>
+					<option value="Mus spretus">Mus spretus</option>
+					<option value="Mus spretus functional">Mus spretus functional</option>
+					<option value="Mus spretus non-functional">Mus spretus non-functional</option>
+					<option value="Oncorhynchus mykiss">Oncorhynchus mykiss</option>
+					<option value="Oncorhynchus mykiss functional">Oncorhynchus mykiss functional</option>
+					<option value="Oncorhynchus mykiss non-functional">Oncorhynchus mykiss non-functional</option>
+					<option value="Ornithorhynchus anatinus">Ornithorhynchus anatinus</option>
+					<option value="Ornithorhynchus anatinus functional">Ornithorhynchus anatinus functional</option>
+					<option value="Ornithorhynchus anatinus non-functional">Ornithorhynchus anatinus non-functional</option>
+					<option value="Oryctolagus cuniculus">Oryctolagus cuniculus</option>
+					<option value="Oryctolagus cuniculus functional">Oryctolagus cuniculus functional</option>
+					<option value="Oryctolagus cuniculus non-functional">Oryctolagus cuniculus non-functional</option>
+					<option value="Rattus norvegicus">Rattus norvegicus</option>
+					<option value="Rattus norvegicus functional">Rattus norvegicus functional</option>
+					<option value="Rattus norvegicus non-functional">Rattus norvegicus non-functional</option>
+					<option value="Sus scrofa">Sus scrofa</option>
+					<option value="Sus scrofa functional">Sus scrofa functional</option>
+					<option value="Sus scrofa non-functional">Sus scrofa non-functional</option>
+				</param>
+			
+				<param name="locus" type="select" label="Locus">
+					<option value="TRA">TRA</option>
+					<option value="TRD">TRD</option>
+					<option value="TRG">TRG</option>
+					<option value="TRB">TRB</option>
+					<option value="IGH">IGH</option>
+					<option value="IGI">IGI</option>
+					<option value="IGK">IGK</option>
+					<option value="IGL">IGL</option>
+				</param>
+			</when>
+			<when value="custom">
+				<param name="species" type="hidden" value="custom" size="50" />
+				<param name="vgenes" type="text" label="V Genes, add the custom genes comma seperated, no spaces" size="100" />
+				<param name="dgenes" type="text" label="D Genes" size="100" />
+				<param name="jgenes" type="text" label="J Genes" size="100" />
+			</when>
+		</conditional>
+		
+		<param name="filterproductive" type="select" label="Remove the unproductive sequences from graphs ">
+			<option value="yes">Yes</option>
+			<option value="no">No</option>
+		</param>
+		
+		<param name="clonality_method" type="select" label="Old clonality algorithm or the newer R package">
+			<option value="old">Old</option>
+			<option value="boyd">R Package</option>
+		</param>
+		
+	</inputs>
+	<outputs>
+		<data format="html" name="out_file" />
+	</outputs>
+	<requirements>
+		<requirement type="package" version="3.3">weblogo</requirement>
+		<!--<requirement type="package" version="0.20">circostools</requirement>-->
+	</requirements>
+	<help>		
+**INPUT**
+
+One or more ARGalaxy proprietary format files combined with the ARGalaxy Experimental Design tool
+
+
+.. class:: warningmark
+
+Custom gene ordering based on position on genome: 
+
+**Human**
+
+IGH::
+
+    V:
+    IGHV7-81,IGHV3-74,IGHV3-73,IGHV3-72,IGHV3-71,IGHV2-70,IGHV1-69,IGHV3-66,IGHV3-64,IGHV4-61,IGHV4-59,IGHV1-58,IGHV3-53,IGHV3-52,IGHV5-a,IGHV5-51,IGHV3-49,IGHV3-48,IGHV3-47,IGHV1-46,IGHV1-45,IGHV3-43,IGHV4-39,IGHV3-35,IGHV4-34,IGHV3-33,IGHV4-31,IGHV4-30-4,IGHV4-30-2,IGHV3-30-3,IGHV3-30,IGHV4-28,IGHV2-26,IGHV1-24,IGHV3-23,IGHV3-22,IGHV3-21,IGHV3-20,IGHV3-19,IGHV1-18,IGHV3-15,IGHV3-13,IGHV3-11,IGHV3-9,IGHV1-8,IGHV3-7,IGHV2-5,IGHV7-4-1,IGHV4-4,IGHV4-b,IGHV1-3,IGHV1-2,IGHV6-1
+    D:
+    IGHD1-1,IGHD2-2,IGHD3-3,IGHD6-6,IGHD1-7,IGHD2-8,IGHD3-9,IGHD3-10,IGHD4-11,IGHD5-12,IGHD6-13,IGHD1-14,IGHD2-15,IGHD3-16,IGHD4-17,IGHD5-18,IGHD6-19,IGHD1-20,IGHD2-21,IGHD3-22,IGHD4-23,IGHD5-24,IGHD6-25,IGHD1-26,IGHD7-27
+    J:
+    IGHJ1,IGHJ2,IGHJ3,IGHJ4,IGHJ5,IGHJ6
+
+
+IGK::
+
+    V:
+    IGKV3D-7,IGKV1D-8,IGKV1D-43,IGKV3D-11,IGKV1D-12,IGKV1D-13,IGKV3D-15,IGKV1D-16,IGKV1D-17,IGKV3D-20,IGKV2D-26,IGKV2D-28,IGKV2D-29,IGKV2D-30,IGKV1D-33,IGKV1D-39,IGKV2D-40,IGKV2-40,IGKV1-39,IGKV1-33,IGKV2-30,IGKV2-29,IGKV2-28,IGKV1-27,IGKV2-24,IGKV3-20,IGKV1-17,IGKV1-16,IGKV3-15,IGKV1-13,IGKV1-12,IGKV3-11,IGKV1-9,IGKV1-8,IGKV1-6,IGKV1-5,IGKV5-2,IGKV4-1
+    J:
+    IGKJ1,IGKJ2,IGKJ3,IGKJ4,IGKJ5
+
+
+IGL::
+
+    V:
+    IGLV4-69,IGLV8-61,IGLV4-60,IGLV6-57,IGLV5-52,IGLV1-51,IGLV9-49,IGLV1-47,IGLV7-46,IGLV5-45,IGLV1-44,IGLV7-43,IGLV1-41,IGLV1-40,IGLV5-39,IGLV5-37,IGLV1-36,IGLV3-27,IGLV3-25,IGLV2-23,IGLV3-22,IGLV3-21,IGLV3-19,IGLV2-18,IGLV3-16,IGLV2-14,IGLV3-12,IGLV2-11,IGLV3-10,IGLV3-9,IGLV2-8,IGLV4-3,IGLV3-1
+    J:
+    IGLJ1,IGLJ2,IGLJ3,IGLJ6,IGLJ7
+
+
+TRB::
+
+    V:
+    TRBV2,TRBV3-1,TRBV4-1,TRBV5-1,TRBV6-1,TRBV4-2,TRBV6-2,TRBV4-3,TRBV6-3,TRBV7-2,TRBV6-4,TRBV7-3,TRBV9,TRBV10-1,TRBV11-1,TRBV10-2,TRBV11-2,TRBV6-5,TRBV7-4,TRBV5-4,TRBV6-6,TRBV5-5,TRBV7-6,TRBV5-6,TRBV6-8,TRBV7-7,TRBV6-9,TRBV7-8,TRBV5-8,TRBV7-9,TRBV13,TRBV10-3,TRBV11-3,TRBV12-3,TRBV12-4,TRBV12-5,TRBV14,TRBV15,TRBV16,TRBV18,TRBV19,TRBV20-1,TRBV24-1,TRBV25-1,TRBV27,TRBV28,TRBV29-1,TRBV30
+    D:
+    TRBD1,TRBD2
+    J:
+    TRBJ1-1,TRBJ1-2,TRBJ1-3,TRBJ1-4,TRBJ1-5,TRBJ1-6,TRBJ2-1,TRBJ2-2,TRBJ2-3,TRBJ2-4,TRBJ2-5,TRBJ2-6,TRBJ2-7
+
+
+TRA::
+
+    V:
+    TRAV1-1,TRAV1-2,TRAV2,TRAV3,TRAV4,TRAV5,TRAV6,TRAV7,TRAV8-1,TRAV9-1,TRAV10,TRAV12-1,TRAV8-2,TRAV8-3,TRAV13-1,TRAV12-2,TRAV8-4,TRAV13-2,TRAV14/DV4,TRAV9-2,TRAV12-3,TRAV8-6,TRAV16,TRAV17,TRAV18,TRAV19,TRAV20,TRAV21,TRAV22,TRAV23/DV6,TRAV24,TRAV25,TRAV26-1,TRAV27,TRAV29/DV5,TRAV30,TRAV26-2,TRAV34,TRAV35,TRAV36/DV7,TRAV38-1,TRAV38-2/DV8,TRAV39,TRAV40,TRAV41
+    J:
+    TRAJ57,TRAJ56,TRAJ54,TRAJ53,TRAJ52,TRAJ50,TRAJ49,TRAJ48,TRAJ47,TRAJ46,TRAJ45,TRAJ44,TRAJ43,TRAJ42,TRAJ41,TRAJ40,TRAJ39,TRAJ38,TRAJ37,TRAJ36,TRAJ34,TRAJ33,TRAJ32,TRAJ31,TRAJ30,TRAJ29,TRAJ28,TRAJ27,TRAJ26,TRAJ24,TRAJ23,TRAJ22,TRAJ21,TRAJ20,TRAJ18,TRAJ17,TRAJ16,TRAJ15,TRAJ14,TRAJ13,TRAJ12,TRAJ11,TRAJ10,TRAJ9,TRAJ8,TRAJ7,TRAJ6,TRAJ5,TRAJ4,TRAJ3
+
+
+TRG::
+
+    V:
+    TRGV9,TRGV8,TRGV5,TRGV4,TRGV3,TRGV2
+    J:
+    TRGJ2,TRGJP2,TRGJ1,TRGJP1
+
+
+TRD::
+
+    V:
+    TRDV1,TRDV2,TRDV3
+    D:
+    TRDD1,TRDD2,TRDD3
+    J:
+    TRDJ1,TRDJ4,TRDJ2,TRDJ3
+
+
+**Mouse**
+
+TRB::
+
+    V:
+    TRBV1,TRBV2,TRBV3,TRBV4,TRBV5,TRBV12-1,TRBV13-1,TRBV12-2,TRBV13-2,TRBV13-3,TRBV14,TRBV15,TRBV16,TRBV17,TRBV19,TRBV20,TRBV23,TRBV24,TRBV26,TRBV29,TRBV30,TRBV31
+    D:
+    TRBD1,TRBD2
+    J:
+    TRBJ1-1,TRBJ1-2,TRBJ1-3,TRBJ1-4,TRBJ1-5,TRBJ2-1,TRBJ2-2,TRBJ2-3,TRBJ2-4,TRBJ2-5,TRBJ2-6,TRBJ2-7
+    
+
+**OUTPUT**
+
+It generates the following result:
+	</help>
+</tool>