Mercurial > repos > davidvanzessen > shm_csr

--- a/CHANGELOG.md	Mon Dec 12 12:32:44 2022 +0000
+++ b/CHANGELOG.md	Mon Mar 06 11:36:32 2023 +0000
@@ -1,3 +1,13 @@
+version 1.7.0
+-----------------
++ Use the name of the input file to generate the name of the output IMGT
+  archives.
++ Add same duplicate filters as immune repertoire pipeline.
++ Add a new "Everything is IGM" class filter for captured IGM sequences.
++ Fix bug where empty tables would cause crashes when generating plots.
++ Fix bug where R script errors where not written to stderr, causing galaxy to
+  mistake the jobs as being successful.
+
 version 1.6.0
 -------------
 + Faster runtime due to faster gene identification, sequence overview creation
Binary file CONTROL_NWK377_PB_IGHC_MID1_40nt_2.txz has changed
Binary file __pycache__/igm_naive_mutations.cpython-39.pyc has changed
Binary file a.out has changed
--- a/merge_and_filter.r	Mon Dec 12 12:32:44 2022 +0000
+++ b/merge_and_filter.r	Mon Mar 06 11:36:32 2023 +0000
@@ -163,8 +163,8 @@
 	result[!higher_than,"best_match"] = paste("unmatched,", result[!higher_than,"best_match"])
 }

-if(class.filter == "101_101"){
-	result$best_match = "all"
+if(splt[1] == "101" & splt[2] == "101"){
+	result$best_match = splt[3]
 }

 write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T)
--- a/sequence_overview.py	Mon Dec 12 12:32:44 2022 +0000
+++ b/sequence_overview.py	Mon Mar 06 11:36:32 2023 +0000
@@ -30,7 +30,9 @@
             "IGG3": 0,
             "IGG4": 0,
             "IGM": 0,
-            "unmatched": 0}
+            "unmatched": 0,
+            "all": 0,
+        }
         self.table_rows: List[SequenceTableRow] = []
--- a/shm_csr.r	Mon Dec 12 12:32:44 2022 +0000
+++ b/shm_csr.r	Mon Mar 06 11:36:32 2023 +0000
@@ -439,19 +439,20 @@

 dat.clss = rbind(dat, dat.clss)

+write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T)
+
+if (nrow(dat) > 0) {
 p = ggplot(dat.clss, aes(best_match, percentage_mutations))
 p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA)
 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
 p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
 p = p + scale_colour_manual(guide = guide_legend(title = "Subclass"), values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
-
 png(filename="scatter.png")
 print(p)
 dev.off()

 pdfplots[["scatter.pdf"]] <- p
-
-write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T)
+}

 print("Plotting frequency ranges plot")

@@ -467,6 +468,7 @@

 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)

+if (nrow(frequency_bins_data) > 0) {
 p = ggplot(frequency_bins_data, aes(frequency_bins, frequency))
 p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
 p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(guide = guide_legend(title = "Class"), values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
@@ -476,6 +478,7 @@
 dev.off()

 pdfplots[["frequency_ranges.pdf"]] <- p
+}

 save(pdfplots, file="pdfplots.RData")

@@ -483,10 +486,12 @@

 frequency_bins_data_by_class = frequency_bins_data_by_class[order(frequency_bins_data_by_class$best_match_class, frequency_bins_data_by_class$frequency_bins),]

+
 frequency_bins_data_by_class$frequency_bins = gsub("-", " to ", frequency_bins_data_by_class$frequency_bins)
-frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
-frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
-
+if (nrow(frequency_bins_data_by_class) > 0) {
+    frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
+    frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
+}
 write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T)

 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")])
@@ -499,9 +504,10 @@

 frequency_bins_data = frequency_bins_data[order(frequency_bins_data$best_match, frequency_bins_data$frequency_bins),]
 frequency_bins_data$frequency_bins = gsub("-", " to ", frequency_bins_data$frequency_bins)
-frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
-frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
-
+if (nrow(frequency_bins_data) > 0) {
+    frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
+    frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
+}
 write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T)
--- a/shm_csr.xml	Mon Dec 12 12:32:44 2022 +0000
+++ b/shm_csr.xml	Mon Mar 06 11:36:32 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="shm_csr" name="SHM &amp; CSR pipeline" version="1.6.0">
+<tool id="shm_csr" name="SHM &amp; CSR pipeline" version="1.7.0" profile="16.04">
 	<description></description>
 	<requirements>
 		<requirement type="package" version="3.7.1">python</requirement>
@@ -21,11 +21,34 @@
 		<requirement type="package" version="0.83">font-ttf-ubuntu</requirement>
 	</requirements>
 	<command interpreter="bash">
+		<![CDATA[
+		#set $input=$in_file.name
+		ln -s "$in_file" "$input" &&
 		#if str ( $filter_unique.filter_unique_select ) == "remove":
-			wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select $filter_unique.filter_unique_clone_count $class_filter_cond.class_filter $empty_region_filter $fast
+			$__tool_directory__/wrapper.sh "$input"
+			custom $out_file $out_file.files_path
+			"${in_file.name}" "-" $functionality $unique
+			$naive_output_cond.naive_output $naive_output_ca
+			$naive_output_cg $naive_output_cm $naive_output_ce
+			$naive_output_all $filter_unique.filter_unique_select
+			$filter_unique.filter_unique_clone_count
+			$class_filter_cond.class_filter
+			$empty_region_filter
+			$fast
 		#else:
-			wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select 2 $class_filter_cond.class_filter $empty_region_filter $fast
+			$__tool_directory__/wrapper.sh
+			"$input" custom
+			$out_file $out_file.files_path
+			"${in_file.name}" "-" $functionality $unique
+			$naive_output_cond.naive_output $naive_output_ca $naive_output_cg
+			$naive_output_cm $naive_output_ce $naive_output_all
+			$filter_unique.filter_unique_select
+			2
+			$class_filter_cond.class_filter
+			$empty_region_filter
+			$fast
 		#end if
+	]]>
 	</command>
 	<inputs>
 		<param name="in_file" type="data" format="data" label="IMGT zip file to be analysed" />
@@ -56,13 +79,16 @@
 		<param name="unique" type="select" label="Remove duplicates based on" help="" >
 			<option value="VGene,CDR3.IMGT.AA,best_match_class">Top.V.Gene, CDR3 (AA), C region</option>
 			<option value="VGene,CDR3.IMGT.AA">Top.V.Gene, CDR3 (AA)</option>
+			<option value="VGene,JGene,CDR3.IMGT.AA">Top.V.Gene, Top.J.Gene, CDR3 (AA)</option>
 			<option value="CDR3.IMGT.AA,best_match_class">CDR3 (AA), C region</option>
 			<option value="CDR3.IMGT.AA">CDR3 (AA)</option>
-
+
 			<option value="VGene,CDR3.IMGT.seq,best_match_class">Top.V.Gene, CDR3 (nt), C region</option>
 			<option value="VGene,CDR3.IMGT.seq">Top.V.Gene, CDR3 (nt)</option>
+			<option value="VGene,JGene,CDR3.IMGT.seq">Top.V.Gene, Top.J.Gene, CDR3 (nt)</option>
 			<option value="CDR3.IMGT.seq,best_match_class">CDR3 (nt), C region</option>
 			<option value="CDR3.IMGT.seq">CDR3 (nt)</option>
+			<option value="VGene,DGene,JGene,CDR3.IMGT.seq">Top.V.Gene, Top.D.Gene, Top.J.Gene, CDR3 (nt)</option>
 			<option value="Sequence.ID" selected="true">Don't remove duplicates</option>
 		</param>
 		<conditional name="class_filter_cond">
@@ -72,14 +98,9 @@
 				<option value="70_0">>70% class</option>
 				<option value="60_0">>60% class</option>
 				<option value="19_0">>19% class</option>
-				<option value="101_101">Do not assign (sub)class</option>
+				<option value="101_101_all">Do not assign (sub)class</option>
+				<option value="101_101_IGM">Everything is IGM</option>
 			</param>
-			<when value="70_70"></when>
-			<when value="60_55"></when>
-			<when value="70_0"></when>
-			<when value="60_0"></when>
-			<when value="19_0"></when>
-			<when value="101_101"></when>
 		</conditional>
 		<conditional name="naive_output_cond">
 			<param name="naive_output" type="select" label="Output new IMGT archives per class into your history?">
Binary file show_time_as_float has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/show_time_as_float.c	Mon Mar 06 11:36:32 2023 +0000
@@ -0,0 +1,16 @@
+/* script adapted from https://www.nu42.com/2021/07/windows-c-time-in-nanoseconds.html */
+#include <stdio.h>
+#include <time.h>
+
+int main(void)
+{
+    struct timespec ts;
+
+    if (timespec_get(&ts, TIME_UTC) != TIME_UTC)
+    {
+        fputs("timespec_get failed!", stderr);
+        return 1;
+    }
+    printf("%ld.%ld\n", ts.tv_sec, ts.tv_nsec);
+    return 0;
+}
Binary file tests/__pycache__/test_shm_csr.cpython-37-pytest-6.2.5.pyc has changed
Binary file tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.0.pyc has changed
Binary file tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.1.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/data/.~lock.handleiding activeren pas.docx#	Mon Mar 06 11:36:32 2023 +0000
@@ -0,0 +1,1 @@
+Vorderman\, R.H.P. (MOLEPI) ,rhpvorderman,sasc-pc-6,21.02.2023 15:01,file:///home/rhpvorderman/.config/libreoffice/4;
\ No newline at end of file
Binary file tests/data/handleiding activeren pas.docx has changed
--- a/tests/test_shm_csr.py	Mon Dec 12 12:32:44 2022 +0000
+++ b/tests/test_shm_csr.py	Mon Mar 06 11:36:32 2023 +0000
@@ -43,11 +43,20 @@
     return container.text


+def ignore_files(src, files):
+    "Ignore virtualenv and git directories to prevent massive tmp folders"
+    if os.path.basename(src) in (".venv", ".git"):
+        return files
+    return ()
+
 @pytest.fixture(scope="module")
 def shm_csr_result():
     temp_dir = Path(tempfile.mkdtemp())
     tool_dir = temp_dir / "shm_csr"
-    shutil.copytree(GIT_ROOT, tool_dir)
+    shutil.copytree(
+        GIT_ROOT, tool_dir,
+        # Ignore .venv and .git directories.
+        ignore=ignore_files)
     working_dir = temp_dir / "working"
     working_dir.mkdir(parents=True)
     output_dir = temp_dir / "outputs"
Binary file time_ns has changed
--- a/wrapper.sh	Mon Dec 12 12:32:44 2022 +0000
+++ b/wrapper.sh	Mon Mar 06 11:36:32 2023 +0000
@@ -22,12 +22,15 @@
 empty_region_filter=${18}
 fast=${19}

+BASENAME=$(basename $input)
+# Cut off .txz or .tgz suffix
+NEW_IMGT_PREFIX="new_IMGT_${BASENAME%.*}"
+
 #exec 5> debug_output.txt
 #BASH_XTRACEFD="5"
-## Busybox date does not support '+%s.%N'. So use the slower python instead.
-## Using -S python does not do 'import site' which shortens the command
-## to 10 milliseconds.
-#PS4='$(python -Sc "import time; print(time.time())") $LINENO: '
+## Busybox date does not support '+%s.%N'. So use a custom program. Can be
+## Compiled with cc -Os show_time_as_float.c -o show_time_as_float
+#PS4='$(${dir}/show_time_as_float) $LINENO: '
 #set -x

 mkdir -p $outdir
@@ -39,7 +42,7 @@

 echo "unpacking IMGT file"

-type="`file $input`"
+type="`file -L $input`"
 if [[ "$type" == *"Zip archive"* ]] ; then
 	echo "Zip archive"
 	echo "unzip $input -d $PWD/files/"
@@ -85,13 +88,30 @@
 echo "---------------- merge_and_filter.r ----------------"
 echo "---------------- merge_and_filter.r ----------------<br />" >> $log

-Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt "$PWD/gapped_aa.txt" $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${filter_unique_count} ${class_filter} ${empty_region_filter} 2>&1
+Rscript $dir/merge_and_filter.r \
+  $PWD/summary.txt \
+  $PWD/sequences.txt \
+  $PWD/mutationanalysis.txt \
+  $PWD/mutationstats.txt \
+  $PWD/hotspots.txt \
+  "$PWD/gapped_aa.txt" \
+  $outdir/identified_genes.txt \
+  $outdir/merged.txt \
+  $outdir/before_unique_filter.txt \
+  $outdir/unmatched.txt \
+  $method \
+  $functionality \
+  $unique \
+  ${filter_unique} \
+  ${filter_unique_count} \
+  ${class_filter} \
+  ${empty_region_filter}

 echo "---------------- creating new IMGT zips ----------------"
 echo "---------------- creating new IMGT zips ----------------<br />" >> $log

 python $dir/split_imgt_file.py --outdir $outdir $input $outdir/merged.txt \
-  --prefix new_IMGT \
+  --prefix "${NEW_IMGT_PREFIX}" \
   - IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM IGE


@@ -100,7 +120,7 @@

 classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,IGE,unmatched"
 echo "R mutation analysis"
-Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} 2>&1
+Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter}

 echo "---------- Split naive memory IGM ---------"
 echo "---------- Split naive memory IGM ---------<br />" >> $log
@@ -108,20 +128,20 @@
 python $dir/igm_naive_mutations.py $outdir/scatter.txt $outdir/igm_naive_mutations.txt \
   $outdir/igm_naive_memory_mutations.txt

-python $dir/split_imgt_file.py --outdir $outdir $outdir/new_IMGT_IGM.txz \
+python $dir/split_imgt_file.py --outdir $outdir $outdir/${NEW_IMGT_PREFIX}_IGM.txz \
   $outdir/igm_naive_mutations.txt \
-  --prefix new_IMGT_IGM_NAIVE -
+  --prefix "${NEW_IMGT_PREFIX}_IGM_NAIVE" -

-python $dir/split_imgt_file.py --outdir $outdir $outdir/new_IMGT_IGM.txz \
+python $dir/split_imgt_file.py --outdir $outdir $outdir/${NEW_IMGT_PREFIX}_IGM.txz \
   $outdir/igm_naive_memory_mutations.txt \
-  --prefix new_IMGT_IGM_NAIVE_MEMORY -
+  --prefix "${NEW_IMGT_PREFIX}_IGM_NAIVE_MEMORY" -

 echo "---------------- plot_pdfs.r ----------------"
 echo "---------------- plot_pdfs.r ----------------<br />" >> $log

-echo "Rscript $dir/shm_csr.r $outdir/pdfplots.RData $outdir 2>&1"
+echo "Rscript $dir/shm_csr.r $outdir/pdfplots.RData $outdir"

-Rscript $dir/plot_pdf.r "$outdir/pdfplots.RData" "$outdir" 2>&1
+Rscript $dir/plot_pdf.r "$outdir/pdfplots.RData" "$outdir"

 echo "---------------- shm_csr.py ----------------"
 echo "---------------- shm_csr.py ----------------<br />" >> $log
@@ -131,7 +151,11 @@
 echo "---------------- aa_histogram.r ----------------"
 echo "---------------- aa_histogram.r ----------------<br />" >> $log

-Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM,IGE" $outdir/ 2>&1
+Rscript $dir/aa_histogram.r \
+  $outdir/aa_id_mutations.txt \
+  $outdir/absent_aa_id.txt "IGA,IGG,IGM,IGE" \
+  $outdir/
+
 if [ -e "$outdir/aa_histogram_.png" ]; then
         mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
         mv $outdir/aa_histogram_.pdf $outdir/aa_histogram.pdf
@@ -153,7 +177,12 @@

 python $dir/sequence_overview.py --before-unique $outdir/before_unique_filter.txt \
   --outdir $outdir/sequence_overview --empty-region-filter ${empty_region_filter}
-Rscript $dir/nt_overview.r $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt ${empty_region_filter} 2>&1
+Rscript $dir/nt_overview.r \
+  $outdir/merged.txt \
+  $outdir/sequence_overview \
+  $classes \
+  $outdir/hotspot_analysis_sum.txt \
+  ${empty_region_filter}

 echo "<table border='1'>" > $outdir/base_overview.html

@@ -198,12 +227,17 @@
 	echo "---------------- pattern_plots.r ----------------"
 	echo "---------------- pattern_plots.r ----------------<br />" >> $log

-	Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/aid_motives $outdir/relative_mutations $outdir/absolute_mutations $outdir/shm_overview.txt 2>&1
+	Rscript $dir/pattern_plots.r \
+	  $outdir/data_${func}.txt \
+	  $outdir/aid_motives \
+	  $outdir/relative_mutations \
+	  $outdir/absolute_mutations \
+	  $outdir/shm_overview.txt

 	echo "<table class='pure-table pure-table-striped'>" >> $output
 	echo "<thead><tr><th>info</th>" >> $output

-	if [ "${class_filter}" != "101_101" ] ; then
+	if [ "${class_filter}" != "101_101_all" ] ; then

 		for gene in ${genes[@]}
 		do
@@ -397,41 +431,41 @@
 	echo "<p>${header_substring}</p></center>" >> $output

 	mkdir $outdir/baseline/IGA_IGG_IGM
-	if [[ "$(count_imgt_lines $outdir/new_IMGT.txz)" -gt "1" ]]; then
+	if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}.txz)" -gt "1" ]]; then
 		cd $outdir/baseline/IGA_IGG_IGM
-		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/${NEW_IMGT_PREFIX}.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
 	else
 		echo "No sequences" > "$outdir/baseline.txt"
 	fi

 	mkdir $outdir/baseline/IGA
-	if [[ "$(count_imgt_lines $outdir/new_IMGT_IGA.txz)" -gt "1" ]]; then
+	if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGA.txz)" -gt "1" ]]; then
 		cd $outdir/baseline/IGA
-		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt"
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/${NEW_IMGT_PREFIX}_IGA.txz "IGA" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt"
 	else
 		echo "No IGA sequences" > "$outdir/baseline_IGA.txt"
 	fi

 	mkdir $outdir/baseline/IGG
-	if [[ "$(count_imgt_lines $outdir/new_IMGT_IGG.txz)" -gt "1" ]]; then
+	if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGG.txz)" -gt "1" ]]; then
 		cd $outdir/baseline/IGG
-		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGG.txz "IGG" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt"
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/${NEW_IMGT_PREFIX}_IGG.txz "IGG" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt"
 	else
 		echo "No IGG sequences" > "$outdir/baseline_IGG.txt"
 	fi

 	mkdir $outdir/baseline/IGM
-	if [[ "$(count_imgt_lines $outdir/new_IMGT_IGM.txz)" -gt "1" ]]; then
+	if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGM.txz)" -gt "1" ]]; then
 		cd $outdir/baseline/IGM
-		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt"
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/${NEW_IMGT_PREFIX}_IGM.txz "IGM" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt"
 	else
 		echo "No IGM sequences" > "$outdir/baseline_IGM.txt"
 	fi

 	mkdir $outdir/baseline/IGE
-	if [[ "$(count_imgt_lines $outdir/new_IMGT_IGE.txz)" -gt "1" ]]; then
+	if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGE.txz)" -gt "1" ]]; then
 		cd $outdir/baseline/IGE
-		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGE.txz "IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGE.pdf" "Sequence.ID" "$outdir/baseline_IGE.txt"
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/${NEW_IMGT_PREFIX}_IGE.txz "IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGE.pdf" "Sequence.ID" "$outdir/baseline_IGE.txt"
 	else
 		echo "No IGE sequences" > "$outdir/baseline_IGE.txt"
 	fi
@@ -498,24 +532,32 @@

 	cd $outdir/change_o

-	bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt
+	bash $dir/change_o/makedb.sh $outdir/${NEW_IMGT_PREFIX}.txz false false false $outdir/change_o/change-o-db.txt
 	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt
-	Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-db-defined_first_clones.txt 2>&1
+	Rscript $dir/change_o/select_first_in_clone.r \
+	  $outdir/change_o/change-o-db-defined_clones.txt \
+	  $outdir/change_o/change-o-db-defined_first_clones.txt

-	python $dir/split_imgt_file.py --outdir $outdir --prefix new_IMGT_first_seq_of_clone \
-	  $outdir/new_IMGT.txz $outdir/change_o/change-o-db-defined_first_clones.txt \
+	python $dir/split_imgt_file.py --outdir $outdir --prefix ${NEW_IMGT_PREFIX}_first_seq_of_clone \
+	  $outdir/${NEW_IMGT_PREFIX}.txz $outdir/change_o/change-o-db-defined_first_clones.txt \
     "-"

-	Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1
-	echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1"
+	Rscript $dir/merge.r \
+	  $outdir/change_o/change-o-db-defined_clones.txt \
+	  $outdir/merged.txt \
+	  "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" \
+	  $outdir/change_o/change-o-db-defined_clones.txt
+	echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt"

-	if [[ "$(count_imgt_lines $outdir/new_IMGT_IGA.txz)" -gt "1" ]]; then
-		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt
+	if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGA.txz)" -gt "1" ]]; then
+		bash $dir/change_o/makedb.sh $outdir/${NEW_IMGT_PREFIX}_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt
 		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt
-		Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-db-defined_first_clones-IGA.txt 2>&1
+		Rscript $dir/change_o/select_first_in_clone.r \
+		  $outdir/change_o/change-o-db-defined_clones-IGA.txt \
+		  $outdir/change_o/change-o-db-defined_first_clones-IGA.txt

-    python $dir/split_imgt_file.py --outdir $outdir --prefix new_IMGT_IGA_first_seq_of_clone \
-      $outdir/new_IMGT.txz $outdir/change_o/change-o-db-defined_first_clones-IGA.txt \
+    python $dir/split_imgt_file.py --outdir $outdir --prefix ${NEW_IMGT_PREFIX}_IGA_first_seq_of_clone \
+      $outdir/${NEW_IMGT_PREFIX}.txz $outdir/change_o/change-o-db-defined_first_clones-IGA.txt \
       "-"

 	else
@@ -523,13 +565,15 @@
 		echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt"
 	fi

-	if [[ "$(count_imgt_lines $outdir/new_IMGT_IGG.txz)" -gt "1" ]]; then
-		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt
+	if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGG.txz)" -gt "1" ]]; then
+		bash $dir/change_o/makedb.sh $outdir/${NEW_IMGT_PREFIX}_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt
 		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt
-		Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-db-defined_first_clones-IGG.txt 2>&1
+		Rscript $dir/change_o/select_first_in_clone.r \
+		  $outdir/change_o/change-o-db-defined_clones-IGG.txt \
+		  $outdir/change_o/change-o-db-defined_first_clones-IGG.txt

-    python $dir/split_imgt_file.py --outdir $outdir --prefix new_IMGT_IGG_first_seq_of_clone \
-       $outdir/new_IMGT.txz $outdir/change_o/change-o-db-defined_first_clones-IGG.txt \
+    python $dir/split_imgt_file.py --outdir $outdir --prefix ${NEW_IMGT_PREFIX}_IGG_first_seq_of_clone \
+       $outdir/${NEW_IMGT_PREFIX}.txz $outdir/change_o/change-o-db-defined_first_clones-IGG.txt \
       "-"

 	else
@@ -537,13 +581,15 @@
 		echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt"
 	fi

-	if [[ "$(count_imgt_lines $outdir/new_IMGT_IGM.txz)" -gt "1" ]]; then
-		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt
+	if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGM.txz)" -gt "1" ]]; then
+		bash $dir/change_o/makedb.sh $outdir/${NEW_IMGT_PREFIX}_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt
 		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt
-		Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-db-defined_first_clones-IGM.txt 2>&1
+		Rscript $dir/change_o/select_first_in_clone.r \
+		  $outdir/change_o/change-o-db-defined_clones-IGM.txt \
+		  $outdir/change_o/change-o-db-defined_first_clones-IGM.txt

-    python $dir/split_imgt_file.py --outdir $outdir --prefix new_IMGT_IGM_first_seq_of_clone \
-      $outdir/new_IMGT.txz $outdir/change_o/change-o-db-defined_first_clones-IGM.txt \
+    python $dir/split_imgt_file.py --outdir $outdir --prefix ${NEW_IMGT_PREFIX}_IGM_first_seq_of_clone \
+      $outdir/${NEW_IMGT_PREFIX}.txz $outdir/change_o/change-o-db-defined_first_clones-IGM.txt \
       "-"

 	else
@@ -551,13 +597,15 @@
 		echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt"
 	fi

-	if [[ "$(count_imgt_lines $outdir/new_IMGT_IGE.txz)" -gt "1" ]]; then
-		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGE.txz false false false $outdir/change_o/change-o-db-IGE.txt
+	if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGE.txz)" -gt "1" ]]; then
+		bash $dir/change_o/makedb.sh $outdir/${NEW_IMGT_PREFIX}_IGE.txz false false false $outdir/change_o/change-o-db-IGE.txt
 		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGE.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGE.txt $outdir/change_o/change-o-defined_clones-summary-IGE.txt
-		Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGE.txt $outdir/change_o/change-o-db-defined_first_clones-IGE.txt 2>&1
+		Rscript $dir/change_o/select_first_in_clone.r \
+		  $outdir/change_o/change-o-db-defined_clones-IGE.txt \
+		  $outdir/change_o/change-o-db-defined_first_clones-IGE.txt

-    python $dir/split_imgt_file.py --outdir $outdir --prefix new_IMGT_IGE_first_seq_of_clone \
-      $outdir/new_IMGT.txz $outdir/change_o/change-o-db-defined_first_clones-IGE.txt \
+    python $dir/split_imgt_file.py --outdir $outdir --prefix ${NEW_IMGT_PREFIX}_IGE_first_seq_of_clone \
+      $outdir/${NEW_IMGT_PREFIX}.txz $outdir/change_o/change-o-db-defined_first_clones-IGE.txt \
       "-"

 	else
@@ -714,38 +762,38 @@
 echo "<tr><td>Sequence overlap between subclasses</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output
 echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just just the first sequence of a clone</td><td><a href='new_IMGT_first_seq_of_clone.txz' download='new_IMGT_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just just the first sequence of a clone</td><td><a href='${NEW_IMGT_PREFIX}_first_seq_of_clone.txz' download='${NEW_IMGT_PREFIX}_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output

 echo "<tr><td>The Change-O DB file with defined clones of IGA</td><td><a href='change_o/change-o-db-defined_clones-IGA.txt' download='change_o/change-o-db-defined_clones-IGA.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The Change-O DB defined clones summary file of IGA</td><td><a href='change_o/change-o-defined_clones-summary-IGA.txt' download='change_o/change-o-defined_clones-summary-IGA.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGA)</td><td><a href='new_IMGT_IGA_first_seq_of_clone.txz' download='new_IMGT_IGA_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGA)</td><td><a href='${NEW_IMGT_PREFIX}_IGA_first_seq_of_clone.txz' download='${NEW_IMGT_PREFIX}_IGA_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output

 echo "<tr><td>The Change-O DB file with defined clones of IGG</td><td><a href='change_o/change-o-db-defined_clones-IGG.txt' download='change_o/change-o-db-defined_clones-IGG.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The Change-O DB defined clones summary file of IGG</td><td><a href='change_o/change-o-defined_clones-summary-IGG.txt' download='change_o/change-o-defined_clones-summary-IGG.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGG)</td><td><a href='new_IMGT_IGG_first_seq_of_clone.txz' download='new_IMGT_IGG_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGG)</td><td><a href='${NEW_IMGT_PREFIX}_IGG_first_seq_of_clone.txz' download='${NEW_IMGT_PREFIX}_IGG_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output

 echo "<tr><td>The Change-O DB file with defined clones of IGM</td><td><a href='change_o/change-o-db-defined_clones-IGM.txt' download='change_o/change-o-db-defined_clones-IGM.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The Change-O DB defined clones summary file of IGM</td><td><a href='change_o/change-o-defined_clones-summary-IGM.txt' download='change_o/change-o-defined_clones-summary-IGM.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGM)</td><td><a href='new_IMGT_IGM_first_seq_of_clone.txz' download='new_IMGT_IGM_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGM)</td><td><a href='${NEW_IMGT_PREFIX}_IGM_first_seq_of_clone.txz' download='${NEW_IMGT_PREFIX}_IGM_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output

 echo "<tr><td>The Change-O DB file with defined clones of IGE</td><td><a href='change_o/change-o-db-defined_clones-IGE.txt' download='change_o/change-o-db-defined_clones-IGE.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The Change-O DB defined clones summary file of IGE</td><td><a href='change_o/change-o-defined_clones-summary-IGE.txt' download='change_o/change-o-defined_clones-summary-IGE.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGE)</td><td><a href='new_IMGT_IGE_first_seq_of_clone.txz' download='new_IMGT_IGE_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGE)</td><td><a href='${NEW_IMGT_PREFIX}_IGE_first_seq_of_clone.txz' download='${NEW_IMGT_PREFIX}_IGE_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output

 echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Filtered IMGT output files</td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz' download='new_IMGT.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGA sequences</td><td><a href='new_IMGT_IGA.txz' download='new_IMGT_IGA.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGA1 sequences</td><td><a href='new_IMGT_IGA1.txz' download='new_IMGT_IGA1.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGA2 sequences</td><td><a href='new_IMGT_IGA2.txz' download='new_IMGT_IGA2.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGG sequences</td><td><a href='new_IMGT_IGG.txz' download='new_IMGT_IGG.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGG1 sequences</td><td><a href='new_IMGT_IGG1.txz' download='new_IMGT_IGG1.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGG2 sequences</td><td><a href='new_IMGT_IGG2.txz' download='new_IMGT_IGG2.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGG3 sequences</td><td><a href='new_IMGT_IGG3.txz' download='new_IMGT_IGG3.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGG4 sequences</td><td><a href='new_IMGT_IGG4.txz' download='new_IMGT_IGG4.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGM sequences</td><td><a href='new_IMGT_IGM.txz' download='new_IMGT_IGM.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGE sequences</td><td><a href='new_IMGT_IGE.txz' download='new_IMGT_IGE.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered naive IGM sequences (mutations below 2%)</td><td><a href='new_IMGT_IGM_NAIVE.txz' download='new_IMGT_IGM_NAIVE.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered naive memory IGM sequences (mutations 2% or higher)</td><td><a href='new_IMGT_IGM_NAIVE_MEMORY.txz' download='new_IMGT_IGM_NAIVE_MEMORY.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='${NEW_IMGT_PREFIX}.txz' download='${NEW_IMGT_PREFIX}.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGA sequences</td><td><a href='${NEW_IMGT_PREFIX}_IGA.txz' download='${NEW_IMGT_PREFIX}_IGA.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGA1 sequences</td><td><a href='${NEW_IMGT_PREFIX}_IGA1.txz' download='${NEW_IMGT_PREFIX}_IGA1.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGA2 sequences</td><td><a href='${NEW_IMGT_PREFIX}_IGA2.txz' download='${NEW_IMGT_PREFIX}_IGA2.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG sequences</td><td><a href='${NEW_IMGT_PREFIX}_IGG.txz' download='${NEW_IMGT_PREFIX}_IGG.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG1 sequences</td><td><a href='${NEW_IMGT_PREFIX}_IGG1.txz' download='${NEW_IMGT_PREFIX}_IGG1.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG2 sequences</td><td><a href='${NEW_IMGT_PREFIX}_IGG2.txz' download='${NEW_IMGT_PREFIX}_IGG2.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG3 sequences</td><td><a href='${NEW_IMGT_PREFIX}_IGG3.txz' download='${NEW_IMGT_PREFIX}_IGG3.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG4 sequences</td><td><a href='${NEW_IMGT_PREFIX}_IGG4.txz' download='${NEW_IMGT_PREFIX}_IGG4.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGM sequences</td><td><a href='${NEW_IMGT_PREFIX}_IGM.txz' download='${NEW_IMGT_PREFIX}_IGM.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGE sequences</td><td><a href='${NEW_IMGT_PREFIX}_IGE.txz' download='${NEW_IMGT_PREFIX}_IGE.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered naive IGM sequences (mutations below 2%)</td><td><a href='${NEW_IMGT_PREFIX}_IGM_NAIVE.txz' download='${NEW_IMGT_PREFIX}_IGM_NAIVE.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered naive memory IGM sequences (mutations 2% or higher)</td><td><a href='${NEW_IMGT_PREFIX}_IGM_NAIVE_MEMORY.txz' download='${NEW_IMGT_PREFIX}_IGM_NAIVE_MEMORY.txz' >Download</a></td></tr>" >> $output
 echo "</table>" >> $output

 echo "<br />" >> $output
@@ -764,16 +812,16 @@
 if [[ "$naive_output" == "yes" ]]
 then
 	echo "output naive output"
-	if [[ "${class_filter}" == "101_101" ]]
+	if [[ "${class_filter}" == "101_101_all" ]]
 	then
-		echo "copy new_IMGT.txz to ${naive_output_all}"
-		cp $outdir/new_IMGT.txz ${naive_output_all}
+		echo "copy ${NEW_IMGT_PREFIX}.txz to ${naive_output_all}"
+		cp $outdir/${NEW_IMGT_PREFIX}.txz ${naive_output_all}
 	else
 		echo "copy for classes"
-		cp $outdir/new_IMGT_IGA.txz ${naive_output_ca}
-		cp $outdir/new_IMGT_IGG.txz ${naive_output_cg}
-		cp $outdir/new_IMGT_IGM.txz ${naive_output_cm}
-		cp $outdir/new_IMGT_IGE.txz ${naive_output_ce}
+		cp $outdir/${NEW_IMGT_PREFIX}_IGA.txz ${naive_output_ca}
+		cp $outdir/${NEW_IMGT_PREFIX}_IGG.txz ${naive_output_cg}
+		cp $outdir/${NEW_IMGT_PREFIX}_IGM.txz ${naive_output_cm}
+		cp $outdir/${NEW_IMGT_PREFIX}_IGE.txz ${naive_output_ce}
 	fi
 fi