Galaxy |

Changeset 93:8fcf31272f6e (2023-03-06)

Previous changeset 92:cf8ad181628f (2022-12-12) Next changeset 94:84e9e5c8c101 (2023-03-24)

Commit message:
planemo upload commit a43893724cc769bed8a1f19a5b19ec1ba20cb63c

modified:
CHANGELOG.md
merge_and_filter.r
sequence_overview.py
shm_csr.r
shm_csr.xml
tests/test_shm_csr.py
wrapper.sh

added:
CONTROL_NWK377_PB_IGHC_MID1_40nt_2.txz
a.out
show_time_as_float
show_time_as_float.c
tests/__pycache__/test_shm_csr.cpython-37-pytest-6.2.5.pyc
tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.1.pyc
tests/data/.~lock.handleiding activeren pas.docx#
tests/data/handleiding activeren pas.docx
time_ns

removed:
__pycache__/igm_naive_mutations.cpython-39.pyc
tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.0.pyc

diff -r cf8ad181628f -r 8fcf31272f6e CHANGELOG.md
--- a/CHANGELOG.md Mon Dec 12 12:32:44 2022 +0000
+++ b/CHANGELOG.md Mon Mar 06 11:36:32 2023 +0000

@@ -1,3 +1,13 @@
+version 1.7.0
+-----------------
++ Use the name of the input file to generate the name of the output IMGT
+ archives.
++ Add same duplicate filters as immune repertoire pipeline.
++ Add a new "Everything is IGM" class filter for captured IGM sequences.
++ Fix bug where empty tables would cause crashes when generating plots.
++ Fix bug where R script errors where not written to stderr, causing galaxy to
+ mistake the jobs as being successful.
+
version 1.6.0
-------------
+ Faster runtime due to faster gene identification, sequence overview creation

diff -r cf8ad181628f -r 8fcf31272f6e CONTROL_NWK377_PB_IGHC_MID1_40nt_2.txz

Binary file CONTROL_NWK377_PB_IGHC_MID1_40nt_2.txz has changed

diff -r cf8ad181628f -r 8fcf31272f6e __pycache__/igm_naive_mutations.cpython-39.pyc

Binary file __pycache__/igm_naive_mutations.cpython-39.pyc has changed

diff -r cf8ad181628f -r 8fcf31272f6e a.out

Binary file a.out has changed

diff -r cf8ad181628f -r 8fcf31272f6e merge_and_filter.r
--- a/merge_and_filter.r Mon Dec 12 12:32:44 2022 +0000
+++ b/merge_and_filter.r Mon Mar 06 11:36:32 2023 +0000

[

@@ -163,8 +163,8 @@
result[!higher_than,"best_match"] = paste("unmatched,", result[!higher_than,"best_match"])
}

-if(class.filter == "101_101"){
- result$best_match = "all"
+if(splt[1] == "101" & splt[2] == "101"){
+ result$best_match = splt[3]
}

write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T)

diff -r cf8ad181628f -r 8fcf31272f6e sequence_overview.py
--- a/sequence_overview.py Mon Dec 12 12:32:44 2022 +0000
+++ b/sequence_overview.py Mon Mar 06 11:36:32 2023 +0000

[

@@ -30,7 +30,9 @@
             "IGG3": 0,
             "IGG4": 0,
             "IGM": 0,
-            "unmatched": 0}
+            "unmatched": 0,
+            "all": 0,
+        }
         self.table_rows: List[SequenceTableRow] = []

diff -r cf8ad181628f -r 8fcf31272f6e shm_csr.r
--- a/shm_csr.r Mon Dec 12 12:32:44 2022 +0000
+++ b/shm_csr.r Mon Mar 06 11:36:32 2023 +0000

[

@@ -439,19 +439,20 @@

dat.clss = rbind(dat, dat.clss)

+write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T)
+
+if (nrow(dat) > 0) {
p = ggplot(dat.clss, aes(best_match, percentage_mutations))
p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA)
p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
p = p + scale_colour_manual(guide = guide_legend(title = "Subclass"), values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
-
png(filename="scatter.png")
print(p)
dev.off()

pdfplots[["scatter.pdf"]] <- p
-
-write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T)
+}

print("Plotting frequency ranges plot")

@@ -467,6 +468,7 @@

frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)

+if (nrow(frequency_bins_data) > 0) {
p = ggplot(frequency_bins_data, aes(frequency_bins, frequency))
p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(guide = guide_legend(title = "Class"), values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
@@ -476,6 +478,7 @@
dev.off()

pdfplots[["frequency_ranges.pdf"]] <- p
+}

save(pdfplots, file="pdfplots.RData")

@@ -483,10 +486,12 @@

frequency_bins_data_by_class = frequency_bins_data_by_class[order(frequency_bins_data_by_class$best_match_class, frequency_bins_data_by_class$frequency_bins),]

+
frequency_bins_data_by_class$frequency_bins = gsub("-", " to ", frequency_bins_data_by_class$frequency_bins)
-frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
-frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
-
+if (nrow(frequency_bins_data_by_class) > 0) {
+ frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
+ frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
+}
write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T)

frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")])
@@ -499,9 +504,10 @@

frequency_bins_data = frequency_bins_data[order(frequency_bins_data$best_match, frequency_bins_data$frequency_bins),]
frequency_bins_data$frequency_bins = gsub("-", " to ", frequency_bins_data$frequency_bins)
-frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
-frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
-
+if (nrow(frequency_bins_data) > 0) {
+ frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
+ frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
+}
write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T)

diff -r cf8ad181628f -r 8fcf31272f6e shm_csr.xml
--- a/shm_csr.xml Mon Dec 12 12:32:44 2022 +0000
+++ b/shm_csr.xml Mon Mar 06 11:36:32 2023 +0000

[

@@ -1,4 +1,4 @@
-<tool id="shm_csr" name="SHM & CSR pipeline" version="1.6.0">
+<tool id="shm_csr" name="SHM & CSR pipeline" version="1.7.0" profile="16.04">
<description></description>
<requirements>
<requirement type="package" version="3.7.1">python</requirement>
@@ -21,11 +21,34 @@
<requirement type="package" version="0.83">font-ttf-ubuntu</requirement>
</requirements>
<command interpreter="bash">
+ <![CDATA[
+ #set $input=$in_file.name
+ ln -s "$in_file" "$input" &&
#if str ( $filter_unique.filter_unique_select ) == "remove":
- wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select $filter_unique.filter_unique_clone_count $class_filter_cond.class_filter $empty_region_filter $fast
+ $__tool_directory__/wrapper.sh "$input"
+ custom $out_file $out_file.files_path
+ "${in_file.name}" "-" $functionality $unique
+ $naive_output_cond.naive_output $naive_output_ca
+ $naive_output_cg $naive_output_cm $naive_output_ce
+ $naive_output_all $filter_unique.filter_unique_select
+ $filter_unique.filter_unique_clone_count
+ $class_filter_cond.class_filter
+ $empty_region_filter
+ $fast
#else:
- wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select 2 $class_filter_cond.class_filter $empty_region_filter $fast
+ $__tool_directory__/wrapper.sh
+ "$input" custom
+ $out_file $out_file.files_path
+ "${in_file.name}" "-" $functionality $unique
+ $naive_output_cond.naive_output $naive_output_ca $naive_output_cg
+ $naive_output_cm $naive_output_ce $naive_output_all
+ $filter_unique.filter_unique_select
+ 2
+ $class_filter_cond.class_filter
+ $empty_region_filter
+ $fast
#end if
+ ]]>
</command>
<inputs>
<param name="in_file" type="data" format="data" label="IMGT zip file to be analysed" />
@@ -56,13 +79,16 @@
<param name="unique" type="select" label="Remove duplicates based on" help="" >
<option value="VGene,CDR3.IMGT.AA,best_match_class">Top.V.Gene, CDR3 (AA), C region</option>
<option value="VGene,CDR3.IMGT.AA">Top.V.Gene, CDR3 (AA)</option>
+ <option value="VGene,JGene,CDR3.IMGT.AA">Top.V.Gene, Top.J.Gene, CDR3 (AA)</option>
<option value="CDR3.IMGT.AA,best_match_class">CDR3 (AA), C region</option>
<option value="CDR3.IMGT.AA">CDR3 (AA)</option>
-
+
<option value="VGene,CDR3.IMGT.seq,best_match_class">Top.V.Gene, CDR3 (nt), C region</option>
<option value="VGene,CDR3.IMGT.seq">Top.V.Gene, CDR3 (nt)</option>
+ <option value="VGene,JGene,CDR3.IMGT.seq">Top.V.Gene, Top.J.Gene, CDR3 (nt)</option>
<option value="CDR3.IMGT.seq,best_match_class">CDR3 (nt), C region</option>
<option value="CDR3.IMGT.seq">CDR3 (nt)</option>
+ <option value="VGene,DGene,JGene,CDR3.IMGT.seq">Top.V.Gene, Top.D.Gene, Top.J.Gene, CDR3 (nt)</option>
<option value="Sequence.ID" selected="true">Don't remove duplicates</option>
</param>
<conditional name="class_filter_cond">
@@ -72,14 +98,9 @@
<option value="70_0">>70% class</option>
<option value="60_0">>60% class</option>
<option value="19_0">>19% class</option>
- <option value="101_101">Do not assign (sub)class</option>
+ <option value="101_101_all">Do not assign (sub)class</option>
+ <option value="101_101_IGM">Everything is IGM</option>
</param>
- <when value="70_70"></when>
- <when value="60_55"></when>
- <when value="70_0"></when>
- <when value="60_0"></when>
- <when value="19_0"></when>
- <when value="101_101"></when>
</conditional>
<conditional name="naive_output_cond">
<param name="naive_output" type="select" label="Output new IMGT archives per class into your history?">

diff -r cf8ad181628f -r 8fcf31272f6e show_time_as_float

Binary file show_time_as_float has changed

diff -r cf8ad181628f -r 8fcf31272f6e show_time_as_float.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/show_time_as_float.c Mon Mar 06 11:36:32 2023 +0000

@@ -0,0 +1,16 @@
+/* script adapted from https://www.nu42.com/2021/07/windows-c-time-in-nanoseconds.html */
+#include <stdio.h>
+#include <time.h>
+
+int main(void)
+{
+ struct timespec ts;
+
+ if (timespec_get(&ts, TIME_UTC) != TIME_UTC)
+ {
+ fputs("timespec_get failed!", stderr);
+ return 1;
+ }
+ printf("%ld.%ld\n", ts.tv_sec, ts.tv_nsec);
+ return 0;
+}

diff -r cf8ad181628f -r 8fcf31272f6e tests/__pycache__/test_shm_csr.cpython-37-pytest-6.2.5.pyc

Binary file tests/__pycache__/test_shm_csr.cpython-37-pytest-6.2.5.pyc has changed

diff -r cf8ad181628f -r 8fcf31272f6e tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.0.pyc

Binary file tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.0.pyc has changed

diff -r cf8ad181628f -r 8fcf31272f6e tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.1.pyc

Binary file tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.1.pyc has changed

diff -r cf8ad181628f -r 8fcf31272f6e tests/data/.~lock.handleiding activeren pas.docx#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/data/.~lock.handleiding activeren pas.docx# Mon Mar 06 11:36:32 2023 +0000

@@ -0,0 +1,1 @@
+Vorderman\, R.H.P. (MOLEPI) ,rhpvorderman,sasc-pc-6,21.02.2023 15:01,file:///home/rhpvorderman/.config/libreoffice/4;
\ No newline at end of file

diff -r cf8ad181628f -r 8fcf31272f6e tests/data/handleiding activeren pas.docx

Binary file tests/data/handleiding activeren pas.docx has changed

diff -r cf8ad181628f -r 8fcf31272f6e tests/test_shm_csr.py
--- a/tests/test_shm_csr.py Mon Dec 12 12:32:44 2022 +0000
+++ b/tests/test_shm_csr.py Mon Mar 06 11:36:32 2023 +0000

@@ -43,11 +43,20 @@
     return container.text

+def ignore_files(src, files):
+    "Ignore virtualenv and git directories to prevent massive tmp folders"
+    if os.path.basename(src) in (".venv", ".git"):
+        return files
+    return ()
+
@pytest.fixture(scope="module")
def shm_csr_result():
     temp_dir = Path(tempfile.mkdtemp())
     tool_dir = temp_dir / "shm_csr"
-    shutil.copytree(GIT_ROOT, tool_dir)
+    shutil.copytree(
+        GIT_ROOT, tool_dir,
+        # Ignore .venv and .git directories.
+        ignore=ignore_files)
     working_dir = temp_dir / "working"
     working_dir.mkdir(parents=True)
     output_dir = temp_dir / "outputs"

diff -r cf8ad181628f -r 8fcf31272f6e time_ns

Binary file time_ns has changed

diff -r cf8ad181628f -r 8fcf31272f6e wrapper.sh
--- a/wrapper.sh Mon Dec 12 12:32:44 2022 +0000
+++ b/wrapper.sh Mon Mar 06 11:36:32 2023 +0000

[

b'@@ -22,12 +22,15 @@\n empty_region_filter=${18}\n fast=${19}\n \n+BASENAME=$(basename $input)\n+# Cut off .txz or .tgz suffix\n+NEW_IMGT_PREFIX="new_IMGT_${BASENAME%.*}"\n+\n #exec 5> debug_output.txt\n #BASH_XTRACEFD="5"\n-## Busybox date does not support \'+%s.%N\'. So use the slower python instead.\n-## Using -S python does not do \'import site\' which shortens the command\n-## to 10 milliseconds.\n-#PS4=\'$(python -Sc "import time; print(time.time())") $LINENO: \'\n+## Busybox date does not support \'+%s.%N\'. So use a custom program. Can be\n+## Compiled with cc -Os show_time_as_float.c -o show_time_as_float\n+#PS4=\'$(${dir}/show_time_as_float) $LINENO: \'\n #set -x\n \n mkdir -p $outdir\n@@ -39,7 +42,7 @@\n \n echo "unpacking IMGT file"\n \n-type="`file $input`"\n+type="`file -L $input`"\n if [[ "$type" == *"Zip archive"* ]] ; then\n \techo "Zip archive"\n \techo "unzip $input -d $PWD/files/"\n@@ -85,13 +88,30 @@\n echo "---------------- merge_and_filter.r ----------------"\n echo "---------------- merge_and_filter.r ---------------- " >> $log\n \n-Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt "$PWD/gapped_aa.txt" $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${filter_unique_count} ${class_filter} ${empty_region_filter} 2>&1\n+Rscript $dir/merge_and_filter.r \\\n+ $PWD/summary.txt \\\n+ $PWD/sequences.txt \\\n+ $PWD/mutationanalysis.txt \\\n+ $PWD/mutationstats.txt \\\n+ $PWD/hotspots.txt \\\n+ "$PWD/gapped_aa.txt" \\\n+ $outdir/identified_genes.txt \\\n+ $outdir/merged.txt \\\n+ $outdir/before_unique_filter.txt \\\n+ $outdir/unmatched.txt \\\n+ $method \\\n+ $functionality \\\n+ $unique \\\n+ ${filter_unique} \\\n+ ${filter_unique_count} \\\n+ ${class_filter} \\\n+ ${empty_region_filter}\n \n echo "---------------- creating new IMGT zips ----------------"\n echo "---------------- creating new IMGT zips ---------------- " >> $log\n \n python $dir/split_imgt_file.py --outdir $outdir $input $outdir/merged.txt \\\n- --prefix new_IMGT \\\n+ --prefix "${NEW_IMGT_PREFIX}" \\\n - IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM IGE\n \n \n@@ -100,7 +120,7 @@\n \n classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,IGE,unmatched"\n echo "R mutation analysis"\n-Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} 2>&1\n+Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter}\n \n echo "---------- Split naive memory IGM ---------"\n echo "---------- Split naive memory IGM --------- " >> $log\n@@ -108,20 +128,20 @@\n python $dir/igm_naive_mutations.py $outdir/scatter.txt $outdir/igm_naive_mutations.txt \\\n $outdir/igm_naive_memory_mutations.txt\n \n-python $dir/split_imgt_file.py --outdir $outdir $outdir/new_IMGT_IGM.txz \\\n+python $dir/split_imgt_file.py --outdir $outdir $outdir/${NEW_IMGT_PREFIX}_IGM.txz \\\n $outdir/igm_naive_mutations.txt \\\n- --prefix new_IMGT_IGM_NAIVE -\n+ --prefix "${NEW_IMGT_PREFIX}_IGM_NAIVE" -\n \n-python $dir/split_imgt_file.py --outdir $outdir $outdir/new_IMGT_IGM.txz \\\n+python $dir/split_imgt_file.py --outdir $outdir $outdir/${NEW_IMGT_PREFIX}_IGM.txz \\\n $outdir/igm_naive_memory_mutations.txt \\\n- --prefix new_IMGT_IGM_NAIVE_MEMORY -\n+ --prefix "${NEW_IMGT_PREFIX}_IGM_NAIVE_MEMORY" -\n \n echo "---------------- plot_pdfs.r ----------------"\n echo "---------------- plot_pdfs.r ---------------- " >> $log\n \n-echo "Rscript $dir/shm_csr.r $outdir/pdfplots.RData $outdir 2>&1"\n+echo "Rscript $dir/shm_csr.r $outdir/pdfplots.RData $outdir"\n \n-Rscript $dir/plot_pdf.r "$outdir/pdfplots.RData" "$outdir" 2>&1\n+Rscript $dir/plot_pdf.r "$outdir/pdfplots.RData" "$outdir"\n \n echo "---------------- shm_csr.py ----------------"\n echo "---------------- shm_csr.py ---------------- " >> $log\n@@ -131,7 +151,11 @@\n echo "---------------- aa_histogram.r ----------------"\n echo "---------------- aa_histogram.r ---------------- " >> $log\n \n-Rsc'..b' 2%)</td><td><a href=\'new_IMGT_IGM_NAIVE.txz\' download=\'new_IMGT_IGM_NAIVE.txz\' >Download</a></td></tr>" >> $output\n-echo "<tr><td>An IMGT archive with just the matched and filtered naive memory IGM sequences (mutations 2% or higher)</td><td><a href=\'new_IMGT_IGM_NAIVE_MEMORY.txz\' download=\'new_IMGT_IGM_NAIVE_MEMORY.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href=\'${NEW_IMGT_PREFIX}.txz\' download=\'${NEW_IMGT_PREFIX}.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered IGA sequences</td><td><a href=\'${NEW_IMGT_PREFIX}_IGA.txz\' download=\'${NEW_IMGT_PREFIX}_IGA.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered IGA1 sequences</td><td><a href=\'${NEW_IMGT_PREFIX}_IGA1.txz\' download=\'${NEW_IMGT_PREFIX}_IGA1.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered IGA2 sequences</td><td><a href=\'${NEW_IMGT_PREFIX}_IGA2.txz\' download=\'${NEW_IMGT_PREFIX}_IGA2.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered IGG sequences</td><td><a href=\'${NEW_IMGT_PREFIX}_IGG.txz\' download=\'${NEW_IMGT_PREFIX}_IGG.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered IGG1 sequences</td><td><a href=\'${NEW_IMGT_PREFIX}_IGG1.txz\' download=\'${NEW_IMGT_PREFIX}_IGG1.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered IGG2 sequences</td><td><a href=\'${NEW_IMGT_PREFIX}_IGG2.txz\' download=\'${NEW_IMGT_PREFIX}_IGG2.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered IGG3 sequences</td><td><a href=\'${NEW_IMGT_PREFIX}_IGG3.txz\' download=\'${NEW_IMGT_PREFIX}_IGG3.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered IGG4 sequences</td><td><a href=\'${NEW_IMGT_PREFIX}_IGG4.txz\' download=\'${NEW_IMGT_PREFIX}_IGG4.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered IGM sequences</td><td><a href=\'${NEW_IMGT_PREFIX}_IGM.txz\' download=\'${NEW_IMGT_PREFIX}_IGM.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered IGE sequences</td><td><a href=\'${NEW_IMGT_PREFIX}_IGE.txz\' download=\'${NEW_IMGT_PREFIX}_IGE.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered naive IGM sequences (mutations below 2%)</td><td><a href=\'${NEW_IMGT_PREFIX}_IGM_NAIVE.txz\' download=\'${NEW_IMGT_PREFIX}_IGM_NAIVE.txz\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just the matched and filtered naive memory IGM sequences (mutations 2% or higher)</td><td><a href=\'${NEW_IMGT_PREFIX}_IGM_NAIVE_MEMORY.txz\' download=\'${NEW_IMGT_PREFIX}_IGM_NAIVE_MEMORY.txz\' >Download</a></td></tr>" >> $output\n echo "</table>" >> $output\n \n echo " " >> $output\n@@ -764,16 +812,16 @@\n if [[ "$naive_output" == "yes" ]]\n then\n \techo "output naive output"\n-\tif [[ "${class_filter}" == "101_101" ]]\n+\tif [[ "${class_filter}" == "101_101_all" ]]\n \tthen\n-\t\techo "copy new_IMGT.txz to ${naive_output_all}"\n-\t\tcp $outdir/new_IMGT.txz ${naive_output_all}\n+\t\techo "copy ${NEW_IMGT_PREFIX}.txz to ${naive_output_all}"\n+\t\tcp $outdir/${NEW_IMGT_PREFIX}.txz ${naive_output_all}\n \telse\n \t\techo "copy for classes"\n-\t\tcp $outdir/new_IMGT_IGA.txz ${naive_output_ca}\n-\t\tcp $outdir/new_IMGT_IGG.txz ${naive_output_cg}\n-\t\tcp $outdir/new_IMGT_IGM.txz ${naive_output_cm}\n-\t\tcp $outdir/new_IMGT_IGE.txz ${naive_output_ce}\n+\t\tcp $outdir/${NEW_IMGT_PREFIX}_IGA.txz ${naive_output_ca}\n+\t\tcp $outdir/${NEW_IMGT_PREFIX}_IGG.txz ${naive_output_cg}\n+\t\tcp $outdir/${NEW_IMGT_PREFIX}_IGM.txz ${naive_output_cm}\n+\t\tcp $outdir/${NEW_IMGT_PREFIX}_IGE.txz ${naive_output_ce}\n \tfi\n fi\n \n'