changeset 2:2f1d464ebfd2 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
author iuc
date Thu, 16 Jul 2020 07:30:45 -0400
parents b75174403a65
children 289b7d6b69a4
files check_bcfile.py macros.xml macros_process.xml stacks_clonefilter.xml test-data/clonefilter/Discarded1_0001.1.1.fq.gz test-data/clonefilter/Discarded1_0001.1.1.fq.single.gz test-data/clonefilter/Removed1_0001.1.1.fq.gz test-data/clonefilter/Removed1_0001.1.1.fq.single-alt.gz test-data/clonefilter/Removed1_0001.1.1.fq.single.gz test-data/cstacks/catalog.alleles.tsv test-data/cstacks/catalog.snps.tsv test-data/cstacks/catalog.tags.tsv test-data/denovo_map/denovo_map.log test-data/denovo_map/popmap_cstacks_genotypes.tsv test-data/gentest.sh test-data/gstacks/catalog.calls.vcf test-data/gstacks/catalog.fa.gz test-data/gstacks/gstacks.log test-data/gstacks/gstacks.log.distribs test-data/kmerfilter/Removed1_0001.1.1.fq.single.gz test-data/kmerfilter/kfreq.tsv test-data/kmerfilter/kfreqdist.tsv test-data/populations/populations.CP.joinmap.loc test-data/populations/populations.log test-data/populations/populations.phistats_summary.tsv test-data/procrad/barcodes-duplicate test-data/procrad/barcodes-duplicate2 test-data/procrad/barcodes-duplicate3 test-data/refmap/catalog.calls.vcf test-data/refmap/catalog.fa.gz test-data/shortreads/PopA_01.forward.fq.gz test-data/shortreads/PopA_01.reverse.fq.gz test-data/shortreads/process_shortreads.out test-data/sstacks/PopA_01.matches.tsv test-data/sstacks/PopA_02.matches.tsv test-data/stacks_outputs/catalog.calls test-data/stacks_outputs/tsv2bam.log test-data/tsv2bam/tsv2bam.log test-data/ustacks/PopA_01.alleles.tsv test-data/ustacks/PopA_01.snps.tsv test-data/ustacks/PopA_01.tags.tsv test-data/ustacks/PopA_02.alleles.tsv test-data/ustacks/PopA_02.snps.tsv test-data/ustacks/PopA_02.tags.tsv
diffstat 44 files changed, 931 insertions(+), 171 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/check_bcfile.py	Thu Jul 16 07:30:45 2020 -0400
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+import argparse
+import sys
+
+parser = argparse.ArgumentParser()
+parser.add_argument('bcfile', help='barcode file')
+args = parser.parse_args()
+
+barcodes = []
+
+with open(args.bcfile, "r") as fh:
+    for line in fh:
+        if len(line) == 0:
+            continue
+        if line.startswith("#"):
+            continue
+        barcodes.append(line.split())
+
+if len(barcodes) <= 1:
+    sys.exit("barcode file is empty")
+
+# check that all lines have the same number of columns
+ncol = None
+for bc in barcodes:
+    if ncol is None:
+        ncol = len(bc)
+    elif ncol != len(bc):
+        sys.exit("barcode file has inconsistent number of columns")
+
+isname = False
+for bc in barcodes:
+    if len(bc[-1].strip("ATCGatcg")) > 0:
+        isname = True
+        break
+
+names = set()
+for bc in barcodes:
+    if isname:
+        n = bc[-1]
+    else:
+        n = '-'.join(bc)
+    if n in names:
+        sys.exit("duplicate sample %s in barcode file" % n)
+    names.add(n)
--- a/macros.xml	Mon Sep 30 14:19:47 2019 -0400
+++ b/macros.xml	Thu Jul 16 07:30:45 2020 -0400
@@ -3,12 +3,14 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@STACKS_VERSION@">stacks</requirement>
+            <requirement type="package" version="3.7">python</requirement>
+            <requirement type="package" version="4.6.0">findutils</requirement>
             <yield/>
         </requirements>
     </xml>
 
-    <token name="@STACKS_VERSION@">2.4</token>
-    <token name="@WRAPPER_VERSION@">1</token>
+    <token name="@STACKS_VERSION@">2.53</token>
+    <token name="@WRAPPER_VERSION@">0</token>
     <!-- fix to 18.01 since https://github.com/galaxyproject/galaxy/pull/7032 -->
     <token name="@PROFILE@">18.01</token>
 
@@ -107,16 +109,11 @@
     <!-- log file handling -->
     <token name="@TEE_APPEND_LOG@"><![CDATA[
         #if $output_log
-            2>> '$output_log' &&
-        #end if
-    ]]></token>
-    <token name="@CAT_LOG_TO_STDERR@"><![CDATA[
-        #if $output_log
-            cat '$output_log' 2>&1
+            2> '$output_log'
         #end if
     ]]></token>
     <xml name="in_log">
-        <param name="add_log" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Add log output as dataset" />
+        <param name="add_log" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Add log output as dataset"/>
     </xml>
     <xml name="out_log">
         <data format="txt" name="output_log" label="${tool.name} on ${on_string} log file">
@@ -126,19 +123,19 @@
 
     <!-- inputs from previous pipeline steps -->
     <xml name="input_stacks_macro">
-        <param name="input_stacks" format="tabular,txt" type="data_collection" collection_type="list" label="Loci and polymorphism" help="output from previous Stacks pipeline steps e.g. denovo_map, refmap or ustacks" />
+        <param name="input_stacks" format="tabular,txt" type="data_collection" collection_type="list" label="Loci and polymorphism" help="output from previous Stacks pipeline steps e.g. denovo_map, refmap or ustacks"/>
     </xml>
     <xml name="input_cat_macro">
-        <param name="input_cat" format="tabular,txt" type="data_collection" collection_type="list" label="Catalog of loci" help="output from a previous Stacks pipeline steps e.g. denovo_map, refmap or cstacks" />
+        <param name="input_cat" format="tabular,txt" type="data_collection" collection_type="list" label="Catalog of loci" help="output from a previous Stacks pipeline steps e.g. denovo_map, refmap or cstacks"/>
     </xml>
     <xml name="input_matches_macro">
-        <param name="input_matches" format="tabular,txt" type="data_collection" collection_type="list" label="Matches to the catalog" help="output from previous Stacks pipeline steps e.g. denovo_map, refmap or sstacks" />
+        <param name="input_matches" format="tabular,txt" type="data_collection" collection_type="list" label="Matches to the catalog" help="output from previous Stacks pipeline steps e.g. denovo_map, refmap or sstacks"/>
     </xml>
     <xml name="bam_input_macro">
-        <param name="input_bam" format="bam" type="data" multiple="true" optional="false" label="Aligned data" help="either the matches to the catalog (bam), i.e. tsv2bam, or reads aligned to a reference" />
+        <param name="input_bam" format="bam" type="data" multiple="true" optional="false" label="Aligned data" help="either the matches to the catalog (bam), i.e. tsv2bam, or reads aligned to a reference"/>
     </xml>
     <xml name="input_aln_macro">
-        <param name="input_aln" format="vcf,fasta.gz" type="data_collection" collection_type="list" label="Assembled contigs and variant sites" help="output from previous Stacks pipeline steps (e.g. gstacks, denovo_map, or refmap)" argument="-P" />
+        <param name="input_aln" format="vcf,fasta.gz" type="data_collection" collection_type="list" label="Assembled contigs and variant sites" help="output from previous Stacks pipeline steps (e.g. gstacks, denovo_map, or refmap)" argument="-P"/>
     </xml>
 
     <!-- code for creating links to the data sets from previous pipeline steps
@@ -195,15 +192,15 @@
                 <option value="paired">Paired-end files</option>
             </param>
             <when value="single">
-                <param name="fqinputs" argument="-f" type="data" format="fastqsanger,fastqsanger.gz" multiple="@MULTIPLE@" label="Singles-end reads" />
+                <param name="fqinputs" argument="-f" type="data" format="fastqsanger,fastqsanger.gz" multiple="@MULTIPLE@" label="Singles-end reads"/>
                 <param name="barcode_encoding" type="select" label="Barcode location">
-                    <expand macro="barcode_encoding_single" type="Barcode" />
+                    <expand macro="barcode_encoding_single" type="Barcode"/>
                 </param>
             </when>
             <when value="paired">
                 <param name="fqinputs" type="data_collection" collection_type="@LISTTYPE@" label="Paired-end reads" format="fastqsanger,fastqsanger.gz"/>
                 <param name="barcode_encoding" type="select" label="Barcode location">
-                    <expand macro="barcode_encoding_pair" type="Barcode" />
+                    <expand macro="barcode_encoding_pair" type="Barcode"/>
                 </param>
             </when>
         </conditional>
@@ -212,7 +209,7 @@
 
     <xml name="fastq_input_bc_file" token_multiple="false" token_listtype="paired">
         <expand macro="fastq_input_bc" multiple="@MULTIPLE@" listtype="@LISTTYPE@">
-            <param name="barcode" argument="-b" type="data" format="tabular,txt" label="Barcode file" />
+            <param name="barcode" argument="-b" type="data" format="tabular,txt" label="Barcode file"/>
         </expand>
     </xml>
 
@@ -392,27 +389,27 @@
     <!-- TODO tags, snps, and alleles could go to sub collections; same for other tools -->
     <xml name="ustacks_outputs_macro" token_tooladd="">
         <collection name="tabs" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Loci and polymorphism">
-            <discover_datasets pattern="(?P&lt;name&gt;(?!catalog).+\.tags)\.tsv$" ext="tabular" directory="stacks_outputs" />
-            <discover_datasets pattern="(?P&lt;name&gt;(?!catalog).+\.snps)\.tsv$" ext="tabular" directory="stacks_outputs" />
-            <discover_datasets pattern="(?P&lt;name&gt;(?!catalog).+\.alleles)\.tsv$" ext="tabular" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;(?!catalog).+\.tags)\.tsv$" ext="tabular" directory="stacks_outputs"/>
+            <discover_datasets pattern="(?P&lt;name&gt;(?!catalog).+\.snps)\.tsv$" ext="tabular" directory="stacks_outputs"/>
+            <discover_datasets pattern="(?P&lt;name&gt;(?!catalog).+\.alleles)\.tsv$" ext="tabular" directory="stacks_outputs"/>
         </collection>
     </xml>
     <!-- cstacks outputs collection containing catalog.tags.tsv, catalog.snps.tsv, catalog.alleles.tsv -->
     <xml name="cstacks_outputs_macro" token_tooladd="">
         <collection name="catalog" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Catalog of loci">
-            <discover_datasets pattern="(?P&lt;name&gt;catalog\.(tags|snps|alleles))\.tsv$" ext="tabular" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;catalog\.(tags|snps|alleles))\.tsv$" ext="tabular" directory="stacks_outputs"/>
         </collection>
     </xml>
     <!-- sstacks outputs collection containing SAMPLE.matches.tsv -->
     <xml name="sstacks_outputs_macro" token_tooladd="">
         <collection name="matches" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Matches to the catalog">
-            <discover_datasets pattern="(?P&lt;name&gt;.+\.matches)\.tsv$" ext="tabular" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+\.matches)\.tsv$" ext="tabular" directory="stacks_outputs"/>
         </collection>
     </xml>
     <!-- tsv2bam outputs collection containing SAMPLE.matches.bam -->
     <xml name="tsv2bam_outputs_macro" token_tooladd="">
         <collection name="bams" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Matches to the catalog (bam)">
-            <discover_datasets pattern="(?P&lt;name&gt;.+\.matches)\.bam$" ext="bam" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+\.matches)\.bam$" ext="bam" directory="stacks_outputs"/>
         </collection>
     </xml>
     <!-- gstacks outputs collection containing catalog.calls.vcf and catalog.fa.gz
@@ -423,7 +420,7 @@
             <filter>add_log_distribs</filter>
         </data>
         <collection name="gstacks_alns_out" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Read alignments">
-            <discover_datasets pattern="(?P&lt;name&gt;.*).alns.bam$" ext="bam" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;.*).alns.bam$" ext="bam" directory="stacks_outputs"/>
             <filter>mode_cond['mode_select'] == 'denovo' and mode_cond['advanced_cond']['advanced_select'] == "yes" and mode_cond['advanced_cond']['write_alignments'] != "" and popmap!=None</filter>
         </collection>
         <data name="gstacks_aln_out" format="bam" label="${tool.name} @TOOLADD@ on ${on_string} Read alignment" from_work_dir="stacks_outputs/alignments.bam">
@@ -432,24 +429,27 @@
     </xml>
     <xml name="gstacks_outputs_macro" token_tooladd="">
         <collection name="gstacks_out" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Assembled contigs and variant sites">
-            <discover_datasets pattern="(?P&lt;name&gt;catalog\.calls\.vcf)$" ext="vcf" directory="stacks_outputs" />
-            <discover_datasets pattern="(?P&lt;name&gt;catalog\.fa\.gz)$" ext="fasta.gz" directory="stacks_outputs" />
+            <discover_datasets pattern="(?P&lt;name&gt;catalog\.calls\.vcf)$" ext="vcf" directory="stacks_outputs"/>
+            <discover_datasets pattern="(?P&lt;name&gt;catalog\.fa\.gz)$" ext="fasta.gz" directory="stacks_outputs"/>
         </collection>
     </xml>
 
     <!-- default output of populations -->
     <xml name="populations_output_light" token_tooladd="">
-        <data format="tabular" name="out_haplotypes" label="${tool.name} @TOOLADD@ on ${on_string} Raw Genotypes/Haplotypes" from_work_dir="stacks_outputs/populations.haplotypes.tsv" />
-        <data format="tabular" name="out_hapstats" label="${tool.name} @TOOLADD@ on ${on_string} Population-level haplotype summary statistics" from_work_dir="stacks_outputs/populations.hapstats.tsv" />
-        <data format="txt" name="out_populations_log_distribs" label="${tool.name} @TOOLADD@ on ${on_string} Populations log distributions" from_work_dir="stacks_outputs/populations.log.distribs" />
-        <data format="tabular" name="out_sumstats_sum" label="${tool.name} @TOOLADD@ on ${on_string} Summary of Population-level summary statistics" from_work_dir="stacks_outputs/populations.sumstats_summary.tsv" />
-        <data format="tabular" name="out_sumstats" label="${tool.name} @TOOLADD@ on ${on_string} Population-level summary statistics" from_work_dir="stacks_outputs/populations.sumstats.tsv" />
-        <data format="tabular" name="out_sql" label="${tool.name} @TOOLADD@ on ${on_string} Genotyping markers" from_work_dir="stacks_outputs/populations.markers.tsv" />
+        <data format="tabular" name="out_haplotypes" label="${tool.name} @TOOLADD@ on ${on_string} Raw Genotypes/Haplotypes" from_work_dir="stacks_outputs/populations.haplotypes.tsv"/>
+        <data format="tabular" name="out_hapstats" label="${tool.name} @TOOLADD@ on ${on_string} Population-level haplotype summary statistics" from_work_dir="stacks_outputs/populations.hapstats.tsv"/>
+        <data format="txt" name="out_populations_log_distribs" label="${tool.name} @TOOLADD@ on ${on_string} Populations log distributions" from_work_dir="stacks_outputs/populations.log.distribs"/>
+        <data format="tabular" name="out_sumstats_sum" label="${tool.name} @TOOLADD@ on ${on_string} Summary of Population-level summary statistics" from_work_dir="stacks_outputs/populations.sumstats_summary.tsv"/>
+        <data format="tabular" name="out_sumstats" label="${tool.name} @TOOLADD@ on ${on_string} Population-level summary statistics" from_work_dir="stacks_outputs/populations.sumstats.tsv"/>
     </xml>
 
     <xml name="populations_output_full">
         <expand macro="populations_output_light"/>
 
+        <data format="txt" name="out_sql" label="${tool.name} @TOOLADD@ on ${on_string} Genotyping markers" from_work_dir="stacks_outputs/populations.sql.tsv">
+            <filter>genetic_map_options['map_type'] and genetic_map_options['map_format']</filter>
+        </data>
+
         <!-- log_fst_comp populations.fst_summary.tsv populations.phistats_summary.tsv populations.phistats.tsv-->
         <data format="tabular" name="out_phistats" label="${tool.name} on ${on_string} Phi_st statistics" from_work_dir="stacks_outputs/populations.phistats.tsv">
             <filter>advanced_options['log_fst_comp'] and fstats_conditional['fstats']=='yes'</filter>
@@ -533,6 +533,26 @@
         </data>
     </xml>
 
+    <!-- fastq output for kmer/clone-filter -->
+    <xml name="fastq_output_filter">
+        <data name="clean" format_source="fqinputs" label="${tool.name} on ${on_string}">
+            <filter>input_type['input_type_select'] == 'single'</filter>
+            <yield/>
+        </data>
+        <collection name="clean_pair" type="paired" format_source="fqinputs" label="${tool.name} on ${on_string}">
+            <filter>input_type['input_type_select'] == 'paired'</filter>
+            <yield/>
+        </collection>
+        <data name="discarded" format_source="fqinputs" label="${tool.name} on ${on_string}: discarded reads">
+            <filter>capture and input_type['input_type_select'] == 'single'</filter>
+            <yield/>
+        </data>
+        <collection name="discarded_pair" format_source="fqinputs" type="paired" label="${tool.name} on ${on_string}: discarded reads">
+            <filter>capture and input_type['input_type_select'] == 'paired'</filter>
+            <yield/>
+        </collection>
+    </xml>
+
     <xml name="snp_options_alpha">
         <param argument="--alpha" type="select" label="Chi square significance level required to call a heterozygote or homozygote" >
             <option value="0.1">0.1</option>
@@ -554,7 +574,7 @@
             </when>
             <when value="bounded">
                 <param argument="--bound_low" type="float" value="0.0" min="0.0" max="1.0" label="Lower bound for epsilon, the error rate" help="between 0 and 1.0"/>
-                <param argument="--bound_high" type="float" value="1.0" min="0.0" max="1.0" label="Upper bound for epsilon, the error rate" help="between 0 and 1.0" />
+                <param argument="--bound_high" type="float" value="1.0" min="0.0" max="1.0" label="Upper bound for epsilon, the error rate" help="between 0 and 1.0"/>
                 <expand macro="snp_options_alpha"/>
             </when>
             <when value="fixed">
@@ -574,8 +594,8 @@
          "Error: No value was provided for \-\-var-alpha and there is no default for this model)"
 	-->
     <xml name="variant_calling_options_vg" token_varalpha_default="">
-        <param argument="--var-alpha" name="var_alpha" type="float" value="@VARALPHA_DEFAULT@" min="0" label="Alpha threshold for discovering SNPs" help="Default is 0.01 if the marukilow model is used (which is the case in refmap and denovomap), otherwise no default value is available." />
-        <param argument="--gt-alpha" name="gt_alpha" type="float" value="0.05" min="0" label="Alpha threshold for calling genotypes" />
+        <param argument="--var-alpha" name="var_alpha" type="float" value="@VARALPHA_DEFAULT@" min="0" label="Alpha threshold for discovering SNPs" help="Default is 0.01 if the marukilow model is used (which is the case in refmap and denovomap), otherwise no default value is available."/>
+        <param argument="--gt-alpha" name="gt_alpha" type="float" value="0.05" min="0" label="Alpha threshold for calling genotypes"/>
     </xml>
 
     <xml name="barcode_encoding_single" token_type="">
--- a/macros_process.xml	Mon Sep 30 14:19:47 2019 -0400
+++ b/macros_process.xml	Thu Jul 16 07:30:45 2020 -0400
@@ -29,12 +29,12 @@
 
     <xml name="discover_faqgz_output_macro" token_pattern="" token_dir="">
         <expand macro="discover_faq_output_macro" pattern="@PATTERN@" dir="@DIR@"/>
-        <discover_datasets pattern="@PATTERN@\.fq\.gz$" ext="fastqsanger.gz" directory="@DIR@/" />
-        <discover_datasets pattern="@PATTERN@\.fa\.gz$" ext="fasta.gz" directory="@DIR@/" />
+        <discover_datasets pattern="@PATTERN@\.fq\.gz$" ext="fastqsanger.gz" directory="@DIR@/"/>
+        <discover_datasets pattern="@PATTERN@\.fa\.gz$" ext="fasta.gz" directory="@DIR@/"/>
     </xml>
     <xml name="discover_faq_output_macro" token_pattern="" token_dir="">
-        <discover_datasets pattern="@PATTERN@\.fq$" ext="fastqsanger" directory="@DIR@/" />
-        <discover_datasets pattern="@PATTERN@\.fa$" ext="fasta" directory="@DIR@/" />
+        <discover_datasets pattern="@PATTERN@\.fq$" ext="fastqsanger" directory="@DIR@/"/>
+        <discover_datasets pattern="@PATTERN@\.fa$" ext="fasta" directory="@DIR@/"/>
     </xml>
 
     <xml name="process_outputs">
@@ -71,17 +71,17 @@
                 <option value="no" selected="true">No</option>
             </param>
             <when value="yes">
-                <param name="sliding" type="float" value="0.15" min="0" max="1" argument="-w" label="Set the size of the sliding window as a fraction of the read length, between 0 and 1" />
-                <param name="score" type="integer" value="10" min="0" max="40" argument="-s" label="Set the score limit. If the average score within the sliding window drops below this value, the read is discarded" />
-                <param name="remove" type="boolean" checked="false" truevalue="-c" falsevalue="" argument="-c" label="Clean data, remove any read with an uncalled base" />
+                <param name="sliding" type="float" value="0.15" min="0" max="1" argument="-w" label="Set the size of the sliding window as a fraction of the read length, between 0 and 1"/>
+                <param name="score" type="integer" value="10" min="0" max="40" argument="-s" label="Set the score limit. If the average score within the sliding window drops below this value, the read is discarded"/>
+                <param name="remove" type="boolean" checked="false" truevalue="-c" falsevalue="" argument="-c" label="Clean data, remove any read with an uncalled base"/>
                 <param name="discard" type="boolean" checked="false" truevalue="-q" falsevalue="" argument="-q" label="Discard reads with low quality scores"/>
-                <param argument="--filter-illumina" name="filter_illumina" type="boolean" checked="false" truevalue="--filter-illumina" falsevalue="" label="Discard reads that have been marked by Illumina's chastity/purity filter as failing" />
+                <param argument="--filter-illumina" name="filter_illumina" type="boolean" checked="false" truevalue="--filter-illumina" falsevalue="" label="Discard reads that have been marked by Illumina's chastity/purity filter as failing"/>
             </when>
             <when value="no">
                 <param argument="--len_limit" type="integer" value="" optional="true" label="Minimum sequence length" help="useful if your data has already been trimmed"/>
             </when>
         </conditional>
-        <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" />
+        <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file"/>
     </xml>
     <token name="@PROCESS_FILTER@"><![CDATA[
     #if $filter_cond.filter_select == 'yes':
@@ -104,30 +104,30 @@
 
         ## fix the _R[12]_0 that was added for preparing the input
         #if $input_type.input_type_select == 'paired':
-            && find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/_R1_0/.1/; s/_R2_0/.2/;')"; done
+            && (find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/_R1_0/.1/; s/_R2_0/.2/;')"; done)
         #end if
         ## also remove the gz which is added by procrad (but its uncompressed)
-        && find stacks_outputs/discarded/ -type f -iname "*.gz.discards" | while read file; do mv "\$file" "\$(echo \$file | sed 's/.gz.discards$/.discards/;')"; done
+        && (find stacks_outputs/discarded/ -type f -iname "*.gz.discards" | while read file; do mv "\$file" "\$(echo \$file | sed 's/.gz.discards$/.discards/;')"; done)
 
         ## the discard files are named fastq even if the output is fasta
         #if str($outype).endswith("fasta"):
-            && find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.fastq.discards/.fa/;')"; done
+            && (find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.fastq.discards/.fa/;')"; done)
         #else
-            && find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.fastq.discards/.fq/;')"; done
+            && (find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.fastq.discards/.fq/;')"; done)
         #end if
     #end if
     ## prepare paired read output for processing in galaxy
     #if $input_type.input_type_select == 'paired':
         && mkdir stacks_outputs/remaining
-        && find stacks_outputs -iregex ".*\.rem\.[12]\.f[aq]\(\.gz\)?" | while read file; do mv "\$file" stacks_outputs/remaining/; done
-        && find stacks_outputs/ -iregex ".*.f[aq]\(\.gz\)?" | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.1\./.forward./; s/\.2\./.reverse./')"; done
+        && (find stacks_outputs -iregex ".*\.rem\.[12]\.f[aq]\(\.gz\)?" | while read file; do mv "\$file" stacks_outputs/remaining/; done)
+        && (find stacks_outputs/ -iregex ".*.f[aq]\(\.gz\)?" | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.1\./.forward./; s/\.2\./.reverse./')"; done)
     #end if
     ]]></token>
 
     <!-- adapter trimming options -->
     <xml name="process_adapter">
-            <param argument="--adapter_1" type="text" value="" optional="true" label="Adaptor sequence that may occur on the first read" />
-            <param argument="--adapter_2" type="text" value="" optional="true" label="Adaptor sequence that may occur on the paired-read" />
+            <param argument="--adapter_1" type="text" value="" optional="true" label="Adaptor sequence that may occur on the first read"/>
+            <param argument="--adapter_2" type="text" value="" optional="true" label="Adaptor sequence that may occur on the paired-read"/>
             <param argument="--adapter_mm" type="integer" value="" optional="true" label="Number of mismatches allowed in the adapter sequence"/>
     </xml>
     <token name="@PROCESS_ADAPTER@"><![CDATA[
@@ -171,8 +171,8 @@
 
     <!-- advanced options that are shared -->
     <xml name="common_advanced">
-        <param name="truncate" type="integer" value="" optional="True" argument="-t" label="Truncate final read length to this value" />
-        <param argument="--retain_header" type="boolean" checked="false" truevalue="--retain_header" falsevalue="" label="Retain unmodified FASTQ headers in the output" />
+        <param name="truncate" type="integer" value="" optional="True" argument="-t" label="Truncate final read length to this value"/>
+        <param argument="--retain_header" type="boolean" checked="false" truevalue="--retain_header" falsevalue="" label="Retain unmodified FASTQ headers in the output"/>
     </xml>
     <token name="@COMMON_ADVANCED@"><![CDATA[
     #if str($options_advanced.truncate)
@@ -181,3 +181,4 @@
     $options_advanced.retain_header
     ]]></token>
 </macros>
+
--- a/stacks_clonefilter.xml	Mon Sep 30 14:19:47 2019 -0400
+++ b/stacks_clonefilter.xml	Thu Jul 16 07:30:45 2020 -0400
@@ -8,6 +8,7 @@
     <command detect_errors="aggressive"><![CDATA[
 @FASTQ_INPUT_FUNCTIONS@
 
+trap ">&2 cat '$output_log'" err exit &&
 mkdir stacks_inputs stacks_outputs &&
 
 #set ($link_command, $fwd_path, $rev_path, $inputype) = $fastq_input_batch($input_type.fqinputs, $input_type.input_type_select)
@@ -37,7 +38,6 @@
 ## the program outputs empty files for fasta/fastq
 -y gzfastq
 @TEE_APPEND_LOG@
-@CAT_LOG_TO_STDERR@
 
 ## move outputs such that Galaxy can find them
 #if $capture:
@@ -57,95 +57,87 @@
 ]]></command>
     <inputs>
         <expand macro="fastq_input_bc"/>
-        <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" />
+        <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file"/>
         <param name="oligo_len_1" type="integer" value="0" label="Length of the single-end oligo sequence in dataset"/>
         <param name="oligo_len_2" optional="true" type="integer" label="Length of the paired-end oligo sequence in dataset"/>
-        <param argument="--retain_oligo" type="boolean" checked="false" truevalue="--retain_oligo" falsevalue="" label="Do not trim off the random oligo sequence (if oligo is inline)" />
+        <param argument="--retain_oligo" type="boolean" checked="false" truevalue="--retain_oligo" falsevalue="" label="Do not trim off the random oligo sequence (if oligo is inline)"/>
         <expand macro="in_log"/>
     </inputs>
     <outputs>
         <expand macro="out_log"/>
-        <data format="fastqsanger.gz" name="clean" from_work_dir="outputs/R1.fq.gz" label="${tool.name} on ${on_string}">
-            <filter>input_type['input_type_select'] == 'single'</filter>
-        </data>
-        <collection name="clean_pair" type="paired" label="${tool.name} on ${on_string}">
-            <filter>input_type['input_type_select'] == 'paired'</filter>
-        </collection>
-        <data name="discarded" format="fastqsanger" label="${tool.name} on ${on_string}: discarded reads">
-            <filter>capture and input_type['input_type_select'] == 'single' and not options_kmer_char['k_dist']</filter>
-        </data>
-        <collection name="discarded_pair" type="paired" label="${tool.name} on ${on_string}: discarded reads">
-            <filter>capture and input_type['input_type_select'] == 'paired' and not options_kmer_char['k_dist']</filter>
-        </collection>
+        <expand macro="fastq_output_filter"/>
     </outputs>
     <tests>
         <!-- single end, defaults-->
-        <test>
+        <test expect_num_outputs="2">
             <conditional name="input_type">
-                <param name="input_type_select" value="single" />
-                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
+                <param name="input_type_select" value="single"/>
+                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz"/>
             </conditional>
-            <param name="oligo_len_1" value="6" />
-            <param name="add_log" value="yes" />
+            <param name="oligo_len_1" value="6"/>
+            <param name="add_log" value="yes"/>
             <output name="output_log" ftype="txt" file="clonefilter/clonefilter.log" lines_diff="8"/>
-            <output name="clean" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.single.gz"/>
+            <output name="clean" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="clonefilter/Removed1_0001.1.1.fq.single.gz"/>
         </test>
         <!-- single end, alt BCencoding, capture-->
-        <test>
+        <test expect_num_outputs="3">
             <conditional name="input_type">
-                <param name="input_type_select" value="single" />
-                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
-                <param name="barcode_encoding" value="--index_null" />
+                <param name="input_type_select" value="single"/>
+                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz"/>
+                <param name="barcode_encoding" value="--index_null"/>
             </conditional>
-            <param name="capture" value="-D" />
-            <param name="oligo_len_1" value="6" />
+            <param name="capture" value="-D"/>
+            <param name="oligo_len_1" value="6"/>
             <assert_command>
-                <has_text text="-D" />
+                <has_text text="-D"/>
             </assert_command>
-            <output name="clean" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz"/>
-            <output name="discarded" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz"/>
+            <output name="output_log" ftype="txt"><assert_contents><has_text text="5 pairs of reads input."/></assert_contents></output>
+            <output name="clean"     compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="clonefilter/Removed1_0001.1.1.fq.single-alt.gz"/>
+            <output name="discarded" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="clonefilter/Discarded1_0001.1.1.fq.single.gz"/>
         </test>
         <!-- paired end, defaults-->
-        <test>
+        <test expect_num_outputs="4">
             <conditional name="input_type">
-                <param name="input_type_select" value="paired" />
+                <param name="input_type_select" value="paired"/>
                 <param name="fqinputs">
                     <collection type="paired">
-                        <element name="forward" value="clonefilter/R1_0001.1.fq.gz" />
-                        <element name="reverse" value="clonefilter/R2_0001.2.fq.gz" />
+                        <element name="forward" value="clonefilter/R1_0001.1.fq.gz" ftype="fastqsanger.gz"/>
+                        <element name="reverse" value="clonefilter/R2_0001.2.fq.gz" ftype="fastqsanger.gz"/>
                     </collection>
                 </param>
             </conditional>
-            <param name="oligo_len_1" value="6" />
+            <param name="oligo_len_1" value="6"/>
+            <output name="output_log" ftype="txt"><assert_contents><has_text text="4 pairs of reads input."/></assert_contents></output>
             <output_collection name="clean_pair" type="paired">
-                <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" />
-                <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" />
+                <element name="forward" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="clonefilter/Removed1_0001.1.1.fq.gz"/>
+                <element name="reverse" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="clonefilter/Removed2_0001.2.2.fq.gz"/>
             </output_collection>
         </test>
         <!-- paired end, non defaults -->
-        <test>
+        <test expect_num_outputs="7">
             <conditional name="input_type">
-                <param name="input_type_select" value="paired" />
+                <param name="input_type_select" value="paired"/>
                 <param name="fqinputs">
                     <collection type="paired">
-                        <element name="forward" value="clonefilter/R1_0001.1.fq.gz" />
-                        <element name="reverse" value="clonefilter/R2_0001.2.fq.gz" />
+                        <element name="forward" value="clonefilter/R1_0001.1.fq.gz" ftype="fastqsanger.gz"/>
+                        <element name="reverse" value="clonefilter/R2_0001.2.fq.gz" ftype="fastqsanger.gz"/>
                     </collection>
                 </param>
             </conditional>
-            <param name="oligo_len_1" value="6" />
-            <param name="capture" value="-D" />
-            <param name="retain_oligo" value="--retain_oligo" />
+            <param name="oligo_len_1" value="6"/>
+            <param name="capture" value="-D"/>
+            <param name="retain_oligo" value="--retain_oligo"/>
             <assert_command>
-                <has_text text="--retain_oligo" />
+                <has_text text="--retain_oligo"/>
             </assert_command>
+            <output name="output_log" ftype="txt"><assert_contents><has_text text="4 pairs of reads input."/></assert_contents></output>
             <output_collection name="clean_pair" type="paired">
-                <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" />
-                <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" />
+                <element name="forward" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="clonefilter/Removed1_0001.1.1.fq.gz"/>
+                <element name="reverse" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="clonefilter/Removed2_0001.2.2.fq.gz"/>
             </output_collection>
             <output_collection name="discarded_pair" type="paired">
-                <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" />
-                <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" />
+                <element name="forward" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="clonefilter/Removed1_0001.1.1.fq.gz"/>
+                <element name="reverse" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="clonefilter/Removed2_0001.2.2.fq.gz"/>
             </output_collection>
         </test>
     </tests>
@@ -158,5 +150,5 @@
 @STACKS_INFOS@
 ]]>
     </help>
-    <expand macro="citation" />
+    <expand macro="citation"/>
 </tool>
Binary file test-data/clonefilter/Discarded1_0001.1.1.fq.gz has changed
Binary file test-data/clonefilter/Discarded1_0001.1.1.fq.single.gz has changed
Binary file test-data/clonefilter/Removed1_0001.1.1.fq.gz has changed
Binary file test-data/clonefilter/Removed1_0001.1.1.fq.single-alt.gz has changed
Binary file test-data/clonefilter/Removed1_0001.1.1.fq.single.gz has changed
--- a/test-data/cstacks/catalog.alleles.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/cstacks/catalog.alleles.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -1,3 +1,4 @@
-# cstacks version 2.4; catalog generated on 2019-06-18 10:34:45
+# cstacks version 2.52; catalog generated on 2020-03-16 15:39:40
 0	1	AC	0	0
 0	1	CA	0	0
+# cstacks completed on 2020-03-16 15:39:40
--- a/test-data/cstacks/catalog.snps.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/cstacks/catalog.snps.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -1,3 +1,4 @@
-# cstacks version 2.4; catalog generated on 2019-06-18 10:34:45
+# cstacks version 2.52; catalog generated on 2020-03-16 15:39:40
 0	1	33	E	0	A	C	-	-
 0	1	88	E	0	A	C	-	-
+# cstacks completed on 2020-03-16 15:39:40
--- a/test-data/cstacks/catalog.tags.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/cstacks/catalog.tags.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -1,4 +1,5 @@
-# cstacks version 2.4; catalog generated on 2019-06-18 10:34:45
+# cstacks version 2.52; catalog generated on 2020-03-16 15:39:40
 0	1	consensus	0	1_1,2_1	AATTCGTTTGCTGCTTCAGGAATCTCTCGTATAATCTGAGTATGTGCGTACGTACGCTATTTAGATGGATAACCGACGCTGCCAGACGAGAGAC	0	0	0
 0	2	consensus	0	1_2,2_2	AATTCGGCTTGCAACGCAAGTGACGATTCCCACGGACATAACTGATCTAAGTAACTTCCAAATCTGGGAATGGGATTTCATAATTAAGGACTAT	0	0	0
 0	3	consensus	0	1_3,2_3	AATTCTCTACACCACAGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTTAAACACTCTGACTGCCACGCCAGCTACCTCTAGA	0	0	0
+# cstacks completed on 2020-03-16 15:39:40
--- a/test-data/denovo_map/denovo_map.log	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/denovo_map/denovo_map.log	Thu Jul 16 07:30:45 2020 -0400
@@ -1,5 +1,5 @@
-denovo_map.pl version 2.4 started at 2019-06-18 10:34:45
-/home/berntm/miniconda3/envs/mulled-v1-2b57e7596f85ebb3b321e6c9681e8fd9250523a80d97945c46ac7743359454e7/bin/denovo_map.pl --samples demultiplexed --popmap denovo_map/popmap_cstacks.tsv -o stacks_outputs --paired
+denovo_map.pl version 2.52 started at 2020-03-16 15:39:40
+/home/berntm/miniconda3/envs/__stacks@2.52/bin/denovo_map.pl --samples demultiplexed --popmap denovo_map/popmap_cstacks.tsv -o stacks_outputs --paired
 
 ustacks
 ==========
@@ -110,7 +110,7 @@
 
 cstacks
 ==========
-cstacks -P stacks_outputs -M denovo_map/popmap_cstacks.tsv
+cstacks -M denovo_map/popmap_cstacks.tsv -P stacks_outputs
 
 cstacks parameters selected:
   Loci matched based on sequence identity.
@@ -279,8 +279,7 @@
 Working on 1 group(s) of populations:
     defaultgrp: 1
 
-Genotyping markers will be written to 'stacks_outputs/populations.markers.tsv'
-Raw Genotypes/Haplotypes will be written to 'stacks_outputs/populations.haplotypes.tsv'
+Raw haplotypes will be written to 'stacks_outputs/populations.haplotypes.tsv'
 Population-level summary statistics will be written to 'stacks_outputs/populations.sumstats.tsv'
 Population-level haplotype summary statistics will be written to 'stacks_outputs/populations.hapstats.tsv'
 
@@ -306,4 +305,4 @@
 Populations is done.
 denovo_map.pl is done.
 
-denovo_map.pl completed at 2019-06-18 10:34:45
+denovo_map.pl completed at 2020-03-16 15:39:40
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/denovo_map/popmap_cstacks_genotypes.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -0,0 +1,2 @@
+PopA_01	parent
+PopA_02	progeny
--- a/test-data/gentest.sh	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/gentest.sh	Thu Jul 16 07:30:45 2020 -0400
@@ -1,7 +1,11 @@
 #!/usr/bin/env bash
+eval "$(conda shell.bash hook)"
+conda activate __stacks@2.52
+
+
+
 
 mkdir stacks_outputs
-
 denovo_map.pl --samples demultiplexed --popmap denovo_map/popmap_cstacks.tsv -o stacks_outputs --paired  && 
 gunzip -c stacks_outputs/catalog.calls > stacks_outputs/catalog.calls.vcf
 rm stacks_outputs/catalog.calls
--- a/test-data/gstacks/catalog.calls.vcf	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/gstacks/catalog.calls.vcf	Thu Jul 16 07:30:45 2020 -0400
@@ -1,6 +1,6 @@
 ##fileformat=VCFv4.2
-##fileDate=20190618
-##source="Stacks v2.4"
+##fileDate=20200316
+##source="Stacks v2.52"
 ##INFO=<ID=AD,Number=R,Type=Integer,Description="Total Depth for Each Allele">
 ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
 ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
Binary file test-data/gstacks/catalog.fa.gz has changed
--- a/test-data/gstacks/gstacks.log	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/gstacks/gstacks.log	Thu Jul 16 07:30:45 2020 -0400
@@ -1,4 +1,4 @@
-gstacks v2.4, executed 2019-06-18 10:34:45 (zlib-1.2.11)
+gstacks v2.52, executed 2020-03-16 15:39:40 (zlib-1.2.11)
 gstacks -P stacks_outputs -M denovo_map/popmap_cstacks.tsv
 Locus/sample distributions will be written to 'stacks_outputs/gstacks.log.distribs'.
 
--- a/test-data/gstacks/gstacks.log.distribs	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/gstacks/gstacks.log.distribs	Thu Jul 16 07:30:45 2020 -0400
@@ -19,25 +19,25 @@
 Num. threads: 1
 Parallel time: 0.0
 Average thread time spent:
-     0.0  reading (3.1%)
-     0.0  processing (95.2%)
-             0.0 pre-alignments block (72.2%)
-             0.0  reformatting fw-reads (0.1%)
-             0.0  assembling (22.2%)
-             0.0  initializing alignments (5.4%)
-             0.0  aligning (42.9%)
-             0.0  merging read pairs (1.5%)
-             0.0 post-alignments block (21.2%)
+     0.0  reading (3.0%)
+     0.0  processing (95.7%)
+             0.0 pre-alignments block (74.4%)
+             0.0  reformatting fw-reads (0.2%)
+             0.0  assembling (27.3%)
+             0.0  initializing alignments (8.7%)
+             0.0  aligning (36.6%)
+             0.0  merging read pairs (1.6%)
+             0.0 post-alignments block (19.8%)
              0.0  filtering reads (0.0%)
-             0.0  counting nucleotides (3.5%)
-             0.0  genotyping (1.9%)
-             0.0  haplotyping (1.0%)
+             0.0  counting nucleotides (3.2%)
+             0.0  genotyping (1.5%)
+             0.0  haplotyping (0.9%)
              0.0  computing consensus (0.1%)
              0.0  building_fa (0.1%)
-             0.0  building_vcf (14.6%)
-     0.0  writing_fa (0.1%)
-     0.0  writing_vcf (1.3%)
-     0.0  clocking (0.2%)
-Total time spent writing vcf: 0.0 (1.3%)
+             0.0  building_vcf (14.0%)
+     0.0  writing_fa (0.0%)
+     0.0  writing_vcf (0.9%)
+     0.0  clocking (0.1%)
+Total time spent writing vcf: 0.0 (0.8%)
 VCFwrite block size: mean=1.0(n=3); max=1
 END clockings
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kmerfilter/Removed1_0001.1.1.fq.single.gz	Thu Jul 16 07:30:45 2020 -0400
@@ -0,0 +1,20 @@
+@K00392:16:HJ2G2BBXX:7:1102:10825:462401 1:N:0:ATCACG
+GGACATTGGCTGCAGTACTCTGACCCTGGCCACCTCAACCTGTCTCTCTCGCACCGGAAACCTCCGATCCGCAGAAACATGAGCAACCCTACAGTTGACACACACAACAGTTTTCGACCGAAACTACACATTCCTCTGTCCCATTTCCTC
++
+--A-FF-F-<A<-FAF7AAF--F--FF-A<-JJFJFFFF-<<-7--<FFF--77<7-77-<---7-7<-7-AA--<<-7<----<-A-FJFF-7A-F--7FF-<7A-A-<7F7---7-7-77-<7<<A--<J7--<-)-7)-7---7F<-
+@K00392:16:HJ2G2BBXX:7:1102:10825:46240 1:N:0:ATCACG
+GGACATTGGCTGCAGTACTCTGACCCTGGCCACCTCAACCTGTCTCTCTCGCACCGGAAACCTCCGATCCGCAGAAACATGAGCAACCCTACAGTTGACACACACAACAGTTTTCGACCGAAACTACACATTCCTCTGTCCCATTTCCTC
++
+--A-FF-F-<A<-FAF7AAF--F--FF-A<-JJFJFFFF-<<-7--<FFF--77<7-77-<---7-7<-7-AA--<<-7<----<-A-FJFF-7A-F--7FF-<7A-A-<7F7---7-7-77-<7<<A--<J7--<-)-7)-7---7F<-
+@K00392:16:HJ2G2BBXX:8:1201:4929:9367 1:N:0:ATCACG
+GGATTGAGGATGCAGCAACGTTCTAACATCTAGTGGAAAGCCTTCCCAGAAGAGTGGAGGCTGTTATAGCAGCAAAGGGGGGACCAACTCCATATTATTGCCCATGACTTTTGAATGAGATGTTTGACGAGCAGGGGTCGACATACTTTT
++
+AAAFFJJJJJJJFJFJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJFAJAJFFJFJJFJFJ7FJAJJJAFFJJJJJJAJAFFFFA<JJJAJJFAFJA7<AFAFFA7F7F<-77<F<--7-7-))---)AFF-<A-7FA
+@K00392:16:HJ2G2BBXX:7:1104:19268:28727 1:N:0:ATCACG
+ATGCCGCGGCCCTTGCAGAGCAAGGGGAACCACTACTTCAACTCAAGGTCTCAAAGCGAGTGACGTAACTGATTGAAACGCTATTAGCGCGCACCACCGCTAACTAGCTATCCATTTCACATCCGTTACATATGTATGTATGTACACACA
++
+AAFFFJJJJJJFJJJJJJJJJJJJJJJJJJJJJJJJJJJJFJJJJJJJJJJJJJJJJJJJFJJJFJJJJJJJJJJJJJFFJJJJJJJJJJJJJJJJJJFJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJFJJJJJFAFJJJJJJJJ
+@K00392:16:HJ2G2BBXX:8:2125:3011:15574 1:N:0:ATCACG
+TCAGAAGAAAAACCCACAAAATAGAACCGGAGTCCTATTCCATTATTCCTAGCTGCGGTATTCAGGCGACCGGGCCTGCTTTGAACACTCTAATTTTTTCAAAGTAAACGCTTCGGACCCCGCGGGCCACTCAGTTAAGAGCATCGAGGG
++
+AAFFFJJJJJJJJFJFJFJFJJJJFJJJJJ<J-7FJFAJJJJJJJJAJJFJAJJJJJJJJFJJFAAFAJJJJJFJJJJJFAJJJJJFFJJ<J-FFAJJJF-J<7-7<<A<---AF<7JJJJ-A)77-7--)F-7----<--7A7A<--7-
--- a/test-data/kmerfilter/kfreq.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/kmerfilter/kfreq.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -0,0 +1,3 @@
+# KmerFrequency	Count
+1	408
+2	136
--- a/test-data/kmerfilter/kfreqdist.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/kmerfilter/kfreqdist.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -1,3 +1,545 @@
-# KmerFrequency	Count
-1	408
-2	136
+#	Kmer	Count
+TAAGAGCATCGAGGG	1
+CTCAGTTAAGAGCAT	1
+ACTCAGTTAAGAGCA	1
+CCACTCAGTTAAGAG	1
+GCCACTCAGTTAAGA	1
+GGCCACTCAGTTAAG	1
+GGGCCACTCAGTTAA	1
+CCGCGGGCCACTCAG	1
+CCCGCGGGCCACTCA	1
+CCCCGCGGGCCACTC	1
+TTCGGACCCCGCGGG	1
+GCTTCGGACCCCGCG	1
+CGCTTCGGACCCCGC	1
+AACGCTTCGGACCCC	1
+GTAAACGCTTCGGAC	1
+AGTAAACGCTTCGGA	1
+AAGTAAACGCTTCGG	1
+AAAGTAAACGCTTCG	1
+CAAAGTAAACGCTTC	1
+TCAAAGTAAACGCTT	1
+TTTTCAAAGTAAACG	1
+TTTTTTCAAAGTAAA	1
+ATTTTTTCAAAGTAA	1
+CTAATTTTTTCAAAG	1
+CACTCTAATTTTTTC	1
+AACACTCTAATTTTT	1
+GAACACTCTAATTTT	1
+CTTTGAACACTCTAA	1
+GCTTTGAACACTCTA	1
+TGCTTTGAACACTCT	1
+CTGCTTTGAACACTC	1
+TAAACGCTTCGGACC	1
+CCTGCTTTGAACACT	1
+GCCTGCTTTGAACAC	1
+GGGCCTGCTTTGAAC	1
+ACCGGGCCTGCTTTG	1
+GACCGGGCCTGCTTT	1
+ATTCAGGCGACCGGG	1
+TATTCAGGCGACCGG	1
+GTATTCAGGCGACCG	1
+CGGTATTCAGGCGAC	1
+ACACTCTAATTTTTT	1
+GCGGTATTCAGGCGA	1
+TGCGGTATTCAGGCG	1
+CTGCGGTATTCAGGC	1
+TAGCTGCGGTATTCA	1
+CTAGCTGCGGTATTC	1
+CCTAGCTGCGGTATT	1
+TCCTAGCTGCGGTAT	1
+TTCCTAGCTGCGGTA	1
+CATTATTCCTAGCTG	1
+CCATTATTCCTAGCT	1
+ATTCCATTATTCCTA	1
+TCCTATTCCATTATT	1
+GTCCTATTCCATTAT	1
+GAGTCCTATTCCATT	1
+GGAGTCCTATTCCAT	1
+AACCGGAGTCCTATT	1
+GGACCCCGCGGGCCA	1
+GAACCGGAGTCCTAT	1
+AATAGAACCGGAGTC	1
+AAATAGAACCGGAGT	1
+AAAATAGAACCGGAG	1
+ACAAAATAGAACCGG	1
+CCACAAAATAGAACC	1
+ACCCACAAAATAGAA	1
+AACCCACAAAATAGA	1
+AAACCCACAAAATAG	1
+AAAACCCACAAAATA	1
+AAAAACCCACAAAAT	1
+GAAAAACCCACAAAA	1
+TTAAGAGCATCGAGG	1
+AGAAAAACCCACAAA	1
+AAGAAAAACCCACAA	1
+TGTTATAGCAGCAAA	1
+GTACTCTGACCCTGG	2
+GGTCGACATACTTTT	1
+AGCAAAGGGGGGACC	1
+TAGAACCGGAGTCCT	1
+AGGCTGTTATAGCAG	1
+AGAACCGGAGTCCTA	1
+GAGGCTGTTATAGCA	1
+GGAGGCTGTTATAGC	1
+AGAAGAGTGGAGGCT	1
+CCAGAAGAGTGGAGG	1
+TTCCTCTGTCCCATT	2
+CCTTCCCAGAAGAGT	1
+GCAGCAAAGGGGGGA	1
+TTTTCGACCGAAACT	2
+TCTAACATCTAGTGG	1
+AGTGGAGGCTGTTAT	1
+AGTTGACACACACAA	2
+TCTAGTGGAAAGCCT	1
+CTGATTGAAACGCTA	1
+CAGTTAAGAGCATCG	1
+ACGCTATTAGCGCGC	1
+TTGACACACACAACA	2
+GAACCACTACTTCAA	1
+CACTCAGTTAAGAGC	1
+CTAACATCTAGTGGA	1
+GCTGCGGTATTCAGG	1
+GAGATGTTTGACGAG	1
+CGGAGTCCTATTCCA	1
+ACCGAAACTACACAT	2
+GAAGAGTGGAGGCTG	1
+AAGCCTTCCCAGAAG	1
+AACCTCCGATCCGCA	2
+TGAGCAACCCTACAG	2
+ACGTTCTAACATCTA	1
+AACGTTCTAACATCT	1
+TATTCCTAGCTGCGG	1
+ATTATTCCTAGCTGC	1
+ATGTTTGACGAGCAG	1
+TCTCTCGCACCGGAA	2
+ATGAGATGTTTGACG	1
+GGCCTGCTTTGAACA	1
+AGCCTTCCCAGAAGA	1
+TTCCATTATTCCTAG	1
+CAACGTTCTAACATC	1
+ACTGATTGAAACGCT	1
+CTAGTGGAAAGCCTT	1
+TGAGATGTTTGACGA	1
+TCTGTCCCATTTCCT	2
+CCTCTGTCCCATTTC	2
+CATTCCTCTGTCCCA	2
+TTGAACACTCTAATT	1
+GAATGAGATGTTTGA	1
+ACACATTCCTCTGTC	2
+ATTCCTAGCTGCGGT	1
+CTCCATATTATTGCC	1
+GACCGAAACTACACA	2
+CGACCGAAACTACAC	2
+ACTACACATTCCTCT	2
+CTGCAGTACTCTGAC	2
+TTCTAACATCTAGTG	1
+CCGAAACTACACATT	2
+ATCCGTTACATATGT	1
+TACATATGTATGTAT	1
+CAGTACTCTGACCCT	2
+GTTCTAACATCTAGT	1
+ACAGTTTTCGACCGA	2
+TTGACGAGCAGGGGT	1
+CTCTGTCCCATTTCC	2
+CACAAAATAGAACCG	1
+TACTCTGACCCTGGC	2
+ATAGCAGCAAAGGGG	1
+TGAATGAGATGTTTG	1
+AACAGTTTTCGACCG	2
+TCGGACCCCGCGGGC	1
+ACCCTACAGTTGACA	2
+TCTCTCTCGCACCGG	2
+GTATGTATGTACACA	1
+ACACAACAGTTTTCG	2
+ACACACAACAGTTTT	2
+TCCCAGAAGAGTGGA	1
+ACCACCGCTAACTAG	1
+GAGTGGAGGCTGTTA	1
+ACACACACAACAGTT	2
+AACGCTATTAGCGCG	1
+GTAACTGATTGAAAC	1
+GTCTCTCTCGCACCG	2
+AACATCTAGTGGAAA	1
+CTTCCCAGAAGAGTG	1
+GTGGAAAGCCTTCCC	1
+CTACTTCAACTCAAG	1
+CGCGGGCCACTCAGT	1
+AACCTGTCTCTCTCG	2
+CTCGCACCGGAAACC	2
+CGCACCGGAAACCTC	2
+TATGTATGTACACAC	1
+ATCCATTTCACATCC	1
+ACTCTAATTTTTTCA	1
+GCTGCAGTACTCTGA	2
+ACATGAGCAACCCTA	2
+GACCCCGCGGGCCAC	1
+AAGAGTGGAGGCTGT	1
+CCGGAAACCTCCGAT	2
+GCAGCAACGTTCTAA	1
+GAAACTACACATTCC	2
+CTGACCCTGGCCACC	2
+ACCTCAACCTGTCTC	2
+AAACTACACATTCCT	2
+AGAGTGGAGGCTGTT	1
+GCGACCGGGCCTGCT	1
+CATCTAGTGGAAAGC	1
+CAGAAACATGAGCAA	2
+ACTTCAACTCAAGGT	1
+CGGGCCACTCAGTTA	1
+AGAAACATGAGCAAC	2
+CTATTCCATTATTCC	1
+TTTTGAATGAGATGT	1
+GGCGACCGGGCCTGC	1
+ACATCTAGTGGAAAG	1
+TAGCAGCAAAGGGGG	1
+GGACCAACTCCATAT	1
+GACATTGGCTGCAGT	2
+CCTATTCCATTATTC	1
+CAGTTTTCGACCGAA	2
+CAACCCTACAGTTGA	2
+TTTGACGAGCAGGGG	1
+GACGTAACTGATTGA	1
+CAGGCGACCGGGCCT	1
+ACTCTGACCCTGGCC	2
+CCGGGCCTGCTTTGA	1
+AGTGGAAAGCCTTCC	1
+TTGAAACGCTATTAG	1
+CGTTCTAACATCTAG	1
+TTGAGGATGCAGCAA	1
+CTCAAAGCGAGTGAC	1
+GGACATTGGCTGCAG	2
+ACCTGTCTCTCTCGC	2
+TTTGAACACTCTAAT	1
+TGACACACACAACAG	2
+CACATTCCTCTGTCC	2
+GTTTTCGACCGAAAC	2
+GCAGAAACATGAGCA	2
+CTGGCCACCTCAACC	2
+GATCCGCAGAAACAT	2
+TCCGATCCGCAGAAA	2
+AACCACTACTTCAAC	1
+GCTGTTATAGCAGCA	1
+AACTAGCTATCCATT	1
+CATGAGCAACCCTAC	2
+TCGACCGAAACTACA	2
+CCTACAGTTGACACA	2
+AAGGGGAACCACTAC	1
+AAAGCCTTCCCAGAA	1
+CACAACAGTTTTCGA	2
+GGAACCACTACTTCA	1
+CCTCAACCTGTCTCT	2
+AAACCTCCGATCCGC	2
+CTTCGGACCCCGCGG	1
+GGGGAACCACTACTT	1
+GGCTGCAGTACTCTG	2
+TTGGCTGCAGTACTC	2
+GAGCAAGGGGAACCA	1
+AGCAACGTTCTAACA	1
+TGCAGTACTCTGACC	2
+TGGCCACCTCAACCT	2
+GACTTTTGAATGAGA	1
+CAGAAGAAAAACCCA	1
+CACCTCAACCTGTCT	2
+CCGATCCGCAGAAAC	2
+GTTATAGCAGCAAAG	1
+TGGCTGCAGTACTCT	2
+CTGTCCCATTTCCTC	2
+AGATGTTTGACGAGC	1
+TACTTCAACTCAAGG	1
+AGCAGGGGTCGACAT	1
+CTTTTGAATGAGATG	1
+TCAACTCAAGGTCTC	1
+ACCGCTAACTAGCTA	1
+GCCACCTCAACCTGT	2
+TGACTTTTGAATGAG	1
+ACCACTACTTCAACT	1
+CCACCTCAACCTGTC	2
+CCATATTATTGCCCA	1
+CATTGGCTGCAGTAC	2
+CTCTAATTTTTTCAA	1
+ATCTAGTGGAAAGCC	1
+AGTACTCTGACCCTG	2
+TTCGACCGAAACTAC	2
+CGGAAACCTCCGATC	2
+AGCTATCCATTTCAC	1
+TGACCCTGGCCACCT	2
+CCGCGGCCCTTGCAG	1
+CTTCAACTCAAGGTC	1
+TTAGCGCGCACCACC	1
+GTGGAGGCTGTTATA	1
+CACCGGAAACCTCCG	2
+GATTGAGGATGCAGC	1
+AAACGCTTCGGACCC	1
+ACAGTTGACACACAC	2
+CGACCGGGCCTGCTT	1
+AGTCCTATTCCATTA	1
+AAGGTCTCAAAGCGA	1
+ACCCTGGCCACCTCA	2
+ACAACAGTTTTCGAC	2
+CTCTCTCGCACCGGA	2
+TGTCTCTCTCGCACC	2
+CAAAGCGAGTGACGT	1
+TTTTTCAAAGTAAAC	1
+TCTCGCACCGGAAAC	2
+GGCCACCTCAACCTG	2
+ATGCCGCGGCCCTTG	1
+TGTTTGACGAGCAGG	1
+CCCACAAAATAGAAC	1
+GAGGATGCAGCAACG	1
+AACCCTACAGTTGAC	2
+CCTGTCTCTCTCGCA	2
+TGCAGCAACGTTCTA	1
+AGCAAGGGGAACCAC	1
+TCTGACCCTGGCCAC	2
+GGCCCTTGCAGAGCA	1
+TAATTTTTTCAAAGT	1
+CTGTCTCTCTCGCAC	2
+ATAGAACCGGAGTCC	1
+GACCCTGGCCACCTC	2
+TGATTGAAACGCTAT	1
+GCGGGCCACTCAGTT	1
+TCAGAAGAAAAACCC	1
+TGGAGGCTGTTATAG	1
+AGCGCGCACCACCGC	1
+TCGCACCGGAAACCT	2
+TCAGTTAAGAGCATC	1
+ATTCCTCTGTCCCAT	2
+GACGAGCAGGGGTCG	1
+CCACTACTTCAACTC	1
+TCAACCTGTCTCTCT	2
+ACCGGAAACCTCCGA	2
+CCCTGGCCACCTCAA	2
+GAAACATGAGCAACC	2
+GCTAACTAGCTATCC	1
+AGTTTTCGACCGAAA	2
+CACACACAACAGTTT	2
+ATTTCACATCCGTTA	1
+TATTCCATTATTCCT	1
+CGATCCGCAGAAACA	2
+GGCTGTTATAGCAGC	1
+GGAAACCTCCGATCC	2
+GCAACGTTCTAACAT	1
+AGCAACCCTACAGTT	2
+TTATTCCTAGCTGCG	1
+CTCTGACCCTGGCCA	2
+CAGCAAAGGGGGGAC	1
+CAGAAGAGTGGAGGC	1
+TACAGTTGACACACA	2
+TAGCGCGCACCACCG	1
+AAACATGAGCAACCC	2
+AAACGCTATTAGCGC	1
+GAAACCTCCGATCCG	2
+GCCCATGACTTTTGA	1
+TCCATATTATTGCCC	1
+ACATTGGCTGCAGTA	2
+CAGTTGACACACACA	2
+TCAAGGTCTCAAAGC	1
+ATCCGCAGAAACATG	2
+CACACAACAGTTTTC	2
+GGATGCAGCAACGTT	1
+TTCAAAGTAAACGCT	1
+AACATGAGCAACCCT	2
+GAGTGACGTAACTGA	1
+CTGTTATAGCAGCAA	1
+TAACATCTAGTGGAA	1
+CTCTCGCACCGGAAA	2
+CGCAGAAACATGAGC	2
+GGATTGAGGATGCAG	1
+TCCTCTGTCCCATTT	2
+GGTATTCAGGCGACC	1
+ATGAGCAACCCTACA	2
+TGGAAAGCCTTCCCA	1
+AGGCGACCGGGCCTG	1
+ACCTCCGATCCGCAG	2
+TCCATTATTCCTAGC	1
+ACTACTTCAACTCAA	1
+GAGCAACCCTACAGT	2
+GCAACCCTACAGTTG	2
+AGCTGCGGTATTCAG	1
+ACTAGCTATCCATTT	1
+AGAAGAAAAACCCAC	1
+CCCTACAGTTGACAC	2
+CTACAGTTGACACAC	2
+CAAGGGGAACCACTA	1
+AGGATGCAGCAACGT	1
+TTATAGCAGCAAAGG	1
+GTTAAGAGCATCGAG	1
+TATAGCAGCAAAGGG	1
+GCAAAGGGGGGACCA	1
+CAAAGGGGGGACCAA	1
+ACCCCGCGGGCCACT	1
+GACCAACTCCATATT	1
+ACATATGTATGTATG	1
+AAGGGGGGACCAACT	1
+ACATTCCTCTGTCCC	2
+GCACCACCGCTAACT	1
+CCGCAGAAACATGAG	2
+AGGGGGGACCAACTC	1
+GGGGGGACCAACTCC	1
+GGGGGACCAACTCCA	1
+GCACCGGAAACCTCC	2
+GGGGACCAACTCCAT	1
+GCAGTACTCTGACCC	2
+ACCAACTCCATATTA	1
+CCAACTCCATATTAT	1
+ACATCCGTTACATAT	1
+TATTGCCCATGACTT	1
+CAACTCCATATTATT	1
+CTCAAGGTCTCAAAG	1
+CTATTAGCGCGCACC	1
+AACTCCATATTATTG	1
+ATTGAGGATGCAGCA	1
+CATATTATTGCCCAT	1
+GGGAACCACTACTTC	1
+ATATTATTGCCCATG	1
+TATTATTGCCCATGA	1
+TCTAATTTTTTCAAA	1
+TTTCGACCGAAACTA	2
+ATTATTGCCCATGAC	1
+ACTCCATATTATTGC	1
+CCGCTAACTAGCTAT	1
+TTATTGCCCATGACT	1
+CAACAGTTTTCGACC	2
+ATTGCCCATGACTTT	1
+TTGCCCATGACTTTT	1
+TGAGGATGCAGCAAC	1
+TGCCCATGACTTTTG	1
+CGCACCACCGCTAAC	1
+CCCATGACTTTTGAA	1
+TTCACATCCGTTACA	1
+CCATGACTTTTGAAT	1
+ATGACTTTTGAATGA	1
+ATGCAGCAACGTTCT	1
+GCGCACCACCGCTAA	1
+ACTTTTGAATGAGAT	1
+CATGACTTTTGAATG	1
+TTTGAATGAGATGTT	1
+AATTTTTTCAAAGTA	1
+ATGTATGTACACACA	1
+TCCGCAGAAACATGA	2
+TTGAATGAGATGTTT	1
+GATGTTTGACGAGCA	1
+TCACATCCGTTACAT	1
+ACGAGCAGGGGTCGA	1
+GAAGAAAAACCCACA	1
+CCCAGAAGAGTGGAG	1
+CGAGCAGGGGTCGAC	1
+GAGCAGGGGTCGACA	1
+GCAGAGCAAGGGGAA	1
+GTTGACACACACAAC	2
+GCAGGGGTCGACATA	1
+CGGACCCCGCGGGCC	1
+GATTGAAACGCTATT	1
+CAGGGGTCGACATAC	1
+ATTGAAACGCTATTA	1
+CAGCAACGTTCTAAC	1
+CAGAGCAAGGGGAAC	1
+AATGAGATGTTTGAC	1
+AGGGGTCGACATACT	1
+CTCCGATCCGCAGAA	2
+GGGGTCGACATACTT	1
+CAACCTGTCTCTCTC	2
+GGGTCGACATACTTT	1
+CAAAATAGAACCGGA	1
+TACACATTCCTCTGT	2
+AACTCAAGGTCTCAA	1
+TGCCGCGGCCCTTGC	1
+GCCGCGGCCCTTGCA	1
+CGCGGCCCTTGCAGA	1
+GCGGCCCTTGCAGAG	1
+CGGCCCTTGCAGAGC	1
+TTTCAAAGTAAACGC	1
+GCCTTCCCAGAAGAG	1
+TTTCACATCCGTTAC	1
+CCCTTGCAGAGCAAG	1
+CGGGCCTGCTTTGAA	1
+CTTGCAGAGCAAGGG	1
+TTGCAGAGCAAGGGG	1
+AAAGGGGGGACCAAC	1
+CCTTGCAGAGCAAGG	1
+TGCAGAGCAAGGGGA	1
+AGAGCAAGGGGAACC	1
+CCGGAGTCCTATTCC	1
+GCAAGGGGAACCACT	1
+TCCGTTACATATGTA	1
+AGGGGAACCACTACT	1
+CCTCCGATCCGCAGA	2
+CCTGGCCACCTCAAC	2
+CACTACTTCAACTCA	1
+TTCCCAGAAGAGTGG	1
+TTCAACTCAAGGTCT	1
+CAACTCAAGGTCTCA	1
+ACTCAAGGTCTCAAA	1
+GGTCTCAAAGCGAGT	1
+ACGCTTCGGACCCCG	1
+ATTGGCTGCAGTACT	2
+CAAGGTCTCAAAGCG	1
+GCTATCCATTTCACA	1
+GGGACCAACTCCATA	1
+TATCCATTTCACATC	1
+AACTACACATTCCTC	2
+AGGTCTCAAAGCGAG	1
+GATGCAGCAACGTTC	1
+GTCTCAAAGCGAGTG	1
+TGAACACTCTAATTT	1
+TGTATGTATGTACAC	1
+TCTCAAAGCGAGTGA	1
+TCAAAGCGAGTGACG	1
+AAGCGAGTGACGTAA	1
+CACCGCTAACTAGCT	1
+AGCGAGTGACGTAAC	1
+TTCAGGCGACCGGGC	1
+CACCACCGCTAACTA	1
+GCGAGTGACGTAACT	1
+AGTGACGTAACTGAT	1
+GTGACGTAACTGATT	1
+TATGTATGTATGTAC	1
+TGACGTAACTGATTG	1
+ACGTAACTGATTGAA	1
+CTCAACCTGTCTCTC	2
+CGTAACTGATTGAAA	1
+TAGTGGAAAGCCTTC	1
+TAACTGATTGAAACG	1
+GGAAAGCCTTCCCAG	1
+AACTGATTGAAACGC	1
+AGCAGCAAAGGGGGG	1
+TGAAACGCTATTAGC	1
+GAAACGCTATTAGCG	1
+CGCTATTAGCGCGCA	1
+GCTATTAGCGCGCAC	1
+AAAGCGAGTGACGTA	1
+TATTAGCGCGCACCA	1
+TCAGGCGACCGGGCC	1
+ATTAGCGCGCACCAC	1
+GCGCGCACCACCGCT	1
+ACCGGAGTCCTATTC	1
+CGCGCACCACCGCTA	1
+CCACCGCTAACTAGC	1
+GAAAGCCTTCCCAGA	1
+CGCTAACTAGCTATC	1
+CTAACTAGCTATCCA	1
+TAACTAGCTATCCAT	1
+GACACACACAACAGT	2
+CTAGCTATCCATTTC	1
+GCCCTTGCAGAGCAA	1
+TAGCTATCCATTTCA	1
+TGACGAGCAGGGGTC	1
+CTATCCATTTCACAT	1
+TCCATTTCACATCCG	1
+GTTTGACGAGCAGGG	1
+CATTTCACATCCGTT	1
+AGTTAAGAGCATCGA	1
+CACATCCGTTACATA	1
+ATGTATGTATGTACA	1
+CTACACATTCCTCTG	2
+CATCCGTTACATATG	1
+CCGTTACATATGTAT	1
+CGAGTGACGTAACTG	1
+CGTTACATATGTATG	1
+CCATTTCACATCCGT	1
+GTTACATATGTATGT	1
+TTACATATGTATGTA	1
+CGAAACTACACATTC	2
+CATATGTATGTATGT	1
+ATATGTATGTATGTA	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/populations/populations.CP.joinmap.loc	Thu Jul 16 07:30:45 2020 -0400
@@ -0,0 +1,13 @@
+# Stacks v2.52; JoinMap; March 17, 2020
+# Parent: PopA_01
+name = populations.20200317
+popt = CP
+nloc = 3
+nind = 1
+
+1	<lmxll>	lm
+2	<lmxll>	--
+3	<lmxll>	lm
+
+individual names:
+PopA_02
--- a/test-data/populations/populations.log	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/populations/populations.log	Thu Jul 16 07:30:45 2020 -0400
@@ -1,4 +1,4 @@
-populations v2.4, executed 2019-06-18 10:34:45 (zlib-1.2.11)
+populations v2.52, executed 2020-03-16 15:39:40 (zlib-1.2.11)
 populations -P stacks_outputs -M denovo_map/popmap_cstacks.tsv
 Locus/sample distributions will be written to 'stacks_outputs/populations.log.distribs'.
 populations parameters selected:
@@ -20,8 +20,7 @@
 Working on 1 group(s) of populations:
     defaultgrp: 1
 
-Genotyping markers will be written to 'stacks_outputs/populations.markers.tsv'
-Raw Genotypes/Haplotypes will be written to 'stacks_outputs/populations.haplotypes.tsv'
+Raw haplotypes will be written to 'stacks_outputs/populations.haplotypes.tsv'
 Population-level summary statistics will be written to 'stacks_outputs/populations.sumstats.tsv'
 Population-level haplotype summary statistics will be written to 'stacks_outputs/populations.hapstats.tsv'
 
--- a/test-data/populations/populations.phistats_summary.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/populations/populations.phistats_summary.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -3,3 +3,6 @@
 
 # Fst' Means
 	1
+
+# Dxy Means
+	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/procrad/barcodes-duplicate	Thu Jul 16 07:30:45 2020 -0400
@@ -0,0 +1,41 @@
+ATGGGG	PopA_01
+GGGTAA	PopA_02
+AGGAAA	PopA_03
+TTTAAG	PopA_04
+GGTGTG	PopA_05
+TGATGT	PopA_06
+GGTTGT	PopA_07
+ATAAGT	PopA_08
+AAGATA	PopA_09
+TGTGAG	PopA_10
+ATAGTT	PopA_11
+GGAAGG	PopA_12
+TTTGTG	PopA_13
+TTAAAT	PopA_14
+AATAAG	PopA_15
+AAGAGG	PopA_16
+TAGTGT	PopA_17
+TGGAAG	PopA_18
+GGGTTG	PopA_19
+CATCAT	PopA_20
+GGAGAG	PopB_20
+GTTTTA	PopB_01
+TGATAA	PopB_02
+GTTGAT	PopB_03
+AGATTA	PopB_04
+GTATAG	PopB_05
+TTGGGA	PopB_06
+ATATAT	PopB_07
+GATGAG	PopB_08
+GGGAAT	PopB_09
+AGTAAT	PopB_10
+GGGATA	PopB_11
+GAGAAG	PopB_12
+AGTAGA	PopB_13
+AAGGAT	PopB_14
+AGGGTA	PopB_15
+TGTTTT	PopB_16
+ATGATG	PopB_17
+GAGTTA	PopB_18
+ATGTAG	PopB_19
+AAAAAA	PopA_01
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/procrad/barcodes-duplicate2	Thu Jul 16 07:30:45 2020 -0400
@@ -0,0 +1,41 @@
+ATGGGG
+GGGTAA
+AGGAAA
+TTTAAG
+GGTGTG
+TGATGT
+GGTTGT
+ATAAGT
+AAGATA
+TGTGAG
+ATAGTT
+GGAAGG
+TTTGTG
+TTAAAT
+AATAAG
+AAGAGG
+TAGTGT
+TGGAAG
+GGGTTG
+CATCAT
+GGAGAG
+GTTTTA
+TGATAA
+GTTGAT
+AGATTA
+GTATAG
+TTGGGA
+ATATAT
+GATGAG
+GGGAAT
+AGTAAT
+GGGATA
+GAGAAG
+AGTAGA
+AAGGAT
+AGGGTA
+TGTTTT
+ATGATG
+GAGTTA
+ATGTAG
+ATGGGG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/procrad/barcodes-duplicate3	Thu Jul 16 07:30:45 2020 -0400
@@ -0,0 +1,41 @@
+ATGGGG	AAAAAA
+GGGTAA	AAAAAA
+AGGAAA	AAAAAA
+TTTAAG	AAAAAA
+GGTGTG	AAAAAA
+TGATGT	AAAAAA
+GGTTGT	AAAAAA
+ATAAGT	AAAAAA
+AAGATA	AAAAAA
+TGTGAG	AAAAAA
+ATAGTT	AAAAAA
+GGAAGG	AAAAAA
+TTTGTG	AAAAAA
+TTAAAT	AAAAAA
+AATAAG	AAAAAA
+AAGAGG	AAAAAA
+TAGTGT	AAAAAA
+TGGAAG	AAAAAA
+GGGTTG	AAAAAA
+CATCAT	AAAAAA
+GGAGAG	AAAAAA
+GTTTTA	AAAAAA
+TGATAA	AAAAAA
+GTTGAT	AAAAAA
+AGATTA	AAAAAA
+GTATAG	AAAAAA
+TTGGGA	AAAAAA
+ATATAT	AAAAAA
+GATGAG	AAAAAA
+GGGAAT	AAAAAA
+AGTAAT	AAAAAA
+GGGATA	AAAAAA
+GAGAAG	AAAAAA
+AGTAGA	AAAAAA
+AAGGAT	AAAAAA
+AGGGTA	AAAAAA
+TGTTTT	AAAAAA
+ATGATG	AAAAAA
+GAGTTA	AAAAAA
+ATGTAG	AAAAAA
+ATGGGG	AAAAAA
--- a/test-data/refmap/catalog.calls.vcf	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/refmap/catalog.calls.vcf	Thu Jul 16 07:30:45 2020 -0400
@@ -1,6 +1,6 @@
 ##fileformat=VCFv4.2
-##fileDate=20190617
-##source="Stacks v2.4"
+##fileDate=20200709
+##source="Stacks v2.53"
 ##INFO=<ID=AD,Number=R,Type=Integer,Description="Total Depth for Each Allele">
 ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
 ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
Binary file test-data/refmap/catalog.fa.gz has changed
Binary file test-data/shortreads/PopA_01.forward.fq.gz has changed
Binary file test-data/shortreads/PopA_01.reverse.fq.gz has changed
--- a/test-data/shortreads/process_shortreads.out	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/shortreads/process_shortreads.out	Thu Jul 16 07:30:45 2020 -0400
@@ -1,5 +1,5 @@
-process_shortreads v2.2, executed 2018-12-03 21:27:19
-process_shortreads -p stacks_inputs/ -i fastq -b /tmp/tmpiZT6X4/files/000/dataset_2.dat --inline_null -o stacks_outputs
+process_shortreads v2.53, executed 2020-07-10 13:51:53 (zlib-1.2.11)
+process_shortreads -p stacks_inputs/ -i fastq -b /tmp/tmpidt4mij_/files/7/7/8/dataset_778705df-5f0c-4210-9ad6-4c43e8aacd52.dat --inline_null -o stacks_outputs
 File	Retained Reads	Low Quality	Ambiguous Barcodes	Trimmed Reads	Orphaned paired-end reads	Total
 R1.fastq	7000	0	0	0	0	7000
 
--- a/test-data/sstacks/PopA_01.matches.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/sstacks/PopA_01.matches.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -1,5 +1,6 @@
-# sstacks version 2.4; generated on 2019-06-18 10:34:45
+# sstacks version 2.52; generated on 2020-03-16 15:39:40
 1	1	1	AC	9	94M
 1	1	1	CA	9	94M
 2	1	2	consensus	28	94M
 3	1	3	consensus	20	94M
+# sstacks completed on 2020-03-16 15:39:40
--- a/test-data/sstacks/PopA_02.matches.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/sstacks/PopA_02.matches.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -1,5 +1,6 @@
-# sstacks version 2.4; generated on 2019-06-18 10:34:45
+# sstacks version 2.52; generated on 2020-03-16 15:39:40
 1	2	1	AC	6	94M
 1	2	1	CA	6	94M
 2	2	2	consensus	28	94M
 3	2	3	consensus	20	94M
+# sstacks completed on 2020-03-16 15:39:40
Binary file test-data/stacks_outputs/catalog.calls has changed
--- a/test-data/stacks_outputs/tsv2bam.log	Mon Sep 30 14:19:47 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-tsv2bam v2.4, executed 2019-06-18 10:34:45 (zlib-1.2.11)
-tsv2bam -P stacks_outputs -M denovo_map/popmap_cstacks.tsv -R demultiplexed/
-Configuration for this run:
-  Stacks directory: 'stacks_outputs/'
-  Population map: 'denovo_map/popmap_cstacks.tsv'
-  Num. samples: 2
-  Paired-end reads directory: 'demultiplexed/'
-
-Paired-end reads files found, e.g. 'demultiplexed/PopA_01.2.fq'.
-Loading the catalog...
-Processing sample 'PopA_01'...
-Processing sample 'PopA_02'...
-
-Sample 'PopA_01': matched 3 sample loci to 3 catalog loci; found a paired-end read for 66 (100.0%) of the assembled forward reads; wrote 132 records.
-Sample 'PopA_02': matched 3 sample loci to 3 catalog loci; found a paired-end read for 60 (100.0%) of the assembled forward reads; wrote 120 records.
-
-tsv2bam is done.
--- a/test-data/tsv2bam/tsv2bam.log	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/tsv2bam/tsv2bam.log	Thu Jul 16 07:30:45 2020 -0400
@@ -1,4 +1,4 @@
-tsv2bam v2.4, executed 2019-06-17 21:22:16 (zlib-1.2.11)
+tsv2bam v2.52, executed 2020-03-16 15:39:40 (zlib-1.2.11)
 tsv2bam -P stacks_outputs -M denovo_map/popmap_cstacks.tsv -R demultiplexed/
 Configuration for this run:
   Stacks directory: 'stacks_outputs/'
--- a/test-data/ustacks/PopA_01.alleles.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/ustacks/PopA_01.alleles.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -1,3 +1,4 @@
-# ustacks version 2.4; generated on 2019-06-18 10:34:45
+# ustacks version 2.52; generated on 2020-03-16 15:39:40
 1	1	AC	50.00	9
 1	1	CA	50.00	9
+# ustacks completed on 2020-03-16 15:39:40
--- a/test-data/ustacks/PopA_01.snps.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/ustacks/PopA_01.snps.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -1,4 +1,4 @@
-# ustacks version 2.4; generated on 2019-06-18 10:34:45
+# ustacks version 2.52; generated on 2020-03-16 15:39:40
 1	1	0	O	24.95	A	-		
 1	1	1	O	24.95	A	-		
 1	1	2	O	24.95	T	-		
@@ -281,3 +281,4 @@
 1	3	91	O	27.73	A	-		
 1	3	92	O	27.73	G	-		
 1	3	93	O	27.73	A	-		
+# ustacks completed on 2020-03-16 15:39:40
--- a/test-data/ustacks/PopA_01.tags.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/ustacks/PopA_01.tags.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -1,4 +1,4 @@
-# ustacks version 2.4; generated on 2019-06-18 10:34:45
+# ustacks version 2.52; generated on 2020-03-16 15:39:40
 1	1	consensus			AATTCGTTTGCTGCTTCAGGAATCTCTCGTATAATCTGAGTATGTGCGTACGTACGCTATTTAGATGGATAACCGACGCTGCCAGACGAGAGAC	0	0	0
 1	1	model			OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOEOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOEOOOOO			
 1	1	primary	0	lane1_fakedata7_0 1:N:0:/1	AATTCGTTTGCTGCTTCAGGAATCTCTCGTATACTCTGAGTATGTGCGTACGTACGCTATTTAGATGGATAACCGACGCTGCCAGACGAGAGAC			
@@ -71,3 +71,4 @@
 1	3	primary	0	lane1_fakedata2_19 1:N:0:/1	AATTCTCTACACCACAGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTTAAACACTCTGACTGCCACGCCAGCTACCTCTAGA			
 1	3	secondary		lane1_fakedata2_5 1:N:0:/1	AATTCTCTACACCACAGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTAAAACACTCTGACTGCCACGCCAGCTACCTCTAGA			
 1	3	secondary		lane1_fakedata2_17 1:N:0:/1	AATTCTCTACACCACAGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTTAAACACTCTGACTGCCACGCCAGCTACCTCCAGA			
+# ustacks completed on 2020-03-16 15:39:40
--- a/test-data/ustacks/PopA_02.alleles.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/ustacks/PopA_02.alleles.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -1,3 +1,4 @@
-# ustacks version 2.4; generated on 2019-06-18 10:34:45
+# ustacks version 2.52; generated on 2020-03-16 15:39:40
 2	1	AC	50.00	6
 2	1	CA	50.00	6
+# ustacks completed on 2020-03-16 15:39:40
--- a/test-data/ustacks/PopA_02.snps.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/ustacks/PopA_02.snps.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -1,4 +1,4 @@
-# ustacks version 2.4; generated on 2019-06-18 10:34:45
+# ustacks version 2.52; generated on 2020-03-16 15:39:40
 2	1	0	O	16.64	A	-		
 2	1	1	O	16.64	A	-		
 2	1	2	O	16.64	T	-		
@@ -281,3 +281,4 @@
 2	3	91	O	27.73	A	-		
 2	3	92	O	27.73	G	-		
 2	3	93	O	27.73	A	-		
+# ustacks completed on 2020-03-16 15:39:40
--- a/test-data/ustacks/PopA_02.tags.tsv	Mon Sep 30 14:19:47 2019 -0400
+++ b/test-data/ustacks/PopA_02.tags.tsv	Thu Jul 16 07:30:45 2020 -0400
@@ -1,4 +1,4 @@
-# ustacks version 2.4; generated on 2019-06-18 10:34:45
+# ustacks version 2.52; generated on 2020-03-16 15:39:40
 2	1	consensus			AATTCGTTTGCTGCTTCAGGAATCTCTCGTATAATCTGAGTATGTGCGTACGTACGCTATTTAGATGGATAACCGACGCTGCCAGACGAGAGAC	0	0	0
 2	1	model			OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOEOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOEOOOOO			
 2	1	primary	0	lane1_fakedata7_7 1:N:0:/1	AATTCGTTTGCTGCTTCAGGAATCTCTCGTATACTCTGAGTATGTGCGTACGTACGCTATTTAGATGGATAACCGACGCTGCCAGACGAGAGAC			
@@ -65,3 +65,4 @@
 2	3	primary	0	lane1_fakedata2_18 1:N:0:/1	AATTCTCTACACCACAGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTTAAACACTCTGACTGCCACGCCAGCTACCTCTAGA			
 2	3	primary	0	lane1_fakedata2_19 1:N:0:/1	AATTCTCTACACCACAGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTTAAACACTCTGACTGCCACGCCAGCTACCTCTAGA			
 2	3	secondary		lane1_fakedata2_1 1:N:0:/1	AATTCTCTACACCACTGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTTAAACACTCTGACTGCCACGCCAGCTACCTCTAGA			
+# ustacks completed on 2020-03-16 15:39:40