Mercurial > repos > pjbriggs > pal_finder
changeset 8:4e625d3672ba draft
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
author | pjbriggs |
---|---|
date | Wed, 16 May 2018 07:39:16 -0400 |
parents | 5e133b7b79a6 |
children | 52dbe2089d14 |
files | README.rst fastq_subset.py pal_finder_macros.xml pal_finder_wrapper.sh pal_finder_wrapper.xml pal_finder_wrapper_utils.sh test-data/454_microsats.out.re_match~ test-data/illuminaPE_bad_primer_read_ids.out test-data/illuminaPE_filtered_microsats.out.re_match~ test-data/illuminaPE_filtered_microsats_occurrences.out.re_match~ test-data/illuminaPE_filtered_microsats_primers.out.re_match~ test-data/illuminaPE_filtered_microsats_rankmotifs.out.re_match~ test-data/illuminaPE_microsat_types.out.re_match~ test-data/illuminaPE_microsats.out.re_match~ test-data/illuminaPE_microsats_bad_ranges.out.re_match test-data/illuminaPE_microsats_subset.out.re_match test-data/illuminaPE_r1_bad_ranges.fq test-data/illuminaPE_r1_no_microsats.fq test-data/illuminaPE_r2_bad_ranges.fq test-data/illuminaPE_r2_no_microsats.fq |
diffstat | 20 files changed, 551 insertions(+), 96 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Mon Mar 19 06:33:32 2018 -0400 +++ b/README.rst Wed May 16 07:39:16 2018 -0400 @@ -61,6 +61,11 @@ Version Changes ---------- ---------------------------------------------------------------------- +0.02.04.7 - Trap for errors in ``pal_finder_v0.02.04.pl`` resulting in bad + ranges being supplied to ``primer3_core`` for some reads via + ``PRIMER_PRODUCT_RANGE_SIZE`` (and enable 'bad' reads to be output + to a dataset); add new option to use a random subset of reads for + microsatellite detection. 0.02.04.6 - Update to get dependencies using ``conda`` when installed from the toolshed (this removes the explicit dependency on Perl 5.16 introduced in 0.02.04.2, as a result the outputs from the tool are
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_subset.py Wed May 16 07:39:16 2018 -0400 @@ -0,0 +1,74 @@ +#!/usr/bin/env python + +import argparse +import random +from Bio.SeqIO.QualityIO import FastqGeneralIterator + +def count_reads(fastq): + """ + Count number of reads in a Fastq file + """ + n = 0 + with open(fastq,'r') as fq: + while True: + buf = fq.read() + n += buf.count('\n') + if buf == "": break + return n/4 + +def fastq_subset(fastq_in,fastq_out,indices): + """ + Output a subset of reads from a Fastq file + + The reads to output are specifed by a list + of integer indices; only reads at those + positions in the input file will be written + to the output. + """ + with open(fastq_in,'r') as fq_in: + fq_out = open(fastq_out,'w') + i = 0 + for title,seq,qual in FastqGeneralIterator(fq_in): + if i in indices: + fq_out.write("@%s\n%s\n+\n%s\n" % (title, + seq, + qual)) + i += 1 + fq_out.close() + +if __name__ == "__main__": + + p = argparse.ArgumentParser() + p.add_argument("fastq_r1") + p.add_argument("fastq_r2") + p.add_argument("-n", + dest="subset_size", + default=None, + help="subset size") + p.add_argument("-s", + dest="seed", + type=int, + default=None, + help="seed for random number generator") + args = p.parse_args() + + print "Processing fastq pair:" + print "\t%s" % args.fastq_r1 + print "\t%s" % args.fastq_r2 + + nreads = count_reads(args.fastq_r1) + print "Counted %d reads in %s" % (nreads,args.fastq_r1) + + if args.subset_size is not None: + subset_size = float(args.subset_size) + if subset_size < 1.0: + subset_size = int(nreads*subset_size) + else: + subset_size = int(subset_size) + print "Extracting subset of reads: %s" % subset_size + if args.seed is not None: + print "Random number generator seed: %d" % args.seed + random.seed(args.seed) + subset = random.sample(xrange(nreads),subset_size) + fastq_subset(args.fastq_r1,"subset_r1.fq",subset) + fastq_subset(args.fastq_r2,"subset_r2.fq",subset)
--- a/pal_finder_macros.xml Mon Mar 19 06:33:32 2018 -0400 +++ b/pal_finder_macros.xml Wed May 16 07:39:16 2018 -0400 @@ -14,6 +14,7 @@ <has_line line="readsWithMicrosat:	13" /> <has_line line="totalBases:	2320" /> <has_line line="totalReads:	20	(2 x 10)" /> + <has_line line="readsWithBadRanges:	0" /> <has_line line="Microsat Type	monomer length	total loci	loci w/ primers	reads with loci	total bases	extended	extended w/ primers	spanning	spanning w/ primers" /> <has_line_matching expression="(AC|TG)\t2\t7\t4\t7\t116\t0?\t0?\t0?\t0?" /> <has_line_matching expression="(AT|CG)\t2\t8\t0\t6\t106\t0?\t0?\t0?\t0?" /> @@ -21,6 +22,44 @@ </assert_contents> </output> </xml> + <xml name="output_illumina_microsat_subset_summary"> + <output name="output_microsat_summary"> + <assert_contents> + <has_line line="allExtended:	0" /> + <has_line line="allSpan:	0" /> + <has_line line="broken:	0" /> + <has_line line="compound:	2" /> + <has_line line="readsWithMicrosat:	7" /> + <has_line line="totalBases:	1160" /> + <has_line line="totalReads:	10	(2 x 5)" /> + <has_line line="Microsat Type	monomer length	total loci	loci w/ primers	reads with loci	total bases	extended	extended w/ primers	spanning	spanning w/ primers" /> + <has_line_matching expression="(AC|TG)\t2\t6\t3\t6\t104\t0?\t0?\t0?\t0?" /> + <has_line_matching expression="(AT|CG)\t2\t3\t0\t3\t38\t0?\t0?\t0?\t0?" /> + <has_line_matching expression="(AG|TC)\t2\t0\t0\t0\t0\t0?\t0?\t0?\t0?" /> + </assert_contents> + </output> + </xml> + <xml name="output_illumina_microsat_summary_bad_ranges"> + <output name="output_microsat_summary"> + <assert_contents> + <has_line line="allExtended:	2" /> + <has_line line="allSpan:	0" /> + <has_line line="broken:	0" /> + <has_line line="compound:	4" /> + <has_line line="readsWithMicrosat:	12" /> + <has_line line="totalBases:	2231" /> + <has_line line="totalReads:	12	(2 x 6)" /> + <has_line line="readsWithBadRanges:	2" /> + <has_line line="Microsat Type	monomer length	total loci	loci w/ primers	reads with loci	total bases	extended	extended w/ primers	spanning	spanning w/ primers" /> + <!-- + I'd like to do a basic check on the remainder of the file but + there are too many lines for the 'assert_contents' method (and + the tag doesn't provide the functionality to do a simple line + count + --> + </assert_contents> + </output> + </xml> <xml name="output_454_microsat_summary"> <output name="output_microsat_summary"> <assert_contents>
--- a/pal_finder_wrapper.sh Mon Mar 19 06:33:32 2018 -0400 +++ b/pal_finder_wrapper.sh Wed May 16 07:39:16 2018 -0400 @@ -26,11 +26,13 @@ # --primer-opt-tm VALUE: optimum melting temperature (Celsius) # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers # --output_config_file FNAME: write a copy of the config.txt file to FNAME -# --filter_microsats FNAME: write output of filter options FNAME +# --bad_primer_ranges FNAME: write a list of the read IDs generating bad primer ranges to FNAME +# --filter_microsats FNAME: write output of filter options to FNAME # -assembly FNAME: run the 'assembly' filter option and write to FNAME # -primers: run the 'primers' filter option # -occurrences: run the 'occurrences' filter option # -rankmotifs: run the 'rankmotifs' filter option +# --subset N: use a subset of reads of size N # # pal_finder is available from http://sourceforge.net/projects/palfinder/ # @@ -53,6 +55,9 @@ # Maximum size reporting log file contents MAX_LINES=500 # +# Get helper functions +. $(dirname $0)/pal_finder_wrapper_utils.sh +# # Initialise locations of scripts, data and executables # # Set these in the environment to overide at execution time @@ -63,31 +68,18 @@ # Filter script is in the same directory as this script PALFINDER_FILTER=$(dirname $0)/pal_filter.py if [ ! -f $PALFINDER_FILTER ] ; then - echo No $PALFINDER_FILTER script >&2 - exit 1 + fatal No $PALFINDER_FILTER script fi # # Check that we have all the components -function have_program() { - local program=$1 - local got_program=$(which $program 2>&1 | grep "no $(basename $program) in") - if [ -z "$got_program" ] ; then - echo yes - else - echo no - fi -} if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then - echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2 - exit 1 + fatal "primer3_core missing: ${PRIMER3_CORE_EXE} not found" fi if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then - echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2 - exit 1 + fatal "pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" fi if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then - echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2 - exit 1 + fatal "pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" fi # # Initialise parameters used in the config.txt file @@ -113,12 +105,14 @@ OUTPUT_ASSEMBLY= FILTERED_MICROSATS= FILTER_OPTIONS= +SUBSET= +RANDOM_SEED=568765 # # Collect command line arguments if [ $# -lt 2 ] ; then echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" - exits + fatal "Bad command line" fi if [ "$1" == "--454" ] ; then PLATFORM="454" @@ -212,6 +206,10 @@ shift OUTPUT_CONFIG_FILE=$1 ;; + --bad_primer_ranges) + shift + BAD_PRIMER_RANGES=$1 + ;; --filter_microsats) shift FILTERED_MICROSATS=$1 @@ -224,6 +222,10 @@ shift OUTPUT_ASSEMBLY=$1 ;; + --subset) + shift + SUBSET=$1 + ;; *) echo Unknown option: $1 >&2 exit 1 @@ -235,16 +237,33 @@ # Check that primer3_core is available got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` if [ -z "$got_primer3" ] ; then - echo ERROR primer3_core not found >&2 - exit 1 + fatal "primer3_core not found" fi # +# Check the n-mers specification +if [ $MIN_6_MER_REPS -ne 0 ] ; then + if [ $MIN_5_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 5-mers cannot be zero if number of 6-mers is non-zero" + fi +fi +if [ $MIN_5_MER_REPS -ne 0 ] ; then + if [ $MIN_4_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" + fi +fi +if [ $MIN_4_MER_REPS -ne 0 ] ; then + if [ $MIN_3_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 3-mers cannot be zero if number of 4-mers is non-zero" + fi +fi +if [ $MIN_2_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 2-mer repeats cannot be zero" +fi # Set up the working dir if [ "$PLATFORM" == "Illumina" ] ; then # Paired end Illumina data as input if [ $FASTQ_R1 == $FASTQ_R2 ] ; then - echo ERROR R1 and R2 fastqs are the same file >&2 - exit 1 + fatal ERROR R1 and R2 fastqs are the same file fi ln -s $FASTQ_R1 ln -s $FASTQ_R2 @@ -259,22 +278,19 @@ PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY) mkdir Output # +# Use a subset of reads +if [ ! -z "$SUBSET" ] ; then + echo "### Extracting subset of reads ###" + $(dirname $0)/fastq_subset.py -n $SUBSET -s $RANDOM_SEED $fastq_r1 $fastq_r2 + fastq_r1="subset_r1.fq" + fastq_r2="subset_r2.fq" +fi +# # Copy in the default config.txt file echo "### Creating config.txt file for pal_finder run ###" /bin/cp $PALFINDER_DATA_DIR/config.txt . # # Update the config.txt file with new values -function set_config_value() { - local key=$1 - local value=$2 - local config_txt=$3 - if [ -z "$value" ] ; then - echo "No value for $key, left as default" - else - echo Setting "$key" to "$value" - sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt - fi -} # Input files set_config_value platform $PLATFORM config.txt if [ "$PLATFORM" == "Illumina" ] ; then @@ -299,6 +315,7 @@ # Primer3 settings set_config_value primer3input Output/pr3in.txt config.txt set_config_value primer3output Output/pr3out.txt config.txt +set_config_value keepPrimer3files 1 config.txt set_config_value primer3executable $PRIMER3_CORE_EXE config.txt set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt @@ -327,18 +344,53 @@ fi tail -$MAX_LINES pal_finder.log # -# Check that log ends with "Done!!" message -if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then - echo ERROR pal_finder failed to complete successfully >&2 +# Check for success/failure +if [ ! -z "$(tail -n 1 pal_finder.log | grep 'No microsatellites found in any reads. Ending script.')" ] ; then + # No microsatellites found + fatal ERROR pal_finder failed to locate any microsatellites exit 1 +elif [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then + # Log doesn't end with "Done!!" (indicates failure) + fatal ERROR pal_finder failed to complete successfully +fi +echo "### pal_finder finished ###" +# +# Check for errors in pal_finder output +echo "### Checking for errors ###" +if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then + echo WARNING primer3 terminated prematurely due to bad product size ranges + $(find_bad_primer_ranges Output/pr3in.txt bad_primer_ranges.txt) + N_BAD_PRIMERS=$(cat bad_primer_ranges.txt | wc -l) + if [ -z "$BAD_PRIMER_RANGES" ] ; then + # No output file so report to stderr + cat <<EOF + +Pal_finder generated bad ranges for the following read IDs: + +EOF + cat bad_primer_ranges.txt + cat <<EOF + +This error can occur when input data contains short R1 reads and has +has not been properly trimmed and filtered. + +EOF + else + # Move the bad ranges to the specified file + echo "### Writing read IDs with bad primer ranges ###" + /bin/mv bad_primer_ranges.txt "$BAD_PRIMER_RANGES" + fi +else + N_BAD_PRIMERS=0 fi # # Sort microsat_summary output echo "### Sorting microsat summary output ###" head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted +echo "readsWithBadRanges:"$'\t'"$((N_BAD_PRIMERS * 2))" >>microsat_summary.sorted grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted -tail -n +11 Output/microsat_summary.txt >>microsat_summary.sorted +tail -n +11 Output/microsat_summary.txt | sort -r -n -k 5 >>microsat_summary.sorted mv microsat_summary.sorted Output/microsat_summary.txt # # Sort PAL_summary output @@ -362,11 +414,9 @@ fi tail -$MAX_LINES pal_filter.log if [ $? -ne 0 ] ; then - echo ERROR $PALFINDER_FILTER exited with non-zero status >&2 - exit 1 + fatal $PALFINDER_FILTER exited with non-zero status elif [ ! -f PAL_summary.filtered ] ; then - echo ERROR no output from $PALFINDER_FILTER >&2 - exit 1 + fatal no output from $PALFINDER_FILTER fi fi # @@ -386,8 +436,7 @@ if [ -f "$assembly" ] ; then /bin/mv $assembly "$OUTPUT_ASSEMBLY" else - echo ERROR no assembly output found >&2 - exit 1 + fatal no assembly output found fi fi if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then
--- a/pal_finder_wrapper.xml Mon Mar 19 06:33:32 2018 -0400 +++ b/pal_finder_wrapper.xml Wed May 16 07:39:16 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.6"> +<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.7"> <description>Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them</description> <macros> <import>pal_finder_macros.xml</import> @@ -9,7 +9,7 @@ <requirement type="package" version="1.65">biopython</requirement> <requirement type="package" version="2.8.1">pandaseq</requirement> </requirements> - <command><![CDATA[ + <command detect_errors="exit_code"><![CDATA[ @CONDA_PAL_FINDER_SCRIPT_DIR@ && @CONDA_PAL_FINDER_DATA_DIR@ && bash $__tool_directory__/pal_finder_wrapper.sh @@ -26,6 +26,9 @@ --454 "$platform.input_fasta" #end if $output_microsat_summary $output_pal_summary + #if $report_bad_primer_ranges + --bad_primer_ranges "$output_bad_primer_read_ids" + #end if #if $keep_config_file --output_config_file "$output_config_file" #end if @@ -61,6 +64,10 @@ #if str( $platform.assembly ) == '-assembly' $platform.assembly "$output_assembly" #end if + #set $use_all_reads = $platform.subset_conditional.use_all_reads + #if str( $use_all_reads ) != "yes" + --subset "$platform.subset_conditional.subset" + #end if #end if ]]></command> <inputs> @@ -88,6 +95,13 @@ label="Select FASTQ dataset collection with R1/R2 pair" /> </when> </conditional> + <conditional name="subset_conditional"> + <param name="use_all_reads" type="boolean" label="Use all reads for microsatellite detection?" checked="True" truevalue="yes" falsevalue="no" /> + <when value="no"> + <param name="subset" type="text" value="0.5" label="Number or fraction of reads to use" help="Either an integer number of reads or a decimal fraction (e.g. 0.5 to select 50% of reads)" /> + </when> + <when value="yes" /> + </conditional> <param name="filters" type="select" display="checkboxes" multiple="True" label="Filters to apply to the pal_finder results" help="Apply none, one or more filters to refine results"> @@ -103,7 +117,7 @@ <param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" /> </when> </conditional> - <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" help="Set to zero to ignore repeats of this n-mer unit" /> + <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" min="1" help="Must detect at least one repeat of this n-mer unit" /> <param name="min_3mer_repeats" type="integer" value="0" label="Minimum number of 3-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> <param name="min_4mer_repeats" type="integer" value="0" label="Minimum number of 4-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> <param name="min_5mer_repeats" type="integer" value="0" label="Minimum number of 5-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> @@ -155,7 +169,9 @@ label="Maximum acceptable difference between melting temperatures of left and right primers (PRIMER_PAIR_MAX_DIFF_TM)" help="Temperature should be in degrees Celsius" /> </when> + <when value="default" /> </conditional> + <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer product size ranges" help="Can be used to screen reads in input Fastqs " /> <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False" label="Output the config file to the history" help="Can be used to run pal_finder outside of Galaxy" /> @@ -169,6 +185,9 @@ <data name="output_assembly" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: assembly"> <filter>platform['assembly'] is True</filter> </data> + <data name="output_bad_primer_read_ids" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: read IDs generating bad primer ranges"> + <filter>report_bad_primer_ranges is True</filter> + </data> <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: config file"> <filter>keep_config_file is True</filter> </data> @@ -247,6 +266,77 @@ <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" /> <output name="output_filtered_microsats" compare="re_match" file="illuminaPE_filtered_microsats_rankmotifs.out.re_match" /> </test> + <!-- Test with Illumina input using subset of reads --> + <test> + <param name="platform_type" value="illumina" /> + <param name="filters" value="" /> + <param name="assembly" value="false" /> + <param name="use_all_reads" value="no" /> + <param name="subset" value="0.5" /> + <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> + <expand macro="output_illumina_microsat_subset_summary" /> + <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_subset.out.re_match" /> + </test> + <!-- Test with Illumina input filter that doesn't find any + microsatellites --> + <test expect_failure="true"> + <param name="platform_type" value="illumina" /> + <param name="filters" value="" /> + <param name="assembly" value="false" /> + <param name="min_2mer_repeats" value="8" /> + <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" /> + <assert_stderr> + <has_text text="pal_finder failed to locate any microsatellites" /> + </assert_stderr> + </test> + <!-- Test with Illumina input generating bad ranges --> + <test> + <param name="platform_type" value="illumina" /> + <param name="filters" value="" /> + <param name="assembly" value="false" /> + <param name="min_2mer_repeats" value="8" /> + <param name="input_fastq_r1" value="illuminaPE_r1_bad_ranges.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2_bad_ranges.fq" ftype="fastqsanger" /> + <param name="min_2mer_repeats" value="8" /> + <param name="min_3mer_repeats" value="8" /> + <param name="min_4mer_repeats" value="8" /> + <param name="min_5mer_repeats" value="8" /> + <param name="min_6mer_repeats" value="8" /> + <param name="primer_options" value="custom" /> + <param name="primer_opt_size" value="25" /> + <param name="primer_min_size" value="21" /> + <param name="primer_max_size" value="30" /> + <param name="primer_min_gc" value="40.0" /> + <param name="primer_max_gc" value="60.0" /> + <param name="primer_gc_clamp" value="3" /> + <param name="primer_max_end_gc" value="5" /> + <param name="primer_min_tm" value="60.0" /> + <param name="primer_max_tm" value="80.0" /> + <param name="primer_opt_tm" value="68.0" /> + <param name="primer_pair_max_diff_tm" value="3.0" /> + <param name="report_bad_primer_ranges" value="true" /> + <expand macro="output_illumina_microsat_summary_bad_ranges" /> + <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_bad_ranges.out.re_match" /> + <output name="output_bad_primer_read_ids" file="illuminaPE_bad_primer_read_ids.out" /> + </test> + <!-- Test with bad n-mers specified --> + <test expect_failure="true"> + <param name="platform_type" value="illumina" /> + <param name="filters" value="" /> + <param name="assembly" value="false" /> + <param name="min_2mer_repeats" value="8" /> + <param name="min_3mer_repeats" value="8" /> + <param name="min_4mer_repeats" value="0" /> + <param name="min_5mer_repeats" value="8" /> + <param name="min_6mer_repeats" value="8" /> + <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" /> + <assert_stderr> + <has_text text="Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" /> + </assert_stderr> + </test> <!-- Test with 454 input --> <test> <param name="platform_type" value="454" /> @@ -282,6 +372,52 @@ .. class:: infomark +**Known issues** + +.. class:: warning + +**Low number of reads used for microsatellite detection/bad primer product size ranges** + +For some datasets pal_finder may generate 'bad' product size ranges (where the +lower limit exceeds the upper limit) for one or more reads, for input into +primer3_core. In these cases primer3_core will terminate prematurely, which can +result in a substantially lower number of reads being used for microsatellite +detection and potentially sub-optimal primer design. + +The number of reads generating the bad size ranges are reported in the +*Summary of microsat types* output dataset as 'readsWithBadRanges'. Ideally +the reported value should be zero. + +The conditions which cause this issue within pal_finder are still unclear, +however we believe it to be associated with short or low quality reads. If this +problem affects your data then: + +* Ensure that the input data are sufficiently trimmed and filtered (using + e.g. the Trimmomatic tool) before rerunning pal_finder. + +* A list of read IDs for which pal_finder generates bad product size ranges can + be output by turning on *Output IDs for input reads which generate bad primer + ranges*. This outputs an additional dataset with a list of read IDs which can + be used to remove read pairs from the input Fastq files (using e.g. the *Filter + sequences by ID* tool) before rerunning pal_finder. + +.. class:: warning + +**Pal_finder takes a long time to run for large input datasets** + +pal_finder was originally developed using MiSeq data, and is not optimised for +working with the larger Fastqs that are output from other platforms such as +HiSeq and NextSeq. As a consequence pal_finder may take a very long time to +complete when operating on larger datasets. + +If this is a problem then the tool can be run using a subset of the input reads +by unchecking the *Use all reads...* option and entering either an integer number +of reads to use, or a decimal fraction (e.g. 0.5 will select 50% of the reads). + +------------- + +.. class:: infomark + **Credits** This Galaxy tool has been developed by Peter Briggs within the Bioinformatics Core
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pal_finder_wrapper_utils.sh Wed May 16 07:39:16 2018 -0400 @@ -0,0 +1,75 @@ +#!/bin/bash +# +# Helper functions for the pal_finder_wrapper.sh script +# +# Utility function for terminating on fatal error +function fatal() { + echo "FATAL $@" >&2 + exit 1 +} +# +# Check that specified program is available +function have_program() { + local program=$1 + local got_program=$(which $program 2>&1 | grep "no $(basename $program) in") + if [ -z "$got_program" ] ; then + echo yes + else + echo no + fi +} +# +# Set the value for a parameter in the pal_finder config file +function set_config_value() { + local key=$1 + local value=$2 + local config_txt=$3 + if [ -z "$value" ] ; then + echo "No value for $key, left as default" + else + echo Setting "$key" to "$value" + sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt + fi +} +# +# Identify 'bad' PRIMER_PRODUCT_SIZE_RANGE from pr3in.txt file +function find_bad_primer_ranges() { + # Parses a pr3in.txt file from pal_finder and reports + # sequence ids where the PRIMER_PRODUCT_SIZE_RANGE has + # upper limit which is smaller than lower limit + local pr3in=$1 + local outfile=$2 + local pattern="^(SEQUENCE_ID|PRIMER_PRODUCT_SIZE_RANGE)" + for line in $(grep -E "$pattern" $pr3in | sed 's/ /^/' | sed 'N;s/\n/*/') + do + # Loop over pairs of SEQUENCE_ID and PRIMER_PRODUCT_SIZE_RANGE + # keywords in the primer3 input + if [ ! -z "$(echo $line | grep ^SEQUENCE_ID)" ] ; then + # Lines look like: + # SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535*PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194 + local size_range=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2 | tr '^' ' ') + local seq_id=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2) + elif [ ! -z "$(echo $line | grep ^PRIMER_PRODUCT_SIZE_RANGE)" ] ; then + # Lines look like: + # PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194*SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535 + local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2 | tr '^' ' ') + local seq_id=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2) + fi + seq_id=$(echo $seq_id | cut -d')' -f3) + # Check the upper and lower limits in each range + # to see if it's okay + local bad_range= + for range in $(echo $size_range) ; do + local lower=$(echo $range | cut -d'-' -f1) + local upper=$(echo $range | cut -d'-' -f2) + if [ "$lower" -gt "$upper" ] ; then + bad_range=yes + break + fi + done + # Report if the range is wrong + if [ ! -z "$bad_range" ] ; then + echo "${seq_id}"$'\t'"(${size_range})" >>$outfile + fi + done +}
--- a/test-data/454_microsats.out.re_match~ Mon Mar 19 06:33:32 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -SequenceID\ Repeat\ Motif\ Size\ Repeat\ Motif\ Number\ Tandem\ Repeats\ Primer\ Designed\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Total\ Repeats\ In\ Amplicon\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs -FW1N29Q04EP35X\ 2\ AC\ 18\ 0\ \ \ \ \ \ \ \ \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_bad_primer_read_ids.out Wed May 16 07:39:16 2018 -0400 @@ -0,0 +1,1 @@ +M00879:99:000000000-AH9KG:1:2107:14372:5471 (74-71 77-74)
--- a/test-data/illuminaPE_filtered_microsats.out.re_match~ Mon Mar 19 06:33:32 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs\ R1\_Sequence\_ID\ R1\_Sequence\ R2\_Sequence\_ID\ R2\_Sequence -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ AC\(12\)\ \ 12\ \ \ 1\ test\_3\ AAGTACAGTGGGGAGGCTGG\ test\_6\ TTTTCTACACAGCTCAAGTAGCCC\ AC\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 1\:N\:0\:TCCTGA\ TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 2\:N\:0\:TCCTGA\ TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ AC\(12\)\ \ 12\ \ \ 1\ test\_7\ GCAGTAAACAAAGGCAAAGGG\ test\_4\ CCTGGGCAGAGGTGTTCC\ AC\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 1\:N\:0\:TCCTGA\ TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 2\:N\:0\:TCCTGA\ TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ AG\(14\)\ \ 14\ \ \ 1\ test\_5\ TTCTCCCACTATATTTTGCATTGG\ test\_1\ TCCAGACTGAAGCTACCCTGG\ AG\(14\)\ \ 14\ 1\ \ \ 1\ 1\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 1\:N\:0\:TCCTGA\ TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 2\:N\:0\:TCCTGA\ TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA
--- a/test-data/illuminaPE_filtered_microsats_occurrences.out.re_match~ Mon Mar 19 06:33:32 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs\ R1\_Sequence\_ID\ R1\_Sequence\ R2\_Sequence\_ID\ R2\_Sequence -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ AG\(14\)\ \ 14\ \ \ 1\ test\_7\ TTCTCCCACTATATTTTGCATTGG\ test\_6\ TCCAGACTGAAGCTACCCTGG\ AG\(14\)\ \ 14\ 1\ \ \ 1\ 1\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 1\:N\:0\:TCCTGA\ TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 2\:N\:0\:TCCTGA\ TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ AC\(12\)\ \ 12\ \ \ 1\ test\_5\ GCAGTAAACAAAGGCAAAGGG\ test\_2\ CCTGGGCAGAGGTGTTCC\ AC\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 1\:N\:0\:TCCTGA\ TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 2\:N\:0\:TCCTGA\ TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ AC\(12\)\ \ 12\ \ \ 1\ test\_8\ AAGTACAGTGGGGAGGCTGG\ test\_4\ TTTTCTACACAGCTCAAGTAGCCC\ AC\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 1\:N\:0\:TCCTGA\ TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 2\:N\:0\:TCCTGA\ TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC
--- a/test-data/illuminaPE_filtered_microsats_primers.out.re_match~ Mon Mar 19 06:33:32 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs\ R1\_Sequence\_ID\ R1\_Sequence\ R2\_Sequence\_ID\ R2\_Sequence -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ AG\(14\)\ \ 14\ \ \ 1\ test\_7\ TTCTCCCACTATATTTTGCATTGG\ test\_2\ TCCAGACTGAAGCTACCCTGG\ AG\(14\)\ \ 14\ 1\ \ \ 1\ 1\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 1\:N\:0\:TCCTGA\ TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 2\:N\:0\:TCCTGA\ TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ TG\(12\)\ \ 12\ \ \ 1\ test\_5\ GCAGTAAACAAAGGCAAAGGG\ test\_3\ CCTGGGCAGAGGTGTTCC\ TG\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 1\:N\:0\:TCCTGA\ TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 2\:N\:0\:TCCTGA\ TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ TG\(12\)\ \ 12\ \ \ 1\ test\_4\ AAGTACAGTGGGGAGGCTGG\ test\_6\ TTTTCTACACAGCTCAAGTAGCCC\ TG\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 1\:N\:0\:TCCTGA\ TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 2\:N\:0\:TCCTGA\ TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\ TG\(12\)\ TG\(12\)\ \ 24\ \ \ 1\ test\_8\ TCTTTATCTAAACACATCCTGAAATACC\ test\_1\ AAACGCAATTATTTTGAGATGTCC\ TG\(12\)\ TG\(12\)\ \ 24\ 1\ \ \ 1\ 2\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\ 1\:N\:0\:TCCTGA\ TCTTTATCTAAACACATCCTGAAATACCATCTGTTACACACACACACAGCAGTGGAAGTATAAAAAAAAATCTGGACATCTCAAAATAATTGCGTTTCTGAAGTGTTACATTTTTC\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\ 2\:N\:0\:TCCTGA\ TATCATTGAAATTTTTATAAAAACTGTGAAGAGAAAAATGTAACACTTCAGAAACGCAATTATTTTGAGATGTCCAGATTTTTTTTTATACTTCCACTGCTGTGTGTGTGTGTAAC
--- a/test-data/illuminaPE_filtered_microsats_rankmotifs.out.re_match~ Mon Mar 19 06:33:32 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs\ R1\_Sequence\_ID\ R1\_Sequence\ R2\_Sequence\_ID\ R2\_Sequence -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ AC\(12\)\ \ 12\ \ \ 1\ test\_3\ AAGTACAGTGGGGAGGCTGG\ test\_4\ TTTTCTACACAGCTCAAGTAGCCC\ AC\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 1\:N\:0\:TCCTGA\ TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ 2\:N\:0\:TCCTGA\ TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8044\:1926\ AT\(12\)\ \ 12\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ \ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8044\:1926\ 1\:N\:0\:TCCTGA\ TAGATTTTTTTTTTTATATATATATAAATATAGATGTACATATATTTATATAAATATAAAAGCACAGCATCCTCCTGTCTCTCCTCCTGATTTATTATGGTTAAAGCTTGTGACAG\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8044\:1926\ 2\:N\:0\:TCCTGA\ TCAGGCAAGGTCACTGCCACCACTGGGGAGTGCCTGTTTCTGAAGGGCCCAGCCAACTCTGTCACAAGCTTTAACCATAATAAATCAGGAGGAGAGACAGGAGGATGCTGTGCTTT -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:6204\:1090\ TC\(12\)\ \ 12\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ \ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:6204\:1090\ 1\:N\:0\:TCCTGA\ TGCTTTGGTTCTAAGAGAAAAACAATTATTATAAATGTTTATAATTGATGATAAGCATTTTTGTACAAAGCCAAGACCATTCTGAATGAAGCACCCAAAAAGCCCGGAGGCAACAA\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:6204\:1090\ 2\:N\:0\:TCCTGA\ TGCTTTGGTTCTAAGAGAAAAACAAGTGATGCACAAGCAATTCCTCGCCACCACCCAACTGATGCCCAGCCACCCCCCCAAGCAGTGAAAGAGAGAGAGAGATGAACCCCCTTCAA -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\ AT\(12\)\ \ 12\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ \ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\ 1\:N\:0\:TCCTGA\ TCCCCACCCTGTCATGGTTCTATGTTTTTGTTTTTGTTTTTGTTTTTATGGTTTCCGTATTCCACATTAAAACCTTATGTAACGTACGGGCCAATAAATAGTTACTCGCCATATCC\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\ 2\:N\:0\:TCCTGA\ TCCCCACCCTGTCATGGTTCTATGTATATATATATAGCCATGTGTGTGGTACCAGGGATAGGTACCTGGGATTGGGGCAGTGACACTTTAGTGCCCCGTACACTACATGATGTTTT -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ AC\(12\)\ \ 12\ \ \ 1\ test\_6\ GCAGTAAACAAAGGCAAAGGG\ test\_1\ CCTGGGCAGAGGTGTTCC\ AC\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 1\:N\:0\:TCCTGA\ TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ 2\:N\:0\:TCCTGA\ TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\ AC\(36\)\ \ 36\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ \ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\ 1\:N\:0\:TCCTGA\ TCGTAGCATGTGTATGCTTTGGGGTTTCATGCTGTTGATTCATAACTGCTGCTGGCTGTAGACTGAACCTTCTGGGTAGGAGGAATATGCTTAGACAAGCACACCAGTCAGCCCGA\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\ 2\:N\:0\:TCCTGA\ TCTGTGTGTGAGCACACACACACACACACACACACACACACACACACATGCAGGTACTTGCTCTGCCACCCCTGGCGGGCTGCGTGGTGTGCCTGACGACGTATTCTAATCCTACA -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ TC\(14\)\ \ 14\ \ \ 1\ test\_7\ TTCTCCCACTATATTTTGCATTGG\ test\_2\ TCCAGACTGAAGCTACCCTGG\ TC\(14\)\ \ 14\ 1\ \ \ 1\ 1\ 1\ 1\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 1\:N\:0\:TCCTGA\ TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA\ \>ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ 2\:N\:0\:TCCTGA\ TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA
--- a/test-data/illuminaPE_microsat_types.out.re_match~ Mon Mar 19 06:33:32 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ -allExtended\:\ 0 -allSpan\:\ 0 -broken\:\ 2 -compound\:\ 2 -readsWithMicrosat\:\ 13 -totalBases\:\ 2320 -totalReads\:\ 20\ \(2\ x\ 10\) - - -Microsat\ Type\ monomer\ length\ total\ loci\ loci\ w\/\ primers\ reads\ with\ loci\ total\ bases\ extended\ extended\ w\/\ primers\ spanning\ spanning\ w\/\ primers -AC\ 2\ 7\ 4\ 7\ 116\ 0\ 0\ 0\ 0 -AG\ 2\ 0\ 0\ 0\ 0\ 0\ 0\ 0\ 0 -AT\ 2\ 8\ 0\ 6\ 106\ 0\ 0\ 0\ 0 -CG\ 2\ 0\ 0\ 0\ 0\ 0\ 0\ 0\ 0 -TC\ 2\ 2\ 1\ 2\ 26\ \ \ \
--- a/test-data/illuminaPE_microsats.out.re_match~ Mon Mar 19 06:33:32 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:10979\:1695\ TC\(14\)\ \ 14\ \ \ 1\ test\_7\ TTCTCCCACTATATTTTGCATTGG\ test\_2\ TCCAGACTGAAGCTACCCTGG\ TC\(14\)\ \ 14\ 1\ \ \ 1\ 1\ 1\ 1 -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\ AC\(36\)\ \ 36\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:19063\:1614\ AT\(14\)\ AT\(14\)\ AT\(14\)\ AT\(14\)\ \ 56\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:1978\:1220\ AC\(12\)\ \ 12\ \ \ 1\ test\_6\ GCAGTAAACAAAGGCAAAGGG\ test\_1\ CCTGGGCAGAGGTGTTCC\ AC\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1 -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5626\:1554\ AT\(14\)\ AC\(16\)\ AC\(16\)\ AT\(12\)\ \ 58\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\ AT\(12\)\ \ 12\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:6204\:1090\ TC\(12\)\ \ 12\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8044\:1926\ AT\(12\)\ \ 12\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ AC\(12\)\ \ 12\ \ \ 1\ test\_3\ AAGTACAGTGGGGAGGCTGG\ test\_4\ TTTTCTACACAGCTCAAGTAGCCC\ AC\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1 -ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\ AC\(12\)\ AC\(12\)\ \ 24\ \ \ 1\ test\_8\ TCTTTATCTAAACACATCCTGAAATACC\ test\_5\ AAACGCAATTATTTTGAGATGTCC\ AC\(12\)\ AC\(12\)\ \ 24\ 1\ \ \ 1\ 2\ 1\ 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_microsats_bad_ranges.out.re_match Wed May 16 07:39:16 2018 -0400 @@ -0,0 +1,7 @@ +readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs +M00879\:99\:000000000\-AH9KG\:1\:2107\:10006\:2535\ AT\(16\)\ AT\(16\)\ \ 32\ AT\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +M00879\:99\:000000000\-AH9KG\:1\:2107\:10032\:7900\ .*\ \ 164\ \ \ 1\ test\_.*\ (CGAAAGATGCTATAGAAGCGATGGGG|TATCTATCTATCAATCCGCTCCCC)\ test\_.*\ (GGACATCGAGATAGAAAGGGGACCG|TGATTGGACATCGAGATAGAAAGGG)\ .*\ \ 80\ 1\ \ \ .*\ .*\ 1\ 1 +M00879\:99\:000000000\-AH9KG\:1\:2107\:10061\:6317\ .*\ \ 76\ \ \ 1\ test\_.*\ GAGAGAGTACATAGATATCTCACGGGGCG\ test\_.*\ GCAACGGCACAGATCTCTTCTACGG\ .*\ \ 22\ 1\ \ \ 1\ 1\ 1\ 1 +M00879\:99\:000000000\-AH9KG\:1\:2107\:10072\:8112\ .*\ \ 44\ \ \ 1\ test\_.*\ AGTTTGTTACAGGGCATGACAACGG\ test\_.*\ TCCTGTTATCTTCTTGTTGCTTGGC\ .*\ \ 22\ 1\ \ \ 1\ 1\ 1\ 1 +M00879\:99\:000000000\-AH9KG\:1\:2107\:10084\:6474\ .*\ \ 100\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +M00879\:99\:000000000\-AH9KG\:1\:2107\:14372\:5471\ .*\ \ 68\ .*\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_microsats_subset.out.re_match Wed May 16 07:39:16 2018 -0400 @@ -0,0 +1,6 @@ +readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\ (AC|TG)\(36\)\ \ 36\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5626\:1554\ AT\(14\)\ (AC|TG)\(16\)\ (AC|TG)\(16\)\ AT\(12\)\ \ 58\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\ AT\(12\)\ \ 12\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ (AC|TG)\(12\)\ \ 12\ \ \ 1\ test\_.*\ AAGTACAGTGGGGAGGCTGG\ test\_.*\ TTTTCTACACAGCTCAAGTAGCCC\ (AC|TG)\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1 +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\ (AC|TG)\(12\)\ (AC|TG)\(12\)\ \ 24\ \ \ 1\ test\_.*\ TCTTTATCTAAACACATCCTGAAATACC\ test\_.*\ AAACGCAATTATTTTGAGATGTCC\ (AC|TG)\(12\)\ (AC|TG)\(12\)\ \ 24\ 1\ \ \ 1\ 2\ 1\ 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r1_bad_ranges.fq Wed May 16 07:39:16 2018 -0400 @@ -0,0 +1,24 @@ +@M00879:99:000000000-AH9KG:1:2107:10006:2535 1:N:0:TAAGGCGA+TAGATCGC +ATATATATATATATATGTAGTATAATCTCACTGGAAAGGAGACGAAAACGAGCAGCTCCGAGCTTTCGACTTTATTTCAAGTCATCTTCAGGGCAACTGACAAATTTTTGTGTAGCAATAGTATATAGACACCAGACGAGATTCCTGACCTCACATCTGGGAGG ++ +CCCCCFGGGGGGGGGGGGGGGFCGGGGGGFGCG@FFGCDGGGGGGGGEGGGDGGFDCFGGGCDGGGGGGGGGGGGGGGGDGFGFGFGFFGGG@FEGGDEFGGGGGEFGGGGGGGFGFGGGGGGAFE?9FGGGGGGGG@F7F7AFDGGFFGFFGGG9EE9D8?>8 +@M00879:99:000000000-AH9KG:1:2107:10032:7900 1:N:0:TAAGGCGA+TAGATCGC +ATCTATGTATTTATCTATCTCCTCTCTCTCTCTCTCTCTCTCTCTCTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCCGCTCCCCTTTCTATCTCGATGTCCAATCATTATACACACACCTACACGAAAGATGCTATAGAAGCGATGGGGGACTATAGGTGTATAGCAACTCTATACATCAACCAGTCTCTGCGCTCGTCCCCCTGTCG ++ +CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEGGGGGGGGGGGGGGGFGGGCFGG@FGGEFGGDGGGGGGGGGGGGGGA9EFFC,@@F@FG8FGG9?EGG,,4:?@FFAGG@EFF<B,CFBEGGD:EFGD7F+>+,4,@,9E9,@ECEEEFC+8+4>B88;=E3,,2,@A68,7=@DDGED=A8=A8FGE+@DDD,++0@+=0+<3<>095?** +@M00879:99:000000000-AH9KG:1:2107:10061:6317 1:N:0:TAAGGCGA+TAGATCGC +TATATATATATATGAGAAAACTGGTGTCGGTGAGGACTCATCAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGTACATAGATATCTCACGGGGCGAGCTGCCGCTGCTGCTGCTGCCGCGGAGGGTGCAGAATAAGACGAGGAGGTGGAGAGAGGTGTGGAAAAAGTGCCGTAGAAGAGATCTGTGCCGTCGCACAGCTCTCGTGGTGCACCTATGGGAGAGGCGG ++ +CCCC@FFFFFGGFGGGGGGGG8EFG?FFGGEGGGFGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGFG@CFFGGGGGGGGECGGGGGGGF,9,,C,E,?,,9,5<,,:=+78B+++@,,,+++6+,8,,A55A,,+++@+87:*3*@3D79;,,,451***>B4<**6,=***4=;8*+++*/*21+0**3//CEG86+A9:1<++2**)*)2)**+629*9*715:***00*:*755*0*.00 +@M00879:99:000000000-AH9KG:1:2107:10072:8112 1:N:0:TAAGGCGA+TAGATCGC +GTGTGTGTGTGTGTGTGTGTGTTGGTTGTGAAAGTTATCTCAGCTTGTTATATGGAATGACGACTGTCAAGCTTACCAGTTTCTTACATGGGATGACAAAGGCTAGGCTTACCAACTTGTTACATGTAATGACATCGGTAAAGCTTATCAGTTTGTTACAGGGCATGACAACGGTCAAGCTTACCAGCATGATACATGTAGTGACGAAGGCCAAGCCACAAGAAGATAACAGGCAC ++ +CCCCCFGGGGFFFGGFFGFGGGGGGGGGGDFGGGCGFGGGG,FFGGFFFCDCDEFFCFGGFGCEGDCG<DGGFA@FCFCFDGFGCEFFGG,CEECFGFGFG,BAFFGGGGGGGAFEC,EFDGGGFGGGGGFGFGGGGEEFGBAFDFCGCAFGGGGEEFFGCFG+9EFGGGD88BECEGGG?EGGGG8,@EF,,@DDFA@3@:DA9CEEDE6AFGGF7D+5CEF591C:>7>CD*;F +@M00879:99:000000000-AH9KG:1:2107:14372:5471 1:N:0:TAAGGCGA+TAGATCGC +GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAG ++ +CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@M00879:99:000000000-AH9KG:1:2107:10084:6474 1:N:0:TAAGGCGA+TAGATCGC +CAATTACCTAAGTATCTGAATAGTCATTTCCCTATTAGGGGTGATCATAGTCGAACGGTAGCACTCCCGCTGGTCACGCTCTGGACCAGAGTTCAAATCTTGGCTGGTCGAGGTTTACGTGTTTTATATCTATCTATCTATCTATCTATCTATCTATCTATATATATATATATATACATATATAT ++ +CCCCCGGGGGGGFGGGGGGGGGGFGGGGGGGGGCFGGGGGGFGGGGGGGGGGGGGGG@FGGGGGFGGGGGGDGGGGGGGGGGGGGGGGGGFECCEEEFGGGGGGGG>FGG>C+BE,BF:FGFFGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGFDEEGGGGFGGGCFGGGGGGGGFDFDCFCD;E
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r1_no_microsats.fq Wed May 16 07:39:16 2018 -0400 @@ -0,0 +1,32 @@ +@ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 1:N:0:TCCTGA +TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA ++ +IIIIIHIIIGHHIIDGHIIIIIIHIIIIIIIIIIIIHGIIIIHGIIIIHHHIHHGHIIHHEHHIGHGDHDHHHGGHEGAHGDGEHEGEGBGCEDBDDGEEFDEECGDEGDEDEDE8 +@ILLUMINA-545855:49:FC61RLR:2:1:19063:1614 1:N:0:TCCTGA +TATATATATATATACACATATATATATATATTTTTTACATTATTTCACTTCGCCCAAACTAGAGAGTCTAACAAAGTACAACCCAGCATATTAAAGTTCATCTCAGTTTTGTTCTG ++ +IIIIIIIGIEIIIIGIIIHHIHIGIGIIIIIHIIIIHGBEIIIIHIFHHIHFIIIIFHFIHIIHFF>EEEBDGDD;BD8DDBBDDBGHGHHHHEFE=DBCDEEEBEBEGHGAFH@E +@ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 1:N:0:TCCTGA +TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA ++ +DDDDDBDBDDB7?BBB5BB>1?<?88>DDD?4(6367;>?2<164=;>8<0DDD>>A6A?>;;6>+6>><>4-8484888&@D@@DD<D;D>>?<9DD;6DDC<DD;<88@##### +@ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 1:N:0:TCCTGA +TCCCCACCCTGTCATGGTTCTATGTTTTTGTTTTTGTTTTTGTTTTTATGGTTTCCGTATTCCACATTAAAACCTTATGTAACGTACGGGCCAATAAATAGTTACTCGCCATATCC ++ +#################################################################################################################### +@ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 1:N:0:TCCTGA +TGCTTTGGTTCTAAGAGAAAAACAATTATTATAAATGTTTATAATTGATGATAAGCATTTTTGTACAAAGCCAAGACCATTCTGAATGAAGCACCCAAAAAGCCCGGAGGCAACAA ++ +#################################################################################################################### +@ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 1:N:0:TCCTGA +TAGATTTTTTTTTTTATATATATATAAATATAGATGTACATATATTTATATAAATATAAAAGCACAGCATCCTCCTGTCTCTCCTCCTGATTTATTATGGTTAAAGCTTGTGACAG ++ +HHHHHHHHHHHHHHHFHFGFHEFFDF92=@=>;;;=HHHGHHHHHHHHHHHHHHHHGHHHFHHHHFHFDHHHHHHHHHHHHHHHHGHHGGHHHHHHHHHHEHHHHGEHHHEFE>EB +@ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 1:N:0:TCCTGA +TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT ++ +HHHHHHHHHHHHHFHHHHHGHHHHHHGHHHHGHHHHHH>HHGGGGFFHHHHGHGHHFHHHHGGBGDCAC>CBDBDDCFGED@BDDB@BBBBECCD@<CC>C=?9==@CECB=BBB8 +@ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 1:N:0:TCCTGA +TCTTTATCTAAACACATCCTGAAATACCATCTGTTACACACACACACAGCAGTGGAAGTATAAAAAAAAATCTGGACATCTCAAAATAATTGCGTTTCTGAAGTGTTACATTTTTC ++ +IIIIIHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIIIGIIIIIIIIIIIIIIHHGIIIIHIHHHHIHHHHHHHGHHHHHGFHEHEHHHHHHHII>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r2_bad_ranges.fq Wed May 16 07:39:16 2018 -0400 @@ -0,0 +1,24 @@ +@M00879:99:000000000-AH9KG:1:2107:10006:2535 2:N:0:TAAGGCGA+TAGATCGC +CCTCCCAGATGTGAGGTCAGGAATCTCGTCTGGTGTCTATATACTATTGCTACACAAAAATTTGTCAGTTGCCCTGAAGATGACTTGAAATAAAGTCGAAAGCTCGGAGCTGCTCGTTTTCGTCTCCTTTCCAGTGAGATTATACTACATATATATATATATAT ++ +CCCCCGGGGGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8FFFGGGGGGFGGGGGGGCEFFGGGGGFGGGGGGGGGGGGGGGFGFCEGGFGG8EFEFDFGEFGFFGGCFGGFAFF,9AFDGGGGGGGGGGGGGGEADDFGFEA +@M00879:99:000000000-AH9KG:1:2107:10032:7900 2:N:0:TAAGGCGA+TAGATCGC +CTACAGGGGGCCGAGCACAGAGACTGGTTGATGTATAGAGTTGCTATACTCCTATAGTCCCCCATCGCTTCTATAGCATCTTTCGTGTAGGTGTGTCTATAATGATTGGACATCGAGATAGAAAGGGGACCGGATTGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAGAGAGAGAGAGAGAGAGAGAGCGAGGAGATAGATAAATACATCGAT ++ +CCCCCGGGGDGGGEGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGGGGGGGGGGGCGFGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGFFGGGFGDGGGFGFGGGGGGGGGGEGFFGFFCGFGGGCFG9FFGGGGFCFGGGGGGGGG8DAFGFFGFGGDGDFGFCFFD72DAFF>?BCFFF5>9A>DFFFFFF*):494?>BFEE4<*9*@:=?*-(- +@M00879:99:000000000-AH9KG:1:2107:10061:6317 2:N:0:TAAGGCGA+TAGATCGC +GCCTCGGGACCTTGGCACGCCGCCTCTCCCATAGATGCACCATGAGATCCGTGCAACGGCACAGATCTCTTCTACGGAACTTTTGCCACACCTCACTTCACCTTCTTGCCTTATTCTGCACCACCTGCAGCAGCAGCAGCAGCAGCAGCTCGCCCCGAGTTACATCTATGTACTCTCTCCCTCTCTCTCTCTCTCTCTCTCTCCCTCTCTCTCTCCCTCCCTCTCCTCTCACCCCACACTCACACCCGC ++ +CCCCCFGGGGGGGFFGGGGGGGGGGGGGGGGGGFGGGFEGGGGEGFGGGGGGGGGGGEGGGGGGGGGCGFFGFGGFG@FGGFEF9FGGGCGGEFEFGGGDGG@FE?EEFGGGGG,EE,EFGGFGGFDG,@FFFFG8D8=E8>EEEGC=D=D6CEGC61C=8:ECFG8AA9<2CCC<C?C+=0==DAF9C7;;@0;0@09*@*9*)3;):/;.7.>*7):(54>3-(0(*(0*-(511(6(/(6)6/(,( +@M00879:99:000000000-AH9KG:1:2107:10072:8112 2:N:0:TAAGGCGA+TAGATCGC +GTTCCTGTTATCTTCTTGTTGCTTGGCCTTCGTCACTACATGTAACATGCTGGTAAGCTTGACCGTTGTCATTCCCTGTAACAAACTGATAAGCTTTACCGATGTCATTACATGTAACCAGTTGGTAAGCCTAGCCTTTGTCATCCCATGTAAGAAACTGGTAAGCTTGACAGTCGTCATTCCATATAACAAGCTGAGATAACTTTCACAACCAACACACACACACACACACACACCTGTCTCCTATAC ++ +CCCCCGGGGGGGGGGGGFFGFGGGG7FFGGGGGGGGGGGGGFGFFFAEGGGCFFG@AGGGC,EFGGGGGGGG@EEGGGGFGGFGGGFGGFGCFGGGGGGGGCEEGGCFEFGDEFGGFG,CFFFEGGGDG9EFFFGGGGFAFGGGGG84E=EFGGG;AGDDFFFGGFGG8=DAFGFG=D88FFG@9D@@FDD+;D56D?FFFFFFD=7*;2:)=855)=DF=>=AAD==@DDA)=@@5)):)3;9A***9 +@M00879:99:000000000-AH9KG:1:2107:14372:5471 2:N:0:TAAGGCGA+TAGATCGC +CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC ++ +ACCCCFGE<FFFGGGGCFFFEF@EGGGGGGGGGGG +@M00879:99:000000000-AH9KG:1:2107:10084:6474 2:N:0:TAAGGCGA+TAGATCGC +ATATATATGTATATATATATATATATAGATAGATAGATAGATAGATAGATAGATAGATATAAAACACGTAAACCTCGACCAGCCAAGATTTGAACTCTGGTCCAGAGCGTGACCAGCGGGAGTGCTACCGTTCGACTATGATCACCCCTAATAGGGAACTGACTATTCAGATACTTAGGTAATTG ++ +CCCCCGGGGGGGGGGGGGGGGGGGCFGFGGGCGGGFGFGFGCFGGFGGGGGGGGGGGGGGGGGGGGFCFFFGGGG<FEFGGFCEFGGGGGGFFAGFGGCCFGGEDGFDGGGGGGGGGG7@FG=AEFGGGGDGC8EGGGGFFEFEGGGGGGGDCF8@FE+==AF9=FFGGFGDGFDA=?DDG+3?9
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r2_no_microsats.fq Wed May 16 07:39:16 2018 -0400 @@ -0,0 +1,32 @@ +@ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 2:N:0:TCCTGA +TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA ++ +IIIIIHIIIIIHIIIIHIHIIIIIIIHIIIHIIIHIHGIIIIIIHEHHHHHHHHIIHIIHFHHEHHIGHHGHGIHGHHFGAGIEHHGEGHBFIFDBHHHGDFHBDBHGGFGD8EB@ +@ILLUMINA-545855:49:FC61RLR:2:1:19063:1614 2:N:0:TCCTGA +TATATATATATATAAACATATATATATATATTTTTTTCTCATTTCAGAACAAAAGTGAGATGAACTTTAATATGGTGGGGTGTATTTTGAGAGACTCTCTAGTTTGGGAGGAGTGA ++ +DDDDDDDDDDDD:D@D+DDDDDDDDDDDD6DDDDD>A@:5>@########################################################################## +@ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 2:N:0:TCCTGA +TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT ++ +B?/?################################################################################################################ +@ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 2:N:0:TCCTGA +TCCCCACCCTGTCATGGTTCTATGTATATATATATAGCCATGTGTGTGGTACCAGGGATAGGTACCTGGGATTGGGGCAGTGACACTTTAGTGCCCCGTACACTACATGATGTTTT ++ +HHHHHHHHHHHHHHHHHEHHGGGGFDHGHGHHHHHHFHHHHHGHEHEHEBEHEFB8EEEFEDAGEDBCEBBB@>BEC>@B@DCBBBECBB<AECBBBC>BA>B<;BA@A@###### +@ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 2:N:0:TCCTGA +TGCTTTGGTTCTAAGAGAAAAACAAGTGATGCACAAGCAATTCCTCGCCACCACCCAACTGATGCCCAGCCACCCCCCCAAGCAGTGAAAGAGAGAGAGAGATGAACCCCCTTCAA ++ +HHHDBHHHEFGHHHHEHFHEHDDDDD@HHEHHEEHEFFEEEGDEGGGGEGEB>EBC>@@@@@@BB@FEBFBBB@A>AAA<;>A>;3>=??>>BB>?>@?1?>:9*@########## +@ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 2:N:0:TCCTGA +TCAGGCAAGGTCACTGCCACCACTGGGGAGTGCCTGTTTCTGAAGGGCCCAGCCAACTCTGTCACAAGCTTTAACCATAATAAATCAGGAGGAGAGACAGGAGGATGCTGTGCTTT ++ +IIIIIIIIIIHIIIIIIIIIIIIIIGIIIIFIIIIIGHHIIIIIGGEIHGHGHHHGGCHHCGGGGGGHGHGEGEGEGGGDGBECCBGGEDGB;8?BBBBDABC@:3==;=:<30:6 +@ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 2:N:0:TCCTGA +TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC ++ +HFHHHHHHHHHHHHDHGHHDHHHHHHHHHHHHHHHHHHGGGHHGHHHHHHHHHHHGHHHHH@HHHFHGHFHHEHHHHHHDHBFEBBHFFDHE>EFHBEFD################ +@ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 2:N:0:TCCTGA +TATCATTGAAATTTTTATAAAAACTGTGAAGAGAAAAATGTAACACTTCAGAAACGCAATTATTTTGAGATGTCCAGATTTTTTTTTATACTTCCACTGCTGTGTGTGTGTGTAAC ++ +IG+GGGIIIIIIIIIIDIIIIIIGHHIIHIIIIIEGHIHIHIIHIIIIIIIIIIIIIIHIIDIIIIIEDIIIDIHGHFIIIIIIIIIHIFFBHGIGBGGHBDBFEGEGCGEIEDEB