Repository 'breseq'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/breseq

Changeset 0:f848a7f97332 (2019-10-31)
Next changeset 1:85c57cc9b558 (2019-12-02)
Commit message:
"planemo upload commit fadaff2d55736bf8c580541d6089c83cd4106a1f"
added:
README.txt
breseq.xml
macros.xml
test-data/gdout.txt
test-data/gdtoolsout.html
test-data/genbank_files.loc
test-data/lambda.gbk
test-data/lambda.short_sequence_repeats.fastq
test-data/log.txt
test-data/out.tar.gz
test-data/report.html
tool-data/genbank_files.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r f848a7f97332 README.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.txt Thu Oct 31 19:40:40 2019 -0400
b
@@ -0,0 +1,1 @@
+In order to properly view breseq's html output in Galaxy, breseq needs to be whitelisted.  Otherwise, the results will not be displayed properly and interactive controls will not function.
\ No newline at end of file
b
diff -r 000000000000 -r f848a7f97332 breseq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/breseq.xml Thu Oct 31 19:40:40 2019 -0400
[
b'@@ -0,0 +1,354 @@\n+<tool id="breseq" name="breseq" version="@PACKAGE_VERSION@+@GALAXY_VERSION@">\n+\n+    <description>find mutations in haploid microbial genomes</description>\n+\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+\n+    <requirements>\n+        <requirement type="package" version="0.34.0">breseq</requirement>\n+    </requirements>\n+\n+    <version_command>breseq --version</version_command>\n+\n+    <command detect_errors="aggressive">\n+        <![CDATA[\n+            #set $ref_opts = ""\n+            #for $i, $r in enumerate( $references ):\n+                #if str($references[$i].reference.source) == "history":\n+                    #for $ref in $references[$i].reference.own_genome:\n+                        #if $ref\n+                            #set $ref_opts = $ref_opts + " --reference \'" + str($ref) + "\'"\n+                        #end if\n+                    #end for\n+                #else:\n+                    #set $ref_opts = $ref_opts + " --reference \'" + $references[$i].reference.fixed_genome.fields.path + "\'"\n+                #end if\n+            #end for\n+\n+            #if str($run.mode) == \'detect\'\n+                breseq\n+\n+                --num-processors \\${GALAXY_SLOTS:-4}\n+\n+                -o results\n+\n+                $ref_opts\n+\n+                #for $s in $run.fastqs:\n+                    ${s}\n+                #end for\n+\n+                #if $run.name\n+                    --name \'$run.name\'\n+                #end if\n+\n+                $run.polymorphism_prediction\n+                $run.predict_junctions\n+\n+                #if \'gd\' in str($run.output_options.formats).split(\',\'):\n+                    && cp results/output/output.gd \'$output\'\n+                #end if\n+\n+                #if \'html\' in str($run.output_options.formats).split(\',\'):\n+                    && cp results/output/index.html \'$report\'\n+                    && mkdir $report.extra_files_path\n+                    && cp -R results/output/* $report.extra_files_path\n+                #end if\n+\n+                #if \'zip\' in str($run.output_options.formats).split(\',\'):\n+                    && tar -zcf \'$zip_output\' results\n+                #end if\n+\n+                #if \'log\' in str($run.output_options.formats).split(\',\'):\n+                    && cp results/output/log.txt \'$log\'\n+                #end if\n+            #else\n+                #set $first = 1\n+                #for $o in str($run.output_options.formats).split(\',\'):\n+\n+                    #if $first == 0\n+                        &&\n+                    #end if\n+                    #set $first = 0\n+\n+                    gdtools ANNOTATE\n+\n+                    --format \'$o\'\n+\n+                    -o\n+                    #if $o == \'html\':\n+                        \'$annreport\'\n+                    #else if $o == \'gd\':\n+                        \'$genomediff\'\n+                    #else if $o == \'tsv\':\n+                        \'$tabdelim\'\n+                    #else if $o == \'phylip\':\n+                        \'$phylipout\'\n+                    #else if $o == \'json\':\n+                        \'$jsonout\'\n+                    #end if\n+\n+                    $ref_opts\n+\n+                    #for $s in $run.gds:\n+                        ${s}\n+                    #end for\n+                #end for\n+            #end if\n+        ]]>\n+    </command>\n+\n+    <inputs>\n+        <repeat name="references" title="Reference Genome" min="1">\n+            <conditional name="reference">\n+                <param name="source" type="select" label="Reference source" >\n+                    <option value="builtin">built-in</option>\n+                    <option value="history" selected="true">history</option>\n+                </param>\n+                <when value="builtin">\n+                    <param name="fixed_genome" argument="--reference" type="select" optional="false" label="Galaxy Built-in Reference(s)">\n+                        <options from_data_table="genbank_files">\n+                            <fi'..b'<output name="output" file="gdout.txt" lines_diff="8" />\n+        </test>\n+        <test>\n+            <repeat name="references">\n+                <conditional name="reference">\n+                    <param name="source" value="history" />\n+                    <param name="own_genome" value="lambda.gbk" />\n+                </conditional>\n+            </repeat>\n+            <conditional name="run">\n+                <param name="mode" value="annotate" />\n+                <param name="gds" value="gdout.txt" />\n+                <section name="output_options">\n+                    <param name="formats" value="html" />\n+                </section>\n+            </conditional>\n+\n+            <output name="annreport" file="gdtoolsout.html" compare="sim_size" delta="100" />\n+        </test>\n+    </tests>\n+\n+    <help>\n+        <![CDATA[\n+**Detect Variants**\n+\n+breseq (pronounced: \\\\br\xc4\x93z-\xcb\x88s\xc4\x93k\\\\ or breeze-seq) is a computational pipeline for\n+the analysis of short-read re-sequencing data (e.g. Illumina, 454, IonTorrent,\n+etc.). It uses reference-based alignment approaches to predict mutations in a\n+sample relative to an already sequenced genome. breseq is intended for microbial\n+genomes (<10 Mb) and re-sequenced samples that are only slightly diverged from\n+the reference sequence (<1 mutation per 1000 bp).\n+\n+breseq\'s primary advantages over other software programs are that it can:\n+\n+- Accurately predict new sequence junctions, such as those associated with mobile element insertions.\n+- Integrate multiple sources of evidence for genetic changes into mutation predictions.\n+- Produce annotated output describing biologically relevant mutational events.\n+\n+breseq was initially developed to analyze data from the Lenski long-term\n+evolution experiment with `E. coli`_. References: barrick2009a_ barrick2009b_.\n+\n+.. _`E. coli`: http://myxo.css.msu.edu/ecoli/\n+.. _barrick2009a: http://barricklab.org/twiki/pub/Lab/ToolsBacterialGenomeResequencing/documentation/references.html#barrick2009a\n+.. _barrick2009b: http://barricklab.org/twiki/pub/Lab/ToolsBacterialGenomeResequencing/documentation/references.html#barrick2009b\n+\n+However, breseq may be generally useful to researchers who are:\n+\n+- Tracking mutations over time in microbial evolution experiments.\n+- Checking strains for unwanted second-site mutations after genetic manipulations.\n+- Identifying mutations that occur during strain improvement or after long-term culture of engineered strains.\n+- Discovering what mutations arise in pathogens during infection or cause antibiotic resistance.\n+\n+\n+*Inputs*\n+\n+Breseq accepts files in FASTQ format. It does not take pair-end information into\n+account.\n+\n+You can either run in clonal (consensus) mode or search for polymorphisms in a\n+population.\n+\n+You can also select an external sequence (eg. a transposon) to detect for\n+insertions or horizontal transfer.\n+\n+\n+*Outputs*\n+\n+Breseq outputs a number of files. These are all condensed in a single zipped\n+file.\n+\n+It contains output files with the final results, accessible through\n+``output/index.html``\n+\n+It also contains data files with accessory data, including:\n+\n+- ``data/reference.fasta`` (file with reference genome: can be used in eg. IGV browser)\n+- ``data/reference.gff`` (file with genomic annotations: can be used in eg. IGV browser)\n+- ``data/areference.bam`` (file with read alignments: can be used in eg. IGV browser)\n+- ``data/unmatched.*`` (files with read that failed to align: can be used to build an assembly or to eg. blast against NCBI)\n+\n+\n+----\n+\n+**Annotate Variants**\n+\n+Annotate a GenomeDiff file (generated by breseq) with information about\n+mutations (what genes they affect, amino acid substitutions, etc.) If multiple\n+input files are provided, then also COMPARE the frequencies for identical\n+mutations across samples.\n+        ]]>\n+    </help>\n+\n+    <citations>\n+        <citation type="doi">10.1007/978-1-4939-0554-6_12</citation>\n+    </citations>\n+\n+</tool>\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r f848a7f97332 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Oct 31 19:40:40 2019 -0400
b
@@ -0,0 +1,14 @@
+<macros>
+    <token name="@PACKAGE_VERSION@" >0.34.0</token>
+    <token name="@GALAXY_VERSION@" >1</token>
+    <xml name="annotate_format_opts">
+        <section name="output_options" title="Output Options" expanded="false">
+            <param argument="--format" name="formats" type="select" multiple="true" optional="false" display="checkboxes" label="Output Formats" help="Note, this tool will run as many times (serially) as the number of output formats selected.">
+                <yield />
+                <option value="tsv">Annotated Variants (Tabular)</option>
+                <option value="json">Annotated Variants (Json)</option>
+                <option value="html">Report (Webpage)</option>
+            </param>
+        </section>
+    </xml>
+</macros>
b
diff -r 000000000000 -r f848a7f97332 test-data/gdout.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gdout.txt Thu Oct 31 19:40:40 2019 -0400
b
@@ -0,0 +1,31 @@
+#=GENOME_DIFF 1.0
+#=TITLE smallest
+#=CREATED 16:09:36 17 Oct 2019
+#=PROGRAM breseq 0.34.0 
+#=COMMAND breseq --num-processors 1 -o results --reference /private/var/folders/8d/z84mms353qb2n0r3n2jhww0m0000gq/T/tmpYSLdP2/files/000/dataset_3.dat /private/var/folders/8d/z84mms353qb2n0r3n2jhww0m0000gq/T/tmpYSLdP2/files/000/dataset_1.dat --name smallest
+#=REFSEQ /private/var/folders/8d/z84mms353qb2n0r3n2jhww0m0000gq/T/tmpYSLdP2/files/000/dataset_3.dat
+#=READSEQ /private/var/folders/8d/z84mms353qb2n0r3n2jhww0m0000gq/T/tmpYSLdP2/files/000/dataset_1.dat
+#=CONVERTED-BASES 250000
+#=CONVERTED-READS 5000
+#=INPUT-BASES 250000
+#=INPUT-READS 5000
+#=MAPPED-BASES 243031
+#=MAPPED-READS 4872
+INS 1 11 NC_001416 41405 GCGGCGGCG repeat_length=3 repeat_new_copies=5 repeat_ref_copies=2 repeat_seq=GCG
+DEL 2 8,12 NC_001416 41601 3 repeat_length=3 repeat_new_copies=1 repeat_ref_copies=2 repeat_seq=AAC
+INS 3 13 NC_001416 41716 TTTT repeat_length=1 repeat_new_copies=9 repeat_ref_copies=5 repeat_seq=T
+DEL 4 9,14 NC_001416 44075 4 repeat_length=4 repeat_new_copies=1 repeat_ref_copies=2 repeat_seq=CAAA
+SNP 5 6 NC_001416 47317 T
+RA 6 . NC_001416 47317 0 N T consensus_score=103.5 frequency=1 major_base=T major_cov=17/17 major_frequency=1.000e+00 minor_base=N minor_cov=0/0 new_cov=17/17 polymorphism_frequency=1.000e+00 polymorphism_score=NA prediction=consensus ref_cov=0/0 total_cov=17/17
+MC 7 . NC_001416 1 38810 0 0 left_inside_cov=0 left_outside_cov=NA right_inside_cov=10 right_outside_cov=13
+MC 8 . NC_001416 41601 41603 0 0 left_inside_cov=0 left_outside_cov=29 right_inside_cov=0 right_outside_cov=28
+MC 9 . NC_001416 44075 44078 0 0 left_inside_cov=0 left_outside_cov=28 right_inside_cov=0 right_outside_cov=27
+MC 10 . NC_001416 48475 48502 0 0 left_inside_cov=10 left_outside_cov=12 right_inside_cov=0 right_outside_cov=NA
+JC 11 . NC_001416 41398 1 NC_001416 41405 -1 -1 alignment_overlap=-1 coverage_minus=6 coverage_plus=18 flanking_left=50 flanking_right=50 frequency=1 junction_possible_overlap_registers=48 key=NC_001416__41398__1__NC_001416__41405__-1__-1__C__50__50__0__0 max_left=44 max_left_minus=39 max_left_plus=44 max_min_left=24 max_min_left_minus=23 max_min_left_plus=24 max_min_right=23 max_min_right_minus=11 max_min_right_plus=23 max_pos_hash_score=96 max_right=47 max_right_minus=34 max_right_plus=47 neg_log10_pos_hash_p_value=0.0 new_junction_coverage=1.10 new_junction_read_count=27 polymorphism_frequency=9.168e-01 pos_hash_score=22 prediction=consensus side_1_annotate_key=gene side_1_continuation=0 side_1_coverage=0.04 side_1_overlap=0 side_1_possible_overlap_registers=49 side_1_read_count=1 side_1_redundant=0 side_2_annotate_key=gene side_2_continuation=0 side_2_coverage=0.16 side_2_overlap=0 side_2_possible_overlap_registers=49 side_2_read_count=4 side_2_redundant=0 total_non_overlap_reads=24 unique_read_sequence=C
+JC 12 . NC_001416 41600 -1 NC_001416 41604 1 0 alignment_overlap=4 coverage_minus=14 coverage_plus=8 flanking_left=50 flanking_right=50 frequency=1 junction_possible_overlap_registers=45 key=NC_001416__41600__-1__NC_001416__41600__1__4____50__50__0__0 max_left=42 max_left_minus=42 max_left_plus=38 max_min_left=22 max_min_left_minus=20 max_min_left_plus=22 max_min_right=20 max_min_right_minus=12 max_min_right_plus=20 max_pos_hash_score=90 max_right=44 max_right_minus=44 max_right_plus=37 neg_log10_pos_hash_p_value=0.0 new_junction_coverage=1.00 new_junction_read_count=23 polymorphism_frequency=1.000e+00 pos_hash_score=19 prediction=consensus side_1_annotate_key=gene side_1_continuation=0 side_1_coverage=0.00 side_1_overlap=4 side_1_possible_overlap_registers=49 side_1_read_count=0 side_1_redundant=0 side_2_annotate_key=gene side_2_continuation=0 side_2_coverage=0.00 side_2_overlap=0 side_2_possible_overlap_registers=45 side_2_read_count=0 side_2_redundant=0 total_non_overlap_reads=22
+JC 13 . NC_001416 41711 -1 NC_001416 41712 1 -4 alignment_overlap=-4 coverage_minus=10 coverage_plus=11 flanking_left=50 flanking_right=50 frequency=1 junction_possible_overlap_registers=45 key=NC_001416__41711__-1__NC_001416__41712__1__-4__TTTT__50__50__0__0 max_left=42 max_left_minus=42 max_left_plus=30 max_min_left=19 max_min_left_minus=19 max_min_left_plus=19 max_min_right=22 max_min_right_minus=19 max_min_right_plus=22 max_pos_hash_score=80 max_right=40 max_right_minus=37 max_right_plus=40 neg_log10_pos_hash_p_value=0.0 new_junction_coverage=1.30 new_junction_read_count=30 polymorphism_frequency=1.000e+00 pos_hash_score=20 prediction=consensus side_1_annotate_key=gene side_1_continuation=5 side_1_coverage=0.00 side_1_overlap=0 side_1_possible_overlap_registers=44 side_1_read_count=0 side_1_redundant=0 side_2_annotate_key=gene side_2_continuation=0 side_2_coverage=0.00 side_2_overlap=0 side_2_possible_overlap_registers=49 side_2_read_count=0 side_2_redundant=0 total_non_overlap_reads=21 unique_read_sequence=TTTT
+JC 14 . NC_001416 44074 -1 NC_001416 44079 1 0 alignment_overlap=5 coverage_minus=19 coverage_plus=5 flanking_left=50 flanking_right=50 frequency=1 junction_possible_overlap_registers=44 key=NC_001416__44074__-1__NC_001416__44074__1__5____50__50__0__0 max_left=44 max_left_minus=43 max_left_plus=44 max_min_left=21 max_min_left_minus=21 max_min_left_plus=18 max_min_right=22 max_min_right_minus=22 max_min_right_plus=12 max_pos_hash_score=88 max_right=43 max_right_minus=43 max_right_plus=27 neg_log10_pos_hash_p_value=0.0 new_junction_coverage=1.11 new_junction_read_count=25 polymorphism_frequency=1.000e+00 pos_hash_score=20 prediction=consensus side_1_annotate_key=gene side_1_continuation=0 side_1_coverage=0.00 side_1_overlap=5 side_1_possible_overlap_registers=49 side_1_read_count=0 side_1_redundant=0 side_2_annotate_key=gene side_2_continuation=0 side_2_coverage=0.00 side_2_overlap=0 side_2_possible_overlap_registers=44 side_2_read_count=0 side_2_redundant=0 total_non_overlap_reads=24
+UN 15 . NC_001416 1 38807
+UN 16 . NC_001416 41601 41603
+UN 17 . NC_001416 44075 44078
+UN 18 . NC_001416 48488 48502
b
diff -r 000000000000 -r f848a7f97332 test-data/gdtoolsout.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gdtoolsout.html Thu Oct 31 19:40:40 2019 -0400
b
@@ -0,0 +1,115 @@
+<!DOCTYPE html
+PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
+<html>
+<head>
+<title>Mutation Comparison</title>
+<style type = "text/css">
+body {font-family: sans-serif; font-size: 11pt;}
+th {background-color: rgb(0,0,0); color: rgb(255,255,255);}
+table {background-color: rgb(1,0,0); color: rgb(0,0,0);}
+tr {background-color: rgb(255,255,255);}
+.mutation_in_codon {color:red; text-decoration : underline;}
+.snp_type_synonymous{color:green;}
+.snp_type_nonsynonymous{color:blue;}
+.snp_type_nonsense{color:red;}
+.mutation_header_row {background-color: rgb(0,130,0);}
+.read_alignment_header_row {background-color: rgb(255,0,0);}
+.missing_coverage_header_row {background-color: rgb(0,100,100);}
+.new_junction_header_row {background-color: rgb(0,0,155);}
+.copy_number_header_row {background-color: rgb(153,102,0);}
+.alternate_table_row_0 {background-color: rgb(255,255,255);}
+.alternate_table_row_1 {background-color: rgb(235,235,235);}
+.gray_table_row {background-color: rgb(230,230,245);}
+.polymorphism_table_row {background-color: rgb(160,255,160);}
+.highlight_table_row {background-color: rgb(192,255,255);}
+.reject_table_row {background-color: rgb(255,200,165);}
+.user_defined_table_row {background-color: rgb(255,255,0);}
+.information_table_row {background-color: rgb(200,255,255);}
+.junction_repeat {background-color: rgb(255,165,0)}
+.junction_gene {}
+.hidden { display: none; }
+.unhidden { display: block; }
+
+</style>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+<script type="text/javascript">
+  function hideTog(divID) {
+    var item = document.getElementById(divID);
+    if (item) {
+      item.className=(item.className=='hidden')?'unhidden':'hidden';
+    }
+  }
+  function showTog(butID) {
+    var button = document.getElementById(butID);
+    if (button) {
+      button.value=(button.value=='Show')?'Hide':'Show';
+    }
+  }
+</script>
+
+</head>
+<body>
+<!--Output Html_Mutation_Table_String-->
+<table border="0" cellspacing="1" cellpadding="3">
+<tr><th colspan="5" align="left" class="mutation_header_row">Predicted mutations</th></tr><tr>
+<th>position</th>
+<th>mutation</th>
+<th>annotation</th>
+<th>gene</th>
+<th width="100%">description</th>
+</tr>
+
+<!-- Item Lines -->
+
+<!-- Print The Table Row -->
+<tr class="normal_table_row">
+<td align="right">41,405</td><!-- Position -->
+<td align="center">(GCG)<sub>2&rarr;5</sub></td><!-- Cell Mutation -->
+<td align="center">coding&nbsp;(325/873&nbsp;nt)</td>
+<td align="center"><i>NinC</i>&nbsp;&rarr;</td>
+<td align="left">NinC protein</td>
+</tr>
+<!-- End Table Row -->
+
+<!-- Print The Table Row -->
+<tr class="normal_table_row">
+<td align="right">41,601</td><!-- Position -->
+<td align="center">(AAC)<sub>2&rarr;1</sub></td><!-- Cell Mutation -->
+<td align="center">coding&nbsp;(521&#8209;523/873&nbsp;nt)</td>
+<td align="center"><i>NinC</i>&nbsp;&rarr;</td>
+<td align="left">NinC protein</td>
+</tr>
+<!-- End Table Row -->
+
+<!-- Print The Table Row -->
+<tr class="normal_table_row">
+<td align="right">41,716</td><!-- Position -->
+<td align="center">(T)<sub>5&rarr;9</sub></td><!-- Cell Mutation -->
+<td align="center">coding&nbsp;(636/873&nbsp;nt)</td>
+<td align="center"><i>NinC</i>&nbsp;&rarr;</td>
+<td align="left">NinC protein</td>
+</tr>
+<!-- End Table Row -->
+
+<!-- Print The Table Row -->
+<tr class="normal_table_row">
+<td align="right">44,075</td><!-- Position -->
+<td align="center">(CAAA)<sub>2&rarr;1</sub></td><!-- Cell Mutation -->
+<td align="center">coding&nbsp;(190&#8209;193/624&nbsp;nt)</td>
+<td align="center"><i>Q</i>&nbsp;&rarr;</td>
+<td align="left">late gene regulator</td>
+</tr>
+<!-- End Table Row -->
+
+<!-- Print The Table Row -->
+<tr class="normal_table_row">
+<td align="right">47,317</td><!-- Position -->
+<td align="center">N&rarr;T</td><!-- Cell Mutation -->
+<td align="center"><span style="white-space: nowrap"><font class="snp_type_nonsynonymous">?87I</font>&nbsp;(<font class="mutation_in_codon">N</font>TC&rarr;<font class="mutation_in_codon">A</font>TC)&nbsp;</span></td>
+<td align="center"><i>lambdap78</i>&nbsp;&larr;</td>
+<td align="left">putative envelope protein</td>
+</tr>
+<!-- End Table Row -->
+</table></body></html>
\ No newline at end of file
b
diff -r 000000000000 -r f848a7f97332 test-data/genbank_files.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genbank_files.loc Thu Oct 31 19:40:40 2019 -0400
b
@@ -0,0 +1,3 @@
+#This configures genbank files available for specific galaxy tools (eg. breseq)
+#<unique_build_id>\t<dbkey>\t<display_name>\t<file_path>
+lambda1 lambda Lambda ${__HERE__}/lambda.gbk
b
diff -r 000000000000 -r f848a7f97332 test-data/lambda.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/lambda.gbk Thu Oct 31 19:40:40 2019 -0400
b
b"@@ -0,0 +1,3280 @@\n+LOCUS       NC_001416              48502 bp    DNA     linear   PHG 21-APR-2009\n+DEFINITION  Enterobacteria phage lambda, complete genome.\n+ACCESSION   NC_001416\n+VERSION     NC_001416.1  GI:9626243\n+DBLINK      Project:14204\n+KEYWORDS    .\n+SOURCE      Enterobacteria phage lambda\n+  ORGANISM  Enterobacteria phage lambda\n+            Viruses; dsDNA viruses, no RNA stage; Caudovirales; Siphoviridae;\n+            Lambda-like viruses.\n+REFERENCE   1  (sites)\n+  AUTHORS   Chen,C.Y. and Richardson,J.P.\n+  TITLE     Sequence elements essential for rho-dependent transcription\n+            termination at lambda tR1\n+  JOURNAL   J. Biol. Chem. 262 (23), 11292-11299 (1987)\n+   PUBMED   3038914\n+REFERENCE   2  (sites)\n+  AUTHORS   Peltz,S.W., Brown,A.L., Hasan,N., Podhajska,A.J. and Szybalski,W.\n+  TITLE     Thermosensitivity of a DNA recognition site: activity of a\n+            truncated nutL antiterminator of coliphage lambda\n+  JOURNAL   Science 228 (4695), 91-93 (1985)\n+   PUBMED   3156406\n+REFERENCE   3  (sites)\n+  AUTHORS   Coleclough,C. and Erlitz,F.L.\n+  TITLE     Use of primer-restriction-end adapters in a novel cDNA cloning\n+            strategy\n+  JOURNAL   Gene 34 (2-3), 305-314 (1985)\n+   PUBMED   2408965\n+REFERENCE   4  (sites)\n+  AUTHORS   Place,N., Fien,K., Mahoney,M.E., Wulff,D.L., Ho,Y.S., Debouck,C.,\n+            Rosenberg,M., Shih,M.C. and Gussin,G.N.\n+  TITLE     Mutations that alter the DNA binding site for the bacteriophage\n+            lambda cII protein and affect the translation efficiency of the cII\n+            gene\n+  JOURNAL   J. Mol. Biol. 180 (4), 865-880 (1984)\n+   PUBMED   6241264\n+REFERENCE   5  (sites)\n+  AUTHORS   Frackman,S., Siegele,D.A. and Feiss,M.\n+  TITLE     A functional domain of bacteriophage lambda terminase for prohead\n+            binding\n+  JOURNAL   J. Mol. Biol. 180 (2), 283-300 (1984)\n+   PUBMED   6096564\n+REFERENCE   6  (sites)\n+  AUTHORS   Craig,N.L. and Nash,H.A.\n+  TITLE     E. coli integration host factor binds to specific sites in DNA\n+  JOURNAL   Cell 39 (3 PT 2), 707-716 (1984)\n+   PUBMED   6096022\n+REFERENCE   7  (sites)\n+  AUTHORS   Edlind,T.D., Cooley,T.E., Richards,S.H. and Ihler,G.M.\n+  TITLE     Long range base-pairing in the leftward transcription unit of\n+            bacteriophage lambda. Characterization by electron microscopy and\n+            computer-aided sequence analysis\n+  JOURNAL   J. Mol. Biol. 179 (3), 351-365 (1984)\n+   PUBMED   6096550\n+REFERENCE   8  (sites)\n+  AUTHORS   Warren,F. and Das,A.\n+  TITLE     Formation of termination-resistant transcription complex at phage\n+            lambda nut locus: effects of altered translation and a ribosomal\n+            mutation\n+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 81 (12), 3612-3616 (1984)\n+   PUBMED   6233610\n+REFERENCE   9  (sites)\n+  AUTHORS   Wulff,D.L., Mahoney,M., Shatzman,A. and Rosenberg,M.\n+  TITLE     Mutational analysis of a regulatory region in bacteriophage lambda\n+            that has overlapping signals for the initiation of transcription\n+            and translation\n+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 81 (2), 555-559 (1984)\n+   PUBMED   6229793\n+REFERENCE   10 (sites)\n+  AUTHORS   Hohn,B.\n+  TITLE     DNA sequences necessary for packaging of bacteriophage lambda DNA\n+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 80 (24), 7456-7460 (1983)\n+   PUBMED   6324174\n+REFERENCE   11 (bases 45901 to 46443)\n+  AUTHORS   Taylor,A., Benedik,M. and Campbell,A.\n+  TITLE     Location of the Rz gene in bacteriophage lambda\n+  JOURNAL   Gene 26 (2-3), 159-163 (1983)\n+   PUBMED   6323257\n+REFERENCE   12 (bases 48469 to 48498)\n+  AUTHORS   Miwa,T. and Matsubara,K.\n+  TITLE     Lambda phage DNA sequences affecting the packaging process\n+  JOURNAL   Gene 24 (2-3), 199-206 (1983)\n+   PUBMED   6227527\n+REFERENCE   13 (bases 29063 to 29140)\n+  AUTHORS   Benedik,M., Mascarenhas,D. and Campbell,A.\n+  TITLE     The integrase promoter and T'I terminator in bacteriophages lambda\n+            and 434\n+ "..b'atctcgc ttatataacg agcgtgttta\n+    45421 tcggctacat cggtactgac tcgattggtt cgcttatcaa acgcttcgct gctaaaaaag\n+    45481 ccggagtaga agatggtaga aatcaataat caacgtaagg cgttcctcga tatgctggcg\n+    45541 tggtcggagg gaactgataa cggacgtcag aaaaccagaa atcatggtta tgacgtcatt\n+    45601 gtaggcggag agctatttac tgattactcc gatcaccctc gcaaacttgt cacgctaaac\n+    45661 ccaaaactca aatcaacagg cgccggacgc taccagcttc tttcccgttg gtgggatgcc\n+    45721 taccgcaagc agcttggcct gaaagacttc tctccgaaaa gtcaggacgc tgtggcattg\n+    45781 cagcagatta aggagcgtgg cgctttacct atgattgatc gtggtgatat ccgtcaggca\n+    45841 atcgaccgtt gcagcaatat ctgggcttca ctgccgggcg ctggttatgg tcagttcgag\n+    45901 cataaggctg acagcctgat tgcaaaattc aaagaagcgg gcggaacggt cagagagatt\n+    45961 gatgtatgag cagagtcacc gcgattatct ccgctctggt tatctgcatc atcgtctgcc\n+    46021 tgtcatgggc tgttaatcat taccgtgata acgccattac ctacaaagcc cagcgcgaca\n+    46081 aaaatgccag agaactgaag ctggcgaacg cggcaattac tgacatgcag atgcgtcagc\n+    46141 gtgatgttgc tgcgctcgat gcaaaataca cgaaggagtt agctgatgct aaagctgaaa\n+    46201 atgatgctct gcgtgatgat gttgccgctg gtcgtcgtcg gttgcacatc aaagcagtct\n+    46261 gtcagtcagt gcgtgaagcc accaccgcct ccggcgtgga taatgcagcc tccccccgac\n+    46321 tggcagacac cgctgaacgg gattatttca ccctcagaga gaggctgatc actatgcaaa\n+    46381 aacaactgga aggaacccag aagtatatta atgagcagtg cagatagagt tgcccatatc\n+    46441 gatgggcaac tcatgcaatt attgtgagca atacacacgc gcttccagcg gagtataaat\n+    46501 gcctaaagta ataaaaccga gcaatccatt tacgaatgtt tgctgggttt ctgttttaac\n+    46561 aacattttct gcgccgccac aaattttggc tgcatcgaca gttttcttct gcccaattcc\n+    46621 agaaacgaag aaatgatggg tgatggtttc ctttggtgct actgctgccg gtttgttttg\n+    46681 aacagtaaac gtctgttgag cacatcctgt aataagcagg gccagcgcag tagcgagtag\n+    46741 catttttttc atggtgttat tcccgatgct ttttgaagtt cgcagaatcg tatgtgtaga\n+    46801 aaattaaaca aaccctaaac aatgagttga aatttcatat tgttaatatt tattaatgta\n+    46861 tgtcaggtgc gatgaatcgt cattgtattc ccggattaac tatgtccaca gccctgacgg\n+    46921 ggaacttctc tgcgggagtg tccgggaata attaaaacga tgcacacagg gtttagcgcg\n+    46981 tacacgtatt gcattatgcc aacgccccgg tgctgacacg gaagaaaccg gacgttatga\n+    47041 tttagcgtgg aaagatttgt gtagtgttct gaatgctctc agtaaatagt aatgaattat\n+    47101 caaaggtata gtaatatctt ttatgttcat ggatatttgt aacccatcgg aaaactcctg\n+    47161 ctttagcaag attttccctg tattgctgaa atgtgatttc tcttgatttc aacctatcat\n+    47221 aggacgtttc tataagatgc gtgtttcttg agaatttaac atttacaacc tttttaagtc\n+    47281 cttttattaa cacggtgtta tcgttttcta acacgangtg aatattatct gtggctagat\n+    47341 agtaaatata atgtgagacg ttgtgacgtt ttagttcaga ataaaacaat tcacagtcta\n+    47401 aatcttttcg cacttgatcg aatatttctt taaaaatggc aacctgagcc attggtaaaa\n+    47461 ccttccatgt gatacgaggg cgcgtagttt gcattatcgt ttttatcgtt tcaatctggt\n+    47521 ctgacctcct tgtgttttgt tgatgattta tgtcaaatat taggaatgtt ttcacttaat\n+    47581 agtattggtt gcgtaacaaa gtgcggtcct gctggcattc tggagggaaa tacaaccgac\n+    47641 agatgtatgt aaggccaacg tgctcaaatc ttcatacaga aagatttgaa gtaatatttt\n+    47701 aaccgctaga tgaagagcaa gcgcatggag cgacaaaatg aataaagaac aatctgctga\n+    47761 tgatccctcc gtggatctga ttcgtgtaaa aaatatgctt aatagcacca tttctatgag\n+    47821 ttaccctgat gttgtaattg catgtataga acataaggtg tctctggaag cattcagagc\n+    47881 aattgaggca gcgttggtga agcacgataa taatatgaag gattattccc tggtggttga\n+    47941 ctgatcacca taactgctaa tcattcaaac tatttagtct gtgacagagc caacacgcag\n+    48001 tctgtcactg tcaggaaagt ggtaaaactg caactcaatt actgcaatgc cctcgtaatt\n+    48061 aagtgaattt acaatatcgt cctgttcgga gggaagaacg cgggatgttc attcttcatc\n+    48121 acttttaatt gatgtatatg ctctcttttc tgacgttagt ctccgacggc aggcttcaat\n+    48181 gacccaggct gagaaattcc cggacccttt ttgctcaaga gcgatgttaa tttgttcaat\n+    48241 catttggtta ggaaagcgga tgttgcgggt tgttgttctg cgggttctgt tcttcgttga\n+    48301 catgaggttg ccccgtattc agtgtcgctg atttgtattg tctgaagttg tttttacgtt\n+    48361 aagttgatgc agatcaatta atacgatacc tgcgtcataa ttgattattt gacgtggttt\n+    48421 gatggcctcc acgcacgttg tgatatgtag atgataatca ttatcacttt acgggtcctt\n+    48481 tccggtgatc cgacaggtta cg\n+//\n+\n'
b
diff -r 000000000000 -r f848a7f97332 test-data/lambda.short_sequence_repeats.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/lambda.short_sequence_repeats.fastq Thu Oct 31 19:40:40 2019 -0400
b
b'@@ -0,0 +1,20000 @@\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000000\n+TTCGGGAAAACGGGATCACCACGATGGAACAGGTTAACGCAGGAATGCGC\n++\n+HHIHHHHIHHIIIHGIFEIIFIGCGIGHIIDFAFII@EFHHGIII<IIGI\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000001\n+TCTGCGGGAGTGTCCGGGAATAATTAAAACGATGCACACAGGGTTTAGCG\n++\n+IHIIHIGHHIIIIGFEGIGIIIEIGIHIDCIDI?FIIIIH@<IIIDIBH=\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000002\n+AACGTGACGGACGTAACCACCGCGACATGTGTGTGCTGTTCCGCTGGGCA\n++\n+IIIHHIHIHHFIGIIIEIEIHHIGEDIGIDIBGHIDGIGEIIICGIHGII\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000003\n+GGCAACTCATGCAATTATTGTGAGCAATACACACGCGCTTCCAGCGGAGT\n++\n+IIIHIHIIHIIFHIHFGEIGFHHIHIDIDACIIIGIDDICIIEBDAIICI\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000004\n+TTTTTTAATGAGGTCATCACGGGATCCCATGTGCGTGACGGACATCGGGA\n++\n+HIIHIGHIIIIIIHHIIHGIICDHIHIIIICIDCFHIIIIIIIIEGII;I\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000005\n+AATTTAACATTTACAACCTTTTTAAGTCCTTTTATTAACACGGTGTTATC\n++\n+IHIHHHGIHIIIDFEFEGHGIIFIFICHGIHICI@EIIFCFIBHEI?ICI\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000006\n+TGAAAACGTAATGCACGTCTTTACCTGCCCGTCGCTTTTGCTCCATTAGC\n++\n+IIHIIHIIGIHIIHIIIGHHFEIIGIFIIICDIBIIG?IDIICDDI=GID\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000007\n+AGAATGTCGCTGGACGGTATCGCGAAAATGTATTCAGAAAATGATTATCA\n++\n+IHIHIIIHIGHFHIIIIFGGIFFIEIGDGDFIGHEGIIIBI@:IID=<9I\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000008\n+GGTAAGGTCTGGCGAACGGTGTATTACCGGTTTGCTACCAGGGAAGAACG\n++\n+IHIHIIIHHIIIIIFFIHEHFAIIGHIGIIIFIIHIIIIBI9GDIFIIEI\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000009\n+TGAGCAAATGATGATTGATGGCTTATCAGAGCGTGGAAACGTTAATCACT\n++\n+IIHHIIHHIGGIHIIDIIGIHGHHHGIIIIEAIFIIHIIBEIII:H?DIH\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000010\n+TTCAGTTCTCTGGCATTTTTGTCGCGCTGGGCTTTGTAGGTAATGGCGTT\n++\n+IHIIIHIIIGIHIEHIIIIIGECIDGIHIEIIIFFIFFFHIIHIIHII@C\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000011\n+CTACACAAATCTTTCCACGCTAAATCATAACGTCCGGTTTCTTCCGTGTC\n++\n+IHHIIFHGIGFIIHFIGEIGIFEIEIFHIAGAGCBIIGGIF>II<GBI5?\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000012\n+ATTACAAAGTTACCTGTCAAACGGTGCAATGAAGCCAAGTTAGAACTCGT\n++\n+HHHHIIIHIIHHIGIIIHGCIIDCEIHIIFIIIICIIIFIFIIICIGDI;\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000013\n+AGTAATTGACGCGTTCGCCAGCTTCAGTTCTCTGGCATTTTTGTCGCGCT\n++\n+HHHIHIHIFGIHIIIFHGIIHHHIIIIGIFGFEFFIII@IIHB@ICIE@G\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000014\n+CGACTCAATGCTCTTACCTGTTGTGCAGATATAAAAAATCCCGAAACCGT\n++\n+IIIIHHHGHIIIGIGIIIIHGIIGIEICFGIIIIII?IDHII>GGHI<II\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000015\n+TCAGGCTGTCAGCCTTATGCTCGAACTGACCATAACCAGCGCCCGGCAGT\n++\n+IHHIIIIIGHIHHIHFEIEIGHDGDIDCFHICGAEIIIHIBIIDHIFIII\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000016\n+CTGCCGTTTTGCCCGTGCATATCGGTCACGAACAAATCTGATTACTTAAA\n++\n+IIHHHHIIIIIIFGIGHHIIDGGIFIIEEIBIIIIIIGGC@C?IEIBA;I\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000017\n+ACTGTCGATGCAGCCAAAATTTGTGGCGGCGCAGAAAATGTTGTTAAAAC\n++\n+IHHHIHHIHIIIHHHGIIICGGIHFIIIHIIIEIFI@FIFIHGIIIIII@\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000018\n+GAAATCAAAGGCGCGGACACGTTCATCTTTGGTCATACGCCAGCAGTGAA\n++\n+IHHIHHHIFGHFIHIIFIIIIIIGHBHIAIIIIFHIIIIGIIIEI>IIDH\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000019\n+GCCGTGTAGTTGAAGGTTTTTCCGTCAGATTCTTTTGGGATTGGCTTGGG\n++\n+IHIHHHIIHIGHHIIHEFFIF?IIBIIIEGDEIIIIIHIIHID=GIFEGI\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000020\n+CTGCCGCTCTGAATTGCAGCATCCGGTTTCACCACAGAAAGGTCGTTTTC\n++\n+IHIIHIHHIGIGIFFIIFGFGFGIIHDIGGFGGHCDIIDIBIAFI=IAII\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000021\n+TGTCCCCCTGTTTTGAGGGATAGCAATCCCCCAATTTGAGGGAGTGTTTT\n++\n+IIHGIHGHFHIIHHGGIHIHIEIIIBIHIGFDIIHIIHIIIFI(BIIHI=\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000022\n+TTTGAAGTAATATTTTAACCGCTAGATGAAGAGCAAGCGCATGGAGCCAC\n++\n+HHHIIHIIIIEIFGIFIGHIIIIICDEIEHICIIIIIIGDIIIIIHI5FB\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000000023\n+CGCTG'..b'GTAGCCGATAAACACGCTCGTTATATAAGCGAG\n++\n+IIHIHIIIIIIIIIGFGHIHIFBIIEIFIIDIIFGDIIAEA?BIIAE@I=\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004977\n+GTACAGGCCGTGCGGTTGATATTGCCAAAACAGAGCTGTGGGGGAGAGTT\n++\n+IHHIIIHHHIHIHGHCHGIFEE=GHGCIIFIFFHIEGBIIHHCHI@HHGI\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004978\n+GAGTGTCCGGGAATAATTAAAACGATGCACACAGGGTTTAGCGCGTACGC\n++\n+IHHIIIIGGHHGHIHIHIIFIIEHIGIFDBEDFIIIII<BIHH=EIGI<I\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004979\n+ATGTGAGACGTTGTGACGTTTTAGTTCAGAATAAAACAATTCACAGTCTA\n++\n+HIIHGGIIGGIIIFIEIIIIHIIIHIIHIIDAEGIIHIEIEIIIIIBIBA\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004980\n+CAGGGGAATGTGATTAATTCCAGGCATTAAACGTTCTCTGCACCACGATC\n++\n+IIHHHHIIHGHGGEGIEIIGGHIIFHIG>IGIFHHFIHFHHI?IGHIGIH\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004981\n+ACGCCGGGGGCAAGATGCAGAGATTGCCATGGTACAGGCCGTGCGGTTGA\n++\n+HIHIIHIHIIIIIGIIIFIIH@IIGHGIIIIFAIGIIBFIIGABHI>HIG\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004982\n+ACCGTTCCGCCCGCTTCTTTGAATTTTGCAATCAGGCTGTCAGCCTTATG\n++\n+IHIHHHIHGHHIIIGIHFEGHHHICIBEIDHHIBIFIHIIDIIII9HIII\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004983\n+ACAGGCGCCGGACGCTACCAGCTTCTTTCCCGTTGGTGGGATGCCTACCG\n++\n+IIIHHHHIIIHIIHIGHIIIGGGDHGIIICFGIIIE@DGIBI8IICGIBI\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004984\n+TTTACACGAATCAGATCCACGGAGGGATCATCAGCAGATTGTTCTTTATT\n++\n+HIIHHHHHHGHHHIIHIIIEHFIFGHIHEGHHGIGCIIHII9DIIIIBG:\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004985\n+CATGCAGCTTCTGACCGCAGTTAGCGCACGTTAAAGCTCGCTCGACGCTT\n++\n+HHHHIHIIHHGGIGICIIIGIGIG@DIICIIIICID7GIID@IHIIHI?F\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004986\n+ATCTTCTACTCCGGCTTTTTTAGCAGCGAAGCGTTTGATAAGCGAACCAA\n++\n+IIHHGHIIIIIIGIGGIIIIIEIHDIIIIIGBIIIIIDIGGICDIIIIID\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004987\n+CTTGCCCCCGGCGTCGCGGCACTACGGCAATAATCCGCATAAGCGAATGT\n++\n+IIHIHIHHIHGIIHHHFIHHFIDEIGIHFIIDIIIFIIIIIIII@GAI=I\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004988\n+ATGCGCTATTACGAAAAAATTGATGGCAGCAAATACCGAAATATTTGGGT\n++\n+IIIIIIHIIIHIGIIIIHHGIIEEIIGIFIIIAIIHAIHGHGIGADIIID\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004989\n+CCGGACATTACCGGACAACTGCTGCGGCACCTCAACTCCGATTTAATGTA\n++\n+HHHHHGIIFIIGFEIHIIIGHHGFIEIFGIHEICICG@EGIAIII?IF1D\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004990\n+GAATAGCGTTTTGCTGATGTGCTGGAGATCGAATTACAAAGGTTAGTTTT\n++\n+IHIIHIHIIIIGIIIIIIIIIIIIGEIGFFIIIIE<DHIGIEIIEIIFBG\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004991\n+TAACTTCGATTTTGGTCAATCACCTTGTTTTCCTCGCACGACGTCTTAGC\n++\n+IIHIIIHIHIHIIIIHHIHDHGIIFGGHIIIIHGEFIIGIIACIIFHCII\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004992\n+GGATATTCAGACGCGAATGCCTGTTCTGAAGCCATTTATCGATATGGTAA\n++\n+IHIIHIHIHIHIIHIHHFEIIIIFFIIIHEDIEF@FIDIHF?DH@ABI?>\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004993\n+TTTGTTGAGATTTATGTCAAATATTAGGAATGTTTTCACTTAATAGTATT\n++\n+HIHIIHHHIIIGHGIIFIIIIIIIIFIIIFFIIIIFIIIIIHIIIIIEI9\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004994\n+CCCGCTTTCACGAAGGTCATGTGTAAAAGGCCGCAGCGTGACTATTACTA\n++\n+IIHIIGIGIHGGHEHIIIIIHIGFIIIGIBEGIIGIIFI>IADHDIHIIG\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004995\n+GGTGCAATGCCACAAAGAAGAGTCAATCGCAGACAACATTTTGAATGCGG\n++\n+HIHHHHGHHHHHGGGHGIHFIGEDFIHEIGF@GDIGHEIFGIIIDFIIDB\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004996\n+ATCATTACCGTGATAACGCCATTACCTACAAAGCCCAGCGCGACAAAAAT\n++\n+HIHHIIHGGHIHIIGEHIIIGEFGDIHGIIFFHGCGIIHIBDIIAIIFBD\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004997\n+AAATATTTGGGTAGTTGGCGATCTGCACGGATGCTACACGAACCTGATGA\n++\n+IIIIIIHHHGHGHIHHIHCIHDIEFIIEICIIAIIA@IICGIFII>CCIC\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004998\n+CGTCACCTAAGCAGGCCCATAGCTTCCTGTTTTGGTCTAAGCTGCGGTTG\n++\n+HIHHHIHGHHGGGIGHIFIHHIIIHIIIIHDIIHFBIIHDIIGI>IFDIG\n+@../data/lambda/lambda.short_sequence_repeats.fastq.000004999\n+ATAATATTCACATCGTGTTAGAAAACGATAACACCGTGTTAATAAAAGGA\n++\n+IHIHIHIGIGIIGGIFHDHHFIFHDFBCGCCEDIIIIIIFIFIICIIDHI\n'
b
diff -r 000000000000 -r f848a7f97332 test-data/log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/log.txt Thu Oct 31 19:40:40 2019 -0400
b
@@ -0,0 +1,4 @@
+Thu Oct 17 16:09:32 2019
+
+breseq --num-processors 1 -o results --reference /private/var/folders/8d/z84mms353qb2n0r3n2jhww0m0000gq/T/tmpYSLdP2/files/000/dataset_3.dat /private/var/folders/8d/z84mms353qb2n0r3n2jhww0m0000gq/T/tmpYSLdP2/files/000/dataset_1.dat --name smallest
+
b
diff -r 000000000000 -r f848a7f97332 test-data/out.tar.gz
b
Binary file test-data/out.tar.gz has changed
b
diff -r 000000000000 -r f848a7f97332 test-data/report.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/report.html Thu Oct 31 19:40:40 2019 -0400
[
@@ -0,0 +1,176 @@
+<!DOCTYPE html
+PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
+<html>
+<head>
+<title>smallest :: BRESEQ :: Mutation Predictions</title>
+<style type = "text/css">
+body {font-family: sans-serif; font-size: 11pt;}
+th {background-color: rgb(0,0,0); color: rgb(255,255,255);}
+table {background-color: rgb(1,0,0); color: rgb(0,0,0);}
+tr {background-color: rgb(255,255,255);}
+.mutation_in_codon {color:red; text-decoration : underline;}
+.snp_type_synonymous{color:green;}
+.snp_type_nonsynonymous{color:blue;}
+.snp_type_nonsense{color:red;}
+.mutation_header_row {background-color: rgb(0,130,0);}
+.read_alignment_header_row {background-color: rgb(255,0,0);}
+.missing_coverage_header_row {background-color: rgb(0,100,100);}
+.new_junction_header_row {background-color: rgb(0,0,155);}
+.copy_number_header_row {background-color: rgb(153,102,0);}
+.alternate_table_row_0 {background-color: rgb(255,255,255);}
+.alternate_table_row_1 {background-color: rgb(235,235,235);}
+.gray_table_row {background-color: rgb(230,230,245);}
+.polymorphism_table_row {background-color: rgb(160,255,160);}
+.highlight_table_row {background-color: rgb(192,255,255);}
+.reject_table_row {background-color: rgb(255,200,165);}
+.user_defined_table_row {background-color: rgb(255,255,0);}
+.information_table_row {background-color: rgb(200,255,255);}
+.junction_repeat {background-color: rgb(255,165,0)}
+.junction_gene {}
+.hidden { display: none; }
+.unhidden { display: block; }
+
+</style>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+<script type="text/javascript">
+  function hideTog(divID) {
+    var item = document.getElementById(divID);
+    if (item) {
+      item.className=(item.className=='hidden')?'unhidden':'hidden';
+    }
+  }
+  function showTog(butID) {
+    var button = document.getElementById(butID);
+    if (button) {
+      button.value=(button.value=='Show')?'Hide':'Show';
+    }
+  }
+</script>
+
+</head>
+<body>
+<table width="100%" border="0" cellspacing="0" cellpadding="3">
+<tr>
+<td><a href="http://barricklab.org/breseq"><img src="evidence/breseq_small.png" /></a></td>
+<td width="100%">
+<b><i>breseq</i></b>&nbsp;&nbsp;version 0.34.0&nbsp;&nbsp;
+<br><a href="index.html">mutation predictions</a> | 
+<a href="marginal.html">marginal predictions</a> | 
+<a href="summary.html">summary statistics</a> | 
+<a href="output.gd">genome diff</a> | 
+<a href="log.txt">command line log</a>
+</td></tr></table>
+
+<p>
+<!--Mutation Predictions -->
+<p>
+<!--Output Html_Mutation_Table_String-->
+<table border="0" cellspacing="1" cellpadding="3">
+<tr><th colspan="6" align="left" class="mutation_header_row">Predicted mutations</th></tr><tr>
+<th>evidence</th>
+<th>position</th>
+<th>mutation</th>
+<th>annotation</th>
+<th>gene</th>
+<th width="100%">description</th>
+</tr>
+
+<!-- Item Lines -->
+
+<!-- Print The Table Row -->
+<tr class="normal_table_row">
+<td align="center"><a href="evidence/JC_11.html">JC</a></td><!-- Evidence -->
+<td align="right">41,405</td><!-- Position -->
+<td align="center">(GCG)<sub>2&rarr;5</sub></td><!-- Cell Mutation -->
+<td align="center">coding&nbsp;(325/873&nbsp;nt)</td>
+<td align="center"><i>NinC</i>&nbsp;&rarr;</td>
+<td align="left">NinC protein</td>
+</tr>
+<!-- End Table Row -->
+
+<!-- Print The Table Row -->
+<tr class="normal_table_row">
+<td align="center"><a href="evidence/MC_PLOT_8.html">MC</a>&nbsp;<a href="evidence/JC_12.html">JC</a></td><!-- Evidence -->
+<td align="right">41,601</td><!-- Position -->
+<td align="center">(AAC)<sub>2&rarr;1</sub></td><!-- Cell Mutation -->
+<td align="center">coding&nbsp;(521&#8209;523/873&nbsp;nt)</td>
+<td align="center"><i>NinC</i>&nbsp;&rarr;</td>
+<td align="left">NinC protein</td>
+</tr>
+<!-- End Table Row -->
+
+<!-- Print The Table Row -->
+<tr class="normal_table_row">
+<td align="center"><a href="evidence/JC_13.html">JC</a></td><!-- Evidence -->
+<td align="right">41,716</td><!-- Position -->
+<td align="center">(T)<sub>5&rarr;9</sub></td><!-- Cell Mutation -->
+<td align="center">coding&nbsp;(636/873&nbsp;nt)</td>
+<td align="center"><i>NinC</i>&nbsp;&rarr;</td>
+<td align="left">NinC protein</td>
+</tr>
+<!-- End Table Row -->
+
+<!-- Print The Table Row -->
+<tr class="normal_table_row">
+<td align="center"><a href="evidence/MC_PLOT_9.html">MC</a>&nbsp;<a href="evidence/JC_14.html">JC</a></td><!-- Evidence -->
+<td align="right">44,075</td><!-- Position -->
+<td align="center">(CAAA)<sub>2&rarr;1</sub></td><!-- Cell Mutation -->
+<td align="center">coding&nbsp;(190&#8209;193/624&nbsp;nt)</td>
+<td align="center"><i>Q</i>&nbsp;&rarr;</td>
+<td align="left">late gene regulator</td>
+</tr>
+<!-- End Table Row -->
+
+<!-- Print The Table Row -->
+<tr class="normal_table_row">
+<td align="center"><a href="evidence/SNP_5.html">RA</a></td><!-- Evidence -->
+<td align="right">47,317</td><!-- Position -->
+<td align="center">N&rarr;T</td><!-- Cell Mutation -->
+<td align="center"><span style="white-space: nowrap"><font class="snp_type_nonsynonymous">?87I</font>&nbsp;(<font class="mutation_in_codon">N</font>TC&rarr;<font class="mutation_in_codon">A</font>TC)&nbsp;</span></td>
+<td align="center"><i>lambdap78</i>&nbsp;&larr;</td>
+<td align="left">putative envelope protein</td>
+</tr>
+<!-- End Table Row -->
+</table>
+<p>
+<table border="0" cellspacing="1" cellpadding="3" width="100%">
+<tr><th colspan="11" align="left" class="missing_coverage_header_row">Unassigned missing coverage evidence</th></tr>
+<tr><th>&nbsp;</th><th>&nbsp;</th><th>&nbsp;</th><th>seq&nbsp;id</th>
+<th>start</th>
+<th>end</th>
+<th>size</th>
+<th>&larr;reads</th>
+<th>reads&rarr;</th>
+<th>gene</th>
+<th width="100%">description</th>
+</tr>
+<tr>
+<td><a href="evidence/MC_SIDE_1_7.html">*</a></td>
+<td><a href="evidence/MC_SIDE_2_7.html">*</a></td>
+<td><a href="evidence/MC_PLOT_7.html">&divide;</a></td>
+<td>NC_001416</td>
+<td align="right">1</td>
+<td align="right">38810</td>
+<td align="right">38810</td>
+<td align="center">NA&nbsp;[0]</td>
+<td align="center">[10]&nbsp;13</td>
+<td align="center"><i>nu1&#8211;[O]</i></td>
+<td align="left">nu1,A,W,B,C,nu3,D,E,Fi,Fii,Z,U,V,G,T,H,M,L,K,I,J,lom,orf&#8209;401,orf206b,orf&#8209;314,orf&#8209;194,ea47,ea31,ea59,int,xis,lambdap35,ea8.5,ea22,orf61,orf63,orf60a,exo,bet,gam,kil,cIII,ea10,ral,orf28,lambdap48,N,rexb,rexa,cI,cro,cII,[O]</td>
+</tr>
+<tr>
+<td><a href="evidence/MC_SIDE_1_10.html">*</a></td>
+<td><a href="evidence/MC_SIDE_2_10.html">*</a></td>
+<td><a href="evidence/MC_PLOT_10.html">&divide;</a></td>
+<td>NC_001416</td>
+<td align="right">48475</td>
+<td align="right">48502</td>
+<td align="right">28</td>
+<td align="center">12&nbsp;[10]</td>
+<td align="center">[0]&nbsp;NA</td>
+<td align="center"><i>lambdap79/&#8211;</i></td>
+<td align="left">hypothetical protein/&#8211;</td>
+</tr>
+</table>
+</body></html>
\ No newline at end of file
b
diff -r 000000000000 -r f848a7f97332 tool-data/genbank_files.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/genbank_files.loc.sample Thu Oct 31 19:40:40 2019 -0400
b
@@ -0,0 +1,4 @@
+#This configures genbank files available for specific galaxy tools (eg. breseq)
+#<unique_build_id>\t<dbkey>\t<display_name>\t<file_path>
+#ecoliMC4100 ecoliMC4100 E. coli MC4100 /afs/igc.gulbenkian.pt/folders/UBI/PROJECTS/GALAXY/genomes/genbank/NC_012759.1_BW2952.gb
+#ecoliMG1655 ecoliMG1655 E. coli MG1655 /afs/igc.gulbenkian.pt/folders/UBI/PROJECTS/GALAXY/genomes/genbank/NC_012759.1_BW2952.gb
b
diff -r 000000000000 -r f848a7f97332 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Oct 31 19:40:40 2019 -0400
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="genbank_files" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/genbank_files.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r f848a7f97332 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Thu Oct 31 19:40:40 2019 -0400
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="genbank_files" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/genbank_files.loc" />
+    </table>
+</tables>