changeset 15:479c4f2f4826 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 999eca8a05f17ae567f99b8ca3394f2105491173
author iuc
date Mon, 09 Jul 2018 13:22:58 -0400
parents 85ca751407c3
children c9ecd2a96ecf
files gbk2fa.py snpEff_create_db.xml snpEff_macros.xml test-data/pBR322.fna test-data/pBR322.gbk.gz test-data/pBR322.gff3 test-data/pBR322_test1.fna test-data/pBR322_test2.fna test-data/pBR322_test2.fna.gz
diffstat 9 files changed, 251 insertions(+), 38 deletions(-) [+]
line wrap: on
line diff
--- a/gbk2fa.py	Tue Jun 12 17:31:21 2018 -0400
+++ b/gbk2fa.py	Mon Jul 09 13:22:58 2018 -0400
@@ -22,7 +22,7 @@
     f_type = magic.from_file(args.genbank_file, mime=True)
     if f_type == 'text/plain':
         input_handle = open(gbk_filename, "r")
-    elif f_type == 'application/gzip':
+    elif f_type == 'application/gzip' or f_type == 'application/x-gzip':
         input_handle = gzip.open(gbk_filename, "rt")
     elif f_type == 'application/x-bzip2':
         input_handle = bz2.open(gbk_filename, "rt")
--- a/snpEff_create_db.xml	Tue Jun 12 17:31:21 2018 -0400
+++ b/snpEff_create_db.xml	Mon Jul 09 13:22:58 2018 -0400
@@ -1,5 +1,5 @@
-<tool id="snpEff_build_gb" name="SnpEff build:" version="@wrapper_version@.galaxy2">
-    <description> database from Genbank record</description>
+<tool id="snpEff_build_gb" name="SnpEff build:" version="@wrapper_version@.galaxy3">
+    <description> database from Genbank or GFF record</description>
     <macros>
         <import>snpEff_macros.xml</import>
     </macros>
@@ -14,22 +14,42 @@
     <expand macro="version_command" />
     <command><![CDATA[
 
-        #if str( $fasta.fasta_selector ) == "yes":
-            python3 '$__tool_directory__/gbk2fa.py' '${input_gbk}' '${output_fasta}'
-            #if $fasta.remove_version:
-                '${fasta.remove_version}'
+        #if str( $input_type.input_type_selector ) == "gb":
+            #if str( $input_type.fasta ) == "yes":
+                python3 '$__tool_directory__/gbk2fa.py' '${input_type.input_gbk}' '${output_fasta}'
+                #if $input_type.remove_version:
+                    '${input_type.remove_version}'
+                #end if
+                &&
             #end if
-            &&
         #end if
 
         mkdir -p '${snpeff_output.files_path}'/'${genome_version}' &&
 
-        ln -s '${input_gbk}' '${snpeff_output.files_path}'/'${genome_version}'/genes.gbk &&
+        #if str( $input_type.input_type_selector ) == "gb":
+            #if $input_type.input_gbk.is_of_type("genbank"):
+                ln -s '${input_type.input_gbk}' '${snpeff_output.files_path}'/'${genome_version}'/genes.gbk &&
+            #elif $input_type.input_gbk.is_of_type("genbank.gz"):
+                ln -s '${input_type.input_gbk}' '${snpeff_output.files_path}'/'${genome_version}'/genes.gbk.gz &&
+            #end if
+        #elif str( $input_type.input_type_selector ) == "gff":
+            #if $input_type.input_fasta.is_of_type("fasta"):
+                ln -s '${input_type.input_fasta}' '${snpeff_output.files_path}'/'${genome_version}'/sequences.fa &&
+            #elif $input_type.input_fasta.is_of_type("fasta.gz"):
+                ln -s '${input_type.input_fasta}' '${snpeff_output.files_path}'/'${genome_version}'/sequences.fa.gz &&
+            #end if
+            ln -s '${input_type.input_gff}' '${snpeff_output.files_path}'/'${genome_version}'/genes.gff &&
+        #end if
 
         snpEff @java_options@ build -v
         -configOption '${genome_version}'.genome='${genome_version}'
         -configOption '${genome_version}'.codonTable='${codon_table}'
-        -genbank -dataDir '${snpeff_output.files_path}' '${genome_version}' &&
+        #if str( $input_type.input_type_selector ) == "gb":
+            -genbank
+        #elif str( $input_type.input_type_selector ) == "gff":
+            -gff3
+        #end if
+        -dataDir '${snpeff_output.files_path}' '${genome_version}' &&
         echo "${genome_version}.genome : ${genome_version}" >> '${snpeff_output.files_path}'/snpEff.config &&
         echo "${genome_version}.codonTable : ${codon_table}" >> '${snpeff_output.files_path}'/snpEff.config
 
@@ -38,7 +58,24 @@
         <param name="genome_version" type="text" value="" label="Name for the database" help="for E. coli K12 you may want to use 'EcK12' etc.">
             <validator type="regex" message="A genome version name is required">\S+</validator>
         </param>
-        <param name="input_gbk" type="data" format="genbank,genbank.gz" label="Genbank dataset to build database from" help="This Genbank file will be used to generate snpEff database"/>
+        <conditional name="input_type">
+            <param name="input_type_selector" type="select" display="radio" label="Input annotations are in" help="Specify format for annotations you are using to create SnpEff database">
+                <option value="gb" selected="true">GenBank</option>
+                <option value="gff">GFF</option>
+            </param>
+            <when value="gb">
+                <param name="input_gbk" type="data" format="genbank,genbank.gz" label="Genbank dataset to build database from" help="This Genbank file will be used to generate snpEff database"/>
+                <param name="fasta" type="select" display="radio" label="Parse Genbank into Fasta" help="This will generate an additional dataset containing all sequences from Genbank file in FASTA format">
+                        <option value="yes" selected="true">Yes</option>
+                        <option value="no">No</option>
+                </param>
+                <param type="boolean" name="remove_version" truevalue="--remove_version" falsevalue="" checked="true" label="Remove sequence version label?" help="Genbank sequences have vesion numbers such as B000564.2. This option removes them leaving only B000564" argument="--remove_version"/>
+            </when>
+            <when value="gff"> 
+                <param name="input_gff" type="data" format="gff3" label="GFF dataset to build database from" help="This GFF file will be used to generate snpEff database"/>
+                <param name="input_fasta" type="data" format="fasta,fasta.gz" label="Genome in FASTA format" help="This dataset is required for generating SnpEff database. See help section below."/>
+            </when>
+        </conditional>
         <param name="codon_table" type="select" label="Select genetic code for this sequence" help="If this sequence uses non-standard genetic code, select one from these options">
             <option selected="true" value="Standard">Standard</option>
             <option value="Vertebrate_Mitochondrial">Vertebrate_Mitochondrial</option>
@@ -66,61 +103,121 @@
             <option value="Scenedesmus_obliquus_Mitochondrial">Scenedesmus_obliquus_Mitochondrial</option>
             <option value="Thraustochytrium_Mitochondrial">Thraustochytrium_Mitochondrial</option>
         </param>
-        <conditional name="fasta">
-            <param name="fasta_selector" type="select" display="radio" label="Parse Genbank into Fasta" help="This will generate an additional dataset containing all sequences from Genbank file in FASTA format">
-                <option value="yes" selected="true">Yes</option>
-                <option value="no">No</option>
-            </param>
-            <when value="yes">
-                <param type="boolean" name="remove_version" truevalue="--remove_version" falsevalue="" checked="true" label="Remove sequence version label?" help="Genbank sequences have vesion numbers such as B000564.2. This option removes them leaving only B000564" argument="--remove_version"/>
-            </when>
-            <when value="no"/>
-        </conditional>
     </inputs>
     <outputs>
         <data name="snpeff_output" format="snpeffdb" label="@snpeff_version@ database for ${genome_version}"/>
         <data name="output_fasta" format="fasta" label="Fasta sequences for ${genome_version}">
-            <filter>fasta['fasta_selector'] == 'yes'</filter>
+            <filter>input_type['input_type_selector'] == 'gb'</filter>
+            <filter>input_type['fasta'] == 'yes'</filter>
         </data>
     </outputs>
     <tests>
         <test>
             <param name="genome_version" value="pBR322"/>
+            <param name="input_type_selector" value="gb"/>
             <param name="input_gbk" value="pBR322.gbk" />
             <output name="snpeff_output">
                 <assert_contents>
                     <has_text text="pBR322" />
                 </assert_contents>
             </output>
-            <output name="output_fasta" value="pBR322.fna"/>
+            <output name="output_fasta" value="pBR322_test1.fna"/>
+        </test>
+        <test>
+            <param name="genome_version" value="pBR322"/>
+            <param name="input_type_selector" value="gb"/>
+            <param name="input_gbk" value="pBR322.gbk.gz" />
+            <output name="snpeff_output">
+                <assert_contents>
+                    <has_text text="pBR322" />
+                </assert_contents>
+            </output>
+            <output name="output_fasta" value="pBR322_test1.fna"/>
+        </test>
+        <test>
+            <param name="genome_version" value="pBR322"/>
+            <param name="input_type_selector" value="gff"/>
+            <param name="input_fasta" value="pBR322_test2.fna" />
+            <param name="input_gff" value="pBR322.gff3" />
+            <output name="snpeff_output">
+                <assert_contents>
+                    <has_text text="pBR322" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="genome_version" value="pBR322"/>
+            <param name="input_type_selector" value="gff"/>
+            <param name="input_fasta" value="pBR322_test2.fna.gz" />
+            <param name="input_gff" value="pBR322.gff3" />
+            <output name="snpeff_output">
+                <assert_contents>
+                    <has_text text="pBR322" />
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
 **What it does**
 
-This tool uses `"snpEff build -genbank"` command to create a snpEff database from a Genbank dataset. If **Parse Genbank into Fasta** is selected (the default behavior) the tool will also convert Genbank dataset into a single FASTA dataset.
+This tool uses `"snpEff build -genbank"` or `"snpEff build -gff3"` commands to create a snpEff database. 
+
+------
 
+.. class:: infomark
+
+**Working with Genbank files**
 
 Using Genbank data for creating databases has several advantages:
 
- #. Genbank files contains annotations (such as locations of genes) together with sequences. This was one ensures that these two are in sync with each other
- #. When you are analyzing small genomes it is much more convenient to create a database on the fly and use it.
+ #. Genbank files contain annotations (such as locations of genes) together with sequences. This ensures that these two are in sync with each other. 
+ #. When you are analyzing small genomes (or not so small) it is much more convenient to create a database on the fly and use it.
+
+ .. class:: warningmark
+
+ SnpEff errors out on highly fragmented genomes containing multiple scaffolds. This is because a single gene may be split between multiple scaffolds causing SnpEff to crash. If this is happening use GFF route described below. 
+
+-------
+
+**Genbank usage scenario**
+
+Suppose you have a series of Illumina reads from an experiment involving *E. coli* K-12 MG1655. You want to map these reads to the reference genome of K-12 MG1655, call variants, and annotate them using snpEff. This tool enables you to follow the following analysis steps:
+
+ #. Go to `NCBI <http://www.ncbi.nlm.nih.gov>`_ page for K-12 MG1655 genome (note that all NCBI genomes have similar list of files associated with them).
+ #. Copy URL for file with extension `gbff.gz` 
+ #. Paste the URL into upload tool and set datatype to `genbank.gz`.
+ #. Use this tool to generate a snpEff database and FASTA sequences from the dataset you've uploaded during the previous step.
+ #. Use your Illumina reads to map against FASTA dataset generated in the previous step using BWA-MEM.
+ #. Call variants using **Freebayes**.
+ #. Annotate vcf output of Freebayes with **SnpEff eff** using database generated at step 2 (using *Custom* option for **Genome source** parameter).
+
+In this scenario Genbank dataset is used twice. First, it is used to produce FASTA sequences that are using by BWA to map against. Second, it is used to create snpEff database. This guarantees that you will not have any issues related to reference sequence naming.
 
 -------
 
 .. class:: infomark
 
-**The usage scenario**
+**Working with GFF files**
+
+Alternatively you can create a SnpEff database from `GFF3 <https://en.wikipedia.org/wiki/General_feature_format>`_ files downloaded from NCBI or any other source. Using GFF dataset for building SnpEff database requires two inputs:
 
-Suppose you have a series of Illumina reads from an experiment involving *E. coli* K-12 MG1655. You want to map these reads to the reference genome of K-12 MG1655, call variants, and annotate them using snpEff. This tool enables you to follow the following analysis steps:
+ #. The GFF file itself
+ #. A genome in FASTA format
+
+The GFF file contains coordinates of various features, but does not contain underlying sequences. This is why a FASTA file needs to be provided as well.
+
+------
 
- #. Download genome from `NCBI <https://www.ncbi.nlm.nih.gov>`_ into Galaxy.
- #. Use this tool to generate a snpEff database and FASTA sequences from the file you downloaded at step 1.
- #. Use your Illumina reads to map against FASTA dataset generated in the previous step using BWA-MEM.
- #. Call variants using **Freebayes**.
- #. Annotate vcf output of Freebayes with **SnpEff eff** using database generated at step 2 (using *Custom* option for **Genome source** parameter).
+**GFF usage scenario**   
+
+The following example also uses *E. coli* K-12 MG1655:
 
-In this scenario Genbank dataset is used twice. First, it is used to produce FASTA sequences that are using by BWA to map against. Second, it is used to create snpEff database. This guarantees that you will not have any issues related to reference sequence naming.
+#. Go to `NCBI <http://www.ncbi.nlm.nih.gov>`_ page for K-12 MG1655 genome.
+#. Copy URLs for files with `gff.gz` and `fna.gz` extensions. The first file contains annotations in GFF3 format. The second file contains entire genome as a FASTA record.
+#. Paste URLs into upload tool and set datatypes to `gff3` and `fasta.gz` for annotations and genome, respectively.
+#. Use this tool to generate a snpEff database from the GFF dataset.
+#. Map your reads against the FASTA dataset and continue as described in the above example.
+
 
 @snpeff_in_galaxy_info@
 @external_documentation@
--- a/snpEff_macros.xml	Tue Jun 12 17:31:21 2018 -0400
+++ b/snpEff_macros.xml	Mon Jul 09 13:22:58 2018 -0400
@@ -54,11 +54,11 @@
 
 In cases when you are dealing with bacterial or viral (or, frankly, any other) genomes it may be easier to create database yourself. For this you need:
 
- #. Download Genbank record corresponding to your genome of interest from NCBI.
+ #. Download Genbank record corresponding to your genome of interest from NCBI or use annotations in GFF format accompanied by the corresponding genome in FASTA format.
  #. Use **SnpEff build** to create the database.
  #. Use the database in **SnpEff eff** (using *Custom* option for **Genome source** parameter).
 
-Creating custom database has one benefit. The **SnpEff build** tool normally produces two outputs: (1) a SnpEff database and (2) FASTA file containing sequences from the Genbank file. If you are performing your experiment from the beginning by mapping reads against a genome and finding variants before annotating them with SnpEff you can use **this FASTA file** as a reference to map your reads against. This will guarantee that you will not have any issues related to reference sequence naming -- the most common source of SnpEff errors.
+Creating custom database has one major advantage. It guaranteess that you will not have any issues related to reference sequence naming -- the most common source of SnpEff errors.
 
 </token>
 
--- a/test-data/pBR322.fna	Tue Jun 12 17:31:21 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
->J01749
-TTCTCATGTTTGACAGCTTATCATCGATAAGCTTTAATGCGGTAGTTTATCACAGTTAAATTGCTAACGCAGTCAGGCACCGTGTATGAAATCTAACAATGCGCTCATCGTCATCCTCGGCACCGTCACCCTGGATGCTGTAGGCATAGGCTTGGTTATGCCGGTACTGCCGGGCCTCTTGCGGGATATCGTCCATTCCGACAGCATCGCCAGTCACTATGGCGTGCTGCTAGCGCTATATGCGTTGATGCAATTTCTATGCGCACCCGTTCTCGGAGCACTGTCCGACCGCTTTGGCCGCCGCCCAGTCCTGCTCGCTTCGCTACTTGGAGCCACTATCGACTACGCGATCATGGCGACCACACCCGTCCTGTGGATCCTCTACGCCGGACGCATCGTGGCCGGCATCACCGGCGCCACAGGTGCGGTTGCTGGCGCCTATATCGCCGACATCACCGATGGGGAAGATCGGGCTCGCCACTTCGGGCTCATGAGCGCTTGTTTCGGCGTGGGTATGGTGGCAGGCCCCGTGGCCGGGGGACTGTTGGGCGCCATCTCCTTGCATGCACCATTCCTTGCGGCGGCGGTGCTCAACGGCCTCAACCTACTACTGGGCTGCTTCCTAATGCAGGAGTCGCATAAGGGAGAGCGTCGACCGATGCCCTTGAGAGCCTTCAACCCAGTCAGCTCCTTCCGGTGGGCGCGGGGCATGACTATCGTCGCCGCACTTATGACTGTCTTCTTTATCATGCAACTCGTAGGACAGGTGCCGGCAGCGCTCTGGGTCATTTTCGGCGAGGACCGCTTTCGCTGGAGCGCGACGATGATCGGCCTGTCGCTTGCGGTATTCGGAATCTTGCACGCCCTCGCTCAAGCCTTCGTCACTGGTCCCGCCACCAAACGTTTCGGCGAGAAGCAGGCCATTATCGCCGGCATGGCGGCCGACGCGCTGGGCTACGTCTTGCTGGCGTTCGCGACGCGAGGCTGGATGGCCTTCCCCATTATGATTCTTCTCGCTTCCGGCGGCATCGGGATGCCCGCGTTGCAGGCCATGCTGTCCAGGCAGGTAGATGACGACCATCAGGGACAGCTTCAAGGATCGCTCGCGGCTCTTACCAGCCTAACTTCGATCACTGGACCGCTGATCGTCACGGCGATTTATGCCGCCTCGGCGAGCACATGGAACGGGTTGGCATGGATTGTAGGCGCCGCCCTATACCTTGTCTGCCTCCCCGCGTTGCGTCGCGGTGCATGGAGCCGGGCCACCTCGACCTGAATGGAAGCCGGCGGCACCTCGCTAACGGATTCACCACTCCAAGAATTGGAGCCAATCAATTCTTGCGGAGAACTGTGAATGCGCAAACCAACCCTTGGCAGAACATATCCATCGCGTCCGCCATCTCCAGCAGCCGCACGCGGCGCATCTCGGGCAGCGTTGGGTCCTGGCCACGGGTGCGCATGATCGTGCTCCTGTCGTTGAGGACCCGGCTAGGCTGGCGGGGTTGCCTTACTGGTTAGCAGAATGAATCACCGATACGCGAGCGAACGTGAAGCGACTGCTGCTGCAAAACGTCTGCGACCTGAGCAACAACATGAATGGTCTTCGGTTTCCGTGTTTCGTAAAGTCTGGAAACGCGGAAGTCAGCGCCCTGCACCATTATGTTCCGGATCTGCATCGCAGGATGCTGCTGGCTACCCTGTGGAACACCTACATCTGTATTAACGAAGCGCTGGCATTGACCCTGAGTGATTTTTCTCTGGTCCCGCCGCATCCATACCGCCAGTTGTTTACCCTCACAACGTTCCAGTAACCGGGCATGTTCATCATCAGTAACCCGTATCGTGAGCATCCTCTCTCGTTTCATCGGTATCATTACCCCCATGAACAGAAATCCCCCTTACACGGAGGCATCAGTGACCAAACAGGAAAAAACCGCCCTTAACATGGCCCGCTTTATCAGAAGCCAGACATTAACGCTTCTGGAGAAACTCAACGAGCTGGACGCGGATGAACAGGCAGACATCTGTGAATCGCTTCACGACCACGCTGATGAGCTTTACCGCAGCTGCCTCGCGCGTTTCGGTGATGACGGTGAAAACCTCTGACACATGCAGCTCCCGGAGACGGTCACAGCTTGTCTGTAAGCGGATGCCGGGAGCAGACAAGCCCGTCAGGGCGCGTCAGCGGGTGTTGGCGGGTGTCGGGGCGCAGCCATGACCCAGTCACGTAGCGATAGCGGAGTGTATACTGGCTTAACTATGCGGCATCAGAGCAGATTGTACTGAGAGTGCACCATATGCGGTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCTCTTCCGCTTCCTCGCTCACTGACTCGCTGCGCTCGGTCGTTCGGCTGCGGCGAGCGGTATCAGCTCACTCAAAGGCGGTAATACGGTTATCCACAGAATCAGGGGATAACGCAGGAAAGAACATGTGAGCAAAAGGCCAGCAAAAGGCCAGGAACCGTAAAAAGGCCGCGTTGCTGGCGTTTTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGGACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTGCAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAACACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTATAAAAATAGGCGTATCACGAGGCCCTTTCGTCTTCAAGAA
Binary file test-data/pBR322.gbk.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pBR322.gff3	Mon Jul 09 13:22:58 2018 -0400
@@ -0,0 +1,51 @@
+##sequence-region J01749.1 1 4361
+##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=47470
+J01749.1	Genbank	region	1	4361	.	+	.	ID=id-1;Dbxref=taxon:47470;Is_circular=true;gbkey=Src;mol_type=other DNA;tissue-lib=ATCC 31344%2C ATCC 37017
+J01749.1	Genbank	region	1	1762	.	+	.	ID=id-J01749.1:1..1762;gbkey=Src
+J01749.1	Genbank	binding_site	24	27	.	+	.	ID=id-J01749.1:24..27;bound_moiety=echinomycin;gbkey=misc_binding
+J01749.1	Genbank	promoter	27	33	.	-	.	ID=id-J01749.1:27..33;Note=promoter P1 (6);gbkey=promoter
+J01749.1	Genbank	binding_site	39	42	.	+	.	ID=id-J01749.1:39..42;bound_moiety=echinomycin;gbkey=misc_binding
+J01749.1	Genbank	promoter	43	49	.	+	.	ID=id-J01749.1:43..49;Note=promoter P2 (6);gbkey=promoter
+J01749.1	Genbank	binding_site	53	56	.	+	.	ID=id-J01749.1:53..56;bound_moiety=echinomycin;gbkey=misc_binding
+J01749.1	Genbank	binding_site	67	70	.	+	.	ID=id-J01749.1:67..70;bound_moiety=echinomycin;gbkey=misc_binding
+J01749.1	Genbank	binding_site	80	83	.	+	.	ID=id-J01749.1:80..83;bound_moiety=echinomycin;gbkey=misc_binding
+J01749.1	Genbank	gene	86	1276	.	+	.	ID=gene-tet;Name=tet;gbkey=Gene;gene=tet
+J01749.1	Genbank	CDS	86	1276	.	+	0	ID=cds-AAB59735.1;Parent=gene-tet;Dbxref=NCBI_GP:AAB59735.1;Name=AAB59735.1;gbkey=CDS;gene=tet;product=tetracycline resistance protein;protein_id=AAB59735.1;transl_table=11
+J01749.1	Genbank	binding_site	411	414	.	+	.	ID=id-tet;bound_moiety=echinomycin;gbkey=misc_binding;gene=tet
+J01749.1	Genbank	binding_site	469	472	.	+	.	ID=id-tet-2;bound_moiety=echinomycin;gbkey=misc_binding;gene=tet
+J01749.1	Genbank	sequence_difference	426	426	.	+	.	ID=id-tet-3;Note=conflict;gbkey=misc_difference;gene=tet;replace=
+J01749.1	Genbank	sequence_feature	146	147	.	+	.	ID=id-tet-4;Note=Endo.Sce I cleavage site coordinated with site at base 142 (10);gbkey=misc_feature;gene=tet
+J01749.1	Genbank	region	526	528	.	+	.	ID=id-tet-5;gbkey=old_sequence;gene=tet
+J01749.1	Genbank	sequence_feature	141	142	.	-	.	ID=id-J01749.1:141..142;Note=Endo.Sce I cleavage site coordinated with site at base 146 (10);gbkey=misc_feature
+J01749.1	Genbank	direct_repeat	1515	1519	.	-	.	ID=id-J01749.1:1515..1519;Note=gamma-delta insertion target sequence;gbkey=repeat_region;rpt_type=direct
+J01749.1	Genbank	sequence_feature	1636	1762	.	+	.	ID=id-J01749.1:1636..1762;Note=from pSC101 (bp 1860-1986);gbkey=misc_feature
+J01749.1	Genbank	direct_repeat	1788	1792	.	-	.	ID=id-J01749.1:1788..1792;Note=gamma-delta insertion target sequence;gbkey=repeat_region;rpt_type=direct
+J01749.1	Genbank	sequence_difference	1891	1892	.	+	.	ID=id-J01749.1:1891..1892;Note=conflict;gbkey=misc_difference;replace=att
+J01749.1	Genbank	region	1892	1893	.	+	.	ID=id-J01749.1:1892..1893;gbkey=old_sequence
+J01749.1	Genbank	region	1905	1910	.	+	.	ID=id-J01749.1:1905..1910;gbkey=RBS
+J01749.1	Genbank	region	1905	1909	.	+	.	ID=id-J01749.1:1905..1909;Note=Shine-Dalgarno sequence;gbkey=RBS
+J01749.1	Genbank	sequence_difference	1913	1914	.	+	.	ID=id-J01749.1:1913..1914;Note=conflict;gbkey=misc_difference;replace=caa
+J01749.1	Genbank	region	1914	1915	.	+	.	ID=id-J01749.1:1914..1915;gbkey=old_sequence
+J01749.1	Genbank	CDS	1915	2106	.	+	0	ID=cds-AAB59736.1;Dbxref=NCBI_GP:AAB59736.1;Name=AAB59736.1;gbkey=CDS;product=ROP protein;protein_id=AAB59736.1;transl_table=11
+J01749.1	Genbank	sequence_feature	2011	2167	.	+	.	ID=id-J01749.1:2011..2167;Note=H-strand Y effector site;gbkey=misc_feature
+J01749.1	Genbank	direct_repeat	2245	2249	.	-	.	ID=id-J01749.1:2245..2249;Note=gamma-delta insertion target sequence;gbkey=repeat_region;rpt_type=direct
+J01749.1	Genbank	sequence_feature	2351	2414	.	-	.	ID=id-J01749.1:2351..2414;Note=L-strand Y effector site;gbkey=misc_feature
+J01749.1	Genbank	binding_site	2439	2447	.	+	.	ID=id-J01749.1:2439..2447;bound_moiety=dnaA;gbkey=misc_binding
+J01749.1	Genbank	origin_of_replication	2535	2535	.	+	.	ID=id-J01749.1:2535..2535;gbkey=rep_origin
+J01749.1	Genbank	region	2729	2730	.	+	.	ID=id-J01749.1:2729..2730;Note=revision according to (17);gbkey=old_sequence;replace=at
+J01749.1	Genbank	region	2729	2729	.	+	.	ID=id-J01749.1:2729..2729;gbkey=old_sequence
+J01749.1	Genbank	region	2730	2730	.	+	.	ID=id-J01749.1:2730..2730;Note=revision according to (16);gbkey=old_sequence;replace=t
+J01749.1	Genbank	mobile_genetic_element	3148	4361	.	+	.	ID=id-J01749.1:3148..4361;gbkey=mobile_element;mobile_element_type=transposon:Tn3
+J01749.1	Genbank	inverted_repeat	3148	3185	.	+	.	ID=id-J01749.1:3148..3185;Note=corresponds to one of the 38bp repeats found in Tn3 (bp 1-38 and complement (4920-4957));gbkey=repeat_region;rpt_type=inverted
+J01749.1	Genbank	gene	3293	4153	.	-	.	ID=gene-bla;Name=bla;gbkey=Gene;gene=bla
+J01749.1	Genbank	CDS	3293	4153	.	-	0	ID=cds-AAB59737.1;Parent=gene-bla;Dbxref=NCBI_GP:AAB59737.1;Name=AAB59737.1;Note=E-286;gbkey=CDS;gene=bla;product=beta-lactamase;protein_id=AAB59737.1;transl_table=11
+J01749.1	Genbank	region	4161	4165	.	-	.	ID=id-J01749.1:4161..4165;Note=Shine-Dalgarno sequence;gbkey=RBS
+J01749.1	Genbank	promoter	4188	4194	.	-	.	ID=id-J01749.1:4188..4194;Note=promoter P3 (6);gbkey=promoter
+J01749.1	Genbank	binding_site	4268	4271	.	-	.	ID=id-J01749.1:4268..4271;bound_moiety=echinomycin;gbkey=misc_binding
+J01749.1	Genbank	binding_site	4280	4283	.	-	.	ID=id-J01749.1:4280..4283;bound_moiety=echinomycin;gbkey=misc_binding
+J01749.1	Genbank	binding_site	4285	4288	.	-	.	ID=id-J01749.1:4285..4288;bound_moiety=echinomycin;gbkey=misc_binding
+J01749.1	Genbank	binding_site	4296	4299	.	-	.	ID=id-J01749.1:4296..4299;bound_moiety=echinomycin;gbkey=misc_binding
+J01749.1	Genbank	binding_site	4311	4314	.	-	.	ID=id-J01749.1:4311..4314;bound_moiety=echinomycin;gbkey=misc_binding
+J01749.1	Genbank	binding_site	4317	4320	.	-	.	ID=id-J01749.1:4317..4320;bound_moiety=echinomycin;gbkey=misc_binding
+J01749.1	Genbank	binding_site	4331	4334	.	-	.	ID=id-J01749.1:4331..4334;bound_moiety=echinomycin;gbkey=misc_binding
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pBR322_test1.fna	Mon Jul 09 13:22:58 2018 -0400
@@ -0,0 +1,2 @@
+>J01749
+TTCTCATGTTTGACAGCTTATCATCGATAAGCTTTAATGCGGTAGTTTATCACAGTTAAATTGCTAACGCAGTCAGGCACCGTGTATGAAATCTAACAATGCGCTCATCGTCATCCTCGGCACCGTCACCCTGGATGCTGTAGGCATAGGCTTGGTTATGCCGGTACTGCCGGGCCTCTTGCGGGATATCGTCCATTCCGACAGCATCGCCAGTCACTATGGCGTGCTGCTAGCGCTATATGCGTTGATGCAATTTCTATGCGCACCCGTTCTCGGAGCACTGTCCGACCGCTTTGGCCGCCGCCCAGTCCTGCTCGCTTCGCTACTTGGAGCCACTATCGACTACGCGATCATGGCGACCACACCCGTCCTGTGGATCCTCTACGCCGGACGCATCGTGGCCGGCATCACCGGCGCCACAGGTGCGGTTGCTGGCGCCTATATCGCCGACATCACCGATGGGGAAGATCGGGCTCGCCACTTCGGGCTCATGAGCGCTTGTTTCGGCGTGGGTATGGTGGCAGGCCCCGTGGCCGGGGGACTGTTGGGCGCCATCTCCTTGCATGCACCATTCCTTGCGGCGGCGGTGCTCAACGGCCTCAACCTACTACTGGGCTGCTTCCTAATGCAGGAGTCGCATAAGGGAGAGCGTCGACCGATGCCCTTGAGAGCCTTCAACCCAGTCAGCTCCTTCCGGTGGGCGCGGGGCATGACTATCGTCGCCGCACTTATGACTGTCTTCTTTATCATGCAACTCGTAGGACAGGTGCCGGCAGCGCTCTGGGTCATTTTCGGCGAGGACCGCTTTCGCTGGAGCGCGACGATGATCGGCCTGTCGCTTGCGGTATTCGGAATCTTGCACGCCCTCGCTCAAGCCTTCGTCACTGGTCCCGCCACCAAACGTTTCGGCGAGAAGCAGGCCATTATCGCCGGCATGGCGGCCGACGCGCTGGGCTACGTCTTGCTGGCGTTCGCGACGCGAGGCTGGATGGCCTTCCCCATTATGATTCTTCTCGCTTCCGGCGGCATCGGGATGCCCGCGTTGCAGGCCATGCTGTCCAGGCAGGTAGATGACGACCATCAGGGACAGCTTCAAGGATCGCTCGCGGCTCTTACCAGCCTAACTTCGATCACTGGACCGCTGATCGTCACGGCGATTTATGCCGCCTCGGCGAGCACATGGAACGGGTTGGCATGGATTGTAGGCGCCGCCCTATACCTTGTCTGCCTCCCCGCGTTGCGTCGCGGTGCATGGAGCCGGGCCACCTCGACCTGAATGGAAGCCGGCGGCACCTCGCTAACGGATTCACCACTCCAAGAATTGGAGCCAATCAATTCTTGCGGAGAACTGTGAATGCGCAAACCAACCCTTGGCAGAACATATCCATCGCGTCCGCCATCTCCAGCAGCCGCACGCGGCGCATCTCGGGCAGCGTTGGGTCCTGGCCACGGGTGCGCATGATCGTGCTCCTGTCGTTGAGGACCCGGCTAGGCTGGCGGGGTTGCCTTACTGGTTAGCAGAATGAATCACCGATACGCGAGCGAACGTGAAGCGACTGCTGCTGCAAAACGTCTGCGACCTGAGCAACAACATGAATGGTCTTCGGTTTCCGTGTTTCGTAAAGTCTGGAAACGCGGAAGTCAGCGCCCTGCACCATTATGTTCCGGATCTGCATCGCAGGATGCTGCTGGCTACCCTGTGGAACACCTACATCTGTATTAACGAAGCGCTGGCATTGACCCTGAGTGATTTTTCTCTGGTCCCGCCGCATCCATACCGCCAGTTGTTTACCCTCACAACGTTCCAGTAACCGGGCATGTTCATCATCAGTAACCCGTATCGTGAGCATCCTCTCTCGTTTCATCGGTATCATTACCCCCATGAACAGAAATCCCCCTTACACGGAGGCATCAGTGACCAAACAGGAAAAAACCGCCCTTAACATGGCCCGCTTTATCAGAAGCCAGACATTAACGCTTCTGGAGAAACTCAACGAGCTGGACGCGGATGAACAGGCAGACATCTGTGAATCGCTTCACGACCACGCTGATGAGCTTTACCGCAGCTGCCTCGCGCGTTTCGGTGATGACGGTGAAAACCTCTGACACATGCAGCTCCCGGAGACGGTCACAGCTTGTCTGTAAGCGGATGCCGGGAGCAGACAAGCCCGTCAGGGCGCGTCAGCGGGTGTTGGCGGGTGTCGGGGCGCAGCCATGACCCAGTCACGTAGCGATAGCGGAGTGTATACTGGCTTAACTATGCGGCATCAGAGCAGATTGTACTGAGAGTGCACCATATGCGGTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCTCTTCCGCTTCCTCGCTCACTGACTCGCTGCGCTCGGTCGTTCGGCTGCGGCGAGCGGTATCAGCTCACTCAAAGGCGGTAATACGGTTATCCACAGAATCAGGGGATAACGCAGGAAAGAACATGTGAGCAAAAGGCCAGCAAAAGGCCAGGAACCGTAAAAAGGCCGCGTTGCTGGCGTTTTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGGACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTGCAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAACACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTATAAAAATAGGCGTATCACGAGGCCCTTTCGTCTTCAAGAA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pBR322_test2.fna	Mon Jul 09 13:22:58 2018 -0400
@@ -0,0 +1,65 @@
+>J01749.1 Cloning vector pBR322, complete sequence
+TTCTCATGTTTGACAGCTTATCATCGATAAGCTTTAATGCGGTAGTTTATCACAGTTAAATTGCTAACGC
+AGTCAGGCACCGTGTATGAAATCTAACAATGCGCTCATCGTCATCCTCGGCACCGTCACCCTGGATGCTG
+TAGGCATAGGCTTGGTTATGCCGGTACTGCCGGGCCTCTTGCGGGATATCGTCCATTCCGACAGCATCGC
+CAGTCACTATGGCGTGCTGCTAGCGCTATATGCGTTGATGCAATTTCTATGCGCACCCGTTCTCGGAGCA
+CTGTCCGACCGCTTTGGCCGCCGCCCAGTCCTGCTCGCTTCGCTACTTGGAGCCACTATCGACTACGCGA
+TCATGGCGACCACACCCGTCCTGTGGATCCTCTACGCCGGACGCATCGTGGCCGGCATCACCGGCGCCAC
+AGGTGCGGTTGCTGGCGCCTATATCGCCGACATCACCGATGGGGAAGATCGGGCTCGCCACTTCGGGCTC
+ATGAGCGCTTGTTTCGGCGTGGGTATGGTGGCAGGCCCCGTGGCCGGGGGACTGTTGGGCGCCATCTCCT
+TGCATGCACCATTCCTTGCGGCGGCGGTGCTCAACGGCCTCAACCTACTACTGGGCTGCTTCCTAATGCA
+GGAGTCGCATAAGGGAGAGCGTCGACCGATGCCCTTGAGAGCCTTCAACCCAGTCAGCTCCTTCCGGTGG
+GCGCGGGGCATGACTATCGTCGCCGCACTTATGACTGTCTTCTTTATCATGCAACTCGTAGGACAGGTGC
+CGGCAGCGCTCTGGGTCATTTTCGGCGAGGACCGCTTTCGCTGGAGCGCGACGATGATCGGCCTGTCGCT
+TGCGGTATTCGGAATCTTGCACGCCCTCGCTCAAGCCTTCGTCACTGGTCCCGCCACCAAACGTTTCGGC
+GAGAAGCAGGCCATTATCGCCGGCATGGCGGCCGACGCGCTGGGCTACGTCTTGCTGGCGTTCGCGACGC
+GAGGCTGGATGGCCTTCCCCATTATGATTCTTCTCGCTTCCGGCGGCATCGGGATGCCCGCGTTGCAGGC
+CATGCTGTCCAGGCAGGTAGATGACGACCATCAGGGACAGCTTCAAGGATCGCTCGCGGCTCTTACCAGC
+CTAACTTCGATCACTGGACCGCTGATCGTCACGGCGATTTATGCCGCCTCGGCGAGCACATGGAACGGGT
+TGGCATGGATTGTAGGCGCCGCCCTATACCTTGTCTGCCTCCCCGCGTTGCGTCGCGGTGCATGGAGCCG
+GGCCACCTCGACCTGAATGGAAGCCGGCGGCACCTCGCTAACGGATTCACCACTCCAAGAATTGGAGCCA
+ATCAATTCTTGCGGAGAACTGTGAATGCGCAAACCAACCCTTGGCAGAACATATCCATCGCGTCCGCCAT
+CTCCAGCAGCCGCACGCGGCGCATCTCGGGCAGCGTTGGGTCCTGGCCACGGGTGCGCATGATCGTGCTC
+CTGTCGTTGAGGACCCGGCTAGGCTGGCGGGGTTGCCTTACTGGTTAGCAGAATGAATCACCGATACGCG
+AGCGAACGTGAAGCGACTGCTGCTGCAAAACGTCTGCGACCTGAGCAACAACATGAATGGTCTTCGGTTT
+CCGTGTTTCGTAAAGTCTGGAAACGCGGAAGTCAGCGCCCTGCACCATTATGTTCCGGATCTGCATCGCA
+GGATGCTGCTGGCTACCCTGTGGAACACCTACATCTGTATTAACGAAGCGCTGGCATTGACCCTGAGTGA
+TTTTTCTCTGGTCCCGCCGCATCCATACCGCCAGTTGTTTACCCTCACAACGTTCCAGTAACCGGGCATG
+TTCATCATCAGTAACCCGTATCGTGAGCATCCTCTCTCGTTTCATCGGTATCATTACCCCCATGAACAGA
+AATCCCCCTTACACGGAGGCATCAGTGACCAAACAGGAAAAAACCGCCCTTAACATGGCCCGCTTTATCA
+GAAGCCAGACATTAACGCTTCTGGAGAAACTCAACGAGCTGGACGCGGATGAACAGGCAGACATCTGTGA
+ATCGCTTCACGACCACGCTGATGAGCTTTACCGCAGCTGCCTCGCGCGTTTCGGTGATGACGGTGAAAAC
+CTCTGACACATGCAGCTCCCGGAGACGGTCACAGCTTGTCTGTAAGCGGATGCCGGGAGCAGACAAGCCC
+GTCAGGGCGCGTCAGCGGGTGTTGGCGGGTGTCGGGGCGCAGCCATGACCCAGTCACGTAGCGATAGCGG
+AGTGTATACTGGCTTAACTATGCGGCATCAGAGCAGATTGTACTGAGAGTGCACCATATGCGGTGTGAAA
+TACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCTCTTCCGCTTCCTCGCTCACTGACTCGCT
+GCGCTCGGTCGTTCGGCTGCGGCGAGCGGTATCAGCTCACTCAAAGGCGGTAATACGGTTATCCACAGAA
+TCAGGGGATAACGCAGGAAAGAACATGTGAGCAAAAGGCCAGCAAAAGGCCAGGAACCGTAAAAAGGCCG
+CGTTGCTGGCGTTTTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAG
+GTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCT
+GTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATA
+GCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCC
+CGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTA
+TCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCT
+TGAAGTGGTGGCCTAACTACGGCTACACTAGAAGGACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGT
+TACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTT
+GTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATCTTTTCTACGGGGT
+CTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCAC
+CTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGAC
+AGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCT
+GACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACC
+GCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGA
+AGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTT
+CGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTGCAGGCATCGTGGTGTCACGCTCGTCGTTTGG
+TATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAA
+GCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTA
+TGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTC
+AACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAACACGGGATAAT
+ACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAA
+GGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTT
+TACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCG
+ACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTC
+TCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCG
+AAAAGTGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTATAAAAATAGGCGTATCACG
+AGGCCCTTTCGTCTTCAAGAA
+
Binary file test-data/pBR322_test2.fna.gz has changed