changeset 0:e02e9af2743f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/roary commit 1fbe605d5ac8fd5d91fec6fb24fcfc51788c62b6
author iuc
date Fri, 23 Jun 2017 07:53:29 -0400
parents
children a9098353ae22
files roary.xml test-data/ex1.gff test-data/ex2.gff test-data/out/summary_statistics.txt test-data/test2/summary_statistics.txt
diffstat 5 files changed, 725 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/roary.xml	Fri Jun 23 07:53:29 2017 -0400
@@ -0,0 +1,173 @@
+<tool id="roary" name="Roary" version="0.1.0">
+    <description>the pangenome pipeline - Quickly generate a core gene alignment from gff3 files</description>
+    
+    <requirements>
+      <requirement type="package" version="3.8.2">roary</requirement>
+    </requirements>
+    
+    <command detect_errors="exit_code"><![CDATA[
+        
+        #for $counter, $gff in enumerate($gff_input.gffs)
+          cp $gff temp${counter}.gff &&
+        #end for
+                
+        roary
+          -f out 
+          -p \${GALAXY_SLOTS:-1}
+          -e
+          -n
+          -i '$percent_ident'
+          -cd '$core_diff'
+          -g '$advanced.maxclust'
+          $advanced.split_para
+          -t '$advanced.trans_tab'
+          -iv '$advanced.mcl'
+          
+          temp*.gff
+          
+    ]]></command>
+    
+    <inputs>
+      <conditional name="gff_input">
+        <param name="gff_input_selector" type="select" label="Individual gff files or a dataset collection" help="Select between individual gff files or a collection of gff files">
+          <option value="individual">Individual</option>
+          <option value="collection">Collection</option>
+        </param>
+        <when value="individual">
+          <param name="gffs" type="data" multiple="true" format="gff3" label="select gff inputs to Roary" help="Select the files you wish to send to Roary, must be in gff3 format with the sequence data at the end of the file." />
+        </when>
+        <when value='collection'>
+          <param name="gffs" type="data_collection" collection_type="list" format="gff3" label="Dataset collection to submit to Roary" help="A dataset list collection of gff3 files to send to Roary for analysis." />
+        </when>
+      </conditional>
+      <param name="percent_ident" type="integer" label="minimum percentage identity for blastp" help="Sets the minimum percentage identity for protein matches" value="95"/>
+      <param name="core_diff" type="float" label="percentage of isolates a gene must be in to be core" help="The total percentage of the isolates that must have the gene for it to be considered a core gene." value="99.0"/>
+      
+      <param name="outputs" type="select" multiple="true" display="checkboxes" label="Additional outputs" help="Summary statistics, core gene alignments and Gene Presence/Absence tables are always produced.">
+        <option value="abg_fa">Accessory binary genes in fasta format</option>
+        <option value="accgraph">Accessory graph in dot format</option>
+        <option value="acchead_embl">Accessory header file in embl format</option>
+        <option value="acctab">Accessory gene table in tabular format</option>
+        <option value="blastfreq">Blast Identity Frequency Rtab file</option>
+        <option value="clust">Clustered proteins file</option>
+        <option value="coreaccgraph">Core Accessory Graph in dot format</option>
+        <option value="coreaccembl">Core Accessory table in embl format</option>
+        <option value="coreacctab">Core Accessory table in tabular format</option>
+        <option value="genepa_rtab">Gene Presence Absence file in Rtab format</option>
+        <option value="numcons_rtab">Number of Conserved Genes in Rtab format</option>
+        <option value="numpangene_rtab">Number of Genes in Pan Genome in Rtab format</option>
+        <option value="numnew_rtab">Number of New Genes in Rtab format</option>
+        <option value="numuniq_rtab">Number of Unique Genes in Rtab format</option>
+      </param>
+      
+      <section name="advanced" title="Advanced options" expanded="false">
+        <param name="maxclust" type="integer" value="50000" label="Maximum number of clusters" help="The maximum number of clusters to assign proteins" />
+        <param name="split_para" type="boolean" checked="False" truevalue="-s" falsevalue="" label="Don't split paralogs" help="Click yes so that paralogs aren't split."/>
+        <param name="trans_tab" type="integer" value="11" label="Translation table to use [1 or 4 or 11]" help="DNA -> RNA translation table to use. Roary is designed for prokaryotes and so tables 1, 4 or 11 will work, others could be problematic." />
+        <param name="mcl" type="float" value="1.5" label="MCL inflation value" help="This is an advanced setting, change only if you know what you are doing and then at your own risk!" />
+      </section>
+      
+    </inputs>
+    
+    <outputs>
+      <data format="tabular" name="sumstats" label="${tool.name} on ${on_string} Summary statistics" from_work_dir="out/summary_statistics.txt" />
+      <data format="fasta" name="core_gene_aln" label="${tool.name} on ${on_string} Core Gene Alignment" from_work_dir="out/core_gene_alignment.aln" />
+      <data format="csv" name="gene_p_a" label="${tool.name} on ${on_string} Gene Presence Absence" from_work_dir="out/gene_presence_absence.csv" />
+      <data format="fasta" name="acc_bin" label="${tool.name} on ${on_string} Accessory Binary Genes" from_work_dir="out/accessory_binary_genes.fa" >
+        <filter>outputs and 'abg_fa' in outputs</filter>
+      </data>
+      <data format="dot" name="acc_graph" label="${tool.name} on ${on_string} Acsessory Graph" from_work_dir="out/accessory_graph.dot">
+        <filter>outputs and 'accgraph' in outputs</filter>
+      </data>
+      <data format="embl" name="acc_head_embl" label="${tool.name} on ${on_string} Accessory Header" from_work_dir="out/accessory.header.embl" >
+        <filter>outputs and 'acchead_embl' in outputs</filter>
+      </data>
+      <data format="tabular" name="acc_tab" label="${tool.name} on ${on_string} Accessory Gene Table" from_work_dir="out/accessory.tab" >
+        <filter>outputs and 'acctab' in outputs</filter>
+      </data>
+      <data format="txt" name="blast_freq" label="${tool.name} on ${on_string} Blast Identity Frequencies" from_work_dir="out/blast_identity_frequency.Rtab" >
+        <filter>outputs and 'blastfreq' in outputs</filter>
+      </data>
+      <data format="txt" name="clust_file" label="${tool.name} on ${on_string} Clustered Proteins" from_work_dir="out/clustered_proteins" >
+        <filter>outputs and 'clust' in outputs</filter>
+      </data>
+      <data format="dot" name="core_acc_graph" label="${tool.name} on ${on_string} Core Accessory Graph" from_work_dir="out/core_accessory_graph.dot" >
+        <filter>outputs and 'coreaccgraph' in outputs</filter>
+      </data>
+      <data format="embl" name="core_acc_embl" label="${tool.name} on ${on_string} Core Accessory EMBL" from_work_dir="out/core_accessory.header.embl" >
+        <filter>outputs and 'coreaccembl' in outputs</filter>
+      </data>
+      <data format="tabular" name="core_acc_tab" label="${tool.name} on ${on_string} Core Accessory Table" from_work_dir="out/core_accessory.tab" >
+        <filter>outputs and 'coreacctab' in outputs</filter>
+      </data>
+      <data format="txt" name="gene_p_a_rtab" label="${tool.name} on ${on_string} Gene Presence Absence Rtab" from_work_dir="out/gene_presence_absence.Rtab" >
+        <filter>outputs and 'genepa_rtab' in outputs</filter>
+      </data>
+      <data format="txt" name="num_cons_rtab" label="${tool.name} on ${on_string} Number of Conserved Genes" from_work_dir="out/number_of_conserved_genes.Rtab" >
+        <filter>outputs and 'numcons_rtab' in outputs</filter>
+      </data>
+      <data format="txt" name="num_pangene_rtab" label="${tool.name} on ${on_string} Number of Genes in Pan Geneome" from_work_dir="out/number_of_genes_in_pan_genome.Rtab" >
+        <filter>outputs and 'numpangene_rtab' in outputs</filter>
+      </data>
+      <data format="txt" name="num_new_rtab" label="${tool.name} on ${on_string} Number of New Genes" from_work_dir="out/number_of_new_genes.Rtab" >
+        <filter>outputs and 'numnew_rtab' in outputs</filter>
+      </data>
+      <data format="txt" name="num_uniq_rtab" label="${tool.name} on ${on_string} Number of Unique Genes" from_work_dir="out/number_of_unique_genes.Rtab" >
+        <filter>outputs and 'numuniq_rtab' in outputs</filter>
+      </data>
+    </outputs>
+    
+    <tests>
+      <test>
+        <param name="gff_input_selector" value="individual"/>
+        <param name="gffs" value="ex1.gff,ex2.gff" ftype="gff3"/>
+        <output name="sumstats" file="out/summary_statistics.txt" ftype="tabular"/>
+      </test>
+      <test>
+        <param name="gff_input_selector" value="individual"/>
+        <param name="gffs" value="ex1.gff,ex2.gff" ftype="gff3"/>
+        <param name="percent_ident" value="50"/>
+        <param name="core_diff" value="50.0"/>
+        <output name="sumstats" file="test2/summary_statistics.txt" ftype="tabular"/>
+      </test>
+    </tests>
+    
+    <help><![CDATA[
+**Roary**
+
+Roary is a high speed stand alone pan genome pipeline, which takes annotated assemblies in GFF3 format (produced by Prokka) and calculates the pan genome. Using a standard desktop PC, it can analyse datasets with thousands of samples, something which is computationally infeasible with existing methods, without compromising the quality of the results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a single processor. To perform this analysis using existing methods would take weeks and hundreds of GB of RAM.
+
+To use Roary, select two or more gff3 files OR a collection of gff3 files
+
+**Options**: 
+
+
+       - Minimum percentage identity for blastp - an integer, default is [95]
+
+       
+       - Percentage of isolates a gene must be in to be core - a float, default is [99.0]
+       
+       
+**Advanced Options**:
+       
+       - Maximum number of clusters - integer, default is [50000]
+
+       
+       - Don't split paralogs - check box
+
+       
+       - Translation table - which translation table to use, an integer, default is [11]
+
+       
+       - Change the MCL inflation value - a float, default is [1.5]
+       
+
+For further info see: http://sanger-pathogens.github.io/Roary/
+    
+    ]]></help>
+    
+    <citations>
+        <citation type="doi">http://doi.org/10.1093/bioinformatics/btv421</citation>
+    </citations>
+    
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ex1.gff	Fri Jun 23 07:53:29 2017 -0400
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=abc_00001;gene=hly;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_00003;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=abc_00005;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	ID=abc_00006;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=abc_00006;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	ID=abc_00007;eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=abc_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase%2C Asp/Orn binding domain;protein_id=gnl|SC|abc_00007
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	ID=abc_00008;eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=abc_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|abc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	ID=abc_01705;inference=COORDINATES:profile:RNAmmer:1.2;locus_tag=abc_01705;product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	ID=abc_00010;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00010;product=hypothetical protein;protein_id=gnl|SC|abc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	ID=abc_00011;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag=abc_00011;product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|abc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	ID=abc_00012;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00012;product=hypothetical protein;protein_id=gnl|SC|abc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	ID=abc_00013;inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag=abc_00013;product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	ID=abc_00014;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00014;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	ID=abc_00015;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00015;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	ID=abc_00016;eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag=abc_00016;product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|abc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ex2.gff	Fri Jun 23 07:53:29 2017 -0400
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=abc_00001;gene=hly;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_00003;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=abc_00005;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	ID=abc_00006;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=abc_00006;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	ID=abc_00007;eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=abc_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase%2C Asp/Orn binding domain;protein_id=gnl|SC|abc_00007
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	ID=abc_00008;eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=abc_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|abc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	ID=abc_01705;inference=COORDINATES:profile:RNAmmer:1.2;locus_tag=abc_01705;product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	ID=abc_00010;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00010;product=hypothetical protein;protein_id=gnl|SC|abc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	ID=abc_00011;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag=abc_00011;product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|abc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	ID=abc_00012;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00012;product=hypothetical protein;protein_id=gnl|SC|abc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	ID=abc_00013;inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag=abc_00013;product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	ID=abc_00014;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00014;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	ID=abc_00015;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00015;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	ID=abc_00016;eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag=abc_00016;product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|abc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out/summary_statistics.txt	Fri Jun 23 07:53:29 2017 -0400
@@ -0,0 +1,5 @@
+Core genes	(99% <= strains <= 100%)	15
+Soft core genes	(95% <= strains < 99%)	0
+Shell genes	(15% <= strains < 95%)	0
+Cloud genes	(0% <= strains < 15%)	0
+Total genes	(0% <= strains <= 100%)	15
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test2/summary_statistics.txt	Fri Jun 23 07:53:29 2017 -0400
@@ -0,0 +1,5 @@
+Core genes	(50% <= strains <= 100%)	15
+Soft core genes	(49% <= strains < 50%)	0
+Shell genes	(15% <= strains < 49%)	0
+Cloud genes	(0% <= strains < 15%)	0
+Total genes	(0% <= strains <= 100%)	15