Repository 'metagene_annotator'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/metagene_annotator

Changeset 0:b04960a7abf5 (2018-03-21)
Next changeset 1:17c7ab82bfbc (2024-03-18)
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/metagene_annotator commit 6d8b6e0fa2f1b47b337dbf21f5bc320586ccbd4c
added:
convert_mga.py
metagene_annotator.xml
static/images/Sixgill_MetaGeneAnnotator_Workflow.png
test-data/m_mga.bed
test-data/m_mga.tsv
test-data/m_mga.txt
test-data/metasequences.fasta
test-data/metasequences1.fasta
test-data/metasequences2.fasta
test-data/s_mga.bed
test-data/s_mga.tsv
test-data/s_mga.txt
b
diff -r 000000000000 -r b04960a7abf5 convert_mga.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/convert_mga.py Wed Mar 21 17:15:25 2018 -0400
[
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import argparse
+import math
+import re
+import sys
+
+
+def __main__():
+    parser = argparse.ArgumentParser(
+        description='Convert mga output to bed and tsv')
+    parser.add_argument(
+        'input_mga',
+        help="mga output to convert,  '-' for stdin")
+    parser.add_argument(
+        '-t', '--tsv', default=None,
+        help='Path to output mga.tsv')
+    parser.add_argument(
+        '-b', '--bed', default=None,
+        help='Path to output mga.bed')
+    parser.add_argument('-v', '--verbose', action='store_true', help='Verbose')
+    args = parser.parse_args()
+
+    input_rdr = open(args.input_mga, 'r')\
+        if args.input_mga != '-' else sys.stdin
+
+    bed_wtr = open(args.bed, 'w') if args.bed is not None else None
+    tsv_wtr = open(args.tsv, 'w') if args.bed is not None else None
+    if tsv_wtr:
+        tsv_wtr.write('#%s\n' % '\t'.join([
+            'seq_id', 'seq_model', 'seq_gc', 'seq_rbs',
+            'gene ID', 'start pos', 'end pos', 'strand', 'frame',
+            'complete/partial', 'gene score', 'used model',
+            'rbs start', 'rbs end', 'rbs score']))
+
+    gc_rbs_pat = '# gc = (-?[0-9]*[.]?[0-9]+), rbs = (-?[0-9]*[.]?[0-9]+)' 
+    seq_count = 0
+    gene_count = 0
+    for i, line in enumerate(input_rdr):
+        # 1317/1
+        # gc = 0.272955, rbs = -1
+        # self: -
+        if line.startswith('# gc'):
+            try:
+                m = re.match(gc_rbs_pat, line.strip())
+                seq_gc, seq_rbs = m.groups()
+            except:
+                seq_gc = seq_rbs = ''
+        elif line.startswith('# self:'):
+            seq_type = re.sub('# self:', '', line.rstrip())
+        elif line.startswith('# '):
+            seq_name = re.sub('# (\S+).*$', '\\1', line.rstrip())
+            seq_count += 1
+        else:
+            fields = line.split('\t')
+            if len(fields) == 11:
+                gene_count += 1
+                start = int(fields[1]) - 1
+                end = int(fields[2])
+                if tsv_wtr:
+                    tsv_wtr.write('%s\t%s\t%s\t%s\t%s' % (
+                        seq_name,
+                        seq_type,
+                        seq_gc,
+                        seq_rbs,
+                        line))
+                if bed_wtr:
+                    bed_wtr.write(
+                        '%s\t%d\t%d\t%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n' % (
+                            seq_name,
+                            start,
+                            end,
+                            '%s:%s' % (seq_name, fields[0]),
+                            int(math.ceil(float(fields[6]))),
+                            fields[3],
+                            start,
+                            end,
+                            0,
+                            1,
+                            abs(end - start),
+                            0))
+
+    if args.verbose:
+        print("sequences: %d\tgenes: %d"
+              % (seq_count, gene_count), file=sys.stdout)
+
+
+if __name__ == "__main__":
+    __main__()
b
diff -r 000000000000 -r b04960a7abf5 metagene_annotator.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/metagene_annotator.xml Wed Mar 21 17:15:25 2018 -0400
[
b'@@ -0,0 +1,186 @@\n+<tool id="metagene_annotator" name="MetaGeneAnnotator" version="1.0.0">\n+    <description>gene-finding program for prokaryote and phage (used by sixgill)</description>\n+    <requirements>\n+        <requirement type="package">metagene_annotator</requirement>\n+        <requirement type="package">python</requirement>\n+    </requirements>\n+    <command detect_errors="exit_code"><![CDATA[\n+        #set $output_list = str($output_formats).split(\',\')\n+        touch mga_output\n+        #for $input in $inputs:\n+            && mga ${input} $multiple_species >> mga_output\n+        #end for\n+        #if \'tsv\' in $output_list or \'bed\' in $output_list:\n+            && python \'$__tool_directory__/convert_mga.py\' mga_output -v \n+            #if \'tsv\' in $output_list\n+                --tsv \'$mga_tsv\'\n+            #end if\n+            #if \'bed\' in $output_list\n+                --bed \'$mga_bed\'\n+            #end if\n+        #end if\n+    ]]></command>\n+    <inputs>\n+        <param name="inputs" type="data" format="fasta" multiple="true" label="prokaryote DNA sequences"/>\n+        <param name="multiple_species" type="boolean" truevalue="-m" falsevalue="-s" checked="true" \n+               label="MetaGenomic - Sequences are from multiple organisms" />\n+        <param name="output_formats" type="select" multiple="true" display="checkboxes" label="output formats">\n+            <option value="txt" selected="true">MetaGeneAnnotator text report</option>\n+            <option value="tsv">MetaGeneAnnotator tabular report with sequence columns</option>\n+            <option value="bed">MetaGeneAnnotator in BED format</option>\n+        </param>\n+    </inputs>\n+    <outputs>\n+        <data name="mga_txt" format="txt" from_work_dir="mga_output" label="${tool.name} on ${on_string} metagenefile">\n+            <filter>\'txt\' in output_formats</filter>\n+        </data>\n+        <data name="mga_tsv" format="tabular" label="${tool.name} on ${on_string} mga table">\n+            <filter>\'tsv\' in output_formats</filter>\n+            <actions>\n+                <action name="column_names" type="metadata" \n+                 default="seq_ID,seq_model,seq_gc,seq_rbs,gene ID,start pos,end pos,strand,frame,complete/partial,gene score,used model,rbs start,rbs end,rbs score"/>\n+            </actions>\n+        </data>\n+        <data name="mga_bed" format="bed" label="${tool.name} on ${on_string} mga bed">\n+            <filter>\'bed\' in output_formats</filter>\n+            <actions>\n+                <action name="column_names" type="metadata" \n+                 default="chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts"/>\n+            </actions>\n+        </data>\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="inputs" value="metasequences.fasta" ftype="fasta"/>\n+            <param name="multiple_species" value="True"/>\n+            <param name="output_formats" value="txt"/>\n+            <output name="mga_txt"> \n+                <assert_contents>\n+                    <has_text_matching expression="# 1/1\\s# gc = 0.275862, rbs = -1\\s# self: -" />\n+                    <has_text_matching expression="gene_1\\t1812\\t1994\\t-\\t0\\t11\\t14.10\\d+\\tb\\t2002\\t2007\\t2.11\\d+" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <test>\n+            <param name="inputs" value="metasequences.fasta" ftype="fasta"/>\n+            <param name="multiple_species" value="False"/>\n+            <param name="output_formats" value="txt"/>\n+            <output name="mga_txt"> \n+                <assert_contents>\n+                    <has_text_matching expression="# 1/1\\s# gc = 0.275862, rbs = 0.428571\\s# self: b" />\n+                    <has_text_matching expression="gene_1\\t1812\\t1994\\t-\\t0\\t11\\t12.48\\d+\\tb\\t2002\\t2007\\t0.49\\d+" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <!-- Try these later\n+        <test>\n+            <param name="inputs" v'..b'           <has_text_matching expression="gene_1\\t1812\\t1994\\t-\\t0\\t11\\t14.10\\d+\\tb\\t2002\\t2007\\t2.11\\d+" />\n+                </assert_contents>\n+            </output>\n+            <output name="mga_tsv"> \n+                <assert_contents>\n+                    <has_text_matching expression="#seq_id\\tseq_model\\tseq_gc\\tseq_rbs" />\n+                    <has_text_matching expression="1/1\\t-\\t0.27\\d+\\t-1\\tgene_1\\t1812\\t1994\\t-\\t0\\t11\\t14.1035\\tb\\t2002\\t2007\\t2.11\\d+" />\n+                </assert_contents>\n+            </output>\n+            <output name="mga_bed"> \n+                <assert_contents>\n+                    <has_text_matching expression="1/1\\t1811\\t1994\\t1/1:gene_1\\t15\\t-\\t1811\\t1994\\t0\\t1\\t183\\t0" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+        -->\n+    </tests>\n+    <help><![CDATA[\n+**MetaGeneAnnotator (mga)**\n+\n+A gene-finding program for prokaryote and phage.\n+\n+The gene annotations can be used by sixgill_ when generating metapeptides from metagenomics shotgun sequencing.\n+\n+.. image:: Sixgill_MetaGeneAnnotator_Workflow.png\n+  :height: 213\n+  :width: 625\n+\n+usage:\n+    mga [multi-fasta] <-m/-s>\n+\n+         -m    (multiple species sequences are individually treated)\n+         -s    (single species sequences are treated as a unit)\n+\n+**Input:**\n+    *A fasta file of metagenomic sequences*\n+\n+\n+**Outputs:**\n+\n+    *MetaGeneAnnotator text report*\n+        Output from the MetaGeneAnnotator mga application::\n+\n+            # 1/1\n+            # gc = 0.275862, rbs = -1\n+            # self: -\n+            gene_1\t1812\t1994\t-\t0\t11\t14.1035\tb\t2002\t2007\t2.11797\n+            # 2/1\n+            # gc = 0.338877, rbs = -1\n+            # self: -\n+            gene_1\t1\t414\t+\t0\t01\t25.748\tb\t.\t.\t.\n+            gene_2\t614\t790\t+\t0\t11\t0.774142\tb\t.\t.\t.\n+            gene_3\t822\t1079\t+\t0\t11\t20.6507\tb\t.\t.\t.\n+\n+        output format description::\n+\n+            # [sequence name]\n+            # gc = [gc%], rbs = [rbs%]\n+            # self: [(b)acteria/(a)rchaea/(p)hage/unused(-)]\n+            [gene ID] [start pos.] [end pos.] [strand] [frame] [complete/partial] [gene score] [used model] [rbs start] [rbs end] [rbs score]\n+\n+            explanations of output column:\n+                *The value of [frame] (0/1/2) indicates the number of surplus (untranslated) nucleotides at the 5\'-end of the predicted ORF.\n+                *The value of [score] indicates the estimated score of predicted gene. All predicted genes are more than 0.\n+                *The value of [complete/partial] indicates that the predicted gene structure is whether complete (contains both of start and stop codons[11]) or partial (lacks start[01] or stop[10] or both of them[00]).\n+                *The value of [model] indicates a selected model ((s)elf/(b)acteria/(a)rchaea/(p)hage) for predicting the gene. \n+\n+\n+    *MetaGeneAnnotator tabular report with sequence columns*\n+        The mga output reformated as a tabular file::\n+\n+            #seq_id\tseq_model\tseq_gc\tseq_rbs\tgene ID\tstart pos\tend pos\tstrand\tframe\tcomplete/partial\tgene score\tused model\trbs start\trbs end\trbs score\n+            1/1\t -\t0.275862\t-1\tgene_1\t1812\t1994\t-\t0\t11\t14.1035\tb\t2002\t2007\t2.11797\n+            2/1\t -\t0.338877\t-1\tgene_1\t1\t414\t+\t0\t01\t25.748\tb\t.\t.\t.\n+            2/1\t -\t0.338877\t-1\tgene_2\t614\t790\t+\t0\t11\t0.774142\tb\t.\t.\t.\n+            2/1\t -\t0.338877\t-1\tgene_3\t822\t1079\t+\t0\t11\t20.6507\tb\t.\t.\t.\n+\n+\n+    *MetaGeneAnnotator in BED format*\n+        The mga output reformatted as a BED file which can be used to extract the DNA sequences for each gene from the fasta file::\n+\n+            1/1\t1811\t1994\t1/1:gene_1\t15\t-\t1811\t1994\t0\t1\t183\t0\n+            2/1\t0\t414\t2/1:gene_1\t26\t+\t0\t414\t0\t1\t414\t0\n+            2/1\t613\t790\t2/1:gene_2\t1\t+\t613\t790\t0\t1\t177\t0\n+            2/1\t821\t1079\t2/1:gene_3\t21\t+\t821\t1079\t0\t1\t258\t0\n+\n+\n+.. _sixgill: https://github.com/dhmay/sixgill\n+    ]]></help>\n+    <citations>\n+        <citation type="doi">10.1093/dnares/dsn027</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r b04960a7abf5 static/images/Sixgill_MetaGeneAnnotator_Workflow.png
b
Binary file static/images/Sixgill_MetaGeneAnnotator_Workflow.png has changed
b
diff -r 000000000000 -r b04960a7abf5 test-data/m_mga.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/m_mga.bed Wed Mar 21 17:15:25 2018 -0400
b
@@ -0,0 +1,40 @@
+1/1 1811 1994 1/1:gene_1 15 - 1811 1994 0 1 183 0
+2/1 0 414 2/1:gene_1 26 + 0 414 0 1 414 0
+2/1 613 790 2/1:gene_2 1 + 613 790 0 1 177 0
+2/1 821 1079 2/1:gene_3 21 + 821 1079 0 1 258 0
+2/1 1960 2140 2/1:gene_4 11 + 1960 2140 0 1 180 0
+2/1 2327 2405 2/1:gene_5 3 - 2327 2405 0 1 78 0
+3/1 61 220 3/1:gene_1 3 + 61 220 0 1 159 0
+3/1 559 754 3/1:gene_2 30 + 559 754 0 1 195 0
+3/1 1238 1403 3/1:gene_3 4 + 1238 1403 0 1 165 0
+3/1 1581 1798 3/1:gene_4 14 - 1581 1798 0 1 217 0
+4/1 2 431 4/1:gene_1 9 - 2 431 0 1 429 0
+4/1 581 743 4/1:gene_2 8 + 581 743 0 1 162 0
+4/1 1458 1746 4/1:gene_3 17 + 1458 1746 0 1 288 0
+4/1 2281 2372 4/1:gene_4 5 - 2281 2372 0 1 91 0
+5/1 0 191 5/1:gene_1 25 + 0 191 0 1 191 0
+5/1 397 619 5/1:gene_2 14 + 397 619 0 1 222 0
+5/1 780 1143 5/1:gene_3 27 + 780 1143 0 1 363 0
+5/1 1163 1412 5/1:gene_4 10 + 1163 1412 0 1 249 0
+5/1 1811 1871 5/1:gene_5 4 - 1811 1871 0 1 60 0
+6/1 149 344 6/1:gene_1 4 - 149 344 0 1 195 0
+6/1 401 791 6/1:gene_2 16 - 401 791 0 1 390 0
+6/1 768 945 6/1:gene_3 2 - 768 945 0 1 177 0
+6/1 1626 1941 6/1:gene_4 31 - 1626 1941 0 1 315 0
+7/1 0 330 7/1:gene_1 33 - 0 330 0 1 330 0
+7/1 358 997 7/1:gene_2 47 - 358 997 0 1 639 0
+7/1 1076 1349 7/1:gene_3 30 - 1076 1349 0 1 273 0
+7/1 1481 1703 7/1:gene_4 12 - 1481 1703 0 1 222 0
+7/1 1747 1857 7/1:gene_5 2 - 1747 1857 0 1 110 0
+8/1 0 193 8/1:gene_1 5 + 0 193 0 1 193 0
+8/1 589 745 8/1:gene_2 12 + 589 745 0 1 156 0
+8/1 1821 1903 8/1:gene_3 4 + 1821 1903 0 1 82 0
+9/1 117 543 9/1:gene_1 5 - 117 543 0 1 426 0
+9/1 622 901 9/1:gene_2 9 - 622 901 0 1 279 0
+9/1 946 1105 9/1:gene_3 5 - 946 1105 0 1 159 0
+9/1 1313 1505 9/1:gene_4 9 - 1313 1505 0 1 192 0
+9/1 1548 1727 9/1:gene_5 9 - 1548 1727 0 1 179 0
+10/1 0 66 10/1:gene_1 4 - 0 66 0 1 66 0
+10/1 256 715 10/1:gene_2 44 - 256 715 0 1 459 0
+10/1 1136 1310 10/1:gene_3 6 - 1136 1310 0 1 174 0
+10/1 1626 1824 10/1:gene_4 10 - 1626 1824 0 1 198 0
b
diff -r 000000000000 -r b04960a7abf5 test-data/m_mga.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/m_mga.tsv Wed Mar 21 17:15:25 2018 -0400
b
@@ -0,0 +1,41 @@
+#seq_id seq_model seq_gc seq_rbs gene ID start pos end pos strand frame complete/partial gene score used model rbs start rbs end rbs score
+1/1  - 0.275862 -1 gene_1 1812 1994 - 0 11 14.1035 b 2002 2007 2.11797
+2/1  - 0.338877 -1 gene_1 1 414 + 0 01 25.748 b . . .
+2/1  - 0.338877 -1 gene_2 614 790 + 0 11 0.774142 b . . .
+2/1  - 0.338877 -1 gene_3 822 1079 + 0 11 20.6507 b . . .
+2/1  - 0.338877 -1 gene_4 1961 2140 + 0 11 10.3813 b . . .
+2/1  - 0.338877 -1 gene_5 2328 2405 - 0 01 2.71033 b . . .
+3/1  - 0.269188 -1 gene_1 62 220 + 0 11 2.43434 p 52 57 0.162572
+3/1  - 0.269188 -1 gene_2 560 754 + 0 11 29.2723 p . . .
+3/1  - 0.269188 -1 gene_3 1239 1403 + 0 11 3.28908 p . . .
+3/1  - 0.269188 -1 gene_4 1582 1798 - 1 01 13.4314 p . . .
+4/1  - 0.550169 -1 gene_1 3 431 - 0 11 8.00428 p 440 445 -0.537592
+4/1  - 0.550169 -1 gene_2 582 743 + 0 11 7.07558 p 569 574 -2.00256
+4/1  - 0.550169 -1 gene_3 1459 1746 + 0 11 16.2536 p . . .
+4/1  - 0.550169 -1 gene_4 2282 2372 - 1 01 4.40418 p . . .
+5/1  - 0.256547 -1 gene_1 1 191 + 2 01 24.4289 b . . .
+5/1  - 0.256547 -1 gene_2 398 619 + 0 11 13.8688 b . . .
+5/1  - 0.256547 -1 gene_3 781 1143 + 0 11 26.4034 b . . .
+5/1  - 0.256547 -1 gene_4 1164 1412 + 0 11 9.28281 b 1152 1157 -2.19822
+5/1  - 0.256547 -1 gene_5 1812 1871 - 0 01 3.07971 b . . .
+6/1  - 0.329727 -1 gene_1 150 344 - 0 11 3.09009 b . . .
+6/1  - 0.329727 -1 gene_2 402 791 - 0 11 15.7382 b . . .
+6/1  - 0.329727 -1 gene_3 769 945 - 0 11 1.97601 b 952 957 -2.75668
+6/1  - 0.329727 -1 gene_4 1627 1941 - 0 01 30.867 b . . .
+7/1  - 0.316101 -1 gene_1 1 330 - 0 10 32.7357 b . . .
+7/1  - 0.316101 -1 gene_2 359 997 - 0 11 46.468 b . . .
+7/1  - 0.316101 -1 gene_3 1077 1349 - 0 11 29.3212 b 1355 1360 -1.77606
+7/1  - 0.316101 -1 gene_4 1482 1703 - 0 11 11.6788 b 1716 1721 0.607519
+7/1  - 0.316101 -1 gene_5 1748 1857 - 2 01 1.33924 b . . .
+8/1  - 0.35155 -1 gene_1 1 193 + 1 01 4.66209 b . . .
+8/1  - 0.35155 -1 gene_2 590 745 + 0 11 11.9555 b . . .
+8/1  - 0.35155 -1 gene_3 1822 1903 + 0 10 3.61015 b . . .
+9/1  - 0.57209 -1 gene_1 118 543 - 0 11 4.22762 p . . .
+9/1  - 0.57209 -1 gene_2 623 901 - 0 11 8.51219 p . . .
+9/1  - 0.57209 -1 gene_3 947 1105 - 0 11 4.01029 p . . .
+9/1  - 0.57209 -1 gene_4 1314 1505 - 0 11 8.25466 p . . .
+9/1  - 0.57209 -1 gene_5 1549 1727 - 2 01 8.49456 p . . .
+10/1  - 0.314145 -1 gene_1 1 66 - 0 10 3.31418 b 80 85 -0.655414
+10/1  - 0.314145 -1 gene_2 257 715 - 0 11 43.0773 b 722 727 -2.59179
+10/1  - 0.314145 -1 gene_3 1137 1310 - 0 11 5.91709 b 1322 1327 -2.35659
+10/1  - 0.314145 -1 gene_4 1627 1824 - 0 01 9.87622 b . . .
b
diff -r 000000000000 -r b04960a7abf5 test-data/m_mga.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/m_mga.txt Wed Mar 21 17:15:25 2018 -0400
b
@@ -0,0 +1,70 @@
+# 1/1
+# gc = 0.275862, rbs = -1
+# self: -
+gene_1 1812 1994 - 0 11 14.1035 b 2002 2007 2.11797
+# 2/1
+# gc = 0.338877, rbs = -1
+# self: -
+gene_1 1 414 + 0 01 25.748 b . . .
+gene_2 614 790 + 0 11 0.774142 b . . .
+gene_3 822 1079 + 0 11 20.6507 b . . .
+gene_4 1961 2140 + 0 11 10.3813 b . . .
+gene_5 2328 2405 - 0 01 2.71033 b . . .
+# 3/1
+# gc = 0.269188, rbs = -1
+# self: -
+gene_1 62 220 + 0 11 2.43434 p 52 57 0.162572
+gene_2 560 754 + 0 11 29.2723 p . . .
+gene_3 1239 1403 + 0 11 3.28908 p . . .
+gene_4 1582 1798 - 1 01 13.4314 p . . .
+# 4/1
+# gc = 0.550169, rbs = -1
+# self: -
+gene_1 3 431 - 0 11 8.00428 p 440 445 -0.537592
+gene_2 582 743 + 0 11 7.07558 p 569 574 -2.00256
+gene_3 1459 1746 + 0 11 16.2536 p . . .
+gene_4 2282 2372 - 1 01 4.40418 p . . .
+# 5/1
+# gc = 0.256547, rbs = -1
+# self: -
+gene_1 1 191 + 2 01 24.4289 b . . .
+gene_2 398 619 + 0 11 13.8688 b . . .
+gene_3 781 1143 + 0 11 26.4034 b . . .
+gene_4 1164 1412 + 0 11 9.28281 b 1152 1157 -2.19822
+gene_5 1812 1871 - 0 01 3.07971 b . . .
+# 6/1
+# gc = 0.329727, rbs = -1
+# self: -
+gene_1 150 344 - 0 11 3.09009 b . . .
+gene_2 402 791 - 0 11 15.7382 b . . .
+gene_3 769 945 - 0 11 1.97601 b 952 957 -2.75668
+gene_4 1627 1941 - 0 01 30.867 b . . .
+# 7/1
+# gc = 0.316101, rbs = -1
+# self: -
+gene_1 1 330 - 0 10 32.7357 b . . .
+gene_2 359 997 - 0 11 46.468 b . . .
+gene_3 1077 1349 - 0 11 29.3212 b 1355 1360 -1.77606
+gene_4 1482 1703 - 0 11 11.6788 b 1716 1721 0.607519
+gene_5 1748 1857 - 2 01 1.33924 b . . .
+# 8/1
+# gc = 0.35155, rbs = -1
+# self: -
+gene_1 1 193 + 1 01 4.66209 b . . .
+gene_2 590 745 + 0 11 11.9555 b . . .
+gene_3 1822 1903 + 0 10 3.61015 b . . .
+# 9/1
+# gc = 0.57209, rbs = -1
+# self: -
+gene_1 118 543 - 0 11 4.22762 p . . .
+gene_2 623 901 - 0 11 8.51219 p . . .
+gene_3 947 1105 - 0 11 4.01029 p . . .
+gene_4 1314 1505 - 0 11 8.25466 p . . .
+gene_5 1549 1727 - 2 01 8.49456 p . . .
+# 10/1
+# gc = 0.314145, rbs = -1
+# self: -
+gene_1 1 66 - 0 10 3.31418 b 80 85 -0.655414
+gene_2 257 715 - 0 11 43.0773 b 722 727 -2.59179
+gene_3 1137 1310 - 0 11 5.91709 b 1322 1327 -2.35659
+gene_4 1627 1824 - 0 01 9.87622 b . . .
b
diff -r 000000000000 -r b04960a7abf5 test-data/metasequences.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metasequences.fasta Wed Mar 21 17:15:25 2018 -0400
b
b'@@ -0,0 +1,20 @@\n+>1/1\n+CTGGGAATGGGTGTCTTGAGATAATTTCTTTATTCAATTAAGCTCTAAACCTAAATTTCTTACTTCATCTTTAAATAAGAATTTAAGAGGTTCGACTAATTTTAATCTCATTTTTTTTGGTAATCCGCCTACATTATGATGAGATTTGATTTTTGACGTTTGGCTTCCTGTTACTGATCTGCTTTCAATAAGATCTGGATAAAGAGTTCCTTGAGCTAAAAATTTTACATTCTTAATTTTTTTTGCATATCTTTCAAAATTTTAATAATAAATTTCCAATTATTTTTCTTTTCTTTCTGGATCTGAAACGTTTGTTAACTTTTTTAAAAATTCTTTTTCAGCATTAACGTATATTAAATTCATCTTTAGCCTTTCTTAAAAGTATTAACTACTTGTTTTCCTCATTTTTCTAAGAAGACCAGTGTTAACAAAAATACAATATAGCTTTTTACCATAGCTTTATTTAATAATGAGCACTACACTACTATCAACTCCTCCTGAAAGGGCACAAATAACTTATTTTCACCAACTTGATTTCTAACCTCTTTAATTAATTTCATCTTTTGATCTTTTGATGACCAGTTTTTTTAATTTTAATATTAAGGAAATAAAATTTTTAATAATTTTTTACCATTTTCTGTATGTGGTAACTTCAGGATGAAACTGAACACCATAAAATTTATTTGTATTGTCTCCAACAATTGCCAAATTTTGAATTTTGACTTGATGCAATTACTCCTAAAATTTTAGGTAATTTTGAAACTTGATTCAGCATGACTCATCCATACTTTATGAATTTTTTTTTGATTAAAGAAATTTTTTAATTAAAAGACTATCTTTTTTTTATAGACATTAGCTAAACCAAACTCCCGATGCTTTGATTGTTTGACCTTTCCACCATTTAATTTTGACAAGATTTGGTGACCAAACAAATTCCTAATATTGGTATATTGTTTTTAAGTATTTCTTTATCAAGAAATATCTATTAATTTCATAAACGTTAAGGGGACCTCCTGATAAAATGATACCTTTAATTACTTGTATCAATATTTTTATTTTTAATCTTTTTATGACTAATAATTTCTGAGAATACCCTAGTTCTCTTATTCTTCTGGCAATCAATTGTGTAAATTGAGACCAAATCTATGATTAATATTTTATTTAAACTTTGATCAAGAATCATTTTTTGGCTCTATATTCGCTAGAGTCTCCAAAATATCTTTATTTATCATCACATAGTTTTTTACAAACTTTTGTAAATGTTATTTTGATAGATCTTTTACTTTTGAATAATTTGATTTTCTAAAGCAATTTTCATTAACATTAATATTGGCTTCTTCGTTATCAGGTTCGATTAATAATGTTGTTTCTAAATTTTTTTCTTTCTTTTTTTGATCCTCTTCAAAATTATAATCTTTGCCAAGTACAAATATGAATTTGAGTCTTTAGGATTAAAAACATACTTCTTTCAACATAAATCTTGCATCGTCATATTTTTCTGTCTTTAAAAAGTTCTAAGCCTTTATCAAAAAAAATTCTCATTACCAAAAGATGTTGCTTGCTAAATTTATGAAAAATAAATTAATACAGTTTTTTTTAATTTTATCATCATTATCAATCATCATCTTTTTACTATATCAACGTTATGTACCATACTTTCGTAAATCCAGCTTTTGTAATCTTTACAAATTTTGGTTTATTTCTTAAATATTTAATTGTGTCTTAGAACCAGATAACCATCGACGATTTCAAACCGACCAATAAGTTGGAAAATTATTTTTCAACCTTACTTTTATATTTAGCAAATCCTTCTACACCTTCAGGTACATATTTGAGATATCTTTTTGTTTTGATTGAAAATATCTATCAGCTGAACCTTTATTCATAGCACCCCACTGATCCATACCTCTAAAACTTTTAATAACTTTCCCACCTCTTCTAATTAATTTACCTGGGGTCTCATCGGTGCCTGCAAATAAATGAACCACCATCACTGCATCTGCTCCTGCATCAAAAGCTTTGGCTAAATCACCAGTGTATTTAATGCCTCCATCAGAGATAAGATCTTAACATTTTTATTTTTAACTCGTTTCTTAACTGTTAAAATTGCACTTAACTGAGGAACTCCAATACCTGCCACTAATCGTGTGGATACAGATAGACCCAGGGCCTATACCCACTTTAATTATATCTACCCCTAATTTTAAAAAATGTTGCAGCTTCTGGTGGTAGCTATTATTACATGCACACTAAAGCTGTCTTTTATTTTTCTTTTTTTAATGAATTTAATTATCTCTGAAACTTTTTGTATGACCATGGG\n+>2/1\n+GCAAGATCTGATTTATTATTGCCAGAATCACAATATGAATTCTTTAAAGTTAAGTTAGAACAAACAAGCATCATTTCACCATTTAATGGAGTTATACAGAATAGGTTTTTAGATACAGGTACAGTAATTAACTCAGGGATCCCTATACTTGAGATCATTGATTCGAATTATGTTGAAGCACATATTTCTGTACCTGTTATCTATTTAGAAGATATGCAAATCAATGAAGAATATAATTTTGAGTTTGATGGTGAGATAACAAAAGCAACTTTCGCCAAGCTAGCACCAATGTCTCCGGGCGTTCAAATAGTAGACTGGCGATATTTAAGTTTAGTAAATTTTTTAATCCAGGCCTCAATAGCAAACCTCCAATTAAAAATTTCCAACAAGCAAAAGGTACCTGGGTTCCTTTAAAGTCTCTAATCTCAATCAGATCAGGGCTTATGGACAATATATACAATCGACAATAGCAATACTGTTGTTAGAGATATAGTAGAAATAGTCCATTTTGAAGATGATTATGCATTCGTTAGTGGCACCATAAAAAATGGTGATTTAGTTGTCTTAGGCGGAGCATCTAAGATTATTGAAGGCAAAAACTTAATTAGAAGAAATGAAATTTATAAGTATATTTCTTGATAGACCAAGAATACTCTTTTTAACTTTAGCTTTCATATTACTTTCTGGCATTTCTTCAATATATACATTACCAATTCAAGAGAATCCAGAGTTAGCTGAGAGATGGGCGACTGTTACCATTTCATATCCTGGAGCCGCACAGAGAGAATAGAAACACAAGTTGTAGATATCTTGAAAATAAATTGAGAGAAATTGTTGAGCTTGATATCGAAGATTTAGCGTCAGTAATTACTCAAGGCTTTTCTGAAACACTAGTAGAATTGCAACAAAGTGTTCCACCCTCATTAATAGAAGAAGTTTGGTCAAAAGTTCAAAATAAAATTGATCAAATAGAAACACCTGATGGAGTAACCATGTTGCTTGAAAGATCATCAGGACCACCTATAACTGTTGAGTACATTATTGATTGGAGGGTGAAGGAAATGCGCCAATTATTATGATGTCCAGATTGGCACAACAACTACAGAAAAGTTGAGTTCTGTTCCAGGGACAGAAAAGACAGCTATATATGGGAGAGGCAGAAGAAGAAATAGTTGTTGAAGTTGATTCTGCAAAAATGTCATCCCTAGGATTAACATATCAAGAAATCAAGTTCAGCCATAAGATCATATGACAATAAAAAACCTGTTGGGGTGGTCTCTGATGAATATTCTGAATTTTTAATACGATTAAAAGATAACATAACGAGTCCTCAAAAGATTGGCGAAATACCAGTTAACGGTAATAAATCAATCCGAAATTATAAGGCTTCAAGATATAGCCGAGGTCTCGATGCAACCCGCTAATCCAATCGAAGACATATTTTTATACAATGGTCGAAGAGTTTTATCGGTATCTGCTACAGGATCATTTTCACAGAGGTATTTGAGTATGTAGAAAATGTAGATGTTCAAGTCGATAAAATGCGATTAACATTACCTGAAGAATTCCAAATTGAAAGAATATACGATGAGTCGATATATGTATCCAGTAAATTTGGAGAACTAATTAAAAGCTTTGCATTAGCAA'..b'TGCGTTACAACTTCGATGGTATTTATTATTCTATTTAGGTGCTGCAATGCTTACATCAGGATTTAGAGCATATTGGTGGTGAAGAATAGTAAGAGACTTCCTTCAAGATTTACCCGGGGGCTTGGACTCAGTTTGTAGTTGTAATGATTGTAATATTCTTACTTGGGTTCTTTTTAGAATTTTATTGAAATCGCTGTTGTGGTTGTTCCAATAATTGCACCTATATTATTAGCTGAGACAGGAGCAAACCGTTACCAGCGGTCTGGTTATGGTGTTATGATTGGTGTCATATTTACAAACCCTCTTTTTTAACTCCCCCCTTTGGTTTTGCCTTATTTTATTTAGAAAATGGTGTTGGTAAAAGTGGTACAAACATTAGATATTTGGAAAGGTGTTGTTCCATTTATAATTTTACAACTCATTGGCCTAG\n+>9/1\n+AAGAAGGCTATCCATTCGGCGGGCCGCCTCGCCAAGCGCCGACGAGCGCCGCAATTCATCAACCAGCGTTCGAGCCGTCGGGAGACCCGCTGGCGTACCAGCTGTCAAAATCGGCCCTCATCCCGGCACCAAGCGACTGACGGAACAGTACGACAACGCCGTTCGGTGCGCGCCGTCCAAGATCAGAGCCACAGGTGTTGGCCCCGTCAGCACCAGAGAAATTGAGAATGTCATTGGCAATCTGATAGGCATTCCCGAGCGCCCGGAAACAATCACCGATGGTCGGGCCGCGCCGCCATGAAGCGCCATCGCCGCGACGCCCTCAAGCGGGGCTGTTAGCAGCGGAGCTGTCTTGTCGCCAGCCCCCTGAAGATAATGGTCCCAGTCCTTGACCGGCCGGACGTCGAATTCCCGTGCTTCGCCGATCGTCGTCGTCTTCATGTGCGTTGCCAGGATTTTCACCAGCATCGGTGTCTGTGATCGCTGCGCCGCCTCAGCGGCGAGTTCGAATGACAGGGCGACCAGCCAGTCACCAAGGGTCAACGCGACATCGCGGCCATAGACCGACAGACAGCCGGACGGCCGCGGCGCAGACGGTCACCATCACAGATGTCGTCATGGATCAGCGAGGCGTTGTGAAGCACCTCGATGGCCACCGCCCAGTGAAGGCCGCCGTCCTGTCGACTTTCAGGAAGTCGGCGGCGCGAAGGGCCATCTTGGCGCGCAGCATCTTGCCAGGGGCGGCGAAATGGTGAAGCGCCGCATCGGCCAGCGGCTGACCAGGCTGCGGCATGCGCTCGATAACGAATTCATGGATGATTTCCGAAATGACTGCGAAACCGGAACTCCCCGTACCGGAGTCGGAGAAGATCTCGTTCACTGGGGTGGACAGGTCAGTCATGCTTACCGTGCTTCAACTATTCCACGAGTGTAGCACGTAGTAATTTCAATATATTATGTGAACACGGGCGACGATTTGGTGCCGCCCCGTTTCTCTTGATTTGTTATGCTTCTTTGCTGTCGCGTTGGCGGCTGCCCAGATTACACCGACGAAGGCGATCTTATTAACAACGTCAGCGACGTTGTAGATCACATTCAATGTCACCGCGTCAGCAGCGCCATTTAGGTAACCGAGGAAGTAGCCTAGTGGGTAGATCGCCCAGCCAACGTAACGATTAACCGCATCGTACCGAAAGACTTCTGGACGGCCTCCGGAGCGGACTCCGCAGATACCCTTCCTGCTTCACCGGCAAAAATCTCATACAAGATGTATGCCCATCCCGCCATGCCAACGACAAAGCCAAGCCAAGCGTTCACGAGACCTGCTTCGCCGAGGTAACCAGCGACAGCATTACCAGCGACCCAATCATCAGGCGCCAAAAAATGCCAGCGCTGACAACTGCGACTGCCGCTAGAATGAAGTAGAATTCAACCATTTGGAGAGGCACGGTTATTAGCCAGTCAACATACCTGTAAACTGTTGGCGTCTCACCTGTGCTGCCCATACGTCGCGCATGTAAAAGTAATGAACTGCCGCAACGATCTGCACTAGGCACCGATCGTCATAGCAGTCTTCCATTTGCCTGGCAGGCGAGCGGCTTCCATCAGGAAGAAAATTGCCGCGGCCATCATCGCCATTGAAATGGTCCAGAAAGATATGCCCACAAAGTCGGTAGCGGCAAGCATTGTCGTTTCTGCATTTGCGACTGATGGGAGAATGGCAAGTC\n+>10/1\n+AAACTCAACAAACTTAGCGTTGTGCTTTTTATGTAGTTCGTATAGTGCAGTTTTTTTTATATTCATATTAACTCTCTAACCCCCTCTGTCGTGGTGCCTGAGAGATTTTAGCTCCTTCGGCGAGAATTTGTCTCTTTCCAGAGTTAATATGTACGGTCCTTTTGCCTGAGAGTTTCCGGGGTGGTTGCTCCTTCGGCGCTACTCAATGTAGTCTCTCCCGTATAAAATTATATTAGCATATTTTTGACAATCGTCATCAAGAAATAGTTTTTTTTTCTAGTTTAATAACATTGTAAAACTGTAGTAATACTGCGAATAATACAATTGACCCCCCAATTAAAACAACAAATGGTGGATTTTCACTCACAAATAACCATGCCCAAAGAGGTCCAAGAATAGCTTCAGTGAGCATTATGATACCAACAATTGCCGAGGGCGTAGATCTTGCTCCAATTGTTATAAAAATAAAACCAAAACCTAATTGAAAAAAACCTGCTAGAAAACCTAAAAATATATCATATGAAGAAATATTAATTTTACCTGCAACCAAATAACCAACAGACATTGCAACTATTCCAGCAATTAATTGTAACGGTATCATGTCTACATCTGGATATTTTCTAATAATAATAATTAAGATTGCAAATGACACAGGCATAATAAATGCTGCAATATTTCCAGACATTTGTCCTGGAGACAAAGATCCCCCAACCATTAAAATAATACCAGAGATAGCTAGTATAATTGATATTAAAGTTAATTTTGAAATTTTTCTTTTAAAAATAAATATCCAAAGATTGCAAAAAATTGTTTGAGTTTGAATAATAAATTAGCATTGGCTACTGTTGTATTATACATTGCAAAACATAGCCACAAAACCCTGAAGATAAAACTATACCACCGATAAGACCGGGAAAGCCTGATTTCTTAAATGCATGAAAAAACATTTTTTTTATAAGTAACGAGTAAAAAATTGATACAACTAAAATAAAAAAAGAGATCTCCAAAAAAGTATCTGCCATAATGTGGCGCCCTCGAAAGATTTTACAATCATCCTCCAAAACTTAAACTAGTGGCACCAAAAAAACTAAAAGTGGACCCGGTAATCTTGTATAAAATTCATCTTATTTTATTAATTATATTCTTAGTTTCCATAGTATAAATTTTGTATAAATTTTCTGCCTCGTAAGACTTATAATTTTGAATTTAATTTTTTCTCCAGGTGGGCATTGTGCTAACTTGTCATAATCTGCACTTATTACAACAGCAATTTTTGGATAACCACCTATTGTTCCATGATCTGAAAGCATTACTATAGGGTCTCCGTCAGCTGGAACTTGTATTACTCCTTTTCCTAATCCTTCAGACTTGATGTTTGTTTTCTTAAATTCTTAATTTTAGGTCCACTTAATCTCATTCCCATCCTATCGGTAAGTTTTGAGACAATAAATCTTTACTTGTGAAATCTTTGATTGAGTCTTCTGAGAAGTAATCAAAGTTTGTACCTTTATCACTCTTATAAATTCTATTTTTGAGTTTAAGTGTTCTAAACTTTTTTCAATTGAGCTTTCAGAAGCATTTAAAATTATTGTTTGTCCAATTTCTATTTTTTTCCCTCATTAGGACCTACTTCAGCTCTAACAGTTGTCGAATAACTATTACAAAATTTATTAATCTGAAACCACCTTTTACCGAAAGGTAGCCATAGACTGACTTTTCAGTTGATATAATATCAAGACAGTCTTTATCATTTAGATTGTAAGTTCTATAACATTCACCTTTTATTTCAGAGGTGTCTTTTTTAATTATCTTAAAATTAACATT\n'
b
diff -r 000000000000 -r b04960a7abf5 test-data/metasequences1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metasequences1.fasta Wed Mar 21 17:15:25 2018 -0400
b
b'@@ -0,0 +1,12 @@\n+>1/1\n+CTGGGAATGGGTGTCTTGAGATAATTTCTTTATTCAATTAAGCTCTAAACCTAAATTTCTTACTTCATCTTTAAATAAGAATTTAAGAGGTTCGACTAATTTTAATCTCATTTTTTTTGGTAATCCGCCTACATTATGATGAGATTTGATTTTTGACGTTTGGCTTCCTGTTACTGATCTGCTTTCAATAAGATCTGGATAAAGAGTTCCTTGAGCTAAAAATTTTACATTCTTAATTTTTTTTGCATATCTTTCAAAATTTTAATAATAAATTTCCAATTATTTTTCTTTTCTTTCTGGATCTGAAACGTTTGTTAACTTTTTTAAAAATTCTTTTTCAGCATTAACGTATATTAAATTCATCTTTAGCCTTTCTTAAAAGTATTAACTACTTGTTTTCCTCATTTTTCTAAGAAGACCAGTGTTAACAAAAATACAATATAGCTTTTTACCATAGCTTTATTTAATAATGAGCACTACACTACTATCAACTCCTCCTGAAAGGGCACAAATAACTTATTTTCACCAACTTGATTTCTAACCTCTTTAATTAATTTCATCTTTTGATCTTTTGATGACCAGTTTTTTTAATTTTAATATTAAGGAAATAAAATTTTTAATAATTTTTTACCATTTTCTGTATGTGGTAACTTCAGGATGAAACTGAACACCATAAAATTTATTTGTATTGTCTCCAACAATTGCCAAATTTTGAATTTTGACTTGATGCAATTACTCCTAAAATTTTAGGTAATTTTGAAACTTGATTCAGCATGACTCATCCATACTTTATGAATTTTTTTTTGATTAAAGAAATTTTTTAATTAAAAGACTATCTTTTTTTTATAGACATTAGCTAAACCAAACTCCCGATGCTTTGATTGTTTGACCTTTCCACCATTTAATTTTGACAAGATTTGGTGACCAAACAAATTCCTAATATTGGTATATTGTTTTTAAGTATTTCTTTATCAAGAAATATCTATTAATTTCATAAACGTTAAGGGGACCTCCTGATAAAATGATACCTTTAATTACTTGTATCAATATTTTTATTTTTAATCTTTTTATGACTAATAATTTCTGAGAATACCCTAGTTCTCTTATTCTTCTGGCAATCAATTGTGTAAATTGAGACCAAATCTATGATTAATATTTTATTTAAACTTTGATCAAGAATCATTTTTTGGCTCTATATTCGCTAGAGTCTCCAAAATATCTTTATTTATCATCACATAGTTTTTTACAAACTTTTGTAAATGTTATTTTGATAGATCTTTTACTTTTGAATAATTTGATTTTCTAAAGCAATTTTCATTAACATTAATATTGGCTTCTTCGTTATCAGGTTCGATTAATAATGTTGTTTCTAAATTTTTTTCTTTCTTTTTTTGATCCTCTTCAAAATTATAATCTTTGCCAAGTACAAATATGAATTTGAGTCTTTAGGATTAAAAACATACTTCTTTCAACATAAATCTTGCATCGTCATATTTTTCTGTCTTTAAAAAGTTCTAAGCCTTTATCAAAAAAAATTCTCATTACCAAAAGATGTTGCTTGCTAAATTTATGAAAAATAAATTAATACAGTTTTTTTTAATTTTATCATCATTATCAATCATCATCTTTTTACTATATCAACGTTATGTACCATACTTTCGTAAATCCAGCTTTTGTAATCTTTACAAATTTTGGTTTATTTCTTAAATATTTAATTGTGTCTTAGAACCAGATAACCATCGACGATTTCAAACCGACCAATAAGTTGGAAAATTATTTTTCAACCTTACTTTTATATTTAGCAAATCCTTCTACACCTTCAGGTACATATTTGAGATATCTTTTTGTTTTGATTGAAAATATCTATCAGCTGAACCTTTATTCATAGCACCCCACTGATCCATACCTCTAAAACTTTTAATAACTTTCCCACCTCTTCTAATTAATTTACCTGGGGTCTCATCGGTGCCTGCAAATAAATGAACCACCATCACTGCATCTGCTCCTGCATCAAAAGCTTTGGCTAAATCACCAGTGTATTTAATGCCTCCATCAGAGATAAGATCTTAACATTTTTATTTTTAACTCGTTTCTTAACTGTTAAAATTGCACTTAACTGAGGAACTCCAATACCTGCCACTAATCGTGTGGATACAGATAGACCCAGGGCCTATACCCACTTTAATTATATCTACCCCTAATTTTAAAAAATGTTGCAGCTTCTGGTGGTAGCTATTATTACATGCACACTAAAGCTGTCTTTTATTTTTCTTTTTTTAATGAATTTAATTATCTCTGAAACTTTTTGTATGACCATGGG\n+>2/1\n+GCAAGATCTGATTTATTATTGCCAGAATCACAATATGAATTCTTTAAAGTTAAGTTAGAACAAACAAGCATCATTTCACCATTTAATGGAGTTATACAGAATAGGTTTTTAGATACAGGTACAGTAATTAACTCAGGGATCCCTATACTTGAGATCATTGATTCGAATTATGTTGAAGCACATATTTCTGTACCTGTTATCTATTTAGAAGATATGCAAATCAATGAAGAATATAATTTTGAGTTTGATGGTGAGATAACAAAAGCAACTTTCGCCAAGCTAGCACCAATGTCTCCGGGCGTTCAAATAGTAGACTGGCGATATTTAAGTTTAGTAAATTTTTTAATCCAGGCCTCAATAGCAAACCTCCAATTAAAAATTTCCAACAAGCAAAAGGTACCTGGGTTCCTTTAAAGTCTCTAATCTCAATCAGATCAGGGCTTATGGACAATATATACAATCGACAATAGCAATACTGTTGTTAGAGATATAGTAGAAATAGTCCATTTTGAAGATGATTATGCATTCGTTAGTGGCACCATAAAAAATGGTGATTTAGTTGTCTTAGGCGGAGCATCTAAGATTATTGAAGGCAAAAACTTAATTAGAAGAAATGAAATTTATAAGTATATTTCTTGATAGACCAAGAATACTCTTTTTAACTTTAGCTTTCATATTACTTTCTGGCATTTCTTCAATATATACATTACCAATTCAAGAGAATCCAGAGTTAGCTGAGAGATGGGCGACTGTTACCATTTCATATCCTGGAGCCGCACAGAGAGAATAGAAACACAAGTTGTAGATATCTTGAAAATAAATTGAGAGAAATTGTTGAGCTTGATATCGAAGATTTAGCGTCAGTAATTACTCAAGGCTTTTCTGAAACACTAGTAGAATTGCAACAAAGTGTTCCACCCTCATTAATAGAAGAAGTTTGGTCAAAAGTTCAAAATAAAATTGATCAAATAGAAACACCTGATGGAGTAACCATGTTGCTTGAAAGATCATCAGGACCACCTATAACTGTTGAGTACATTATTGATTGGAGGGTGAAGGAAATGCGCCAATTATTATGATGTCCAGATTGGCACAACAACTACAGAAAAGTTGAGTTCTGTTCCAGGGACAGAAAAGACAGCTATATATGGGAGAGGCAGAAGAAGAAATAGTTGTTGAAGTTGATTCTGCAAAAATGTCATCCCTAGGATTAACATATCAAGAAATCAAGTTCAGCCATAAGATCATATGACAATAAAAAACCTGTTGGGGTGGTCTCTGATGAATATTCTGAATTTTTAATACGATTAAAAGATAACATAACGAGTCCTCAAAAGATTGGCGAAATACCAGTTAACGGTAATAAATCAATCCGAAATTATAAGGCTTCAAGATATAGCCGAGGTCTCGATGCAACCCGCTAATCCAATCGAAGACATATTTTTATACAATGGTCGAAGAGTTTTATCGGTATCTGCTACAGGATCATTTTCACAGAGGTATTTGAGTATGTAGAAAATGTAGATGTTCAAGTCGATAAAATGCGATTAACATTACCTGAAGAATTCCAAATTGAAAGAATATACGATGAGTCGATATATGTATCCAGTAAATTTGGAGAACTAATTAAAAGCTTTGCATTAGCAA'..b'TCCTGATTCCATTCGAGCGTTTCTTCCGAGACAGATCGGGTGCTGTCTGAGGACAGGCTCTGCACGTCAGTTTCGATAGTCAGATCCCGAACTTTAGTGTCCGGCGGACCAAGGCACTACCATTGAGACGGAACCAGTAGCCGTCGCTACGCTCTGCGCTGAGGCGCCAG\n+>5/1\n+TTTTAGATGATACAGACGAAAATTTTTCAGGAAAGTTAAAAAAATTTAATTTAATTGGAATTCCATATCAAATTCTAATTGGAAAAAATCCTGATAAAAATAAAGTTGAGTTTAAAGAAGTGGGGAACGATAGTAAGATGATAAGTCTTGAAGAAGCCCTTAATTTCATTAAATCCAAAAAAATAAATTGATTTCAACTTTAGAAAAAAAAATAATTTTTAGATATCTTAAAACTAGAAAAAAAGATGGTTTTTTAAACATCATCACTTTATTCATTCTTAGGCATAAGTTTAGGCGTGGCAGTTTTAATTATAGTTATGTCTGTAATGAATGGATTTAGGTCAGAGGCTGATAAACAAGATAACTAATTTCAACGCACATGTAATCGTGAAACCCTATGAAAAAAAATAGAACAAAAAAAATAGGTAACGAGTTTTTACAAAATATATCTTCAAATTTGATATTAAGTAATAATGGAGAGGGAATACTTTTAAATGATGAAATTACTAAAGGTATTTTAGTTAGAGGCTATTCTGAAAATGACTTTCAAAAACTAAATATTGTAAATAATAAATATTTTAGAGGTAATAAAAAGATTTTAAAGACAATATATCGATAGGGAGCGATTTAAGTTATGACCTTGAATTAAAAATTGGTGACCAGATTTCAATAATTTCACCTTCTGGTGAAAATACACTTATTGGATCTATACCTCGACAAAAAACTTTTAAAATTGATTCTATATTTGATAGTAGGTTTGCTGAGTTTAATAGTTCTGTTGTGTTTATTAATCTTAAAGATCTACAAGGTTTGTTTGATTTAAAAAAAGAAAATAATTTCTTAGAGGTTTATTTATATAAACCTGACAAAATTGAGATTTATCGTGAAAAACTGTTGTCGATTTTTACTGACGAATATATATTTACTTGGTCAGATCTAAACAAACCATTATTTTCTGCTTTAAAAGTAGAGAGAAATGTTATGTTTATAATATTATCCTTAATAATTATAGTCGCTGCTTTTAACATTATTTCGGGTCTTACTATTTTAGTAAAAAATAAAACTAGGGAAATTGCTATATTAAAATCAATAGGTGTTTCTAACTTTTCCATTCGAAAAATATTTTTTTTATTGGATTTTTAATAGGATTTTTAGCAACAATTATGGGAGTTGTTATAGGCGTAACTTTTTCTCTTTATGTTGAGGAAATTAGAATGCTAATTAGCAATGTCTTTAATATAAGTTTATTTCCAGAGGAAATATATTTTTTAAGCACGATACCATATCAGATAGATTTTTGGTCAATATTTTTGATTTCAAGTTGTTCAATTATAATGACCTGTTTAGTCTCAATATACCCTGCAACTAAAGCAGCAAAATTAGATACAATTAAATCATTAAAATATGAATAACATTATAGAACTAAATAATATTTCAAAGATATTTAATAATCAAAAAAGACCACTGTTCTTAAAAATTTAAATTCAAATTCAAAAAAGGAAAAATTTATTCATTATCAGGGCCTTCAGGATCTGGAAAATCCACATTACTCAATTTATTATCTTTAATAGATAGACCATCATCCGGAAATATCAAAATTGATAGTCAAAATATAAATCACAACGAAATTGAGATTAATGATAAAATTAGATCTAATAATATTGGAATTGGCTATCAAGAGAAAAATTTACTAACTGATTTTACAGCTATCGAAAATGTTTGCTTAGCTAGCTTAGCTGCTAATAACAATCAGAAAATAGCAGAAAAAGAATCATTAAAAATTATTCAAAAAGTTGGCTTAAAAGATAGGACTAATCATTACCATCAGAACTATCAGGAGGAGAGTTGCAAAGAATTGCTTCTATCGAGAG\n+>6/1\n+CATATCAAAATAATCATCGCCTTTTTCAGATATGTTAAGGTTAGTTATATTGCCACCTGCACTTCCAACAATGGTTGCTAATTTACCTAAAGATCCAGGTTCATTTTTAACTGAGACTTTTATTCTTGACGAAAAATACTGCTTTGGATCTACATCTTTCCATGAGGCAGTGATCCAATCTTCTTTTTTGTTTCTTTATCTAGCTCCTTACATAAACTGGAGTGAATGATTACACCTGAATCTTCTGATGTAAGTCCCAAAATATTTTCATTGGGTAAAGGGAAACAGCATTCTGCATAATGAATTGCGATTCCAGGTTGGAAGTTACTTATCTCTAAAGGCATCTTATCGTTTGTGTCTCTCTTCTGTATCGAGAAAAAAATGATAAAGTTGAACGGGGCTTAAGCTTTGATAATTGATGTGGCCTTTATACTTCCTTTACCAATTGCAACAAGAAACTCCTCAGGACTTTTCCTATTAAAAATTTTTGCAAAATTACTAATTTCTGTGTTTGAAATATCTTCATTTCTACCTGTGACTTTGTGTAATATTTTTAACCCTAGACTTGTAAATTCATCTTTTTCTTGCTTTTTAAAAAATCTTTTTATCGATGACTGCGCCTTTGCAGTTGTAACAATATTTTCCCAAGTTTCAGATGGAGATGGTTTTTTTGATGTTAGTACTTCAATCTCATCTCCATTTTGAAGTTTTGTAAATAAGGGTCTAGGTTTTCCATTAATCTTACATCCAACGCAACGATTTCCAATTTCAGAATGAAGGGCATAAAGCAAAATCGATAGAGTTAGAATTTGCAGGCAAATGAATTAAGTCACCTTTCGGTGTAAAACAAAAAACTTGATCCTGAAACATTTCTAATTTTGTAGCTTCAAGTAAGTCCTCAGAGCTTCCACCGCTGTCCAATATTTCTACTAATCTCTCAACCAAAGTAACATTAACTGGATCTGAAGAAGAGTTATCTACTAAATTATAATTCTCTTTATAGCTCCAGTGTGCCGCTATACCCTTATCTGCTATTAGTGTTCATTTCATTTGTTCGAATTTGAAGTTCAACTCTCTGTCTTTCAGGGCCAACAATTGTTGTATGAATTGACTGATAATTATTAATTTTTGGTGTCGATATATAATCCTTAAATGAGCCTGGTACAGCAGACCACCGGTTATGAATGATGCCAATATACTATAACAATCTTCAATGCTATTAGTGATAATCCTATAAGCAAAAAATATCCGACAATTGTCTAAAAGCAATTTGGCTTATTCTGCATTTTATTCCATACAGAGAAGATTTTTTTTTTTCACGTCCTATTATTTGACAATCAATTTCATTTTTTTCTAAAAGACTAAAGAATCCTTGTCTTTATTTTATCAGTTAAATTAGATTTATCTTTCTTTAAATACTCAATTCTTGTCATCAATGAGGAATATGCAGATGGATTTAAAATTTGAAATGATAAATCTTCCAACTCGTCCTTGAATTCATGCATCCCCAATCTTCCAGCAAGGGGGCATAAATTTCTAATGTTTCCGATGCAATACGTTGTCGTCTTTTTTCCTGTGGAATAAATTGAATCGTTCTCATGTTGTGTAAACGGTCAGCTAACTTTACTAACAAAACTCTTGATATCCTTCGACATTGCTAAAATTAATTTTTCTGAAATTTTCAGCCTGGAATTTATTTTCAGTATACGTTTCAATTTTTGACAGTTTTGTAACGCCGTTTACTAATTGAAGGACTTCTTTTCCAAATTTTTCTTCAATTTCATTTTCTGTAGCAAGAGTATCCTCAAGTGTGTCATGTAAAAGGCCCGTTATAATTGTGGAAGTATCTAACTTTAAGTTTAATAGAATATCTGCAACTGCAACTGGGTGAGATATATAAGGATCACCTGAATGCCTTTTTGTGACTTGTGACACTCGACT\n'
b
diff -r 000000000000 -r b04960a7abf5 test-data/metasequences2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metasequences2.fasta Wed Mar 21 17:15:25 2018 -0400
b
@@ -0,0 +1,8 @@
+>7/1
+TAAACTATAAATACCTTTTGTATATTTTTTGTAAGTCTCGTAAGAATTTGATCCAACAGCAGCTTGTAAAAGATGAATTAATTTTCCTTGGTATTGGTGTGTTTCACCATTTTTTCTGTATCTATAAATTCCACCTATAGGTAAAATATCTGAGTGAACATCAAAAGCATCTTTATGAATAGCTCTAATTTTTTTTTCAATACCAGTTAAACCAATTCCTGAAATTTTTGAAACTACCCCTGGAAAATAATCTTTAACAATTGTTCTGCTTAAACCAACAGTTTCAAAGTTGCACCCACCTCTGTAAGAGCTAAGTACTGAAATTCCCATCTCGACATTATTTTTAACAAACCAAGATTTACTGATTTAATATATCTAGCAACGCACTCATCATAGGAAAAATTGCCAAAAGTTTTTTGGAATGTCTTTGATATAAACTATCAAATGCAAGGTATGGATTTACTGTTGTGGCACCGACACCTATAAGAGTAGCAAAAGAATGTGTATCAAGAGCATCTCCTGTTTGAACATTAATAGAAACATATCCCCTTAAGCCTAATTTTATTAAATGAGTATTTATTGCACCAATACATAAAAGCATTGGCATAGGAAGTTTTGTTTCAGATGTTTTTTTATCAGATAGAATAAGTTGAGTAACACCTTCTCTAACTGCTTTTTCAGCATCTTTTCTCAATCTTTCAATTGACTCTTCAAGGGTTTGATCATCATTGAATGTACACTCTATAGTTTTATAGTTATCACCAAAGTATTTTACGAATTTGTCAAATTGGGTATTTGATAGAATTGGACTATCTAATACATAAATATTTTCTTCAGTAAGATTTGAAAAATCAAGGATATTTCCAAGATTTCCAAATCTAGTTTTAAGACTCATTACTTTATTTTCTCTTAACGAGTCGATAGGAGGATTGGTTACTTGACTAAAATTTTGTCTAAAAAGTGGTAAAGAGGCTATATTTATCTGATAATACAGCAAGTGGTGTATCATCACCCATCGAGCCAGTTGCCTCTTTTGCATCTTCTGCCATTGGATGCCAAGATTAATTCCAAATCCTCTAGACTATATCCAAAGCAATGTTGTATCTTTTTAAATCAGTTCCAGAAAACTCATTTTTTTCATTTTCAATAGTCAAAGTCTTATCCAAATCAATAATTTGATTATTAAAGTGCTTATATTCCTTGGCTAAATAATTTTTAATTTCTTTATTTGAGTAAACTTTTCCTTTTTCAATTCTAACGCCTAATATTTCCCCTGGTCCTAATCTTCCTTTTGAAACAATTTTTTTTTCATTTAGGTCAATCATTCCAGTTTCAGACCCAGCAAACAATAGTTTATCTCTTGTTACCGTATATCTAAAGGTCTAAGTCCATTTCTATCGCTTGCCACAATTACCCATTCATTATCTGTTGCTGCTATAGCTGCAGGACCATCCCATGGTTCCATAGTGCTATTCAAAAAGTTAATAATTGTTGATGATCTCGTGAAAGAACTTTACTTTTTTTAGACCAAGCATCGGAATTAGTATTAATTTTGCTAATGGTGCTGAATGACCAGAAATATTTAAGAGTTCAAAGACATTATCTAACGAAGCAGAATCCGAATTACCCGCTGGAATTACAGGTTTTAAATTTTCCATATCTTCAAATAGAGGACTAAACATTTCTTCTTCATGAATTCTCATCCAATTAATATTTCCTTTTAAAGTATTTATTTCTCCATTGTGTGCTATTGACCTAAATGGTTGAGCCAAGTCCCAACTGGGAGCTGTGTTAGTTGAAAACCTTTGATGAAAATTGCGTATCTTGATATAAAACGTTCATCTTTTAAGTCCTGTA
+>8/1
+CTTTTATGGTTTAAACGGTAAAGCAAGTATTATTAACGGACCCGTAGTTGCCTTTGAAGTAACTCAGCAAGGTTCTAACGGACTATATCTACTTTATTTAATGGCTGTTTATTTAGGAGTTTTGCAGTTACGATGTTGCTGCAATTCACAAGTTATTTTTATGAGCAGTAGCCACAAACTTCTGCAAAATTGATGAAGAGGTAAATATTTCAAAGGTCGATAAAACAAAAATAAAGTTTAGTATCGAAAATATGGAAAGCCAATAATGGAATTATTTTTTTTAGCTATATTAGTTTTAATTATGATTGCAGCTCTAGCTTCAGGGTATCCAGTTGCCTTTGCTCTTCCTGGTTCAAGCTATCATATCTAATCGGTTTAGCAGCTTTTTTTGGTTATATTTTTTGCTGGCGATGTTGATGCTTACTTCGCTGTCGATGGTCCAACCGAATGGCTTGTTGCTGGAATTACAAAATTTTAGAAGTAATTACTGGGACGTCGAGACTGACACTTTAATTGCAATTCCCTTATTTATTTTTATGGGAATTATGCTTCAAAAAATCAAAAAATTGCTGAGGATTTATTAATAACTATGGGTCCAATTATTTGGCCCCCGATACCTGGTGGTTTAGGTATTTCGGTAATATTTGTTGGTGCATTATTAGCTGCAACTACAGGTATTGTCGGAGCAACAGTCATTGCAATGGGATTAATTTCATTACCTACAATGTTTAAACAACAATTATGACAGAAAACTTGCAAGTGGAATTGTTTGCTCTTCTGGAACTCTAGGACAGATTATTCCGCCATCAATTGTATTAATTATTATAGCGGACCAATTAGGCGAGGCGCATCAGATGTTGCCAATAACATGAGGCAGAACGATTATAAAGCTTTAACAGCGCGAATTTAACATGCCAGGTGAATTTAGAGTAGGTTTCATCCAGTGCCGGTGACATGTTCCTGGGAGCACTTTTACCAGGGAATGTAGTACTTGTAGCTTTATATATGATCTATGTATTGTTTCTATGCGAAGAATAAAAAAAGGGGTTGCCCCACCTGTTCCATTAAAGGAAACTTGATTTCAAATTTTGGATGAAAGTTATTGTTTTAATTATTCCTCCACTTGCATTAATTTTTGCTGTTTTAGGATCGATTCTTATGTGGTATTGCAAGCTGTGAACCAAGCTGGTTCTAATAGGAGCAATTGGAGCAACATTAATGGCGGGTTACCGTCTATTTCAGGTAAGAAAAGTGCCTCTTCTATCACTTATAATGATAATTGGATCATTAATACCCATTACATTTCTTTTTTGCATCAAATTATGACTTAACATAAAAAATATAGAAGAAGAGATTTAGGAGCAATTTATATAACTGCTATTTTTGTAATCACTTGGTTTATGGCAATAGCATGGAGTTTTTGGAGAACATATAAAACTGAAAATGCTTCTTAAAGAAGCAGTAACCGAAACGTGCGTTACAACTTCGATGGTATTTATTATTCTATTTAGGTGCTGCAATGCTTACATCAGGATTTAGAGCATATTGGTGGTGAAGAATAGTAAGAGACTTCCTTCAAGATTTACCCGGGGGCTTGGACTCAGTTTGTAGTTGTAATGATTGTAATATTCTTACTTGGGTTCTTTTTAGAATTTTATTGAAATCGCTGTTGTGGTTGTTCCAATAATTGCACCTATATTATTAGCTGAGACAGGAGCAAACCGTTACCAGCGGTCTGGTTATGGTGTTATGATTGGTGTCATATTTACAAACCCTCTTTTTTAACTCCCCCCTTTGGTTTTGCCTTATTTTATTTAGAAAATGGTGTTGGTAAAAGTGGTACAAACATTAGATATTTGGAAAGGTGTTGTTCCATTTATAATTTTACAACTCATTGGCCTAG
+>9/1
+AAGAAGGCTATCCATTCGGCGGGCCGCCTCGCCAAGCGCCGACGAGCGCCGCAATTCATCAACCAGCGTTCGAGCCGTCGGGAGACCCGCTGGCGTACCAGCTGTCAAAATCGGCCCTCATCCCGGCACCAAGCGACTGACGGAACAGTACGACAACGCCGTTCGGTGCGCGCCGTCCAAGATCAGAGCCACAGGTGTTGGCCCCGTCAGCACCAGAGAAATTGAGAATGTCATTGGCAATCTGATAGGCATTCCCGAGCGCCCGGAAACAATCACCGATGGTCGGGCCGCGCCGCCATGAAGCGCCATCGCCGCGACGCCCTCAAGCGGGGCTGTTAGCAGCGGAGCTGTCTTGTCGCCAGCCCCCTGAAGATAATGGTCCCAGTCCTTGACCGGCCGGACGTCGAATTCCCGTGCTTCGCCGATCGTCGTCGTCTTCATGTGCGTTGCCAGGATTTTCACCAGCATCGGTGTCTGTGATCGCTGCGCCGCCTCAGCGGCGAGTTCGAATGACAGGGCGACCAGCCAGTCACCAAGGGTCAACGCGACATCGCGGCCATAGACCGACAGACAGCCGGACGGCCGCGGCGCAGACGGTCACCATCACAGATGTCGTCATGGATCAGCGAGGCGTTGTGAAGCACCTCGATGGCCACCGCCCAGTGAAGGCCGCCGTCCTGTCGACTTTCAGGAAGTCGGCGGCGCGAAGGGCCATCTTGGCGCGCAGCATCTTGCCAGGGGCGGCGAAATGGTGAAGCGCCGCATCGGCCAGCGGCTGACCAGGCTGCGGCATGCGCTCGATAACGAATTCATGGATGATTTCCGAAATGACTGCGAAACCGGAACTCCCCGTACCGGAGTCGGAGAAGATCTCGTTCACTGGGGTGGACAGGTCAGTCATGCTTACCGTGCTTCAACTATTCCACGAGTGTAGCACGTAGTAATTTCAATATATTATGTGAACACGGGCGACGATTTGGTGCCGCCCCGTTTCTCTTGATTTGTTATGCTTCTTTGCTGTCGCGTTGGCGGCTGCCCAGATTACACCGACGAAGGCGATCTTATTAACAACGTCAGCGACGTTGTAGATCACATTCAATGTCACCGCGTCAGCAGCGCCATTTAGGTAACCGAGGAAGTAGCCTAGTGGGTAGATCGCCCAGCCAACGTAACGATTAACCGCATCGTACCGAAAGACTTCTGGACGGCCTCCGGAGCGGACTCCGCAGATACCCTTCCTGCTTCACCGGCAAAAATCTCATACAAGATGTATGCCCATCCCGCCATGCCAACGACAAAGCCAAGCCAAGCGTTCACGAGACCTGCTTCGCCGAGGTAACCAGCGACAGCATTACCAGCGACCCAATCATCAGGCGCCAAAAAATGCCAGCGCTGACAACTGCGACTGCCGCTAGAATGAAGTAGAATTCAACCATTTGGAGAGGCACGGTTATTAGCCAGTCAACATACCTGTAAACTGTTGGCGTCTCACCTGTGCTGCCCATACGTCGCGCATGTAAAAGTAATGAACTGCCGCAACGATCTGCACTAGGCACCGATCGTCATAGCAGTCTTCCATTTGCCTGGCAGGCGAGCGGCTTCCATCAGGAAGAAAATTGCCGCGGCCATCATCGCCATTGAAATGGTCCAGAAAGATATGCCCACAAAGTCGGTAGCGGCAAGCATTGTCGTTTCTGCATTTGCGACTGATGGGAGAATGGCAAGTC
+>10/1
+AAACTCAACAAACTTAGCGTTGTGCTTTTTATGTAGTTCGTATAGTGCAGTTTTTTTTATATTCATATTAACTCTCTAACCCCCTCTGTCGTGGTGCCTGAGAGATTTTAGCTCCTTCGGCGAGAATTTGTCTCTTTCCAGAGTTAATATGTACGGTCCTTTTGCCTGAGAGTTTCCGGGGTGGTTGCTCCTTCGGCGCTACTCAATGTAGTCTCTCCCGTATAAAATTATATTAGCATATTTTTGACAATCGTCATCAAGAAATAGTTTTTTTTTCTAGTTTAATAACATTGTAAAACTGTAGTAATACTGCGAATAATACAATTGACCCCCCAATTAAAACAACAAATGGTGGATTTTCACTCACAAATAACCATGCCCAAAGAGGTCCAAGAATAGCTTCAGTGAGCATTATGATACCAACAATTGCCGAGGGCGTAGATCTTGCTCCAATTGTTATAAAAATAAAACCAAAACCTAATTGAAAAAAACCTGCTAGAAAACCTAAAAATATATCATATGAAGAAATATTAATTTTACCTGCAACCAAATAACCAACAGACATTGCAACTATTCCAGCAATTAATTGTAACGGTATCATGTCTACATCTGGATATTTTCTAATAATAATAATTAAGATTGCAAATGACACAGGCATAATAAATGCTGCAATATTTCCAGACATTTGTCCTGGAGACAAAGATCCCCCAACCATTAAAATAATACCAGAGATAGCTAGTATAATTGATATTAAAGTTAATTTTGAAATTTTTCTTTTAAAAATAAATATCCAAAGATTGCAAAAAATTGTTTGAGTTTGAATAATAAATTAGCATTGGCTACTGTTGTATTATACATTGCAAAACATAGCCACAAAACCCTGAAGATAAAACTATACCACCGATAAGACCGGGAAAGCCTGATTTCTTAAATGCATGAAAAAACATTTTTTTTATAAGTAACGAGTAAAAAATTGATACAACTAAAATAAAAAAAGAGATCTCCAAAAAAGTATCTGCCATAATGTGGCGCCCTCGAAAGATTTTACAATCATCCTCCAAAACTTAAACTAGTGGCACCAAAAAAACTAAAAGTGGACCCGGTAATCTTGTATAAAATTCATCTTATTTTATTAATTATATTCTTAGTTTCCATAGTATAAATTTTGTATAAATTTTCTGCCTCGTAAGACTTATAATTTTGAATTTAATTTTTTCTCCAGGTGGGCATTGTGCTAACTTGTCATAATCTGCACTTATTACAACAGCAATTTTTGGATAACCACCTATTGTTCCATGATCTGAAAGCATTACTATAGGGTCTCCGTCAGCTGGAACTTGTATTACTCCTTTTCCTAATCCTTCAGACTTGATGTTTGTTTTCTTAAATTCTTAATTTTAGGTCCACTTAATCTCATTCCCATCCTATCGGTAAGTTTTGAGACAATAAATCTTTACTTGTGAAATCTTTGATTGAGTCTTCTGAGAAGTAATCAAAGTTTGTACCTTTATCACTCTTATAAATTCTATTTTTGAGTTTAAGTGTTCTAAACTTTTTTCAATTGAGCTTTCAGAAGCATTTAAAATTATTGTTTGTCCAATTTCTATTTTTTTCCCTCATTAGGACCTACTTCAGCTCTAACAGTTGTCGAATAACTATTACAAAATTTATTAATCTGAAACCACCTTTTACCGAAAGGTAGCCATAGACTGACTTTTCAGTTGATATAATATCAAGACAGTCTTTATCATTTAGATTGTAAGTTCTATAACATTCACCTTTTATTTCAGAGGTGTCTTTTTTAATTATCTTAAAATTAACATT
b
diff -r 000000000000 -r b04960a7abf5 test-data/s_mga.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/s_mga.bed Wed Mar 21 17:15:25 2018 -0400
b
@@ -0,0 +1,35 @@
+1/1 1811 1994 1/1:gene_1 13 - 1811 1994 0 1 183 0
+2/1 0 414 2/1:gene_1 35 + 0 414 0 1 414 0
+2/1 821 1079 2/1:gene_2 22 + 821 1079 0 1 258 0
+2/1 1960 2140 2/1:gene_3 12 + 1960 2140 0 1 180 0
+3/1 61 220 3/1:gene_1 7 + 61 220 0 1 159 0
+3/1 559 754 3/1:gene_2 29 + 559 754 0 1 195 0
+3/1 1238 1403 3/1:gene_3 3 + 1238 1403 0 1 165 0
+3/1 1581 1798 3/1:gene_4 14 - 1581 1798 0 1 217 0
+4/1 2 431 4/1:gene_1 18 - 2 431 0 1 429 0
+4/1 1464 1746 4/1:gene_2 11 + 1464 1746 0 1 282 0
+5/1 0 191 5/1:gene_1 25 + 0 191 0 1 191 0
+5/1 397 619 5/1:gene_2 13 + 397 619 0 1 222 0
+5/1 780 1143 5/1:gene_3 26 + 780 1143 0 1 363 0
+5/1 1163 1412 5/1:gene_4 8 + 1163 1412 0 1 249 0
+5/1 1811 1871 5/1:gene_5 4 - 1811 1871 0 1 60 0
+6/1 149 344 6/1:gene_1 6 - 149 344 0 1 195 0
+6/1 401 791 6/1:gene_2 19 - 401 791 0 1 390 0
+6/1 1626 1941 6/1:gene_3 35 - 1626 1941 0 1 315 0
+7/1 0 330 7/1:gene_1 32 - 0 330 0 1 330 0
+7/1 358 997 7/1:gene_2 52 - 358 997 0 1 639 0
+7/1 1076 1349 7/1:gene_3 32 - 1076 1349 0 1 273 0
+7/1 1481 1703 7/1:gene_4 14 - 1481 1703 0 1 222 0
+7/1 1747 1857 7/1:gene_5 2 - 1747 1857 0 1 110 0
+8/1 0 193 8/1:gene_1 5 + 0 193 0 1 193 0
+8/1 589 745 8/1:gene_2 19 + 589 745 0 1 156 0
+8/1 1836 1903 8/1:gene_3 6 + 1836 1903 0 1 67 0
+9/1 117 543 9/1:gene_1 3 - 117 543 0 1 426 0
+9/1 622 901 9/1:gene_2 12 - 622 901 0 1 279 0
+9/1 946 1105 9/1:gene_3 3 - 946 1105 0 1 159 0
+9/1 1143 1317 9/1:gene_4 3 - 1143 1317 0 1 174 0
+9/1 1313 1505 9/1:gene_5 8 - 1313 1505 0 1 192 0
+9/1 1548 1727 9/1:gene_6 9 - 1548 1727 0 1 179 0
+10/1 256 715 10/1:gene_1 48 - 256 715 0 1 459 0
+10/1 1136 1310 10/1:gene_2 12 - 1136 1310 0 1 174 0
+10/1 1626 1824 10/1:gene_3 10 - 1626 1824 0 1 198 0
b
diff -r 000000000000 -r b04960a7abf5 test-data/s_mga.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/s_mga.tsv Wed Mar 21 17:15:25 2018 -0400
b
@@ -0,0 +1,36 @@
+#seq_id seq_model seq_gc seq_rbs gene ID start pos end pos strand frame complete/partial gene score used model rbs start rbs end rbs score
+1/1  b 0.275862 0.428571 gene_1 1812 1994 - 0 11 12.4805 b 2002 2007 0.494927
+2/1  b 0.338877 0.428571 gene_1 1 414 + 0 01 34.5932 s . . .
+2/1  b 0.338877 0.428571 gene_2 822 1079 + 0 11 21.7601 s . . .
+2/1  b 0.338877 0.428571 gene_3 1961 2140 + 0 11 11.9821 s . . .
+3/1  b 0.269188 0.428571 gene_1 62 220 + 0 11 6.91386 p 53 58 4.6421
+3/1  b 0.269188 0.428571 gene_2 560 754 + 0 11 28.0223 p . . .
+3/1  b 0.269188 0.428571 gene_3 1239 1403 + 0 11 2.03908 p . . .
+3/1  b 0.269188 0.428571 gene_4 1582 1798 - 1 01 13.4314 p . . .
+4/1  b 0.550169 0.428571 gene_1 3 431 - 0 11 17.2128 s 439 444 -2.92195
+4/1  b 0.550169 0.428571 gene_2 1465 1746 + 0 11 10.92 s . . .
+5/1  b 0.256547 0.428571 gene_1 1 191 + 2 01 24.4289 b . . .
+5/1  b 0.256547 0.428571 gene_2 398 619 + 0 11 12.6188 b . . .
+5/1  b 0.256547 0.428571 gene_3 781 1143 + 0 11 25.1534 b . . .
+5/1  b 0.256547 0.428571 gene_4 1164 1412 + 0 11 7.23104 b . . .
+5/1  b 0.256547 0.428571 gene_5 1812 1871 - 0 01 3.07971 b . . .
+6/1  b 0.329727 0.428571 gene_1 150 344 - 0 11 5.73124 s . . .
+6/1  b 0.329727 0.428571 gene_2 402 791 - 0 11 18.3505 s . . .
+6/1  b 0.329727 0.428571 gene_3 1627 1941 - 0 01 34.5233 s . . .
+7/1  b 0.316101 0.428571 gene_1 1 330 - 0 10 31.4857 b . . .
+7/1  b 0.316101 0.428571 gene_2 359 997 - 0 11 51.7886 b 1005 1010 2.3206
+7/1  b 0.316101 0.428571 gene_3 1077 1349 - 0 11 31.1612 b 1355 1360 0.0639296
+7/1  b 0.316101 0.428571 gene_4 1482 1703 - 0 11 13.9061 b 1716 1721 2.83485
+7/1  b 0.316101 0.428571 gene_5 1748 1857 - 2 01 1.33924 b . . .
+8/1  b 0.35155 0.428571 gene_1 1 193 + 1 01 4.66209 b . . .
+8/1  b 0.35155 0.428571 gene_2 590 745 + 0 11 18.2213 b 571 576 3.26585
+8/1  b 0.35155 0.428571 gene_3 1837 1903 + 0 10 5.66021 b 1824 1829 2.7516
+9/1  b 0.57209 0.428571 gene_1 118 543 - 0 11 2.97762 p . . .
+9/1  b 0.57209 0.428571 gene_2 623 901 - 0 11 11.7916 p 904 909 0.279428
+9/1  b 0.57209 0.428571 gene_3 947 1105 - 0 11 2.76029 p . . .
+9/1  b 0.57209 0.428571 gene_4 1144 1317 - 0 11 2.08585 p 1321 1326 4.2111
+9/1  b 0.57209 0.428571 gene_5 1314 1505 - 0 11 7.00466 p . . .
+9/1  b 0.57209 0.428571 gene_6 1549 1727 - 2 01 8.49456 p . . .
+10/1  b 0.314145 0.428571 gene_1 257 715 - 0 11 47.9897 b 722 727 2.3206
+10/1  b 0.314145 0.428571 gene_2 1137 1310 - 0 11 11.4563 b 1322 1327 3.1826
+10/1  b 0.314145 0.428571 gene_3 1627 1824 - 0 01 9.87622 b . . .
b
diff -r 000000000000 -r b04960a7abf5 test-data/s_mga.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/s_mga.txt Wed Mar 21 17:15:25 2018 -0400
b
@@ -0,0 +1,65 @@
+# 1/1
+# gc = 0.275862, rbs = 0.428571
+# self: b
+gene_1 1812 1994 - 0 11 12.4805 b 2002 2007 0.494927
+# 2/1
+# gc = 0.338877, rbs = 0.428571
+# self: b
+gene_1 1 414 + 0 01 34.5932 s . . .
+gene_2 822 1079 + 0 11 21.7601 s . . .
+gene_3 1961 2140 + 0 11 11.9821 s . . .
+# 3/1
+# gc = 0.269188, rbs = 0.428571
+# self: b
+gene_1 62 220 + 0 11 6.91386 p 53 58 4.6421
+gene_2 560 754 + 0 11 28.0223 p . . .
+gene_3 1239 1403 + 0 11 2.03908 p . . .
+gene_4 1582 1798 - 1 01 13.4314 p . . .
+# 4/1
+# gc = 0.550169, rbs = 0.428571
+# self: b
+gene_1 3 431 - 0 11 17.2128 s 439 444 -2.92195
+gene_2 1465 1746 + 0 11 10.92 s . . .
+# 5/1
+# gc = 0.256547, rbs = 0.428571
+# self: b
+gene_1 1 191 + 2 01 24.4289 b . . .
+gene_2 398 619 + 0 11 12.6188 b . . .
+gene_3 781 1143 + 0 11 25.1534 b . . .
+gene_4 1164 1412 + 0 11 7.23104 b . . .
+gene_5 1812 1871 - 0 01 3.07971 b . . .
+# 6/1
+# gc = 0.329727, rbs = 0.428571
+# self: b
+gene_1 150 344 - 0 11 5.73124 s . . .
+gene_2 402 791 - 0 11 18.3505 s . . .
+gene_3 1627 1941 - 0 01 34.5233 s . . .
+# 7/1
+# gc = 0.316101, rbs = 0.428571
+# self: b
+gene_1 1 330 - 0 10 31.4857 b . . .
+gene_2 359 997 - 0 11 51.7886 b 1005 1010 2.3206
+gene_3 1077 1349 - 0 11 31.1612 b 1355 1360 0.0639296
+gene_4 1482 1703 - 0 11 13.9061 b 1716 1721 2.83485
+gene_5 1748 1857 - 2 01 1.33924 b . . .
+# 8/1
+# gc = 0.35155, rbs = 0.428571
+# self: b
+gene_1 1 193 + 1 01 4.66209 b . . .
+gene_2 590 745 + 0 11 18.2213 b 571 576 3.26585
+gene_3 1837 1903 + 0 10 5.66021 b 1824 1829 2.7516
+# 9/1
+# gc = 0.57209, rbs = 0.428571
+# self: b
+gene_1 118 543 - 0 11 2.97762 p . . .
+gene_2 623 901 - 0 11 11.7916 p 904 909 0.279428
+gene_3 947 1105 - 0 11 2.76029 p . . .
+gene_4 1144 1317 - 0 11 2.08585 p 1321 1326 4.2111
+gene_5 1314 1505 - 0 11 7.00466 p . . .
+gene_6 1549 1727 - 2 01 8.49456 p . . .
+# 10/1
+# gc = 0.314145, rbs = 0.428571
+# self: b
+gene_1 257 715 - 0 11 47.9897 b 722 727 2.3206
+gene_2 1137 1310 - 0 11 11.4563 b 1322 1327 3.1826
+gene_3 1627 1824 - 0 01 9.87622 b . . .