Repository 'cpt_convert_mga'
hg clone https://toolshed.g2.bx.psu.edu/repos/cpt/cpt_convert_mga

Changeset 0:d71dee3fdc80 (2022-05-13)
Next changeset 1:ecbddc0c76d2 (2022-05-19)
Commit message:
Uploaded
added:
cpt_convert_mga/cpt-macros.xml
cpt_convert_mga/cpt_convert_mga_to_gff3.py
cpt_convert_mga/cpt_convert_mga_to_gff3.xml
cpt_convert_mga/macros.xml
cpt_convert_mga/test-data/ConvMga_In.fa
cpt_convert_mga/test-data/ConvMga_In.out
cpt_convert_mga/test-data/ConvMga_Out.gff3
b
diff -r 000000000000 -r d71dee3fdc80 cpt_convert_mga/cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_convert_mga/cpt-macros.xml Fri May 13 17:57:10 2022 +0000
[
@@ -0,0 +1,115 @@
+<?xml version="1.0"?>
+<macros>
+ <xml name="gff_requirements">
+ <requirements>
+ <requirement type="package" version="2.7">python</requirement>
+ <requirement type="package" version="1.65">biopython</requirement>
+ <requirement type="package" version="2.12.1">requests</requirement>
+ <yield/>
+ </requirements>
+ <version_command>
+ <![CDATA[
+ cd $__tool_directory__ && git rev-parse HEAD
+ ]]>
+ </version_command>
+ </xml>
+ <xml name="citation/mijalisrasche">
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">@unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+ </xml>
+ <xml name="citations">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation> 
+ <yield/>
+ </citations>
+ </xml>
+     <xml name="citations-crr">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+ <yield/>
+ </citations>
+ </xml>
+        <xml name="citations-2020">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+                        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+                        <yield/>
+ </citations>
+ </xml>
+        <xml name="citations-2020-AJC-solo">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+                        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+                        <yield/>
+ </citations>
+ </xml>
+        <xml name="citations-clm">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+                        <yield/>
+ </citations>
+ </xml>
+        <xml name="sl-citations-clm">
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+                        <yield/>
+ </xml>
+</macros>
b
diff -r 000000000000 -r d71dee3fdc80 cpt_convert_mga/cpt_convert_mga_to_gff3.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_convert_mga/cpt_convert_mga_to_gff3.py Fri May 13 17:57:10 2022 +0000
[
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+import sys
+import argparse
+from Bio import SeqIO
+from Bio.SeqFeature import SeqFeature
+from Bio.SeqFeature import FeatureLocation
+from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+
+def mga_to_gff3(mga_output, genome):
+    seq_dict = SeqIO.to_dict(SeqIO.parse(genome, "fasta"))
+
+    current_record = None
+    for line in mga_output:
+        if line.startswith("#"):
+            if line.startswith("# gc = ") or line.startswith("# self:"):
+                continue
+            chromId = line.strip().replace("# ", "")
+
+            if " " in chromId:
+                chromId = chromId[0 : chromId.index(" ")]
+
+            if chromId in seq_dict:
+                if current_record is not None:
+                    yield current_record
+                current_record = seq_dict[chromId]
+            else:
+                raise Exception(
+                    "Found results for sequence %s which was not in fasta file sequences (%s)"
+                    % (chromId, ", ".join(seq_dict.keys()))
+                )
+
+        else:
+            (
+                gene_id,
+                start,
+                end,
+                strand,
+                phase,
+                complete,
+                score,
+                model,
+                rbs_start,
+                rbs_end,
+                rbs_score,
+            ) = line.strip().split("\t")
+            start = int(start)
+            end = int(end)
+            strand = +1 if strand == "+" else -1
+
+            # Correct for gff3
+            start -= 1
+
+            rbs_feat = None
+            if rbs_start != "-":
+                rbs_start = int(rbs_start)
+                rbs_end = int(rbs_end)
+                rbs_feat = gffSeqFeature(
+                    FeatureLocation(rbs_start, rbs_end),
+                    type="Shine_Dalgarno_sequence",
+                    strand=strand,
+                    qualifiers={
+                        "ID": "%s.rbs_%s" % (current_record.id, gene_id),
+                        "Source": "MGA",
+                    },
+                    phase=phase,
+                    source="MGA"
+                )
+
+            cds_feat = gffSeqFeature(
+                FeatureLocation(start, end),
+                type="CDS",
+                strand=strand,
+                qualifiers={
+                    "Source": "MGA",
+                    "ID": "%s.cds_%s" % (current_record.id, gene_id),
+                }, 
+                phase=phase,
+                source="MGA"
+            )
+
+            if rbs_feat is not None:
+                if strand > 0:
+                    gene_start = rbs_start
+                    gene_end = end
+                else:
+                    gene_start = start
+                    gene_end = rbs_end
+            else:
+                gene_start = start
+                gene_end = end
+
+            gene = gffSeqFeature(
+                FeatureLocation(gene_start, gene_end),
+                type="gene",
+                strand=strand,
+                id="%s.%s" % (current_record.id, gene_id),
+                qualifiers={
+                    "Source": "MGA",
+                    "ID": "%s.%s" % (current_record.id, gene_id),
+                },
+                phase=phase,
+                source="MGA"
+            )
+
+            gene.sub_features = [cds_feat]
+            if rbs_feat is not None:
+                gene.sub_features.append(rbs_feat)
+            current_record.features.append(gene)
+    yield current_record
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert MGA to GFF3", epilog="")
+    parser.add_argument(
+        "mga_output", type=argparse.FileType("r"), help="MetaGeneAnnotator Output"
+    )
+    parser.add_argument("genome", type=argparse.FileType("r"), help="Fasta Genome")
+    args = parser.parse_args()
+
+    for result in mga_to_gff3(**vars(args)):
+        gffWrite([result], sys.stdout)
b
diff -r 000000000000 -r d71dee3fdc80 cpt_convert_mga/cpt_convert_mga_to_gff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_convert_mga/cpt_convert_mga_to_gff3.xml Fri May 13 17:57:10 2022 +0000
b
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<tool id="edu.tamu.cpt2.util.mga_to_gff3" name="MGA to GFF3" version="19.1.0.0">
+ <description></description>
+ <macros>
+ <import>cpt-macros.xml</import>
+ <import>macros.xml</import>
+ </macros>
+ <expand macro="requirements"/>
+ <command detect_errors="aggressive">
+@GENOME_SELECTOR_PRE@
+
+python $__tool_directory__/cpt_convert_mga_to_gff3.py
+$mga
+@GENOME_SELECTOR@
+> $data
+
+</command>
+ <inputs>
+ <param label="MGA Output" name="mga" type="data" format="tabular"/>
+ <expand macro="genome_selector" />
+ </inputs>
+ <outputs>
+ <data format="gff3" name="data">
+ </data>
+ </outputs>
+ <tests>
+ <test>
+ <param name="reference_genome_source" value="history" />
+ <param name="genome_fasta" value="ConvMga_In.fa" />
+ <param name="mga" value="ConvMga_In.out" />
+ <output name="data" file="ConvMga_Out.gff3" />
+ </test>
+ </tests>
+ <help>
+**What it does**
+
+Converts an input MetaGeneAnnotator table to the GFF3 format.
+ </help>
+ <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r d71dee3fdc80 cpt_convert_mga/macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_convert_mga/macros.xml Fri May 13 17:57:10 2022 +0000
b
@@ -0,0 +1,66 @@
+<?xml version="1.0"?>
+<macros>
+ <xml name="requirements">
+ <requirements>
+ <requirement type="package" version="3.7">python</requirement>
+ <requirement type="package" version="1.77">biopython</requirement>
+ <requirement type="package" version="1.1.3">cpt_gffparser</requirement>  
+ <yield/>
+ </requirements>
+ </xml>
+ <xml name="genome_selector">
+ <conditional name="reference_genome">
+ <param name="reference_genome_source" type="select" label="Reference Genome">
+ <option value="history" selected="True">From History</option>
+ <option value="cached">Locally Cached</option>
+ </param>
+ <when value="cached">
+ <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+ <options from_data_table="all_fasta"/>
+ </param>
+ </when>
+ <when value="history">
+ <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+ </when>
+ </conditional>
+ </xml>
+ <xml name="gff3_input">
+ <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+ </xml>
+ <xml name="input/gff3+fasta">
+ <expand macro="gff3_input" />
+ <expand macro="genome_selector" />
+ </xml>
+ <token name="@INPUT_GFF@">
+ "$gff3_data"
+ </token>
+ <token name="@INPUT_FASTA@">
+#if str($reference_genome.reference_genome_source) == 'cached':
+ "${reference_genome.fasta_indexes.fields.path}"
+#else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+#end if
+ </token>
+ <token name="@GENOME_SELECTOR_PRE@">
+#if $reference_genome.reference_genome_source == 'history':
+ ln -s $reference_genome.genome_fasta genomeref.fa;
+#end if
+ </token>
+ <token name="@GENOME_SELECTOR@">
+#if str($reference_genome.reference_genome_source) == 'cached':
+ "${reference_genome.fasta_indexes.fields.path}"
+#else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+#end if
+ </token>
+        <xml name="input/fasta">
+ <param label="Fasta file" name="sequences" type="data" format="fasta"/>
+ </xml>
+
+ <token name="@SEQUENCE@">
+ "$sequences"
+ </token>
+ <xml name="input/fasta/protein">
+ <param label="Protein fasta file" name="sequences" type="data" format="fasta"/>
+ </xml>
+</macros>
b
diff -r 000000000000 -r d71dee3fdc80 cpt_convert_mga/test-data/ConvMga_In.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_convert_mga/test-data/ConvMga_In.fa Fri May 13 17:57:10 2022 +0000
b
b'@@ -0,0 +1,2300 @@\n+>01\n+TACTACTATTACTACTACTAAGTACCTTTGTTATGTACTACTATTACTACTACTATTACT\n+ACTACTATTACTATTACTACTACTATTACTACTACTATTACTACTACTATTACTACTACT\n+ATTACTACTACTACTAAGTACCTGGGAATTCTTTTACCTCTCTCACTCAGCCTATTACTT\n+ATTACCGACTTCCCTAACTACTTATTCTATAGTTATAATATTCATTTATTATACAATACT\n+TAAACTATAGTATTCTACTGTTAATCTATGCTGAAGCGGTCTTAATCTATGGTTATTATA\n+TAATAATCTTATATAATGGTACATTAATCTAGTATATTACATTAGAATCATTCTAATCTA\n+GGATTTTAATCTTTAGACCCTAGGAAAAGTGGTACTAAAATATAAAACCCTATAGGTATG\n+GGATTCTTATTTTTAAAATTACTAAAAAGTATTAGGTTTTCCCTAGGGCAAAGTTTTAAT\n+GTACTTAAAATAGTAAGTAGCTACTTATCATTTAGGGTTCTATAATTGAGAATATTGAGA\n+GATAATCCGCTTCAATTGTAATTAATTGTTGACAACTATGAAGCGGGTATGCTATAATTA\n+GGTATAGTCAAATTTAGGAGATGAAATAGATGATTGATATATACTTAGGAGAAGGTTATA\n+ATAAAGAATACTTGTCTAAAGCACTCAGATTAATCAATGACCATGCTCCTAGGGAGTTAA\n+GTTATGATTTTAATAATGTAGAAGCGGATGTTAATATTCACACAATGTTATATGTTAAAC\n+CTGAAGATAGATTTATATATAAGGATATATCCTATTACTTCCCGGGTGATTTAATTATTT\n+GTATAGTTGATGATGATGCTATTGTATACCACCAAGGTGAGCAGATTTCAGGTATTAGTA\n+TTTTAAGAATACTAGAAGAGATATTTTAAGGAGGATAAGTAATCATGATAGGAATAACAA\n+TATTAATTACGATAATGAGTATATCAACTATCTCTATGTATATTTATTTTTTAGTAGACT\n+TGATTCAGTCAATCAGATATAATAGTTTTGATAAGGTAATTAACGTCATAACATTTGTAC\n+TTATGACAGTTATAATAGCATCAGGTATTTTAGCTATACTTGGAATATAGAGCTCATTTA\n+AGAAGCGGTTAAGTAGTTAGAGGGGATTTGTCCTAAAATAGTATACCGCTTCTATATGGA\n+AGGCTGAGAGGTCTTAGAATTGAAAGGAGAGATATAATGATTCATATATTTGTAAAAGAG\n+GATTATAATAAAGAAACATTAAGGAGTTTACTTGAGTATATTAATGATACTGTAGGTAGG\n+GAATTAACTTATGGTATTAATACAGACTATGATAAGGATGTCGTGATTGAAACCGATGAC\n+CCTATAGATGAGGAGGATACAATTGAGTTATCAGGTACAAACATGTTCAAGGATGACTTA\n+TGTATTCTTATAGAAGAGCTATACTGTAAGGCATTTGTTAATGGTGAACCTGTTATTATA\n+CGTAAGTATGTAGAGGAGATGTTATAATGATTATAATATTTTTAACTGAAAAATATGATG\n+CCAAGGCTTTAAAGAAAGTATTAGAACATATTGATAATTGTAGTAGTAGAGGTCTTAGCT\n+ATTTAATGGGAAAAGGAGAAGCGGATGTATGTATAGAGAAAAATGTATTTAGAGAAAGAG\n+ATGATGTAAGGATTAACTCAAACATTATTGATGAAGGTAAACTTTGTATACTAATAAATA\n+GACATGGTTTAGAATGTAGCTACTATAGAGGTATATCATGTAATATTGGTTCCTTCGTAA\n+AGGAGAGATTATAATGATAGAGATATACCTTAGTGAAAATTATGATAAGAATTTACTAAA\n+AGCAGAATTAAAATGGATTAAAGAGACCGCTTCAAGAGAACTAACTTATGATGTTAATAG\n+AAGTCCAGGATTGGATGTTTATGTTAATCCCTATAGGTGTACTAAAGACGAAGTTGAAGA\n+ATGGAGTACACTTCCTCCATTTGAAGATGATATACTTGTATTTATAGCGGAGACGTGGAT\n+ACATGAATATCTTAAGGGTGAATCAATAGGTGTAGATAGTATGGAAGAGTATGTAAAGGA\n+GATGTAACTAATGTTTAAGGTATATTATACAGTCTACCATAGAGGTAGTATGAAAACTAT\n+TAAGGATAAGCTAGATAGAAGTAGTTTAATATACTTCTTGTATGATACTTGGTATAAAGA\n+TATTAGTAACGTATTCCCTAATCACTATAATAAAGAGTTTGGGAGTAAGAGTGATGATAT\n+AGATATAGATAAACTTATTGAAGCGGTTAATGAGGAAGGTATATTACTTATCAATAGAGG\n+TAATTATGTTACAATAAGAGAATGGTAGGATAGGATAAACTTAGGATAGAAAATAATTTA\n+GGATGAGTTACAATAGGATAGGATAGGATAGGGGGTTAAGTTAGGATGGATACTTTAACA\n+TACACTATTATTCATAAAGAATCTGATAGGGTAATAGCTAGCGGTTTAAATGAGACAGAA\n+ACTATGAACTTAGTTCAAAGGATGATAAATACTAATCTAGTTACTGATATATCATTAGAT\n+GATTATAAACGCAGACCACATGGAAAGATAGATGTAGTCAATTTACTAGTAGATATTAGA\n+AGACAAGGCGTATTTGATTTCAATCACATTTGGCACGTAGGATAGGAGGGATAGGATGAT\n+AGTTATATATACAGATGTTTCTAAGGATTATTTAAAAGACGAGTTCTTACCTTGGCTTAA\n+TGAAAGGGATAGATACTTAGAATACTATAAAGATGAATTACCTGAGGATATAGATTCCTC\n+TTATATTGTATCAGTTGTATACTGTAAGGATATGGAAGGTCTATTAGAAAGAAAAGACAT\n+TGTTCTTGATAATAGTTATAATGAACCTGTAGCTTTATTAGGTGTTCCTGAGTTTTTTGG\n+TAATTATAGTAATTATTTCTATTATAGAGGAGAAAGTATTAGTAAACATGACCTAGGAGA\n+AATTGTTAGGTTAAAAGCTTGGCAACGTATGGGTGGGGATTGACTAAGTAGCTCTCCCTA\n+ATTTCACTAAGTAGCTCCCTAGGAATTGCCTAAGTAGCTCGGTATGATTTTACCCTAAGT\n+AGCTCCCTCTGTTTTCTACTAGTTTATTTTAACCGCTTCAGGTGTCTATATATATATAGA\n+CGGTTGGAATAATATCAGACCGCAAAAATAAATACACTAGGATATTATTCCTAGTGTATT\n+ATATAATTTTTTTATAGAATATTTATAACATTGTATTCAAATTCATTTACTTCATGTTGT\n+GATTTAATTAAATTTTTAATTAATCCGTTTTGTGTTTTATACTCTTTTATTAGTTTTTCA\n+TTTTCTATAATTAAATTATTAAATTCTTCTTTTGTTGTTTCCTCATCTACATAAAATTTA\n+CTTTCATATATTTCATAATATTTTTTATCTGTTCCGCCATCTAAATCATCTGATATTTGA\n+TAATTTTTGAATATAATTTCTTTTGTTTCTAATTCATTTACTAATAATTGTGATTTTGCA\n+TATTGTAATACATCTTCATTGTCCCACATTGGAATATAGTTTATTTTCATTTAAATCAAA\n+TCCTTTTCTTATAATTTTTTTATATAATATTTGTAGAAGCGGTTGGGGTTTGTCCCTTGC\n+CTTACTACACTTTATATATTACAGTATAGTTATTCAGAAGTCAATACTTTTGAGTAACTT\n+TTTTTAAATTCTTTTTTCTTCTATATAATAGTAGTTTTTAGCCCTAAAAATGTTTTTAAA\n+AGAATTTGCATTTTCTTATTGACTTTATTATCATATGGTAGTAATATAAAGGTACAGCAA\n+GGGAAC'..b'ACTTTTGGGGT\n+AATATGACTAAAACTTTACCTAGATTAAAGGATATTATTATGGAACGTAATGGTAAAGTA\n+GTAATCAGACCTGATAGTGGAGACCCTGTTAAAATTATTTGCGGAGACCCTGATGCAGAC\n+ACTGAATATGAACGTAAAGGTGCAGTAGAAGTGCTTTGGGATACATTTGGAGGTACTGAA\n+ACTGAAAAAGGGTACAAAGTATTAGATGAACATGTAGGATTAATTTATGGAGACTCTATT\n+AACTATGAACGTGCTCAACAAATTTGTGAAGGATTAAAAGAAAAAGGTTTTGCAAGTATT\n+AATGTTGTATTAGGTGTAGGTAGTTTCTCTTACCAATTTAATACTCGTGATACCCACGGG\n+TTTGCAATCAAAGCAACGTATGCTAAGATTAAAAATGAAGAAAAACTTATCTATAAAAAT\n+CCTAAAACAGATAGTGGTAAACGTTCACATAAAGGTCGAGTAGCTGTATATAAAGACGGT\n+TCATGGGAAGATAACTTAACCTTACATCAATGGCTAAACAAACAAAATGTTAATCAATTA\n+GAAAGAGTATTTGAAGATGGTAAACTTTATAGAGACCAGTCGTTAAGTGAAATTAGAGAA\n+ATAATTAAAAATAATTAATAAATATTTAAACTCCCTATTGACAAAGGGAGTTTTTTATTA\n+TATAGTAGGGCTATAGTAAATAAAGGAGTGAAAGAAATGATTTATAAAATATCAAAACAT\n+AATTACTATAGTAGATTTGAGCATTCCACTTATCCTCCTGATGAGGGGTTTGCGTATGTA\n+GATTATGTAGATGTGATTCTTATTGGTGTAGATAATCCTAGGAAAAGAAAGATTATTACC\n+TTAAAAGTAAATGAGTTCAACCCGGATGACTACAGAGTAGGTCATAAGTACAATATTATA\n+AAAATACTATGGTTTGAAAAATGGGAATGGTTAAAGCCATAAGTAAAAGGAGAGAAATAA\n+AATGATTATAGATAAATTAAATGGAGTTAAATTAGAAATAGGTGGGCATGTCGTATCATT\n+TAGTGTAAGAAAGTTTAATACAATTAATGGTGAGAGACAATTAATAGACTACCATCATAT\n+TAAAAGAAATAGACAACAGTACTTTAGAACTACTGAAGAATTTTATAATGAATATAAAGA\n+AATTAAGCCTGACAAAAATGAAATAGATGAAATGTTTGAATCTCTAGGTTATGTAGATAC\n+TGAGTTAGATGATGTAGTAAGAAACCAGGAAAAGGTTACTGAAATATTAGGAGTTAGTGA\n+ACAATATTTAAATCAGTTATCTTATAAAGCTATAGAGGAGTATGTAGATAAAGTAGTTAC\n+ACTTGAAATTAAAGAGTTGAAAGGAGAGAAATAGCATGAATAATAACTGGGAAAAAGAAG\n+GAGTTAACTATTGGGAAAACGAAGACTGTCCTAGGGAATACTTAGAGAAAGCATTCATTG\n+ACCTGGTAGAATATGTTGAAGGAGTTACAGTACCACCTAAAGATGTTAAGCAGTTAAGAG\n+AAGATAAACTTAGAGAAGATATTGGGTTTTATGAGTACGTAGCTGATAAATAAATTAGTA\n+TCTACCTATTGACTTAGGTAGGTATCTATTATATAATAGTATACAAGGAGATGAAAATAT\n+GAAAAAGTTAATAGTATTACTTACAATTACTATTTCTCTATTACTAGGGGGTTGCTCTCC\n+TGATAACCATGAAGGTAAAGTAGTAGGAGTAGGTGAATACAGAGAACCAACTACTTATAT\n+AAAATCAGGTAGCGTTACTGTACCAGTCATTGGTGAAATGAAATACTATGTAGATTTAGA\n+GACAGATAAAGGAGAAGACCGTGTATATCTTAATAAAGAGGTCTATCATAAGTTTGATAA\n+AGGTGATGATTTCTCTAATGTAGGTGAGAAAGTGTATAAGAATGATGAATTAATATATAA\n+AGGAGACTAACTATGTATTTAAATGATTATGTAGGTAAATTTATAAAGGAAGATAACTAT\n+TATGGATATCAATCTACAGACTTAGTATCTAATTATGTTCAACGATTAACTCTAGGTAGG\n+TACAAAACTAAGTTAAATGCTAATAAAATGAAATACGAAAGATTACCTAGTTCTTGGAAA\n+ATAATTAAAGCCAAAGATTTGTTAAGAACAGATGATTATAGAGAAGGAGATATATTTGTA\n+TCAGAAAGAATCTCCGTATTCGGTTTTAATGGTATTATTGTATATAACCATGATTTTAAC\n+AATGTAACTGTTATTACTCAAAATAGAGATGGTAAAGCTACTAATCCTGTAGAGGAGCAT\n+TTATATCCAAAGAAAGATATTGATTATATTATTAGACCTATCGAGAGGGACTACAGGGAA\n+TACTTTAAAAAATCAGATTCAAAAGAAAAAGTTACTCTTTCGAAGCAAGAATATAAAAAA\n+TTATTAGAGGCTTATAATAAAATGAAGGAAGTGTTTAAGTAATATGAATAGTACAAAATT\n+AGTAGAGTACTTTACAAATAAACAAGGTAAATCTCTAATATTACCTGATGAAAATAAAGT\n+TGAGTTATATAGAGTTGATGTAACACCTTATACTATGAGACTTAATTTCACTTACAATAC\n+AGAAGTTGTAGCTATAGATATTGATAAGTTACACTCAGATTCTATAGAAATGCATATACC\n+ACAAGGTCTTTATATAACAACTGTTGTTAAAATTACTAGTACGCAGAGTATTAGTTCAGT\n+TCTTCATAAGGTATTAGAGGAATGGGTAAGACAAGTACAAAATGATGGTATATTCGGATT\n+CGTATGGGAGTAATTATAATGATAAGTATAGAACATGATTATACAATAAGAACTGTAGAT\n+AATAGAAAATATACTTATTATAGTAAATACGAATCACTAGTTACTTTGTATGAAAATATT\n+ATGAGTAAAGATTGTATTGAAGTAACTAAATATGGGAAAGATAAAAAAGTTATTATTGAT\n+ACTAGACATATTGTATCTATTGAACGATGGTAAATAATAAGGAGGAGTAAACTATTATGA\n+TAAATGCAGGGCATGCTAAGTACCTATCAGAAATTTATGAAGATGATGTACATTATGAAA\n+CTATAGATAGTATTGTAGAAGATATACTAGATAATATTAATGATGGTATTATTGAAGAAG\n+CTATGAAAGGTAATACAAGTTATCAATATGTTCTTAGAGACTTAAGAGTAGATAATGAAG\n+TAGAATATAGAGTTATAGAAGAACTTACTAACCAAGGATATAGTGTAAACCACATTAGTA\n+ATGATATAGAGTACCCTTCTATATCTACAAATAATTTAGCAGGGTTAGATTACTTAAATA\n+TTAAATGGTAAGGGAGGGAATTAATATGATAAATAAATATAAAAAGTTATGGGATGAAAT\n+AACTCAACAAATTGTTAATGTAGAAATTATTAACTTTAAAAATGAAACAGTAACAATAGA\n+ATCTACAGATGATTCAGGATTATCAGAGATAAGAGGTTTTGAAGAAGTAGAGTTTATAGA\n+TTACTATGGATAAGATGTTTAAAGTATATAATTTATAAGGAGGAAACATATGGACTTGTT\n+TGCAAAAATAATTATTATGTCTATAGGAGTTGTTCCCTTGTTAACTATTATTGTTGCACA\n+GCTAATTACAGATTACCATGATAATCATTAAGTATTATAGTAATAGGAAGGACAATATTT\n+AGAGTGAGAGTATGTTGACTAATGAGGAAGATATAGAATGAGAACCTAACCAAGTAAAAC\n+TAAGTACCTTTGTTATGTACTACTATTACTACTACTACTATTACTACTACTATTACTACT\n+ACTACTACTACTACTATTACTATTACTACTACTATTACTACTACTAAGTACCTTTGTTAT\n+GTACTACTATTACTA\n+\n'
b
diff -r 000000000000 -r d71dee3fdc80 cpt_convert_mga/test-data/ConvMga_In.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_convert_mga/test-data/ConvMga_In.out Fri May 13 17:57:10 2022 +0000
b
b'@@ -0,0 +1,211 @@\n+# 01\n+# gc = 0.303936, rbs = 0.9375\n+# self: b\n+gene_1\t28\t243\t+\t0\t11\t6.36358\tp\t-\t-\t-\n+gene_2\t630\t929\t+\t0\t11\t57.1705\tp\t615\t620\t12.2467\n+gene_3\t945\t1130\t+\t0\t11\t26.4954\ta\t929\t934\t16.2941\n+gene_4\t1237\t1527\t+\t0\t11\t63.5811\tp\t1224\t1229\t14.3502\n+gene_5\t1527\t1814\t+\t0\t11\t33.8744\tp\t1513\t1518\t13.2667\n+gene_6\t1814\t2107\t+\t0\t11\t52.2562\tp\t1800\t1805\t15.8863\n+gene_7\t2111\t2368\t+\t0\t11\t50.8798\tp\t2096\t2101\t15.9853\n+gene_8\t2446\t2685\t+\t0\t11\t36.2735\ts\t2430\t2435\t10.7912\n+gene_9\t2696\t3043\t+\t0\t11\t69.4745\ts\t2684\t2689\t15.7116\n+gene_10\t3252\t3590\t-\t0\t11\t61.7674\tp\t3601\t3606\t0.728011\n+gene_11\t3901\t4209\t+\t0\t11\t34.7395\ts\t3888\t3893\t-0.104741\n+gene_12\t4415\t4702\t+\t0\t11\t46.3413\tp\t4399\t4404\t-0.522925\n+gene_13\t4752\t4943\t+\t0\t11\t25.5946\tp\t4737\t4742\t-0.222928\n+gene_14\t5260\t5748\t-\t0\t11\t60.0468\tp\t5758\t5763\t15.7116\n+gene_15\t5916\t6074\t+\t0\t11\t7.34623\tp\t5899\t5904\t0.766797\n+gene_16\t6144\t6275\t+\t0\t11\t10.8152\tp\t6128\t6133\t0.728011\n+gene_17\t6443\t6766\t+\t0\t11\t47.907\tp\t6427\t6432\t0.728011\n+gene_18\t6866\t7102\t+\t0\t11\t28.7246\tp\t6850\t6855\t0.728011\n+gene_19\t7182\t7652\t+\t0\t11\t100.397\ts\t7166\t7171\t0.728011\n+gene_20\t7892\t8071\t+\t0\t11\t32.3826\ts\t7876\t7881\t0.728011\n+gene_21\t8405\t8641\t-\t0\t11\t56.3171\tp\t8652\t8657\t15.4219\n+gene_22\t8643\t9128\t-\t0\t11\t103.956\ts\t9136\t9141\t18.4273\n+gene_23\t9141\t9548\t-\t0\t11\t80.6226\tp\t9556\t9561\t18.4273\n+gene_24\t9548\t9979\t-\t0\t11\t93.5448\ts\t9991\t9996\t15.3253\n+gene_25\t9982\t10173\t-\t0\t11\t27.542\tp\t10181\t10186\t18.4273\n+gene_26\t10170\t10655\t-\t0\t11\t49.9923\tp\t10662\t10667\t15.7116\n+gene_27\t10648\t11079\t-\t0\t11\t79.9932\ts\t11087\t11092\t14.3502\n+gene_28\t11093\t11635\t-\t0\t11\t66.0846\tp\t11642\t11647\t-2.78976\n+gene_29\t11647\t12135\t-\t0\t11\t89.8718\ts\t12143\t12148\t14.3502\n+gene_30\t12148\t12546\t-\t0\t11\t63.7164\tp\t12554\t12559\t12.7215\n+gene_31\t12543\t13250\t-\t0\t11\t111.002\ts\t13258\t13263\t18.4273\n+gene_32\t13350\t13901\t-\t0\t11\t111.317\tp\t13911\t13916\t13.3656\n+gene_33\t13920\t14237\t-\t0\t11\t51.6242\ts\t14247\t14252\t15.9853\n+gene_34\t15223\t15771\t-\t0\t11\t87.2276\ts\t15778\t15783\t4.63439\n+gene_35\t15775\t15993\t-\t0\t11\t50.2172\ts\t16005\t16010\t10.6116\n+gene_36\t15994\t16188\t-\t0\t11\t44.105\tp\t16195\t16200\t10.2405\n+gene_37\t16178\t16915\t-\t0\t11\t127.839\ts\t16924\t16929\t17.0766\n+gene_38\t17094\t17333\t-\t0\t11\t36.3308\tp\t17342\t17347\t0.377039\n+gene_39\t17335\t17724\t-\t0\t11\t89.7706\ts\t17735\t17740\t15.4219\n+gene_40\t17823\t17996\t-\t0\t11\t37.5907\tp\t18007\t18012\t16.2941\n+gene_41\t18037\t18519\t-\t0\t11\t94.8623\tp\t18527\t18532\t18.4273\n+gene_42\t18569\t19111\t-\t0\t11\t133.042\ts\t19122\t19127\t15.4219\n+gene_43\t19111\t19644\t-\t0\t11\t109.849\ts\t19656\t19661\t14.3502\n+gene_44\t19647\t19811\t-\t0\t11\t24.1269\tp\t19823\t19828\t14.3502\n+gene_45\t19814\t20089\t-\t0\t11\t57.1618\ts\t20099\t20104\t10.2405\n+gene_46\t20089\t20934\t-\t0\t11\t133.788\ts\t20942\t20947\t18.4273\n+gene_47\t20946\t22064\t-\t0\t11\t225.008\ts\t22075\t22080\t15.4219\n+gene_48\t22218\t22544\t-\t0\t11\t45.7619\tp\t22554\t22559\t10.2749\n+gene_49\t22537\t22953\t-\t0\t11\t93.1622\tb\t22961\t22966\t18.4273\n+gene_50\t23087\t23389\t-\t0\t11\t82.8253\ts\t23400\t23405\t15.4219\n+gene_51\t23389\t23577\t-\t0\t11\t45.7175\tp\t23586\t23591\t0.377039\n+gene_52\t23621\t23782\t-\t0\t11\t42.3377\tp\t23790\t23795\t18.4273\n+gene_53\t23782\t25830\t-\t0\t11\t332.763\ts\t25840\t25845\t15.9853\n+gene_54\t25908\t26171\t-\t0\t11\t63.8\tp\t26179\t26184\t18.4273\n+gene_55\t26188\t26361\t-\t0\t11\t26.0644\tp\t26372\t26377\t7.89094\n+gene_56\t26368\t26946\t-\t0\t11\t40.4747\tb\t26953\t26958\t-3.90026\n+gene_57\t26939\t27565\t-\t0\t11\t150.252\ts\t27575\t27580\t15.9853\n+gene_58\t27558\t28454\t-\t0\t11\t177.87\ts\t28462\t28467\t18.4273\n+gene_59\t28454\t28678\t-\t0\t11\t27.7492\tb\t28687\t28692\t17.0766\n+gene_60\t28747\t29487\t-\t0\t11\t105.788\ts\t29501\t29506\t-0.296479\n+gene_61\t29539\t30153\t-\t0\t11\t137.064\ts\t30162\t30167\t15.8863\n+gene_62\t30169\t30678\t-\t0\t11\t30.7749\tp\t-\t-\t-\n+gene_63\t30584\t30775\t-\t0\t11\t31.0006\tp\t30786\t30791\t10.5884\n+gene_64\t30798\t31439\t-\t0\t11\t115.669\tb\t31446\t31451\t10.0059\n+gene_65\t31429\t31659\t-\t0\t11\t42.8517\ts\t31668\t31673\t13.2667\n+gene_66\t31662\t31889\t-\t0\t11\t41.7783\tp\t31897\t31902\t18.4273\n+gene_67\t31999\t32691\t-\t0\t11\t173.713\ts\t32702\t32707\t15.4219\n+gene_68\t32878\t33513\t-\t0\t11\t89.781\tp\t33521\t33526\t18.4273\n+gene_69\t33580\t34371\t-\t0\t11\t170.68\ts\t34379\t34384\t14.3502\n+gene_70\t34371\t34679\t-\t0\t11\t51.9248\ts\t34686\t34691\t15.7116\n+gene'..b'44\t106252\t106959\t+\t0\t11\t99.5882\ts\t106240\t106245\t12.8833\n+gene_145\t107193\t108053\t+\t0\t11\t158.859\ts\t-\t-\t-\n+gene_146\t108122\t108364\t+\t0\t11\t51.6683\ts\t108109\t108114\t18.4273\n+gene_147\t108381\t108863\t+\t0\t11\t94.3678\ts\t108366\t108371\t15.7116\n+gene_148\t108950\t110221\t+\t0\t11\t253.303\ts\t108933\t108938\t14.3502\n+gene_149\t110281\t111537\t+\t0\t11\t279.46\ts\t110268\t110273\t18.4273\n+gene_150\t111541\t111894\t+\t0\t11\t55.5227\ts\t111522\t111527\t-0.222928\n+gene_151\t111881\t112543\t+\t0\t11\t128.504\ts\t111866\t111871\t10.2087\n+gene_152\t112671\t113303\t+\t0\t11\t151.415\ts\t112658\t112663\t-0.104741\n+gene_153\t113326\t113838\t+\t0\t11\t93.6518\ts\t113312\t113317\t-0.54918\n+gene_154\t113853\t114080\t+\t0\t11\t70.6974\ts\t113837\t113842\t16.2941\n+gene_155\t114176\t114436\t+\t0\t11\t54.9897\ts\t114162\t114167\t17.0766\n+gene_156\t114440\t115195\t+\t0\t11\t98.9334\ts\t-\t-\t-\n+gene_157\t115188\t116438\t+\t0\t11\t235.397\ts\t115174\t115179\t0.524595\n+gene_158\t116452\t116820\t+\t0\t11\t31.9307\ts\t116436\t116441\t-0.198208\n+gene_159\t116807\t117118\t+\t0\t11\t65.8932\tp\t116795\t116800\t15.7116\n+gene_160\t117182\t117718\t+\t0\t11\t84.4833\ts\t117166\t117171\t2.18516\n+gene_161\t117711\t118478\t+\t0\t11\t94.9641\tp\t117696\t117701\t15.9853\n+gene_162\t118456\t118902\t+\t0\t11\t50.1785\ts\t-\t-\t-\n+gene_163\t118902\t119765\t+\t0\t11\t127.299\tp\t118885\t118890\t0.766797\n+gene_164\t120137\t120868\t+\t0\t11\t162.867\ts\t120122\t120127\t15.9853\n+gene_165\t120886\t121344\t+\t0\t11\t84.6329\tp\t120872\t120877\t-0.830994\n+gene_166\t121409\t121852\t+\t0\t11\t86.4653\tp\t121394\t121399\t15.9853\n+gene_167\t121869\t122573\t+\t0\t11\t128.068\ts\t121856\t121861\t10.0241\n+gene_168\t122635\t123033\t+\t0\t11\t38.5561\tp\t122621\t122626\t2.96764\n+gene_169\t123180\t123422\t+\t0\t11\t53.8202\ts\t123165\t123170\t15.9853\n+gene_170\t123427\t123591\t+\t0\t11\t38.5845\tp\t123413\t123418\t15.8863\n+gene_171\t123578\t123757\t+\t0\t11\t19.8686\tp\t-\t-\t-\n+gene_172\t123793\t123969\t+\t0\t11\t46.266\tp\t123778\t123783\t15.7116\n+gene_173\t123962\t124126\t+\t0\t11\t16.4271\tb\t123949\t123954\t11.7305\n+gene_174\t124375\t124494\t+\t0\t11\t12.445\tp\t-\t-\t-\n+gene_175\t124509\t124628\t+\t0\t11\t13.2651\tb\t124497\t124502\t8.77418\n+gene_176\t124640\t124816\t+\t0\t11\t33.7203\tp\t124627\t124632\t14.3502\n+gene_177\t124809\t125105\t+\t0\t11\t73.7223\tp\t124795\t124800\t15.8863\n+gene_178\t125153\t125335\t+\t0\t11\t21.0171\ts\t125137\t125142\t7.89094\n+gene_179\t125348\t125716\t+\t0\t11\t75.8528\tb\t125334\t125339\t17.0766\n+gene_180\t125729\t126076\t+\t0\t11\t89.2576\ts\t125716\t125721\t18.4273\n+gene_181\t126076\t126354\t+\t0\t11\t16.0608\tb\t-\t-\t-\n+gene_182\t126424\t126729\t+\t0\t11\t52.4215\ts\t126409\t126414\t11.7794\n+gene_183\t126744\t127094\t+\t0\t11\t73.8799\tp\t126731\t126736\t18.4273\n+gene_184\t127094\t127696\t+\t0\t11\t97.8033\tp\t127081\t127086\t18.4273\n+gene_185\t127710\t127889\t+\t0\t11\t44.5511\tp\t127696\t127701\t10.1392\n+gene_186\t128116\t128517\t+\t0\t11\t69.0757\tp\t128102\t128107\t15.8863\n+gene_187\t128519\t128779\t+\t0\t11\t32.5682\ts\t128505\t128510\t17.0766\n+gene_188\t128831\t129118\t+\t0\t11\t56.3442\ts\t128817\t128822\t17.0766\n+gene_189\t129235\t129498\t+\t0\t11\t52.4949\tp\t129218\t129223\t15.3253\n+gene_190\t129575\t129754\t+\t0\t11\t31.8781\tp\t129562\t129567\t18.4273\n+gene_191\t129769\t130032\t+\t0\t11\t59.5097\ts\t129755\t129760\t17.0766\n+gene_192\t130035\t130352\t+\t0\t11\t72.9264\tp\t130019\t130024\t16.2941\n+gene_193\t130353\t130910\t+\t0\t11\t97.954\tp\t130339\t130344\t17.0766\n+gene_194\t131122\t131280\t+\t0\t11\t31.9551\ta\t131109\t131114\t18.4273\n+gene_195\t131315\t131515\t+\t0\t11\t39.2297\ts\t131301\t131306\t15.8863\n+gene_196\t131516\t131806\t+\t0\t11\t41.8281\tp\t131502\t131507\t11.6055\n+gene_197\t131898\t132206\t+\t0\t11\t64.8526\tp\t131882\t131887\t15.4219\n+gene_198\t132203\t133111\t+\t0\t11\t160.639\ts\t132186\t132191\t14.3502\n+gene_199\t133129\t134598\t+\t0\t11\t295.577\ts\t133112\t133117\t14.3502\n+gene_200\t134677\t134922\t+\t0\t11\t42.7425\tp\t134663\t134668\t15.8863\n+gene_201\t134942\t135334\t+\t0\t11\t88.8849\ts\t134927\t134932\t15.9853\n+gene_202\t135336\t135533\t+\t0\t11\t55.0569\ts\t135321\t135326\t15.9853\n+gene_203\t135599\t135910\t+\t0\t11\t70.4734\tp\t135585\t135590\t15.8863\n+gene_204\t135913\t136422\t+\t0\t11\t81.4458\tp\t135900\t135905\t14.3502\n+gene_205\t136424\t136753\t+\t0\t11\t44.945\tp\t136407\t136412\t-0.442378\n+gene_206\t136759\t136953\t+\t0\t11\t31.0427\tp\t136745\t136750\t7.63734\n+gene_207\t136977\t137291\t+\t0\t11\t79.026\ts\t136960\t136965\t15.3253\n+gene_208\t137306\t137473\t+\t0\t11\t41.9391\tp\t137291\t137296\t11.4748\n'
b
diff -r 000000000000 -r d71dee3fdc80 cpt_convert_mga/test-data/ConvMga_Out.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_convert_mga/test-data/ConvMga_Out.gff3 Fri May 13 17:57:10 2022 +0000
b
b'@@ -0,0 +1,608 @@\n+##gff-version 3\n+01\tMGA\tgene\t28\t243\t.\t+\t0\tSource=MGA;ID=01.gene_1;\n+01\tMGA\tCDS\t28\t243\t.\t+\t0\tSource=MGA;ID=01.cds_gene_1;Parent=01.gene_1;\n+01\tMGA\tgene\t616\t929\t.\t+\t0\tSource=MGA;ID=01.gene_2;\n+01\tMGA\tCDS\t630\t929\t.\t+\t0\tSource=MGA;ID=01.cds_gene_2;Parent=01.gene_2;\n+01\tMGA\tShine_Dalgarno_sequence\t616\t620\t.\t+\t0\tID=01.rbs_gene_2;Source=MGA;Parent=01.gene_2;\n+01\tMGA\tgene\t930\t1130\t.\t+\t0\tSource=MGA;ID=01.gene_3;\n+01\tMGA\tCDS\t945\t1130\t.\t+\t0\tSource=MGA;ID=01.cds_gene_3;Parent=01.gene_3;\n+01\tMGA\tShine_Dalgarno_sequence\t930\t934\t.\t+\t0\tID=01.rbs_gene_3;Source=MGA;Parent=01.gene_3;\n+01\tMGA\tgene\t1225\t1527\t.\t+\t0\tSource=MGA;ID=01.gene_4;\n+01\tMGA\tCDS\t1237\t1527\t.\t+\t0\tSource=MGA;ID=01.cds_gene_4;Parent=01.gene_4;\n+01\tMGA\tShine_Dalgarno_sequence\t1225\t1229\t.\t+\t0\tID=01.rbs_gene_4;Source=MGA;Parent=01.gene_4;\n+01\tMGA\tgene\t1514\t1814\t.\t+\t0\tSource=MGA;ID=01.gene_5;\n+01\tMGA\tCDS\t1527\t1814\t.\t+\t0\tSource=MGA;ID=01.cds_gene_5;Parent=01.gene_5;\n+01\tMGA\tShine_Dalgarno_sequence\t1514\t1518\t.\t+\t0\tID=01.rbs_gene_5;Source=MGA;Parent=01.gene_5;\n+01\tMGA\tgene\t1801\t2107\t.\t+\t0\tSource=MGA;ID=01.gene_6;\n+01\tMGA\tCDS\t1814\t2107\t.\t+\t0\tSource=MGA;ID=01.cds_gene_6;Parent=01.gene_6;\n+01\tMGA\tShine_Dalgarno_sequence\t1801\t1805\t.\t+\t0\tID=01.rbs_gene_6;Source=MGA;Parent=01.gene_6;\n+01\tMGA\tgene\t2097\t2368\t.\t+\t0\tSource=MGA;ID=01.gene_7;\n+01\tMGA\tCDS\t2111\t2368\t.\t+\t0\tSource=MGA;ID=01.cds_gene_7;Parent=01.gene_7;\n+01\tMGA\tShine_Dalgarno_sequence\t2097\t2101\t.\t+\t0\tID=01.rbs_gene_7;Source=MGA;Parent=01.gene_7;\n+01\tMGA\tgene\t2431\t2685\t.\t+\t0\tSource=MGA;ID=01.gene_8;\n+01\tMGA\tCDS\t2446\t2685\t.\t+\t0\tSource=MGA;ID=01.cds_gene_8;Parent=01.gene_8;\n+01\tMGA\tShine_Dalgarno_sequence\t2431\t2435\t.\t+\t0\tID=01.rbs_gene_8;Source=MGA;Parent=01.gene_8;\n+01\tMGA\tgene\t2685\t3043\t.\t+\t0\tSource=MGA;ID=01.gene_9;\n+01\tMGA\tCDS\t2696\t3043\t.\t+\t0\tSource=MGA;ID=01.cds_gene_9;Parent=01.gene_9;\n+01\tMGA\tShine_Dalgarno_sequence\t2685\t2689\t.\t+\t0\tID=01.rbs_gene_9;Source=MGA;Parent=01.gene_9;\n+01\tMGA\tgene\t3252\t3606\t.\t-\t0\tSource=MGA;ID=01.gene_10;\n+01\tMGA\tCDS\t3252\t3590\t.\t-\t0\tSource=MGA;ID=01.cds_gene_10;Parent=01.gene_10;\n+01\tMGA\tShine_Dalgarno_sequence\t3602\t3606\t.\t-\t0\tID=01.rbs_gene_10;Source=MGA;Parent=01.gene_10;\n+01\tMGA\tgene\t3889\t4209\t.\t+\t0\tSource=MGA;ID=01.gene_11;\n+01\tMGA\tCDS\t3901\t4209\t.\t+\t0\tSource=MGA;ID=01.cds_gene_11;Parent=01.gene_11;\n+01\tMGA\tShine_Dalgarno_sequence\t3889\t3893\t.\t+\t0\tID=01.rbs_gene_11;Source=MGA;Parent=01.gene_11;\n+01\tMGA\tgene\t4400\t4702\t.\t+\t0\tSource=MGA;ID=01.gene_12;\n+01\tMGA\tCDS\t4415\t4702\t.\t+\t0\tSource=MGA;ID=01.cds_gene_12;Parent=01.gene_12;\n+01\tMGA\tShine_Dalgarno_sequence\t4400\t4404\t.\t+\t0\tID=01.rbs_gene_12;Source=MGA;Parent=01.gene_12;\n+01\tMGA\tgene\t4738\t4943\t.\t+\t0\tSource=MGA;ID=01.gene_13;\n+01\tMGA\tCDS\t4752\t4943\t.\t+\t0\tSource=MGA;ID=01.cds_gene_13;Parent=01.gene_13;\n+01\tMGA\tShine_Dalgarno_sequence\t4738\t4742\t.\t+\t0\tID=01.rbs_gene_13;Source=MGA;Parent=01.gene_13;\n+01\tMGA\tgene\t5260\t5763\t.\t-\t0\tSource=MGA;ID=01.gene_14;\n+01\tMGA\tCDS\t5260\t5748\t.\t-\t0\tSource=MGA;ID=01.cds_gene_14;Parent=01.gene_14;\n+01\tMGA\tShine_Dalgarno_sequence\t5759\t5763\t.\t-\t0\tID=01.rbs_gene_14;Source=MGA;Parent=01.gene_14;\n+01\tMGA\tgene\t5900\t6074\t.\t+\t0\tSource=MGA;ID=01.gene_15;\n+01\tMGA\tCDS\t5916\t6074\t.\t+\t0\tSource=MGA;ID=01.cds_gene_15;Parent=01.gene_15;\n+01\tMGA\tShine_Dalgarno_sequence\t5900\t5904\t.\t+\t0\tID=01.rbs_gene_15;Source=MGA;Parent=01.gene_15;\n+01\tMGA\tgene\t6129\t6275\t.\t+\t0\tSource=MGA;ID=01.gene_16;\n+01\tMGA\tCDS\t6144\t6275\t.\t+\t0\tSource=MGA;ID=01.cds_gene_16;Parent=01.gene_16;\n+01\tMGA\tShine_Dalgarno_sequence\t6129\t6133\t.\t+\t0\tID=01.rbs_gene_16;Source=MGA;Parent=01.gene_16;\n+01\tMGA\tgene\t6428\t6766\t.\t+\t0\tSource=MGA;ID=01.gene_17;\n+01\tMGA\tCDS\t6443\t6766\t.\t+\t0\tSource=MGA;ID=01.cds_gene_17;Parent=01.gene_17;\n+01\tMGA\tShine_Dalgarno_sequence\t6428\t6432\t.\t+\t0\tID=01.rbs_gene_17;Source=MGA;Parent=01.gene_17;\n+01\tMGA\tgene\t6851\t7102\t.\t+\t0\tSource=MGA;ID=01.gene_18;\n+01\tMGA\tCDS\t6866\t7102\t.\t+\t0\tSource=MGA;ID=01.cds_gene_18;Parent=01.gene_18;\n+01\tMGA\tShine_Dalgarno_sequence\t6851\t6855\t.\t+\t0\tID=01.rbs_gene_18;Source=MGA;Parent=01.gene_18;\n+01\tMGA\tgene\t7167\t7652\t.\t+\t0\tSour'..b'\tShine_Dalgarno_sequence\t130020\t130024\t.\t+\t0\tID=01.rbs_gene_192;Source=MGA;Parent=01.gene_192;\n+01\tMGA\tgene\t130340\t130910\t.\t+\t0\tSource=MGA;ID=01.gene_193;\n+01\tMGA\tCDS\t130353\t130910\t.\t+\t0\tSource=MGA;ID=01.cds_gene_193;Parent=01.gene_193;\n+01\tMGA\tShine_Dalgarno_sequence\t130340\t130344\t.\t+\t0\tID=01.rbs_gene_193;Source=MGA;Parent=01.gene_193;\n+01\tMGA\tgene\t131110\t131280\t.\t+\t0\tSource=MGA;ID=01.gene_194;\n+01\tMGA\tCDS\t131122\t131280\t.\t+\t0\tSource=MGA;ID=01.cds_gene_194;Parent=01.gene_194;\n+01\tMGA\tShine_Dalgarno_sequence\t131110\t131114\t.\t+\t0\tID=01.rbs_gene_194;Source=MGA;Parent=01.gene_194;\n+01\tMGA\tgene\t131302\t131515\t.\t+\t0\tSource=MGA;ID=01.gene_195;\n+01\tMGA\tCDS\t131315\t131515\t.\t+\t0\tSource=MGA;ID=01.cds_gene_195;Parent=01.gene_195;\n+01\tMGA\tShine_Dalgarno_sequence\t131302\t131306\t.\t+\t0\tID=01.rbs_gene_195;Source=MGA;Parent=01.gene_195;\n+01\tMGA\tgene\t131503\t131806\t.\t+\t0\tSource=MGA;ID=01.gene_196;\n+01\tMGA\tCDS\t131516\t131806\t.\t+\t0\tSource=MGA;ID=01.cds_gene_196;Parent=01.gene_196;\n+01\tMGA\tShine_Dalgarno_sequence\t131503\t131507\t.\t+\t0\tID=01.rbs_gene_196;Source=MGA;Parent=01.gene_196;\n+01\tMGA\tgene\t131883\t132206\t.\t+\t0\tSource=MGA;ID=01.gene_197;\n+01\tMGA\tCDS\t131898\t132206\t.\t+\t0\tSource=MGA;ID=01.cds_gene_197;Parent=01.gene_197;\n+01\tMGA\tShine_Dalgarno_sequence\t131883\t131887\t.\t+\t0\tID=01.rbs_gene_197;Source=MGA;Parent=01.gene_197;\n+01\tMGA\tgene\t132187\t133111\t.\t+\t0\tSource=MGA;ID=01.gene_198;\n+01\tMGA\tCDS\t132203\t133111\t.\t+\t0\tSource=MGA;ID=01.cds_gene_198;Parent=01.gene_198;\n+01\tMGA\tShine_Dalgarno_sequence\t132187\t132191\t.\t+\t0\tID=01.rbs_gene_198;Source=MGA;Parent=01.gene_198;\n+01\tMGA\tgene\t133113\t134598\t.\t+\t0\tSource=MGA;ID=01.gene_199;\n+01\tMGA\tCDS\t133129\t134598\t.\t+\t0\tSource=MGA;ID=01.cds_gene_199;Parent=01.gene_199;\n+01\tMGA\tShine_Dalgarno_sequence\t133113\t133117\t.\t+\t0\tID=01.rbs_gene_199;Source=MGA;Parent=01.gene_199;\n+01\tMGA\tgene\t134664\t134922\t.\t+\t0\tSource=MGA;ID=01.gene_200;\n+01\tMGA\tCDS\t134677\t134922\t.\t+\t0\tSource=MGA;ID=01.cds_gene_200;Parent=01.gene_200;\n+01\tMGA\tShine_Dalgarno_sequence\t134664\t134668\t.\t+\t0\tID=01.rbs_gene_200;Source=MGA;Parent=01.gene_200;\n+01\tMGA\tgene\t134928\t135334\t.\t+\t0\tSource=MGA;ID=01.gene_201;\n+01\tMGA\tCDS\t134942\t135334\t.\t+\t0\tSource=MGA;ID=01.cds_gene_201;Parent=01.gene_201;\n+01\tMGA\tShine_Dalgarno_sequence\t134928\t134932\t.\t+\t0\tID=01.rbs_gene_201;Source=MGA;Parent=01.gene_201;\n+01\tMGA\tgene\t135322\t135533\t.\t+\t0\tSource=MGA;ID=01.gene_202;\n+01\tMGA\tCDS\t135336\t135533\t.\t+\t0\tSource=MGA;ID=01.cds_gene_202;Parent=01.gene_202;\n+01\tMGA\tShine_Dalgarno_sequence\t135322\t135326\t.\t+\t0\tID=01.rbs_gene_202;Source=MGA;Parent=01.gene_202;\n+01\tMGA\tgene\t135586\t135910\t.\t+\t0\tSource=MGA;ID=01.gene_203;\n+01\tMGA\tCDS\t135599\t135910\t.\t+\t0\tSource=MGA;ID=01.cds_gene_203;Parent=01.gene_203;\n+01\tMGA\tShine_Dalgarno_sequence\t135586\t135590\t.\t+\t0\tID=01.rbs_gene_203;Source=MGA;Parent=01.gene_203;\n+01\tMGA\tgene\t135901\t136422\t.\t+\t0\tSource=MGA;ID=01.gene_204;\n+01\tMGA\tCDS\t135913\t136422\t.\t+\t0\tSource=MGA;ID=01.cds_gene_204;Parent=01.gene_204;\n+01\tMGA\tShine_Dalgarno_sequence\t135901\t135905\t.\t+\t0\tID=01.rbs_gene_204;Source=MGA;Parent=01.gene_204;\n+01\tMGA\tgene\t136408\t136753\t.\t+\t0\tSource=MGA;ID=01.gene_205;\n+01\tMGA\tCDS\t136424\t136753\t.\t+\t0\tSource=MGA;ID=01.cds_gene_205;Parent=01.gene_205;\n+01\tMGA\tShine_Dalgarno_sequence\t136408\t136412\t.\t+\t0\tID=01.rbs_gene_205;Source=MGA;Parent=01.gene_205;\n+01\tMGA\tgene\t136746\t136953\t.\t+\t0\tSource=MGA;ID=01.gene_206;\n+01\tMGA\tCDS\t136759\t136953\t.\t+\t0\tSource=MGA;ID=01.cds_gene_206;Parent=01.gene_206;\n+01\tMGA\tShine_Dalgarno_sequence\t136746\t136750\t.\t+\t0\tID=01.rbs_gene_206;Source=MGA;Parent=01.gene_206;\n+01\tMGA\tgene\t136961\t137291\t.\t+\t0\tSource=MGA;ID=01.gene_207;\n+01\tMGA\tCDS\t136977\t137291\t.\t+\t0\tSource=MGA;ID=01.cds_gene_207;Parent=01.gene_207;\n+01\tMGA\tShine_Dalgarno_sequence\t136961\t136965\t.\t+\t0\tID=01.rbs_gene_207;Source=MGA;Parent=01.gene_207;\n+01\tMGA\tgene\t137292\t137473\t.\t+\t0\tSource=MGA;ID=01.gene_208;\n+01\tMGA\tCDS\t137306\t137473\t.\t+\t0\tSource=MGA;ID=01.cds_gene_208;Parent=01.gene_208;\n+01\tMGA\tShine_Dalgarno_sequence\t137292\t137296\t.\t+\t0\tID=01.rbs_gene_208;Source=MGA;Parent=01.gene_208;\n'