Repository 'cpt_gbk_renumber'
hg clone https://toolshed.g2.bx.psu.edu/repos/cpt/cpt_gbk_renumber

Changeset 0:8cac332dbc77 (2022-06-17)
Commit message:
Uploaded
added:
cpt_renumber_gbk/BIO_FIX_TOPO.py
cpt_renumber_gbk/cpt-macros.xml
cpt_renumber_gbk/macros.xml
cpt_renumber_gbk/renumber.py
cpt_renumber_gbk/renumber.xml
cpt_renumber_gbk/test-data/MS105.genbank
cpt_renumber_gbk/test-data/renumbered.gbk
cpt_renumber_gbk/test-data/renumbered.tsv
b
diff -r 000000000000 -r 8cac332dbc77 cpt_renumber_gbk/BIO_FIX_TOPO.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_renumber_gbk/BIO_FIX_TOPO.py Fri Jun 17 13:13:47 2022 +0000
[
@@ -0,0 +1,85 @@
+import Bio.GenBank
+
+
+def record_end(self, content):
+    """Clean up when we've finished the record.
+    """
+    #from Bio import Alphabet
+    #from Bio.Alphabet import IUPAC
+    from Bio.Seq import Seq, UnknownSeq
+
+    # Try and append the version number to the accession for the full id
+    if not self.data.id:
+        assert "accessions" not in self.data.annotations, self.data.annotations[
+            "accessions"
+        ]
+        self.data.id = self.data.name  # Good fall back?
+    elif self.data.id.count(".") == 0:
+        try:
+            self.data.id += ".%i" % self.data.annotations["sequence_version"]
+        except KeyError:
+            pass
+
+    # add the sequence information
+    # first, determine the alphabet
+    # we default to an generic alphabet if we don't have a
+    # seq type or have strange sequence information.
+    
+    #seq_alphabet = Alphabet.generic_alphabet
+
+    # now set the sequence
+    sequence = "".join(self._seq_data)
+
+    if (
+        self._expected_size is not None
+        and len(sequence) != 0
+        and self._expected_size != len(sequence)
+    ):
+        import warnings
+        from Bio import BiopythonParserWarning
+
+        warnings.warn(
+            "Expected sequence length %i, found %i (%s)."
+            % (self._expected_size, len(sequence), self.data.id),
+            BiopythonParserWarning,
+        )
+    """
+    if self._seq_type:
+        # mRNA is really also DNA, since it is actually cDNA
+        if "DNA" in self._seq_type.upper() or "MRNA" in self._seq_type.upper():
+            seq_alphabet = IUPAC.ambiguous_dna
+        # are there ever really RNA sequences in GenBank?
+        elif "RNA" in self._seq_type.upper():
+            # Even for data which was from RNA, the sequence string
+            # is usually given as DNA (T not U).  Bug 2408
+            if "T" in sequence and "U" not in sequence:
+                seq_alphabet = IUPAC.ambiguous_dna
+            else:
+                seq_alphabet = IUPAC.ambiguous_rna
+        elif (
+            "PROTEIN" in self._seq_type.upper() or self._seq_type == "PRT"
+        ):  # PRT is used in EMBL-bank for patents
+            seq_alphabet = IUPAC.protein  # or extended protein?
+        # work around ugly GenBank records which have circular or
+        # linear but no indication of sequence type
+        elif self._seq_type in ["circular", "linear", "unspecified"]:
+            pass
+        # we have a bug if we get here
+        else:
+            raise ValueError(
+                "Could not determine alphabet for seq_type %s" % self._seq_type
+            )
+
+        # Also save the chomosome layout
+        if "circular" in self._seq_type.lower():
+            self.data.annotations["topology"] = "circular"
+        elif "linear" in self._seq_type.lower():
+            self.data.annotations["topology"] = "linear"
+    """
+    if not sequence and self.__expected_size:
+        self.data.seq = UnknownSeq(self._expected_size)#, seq_alphabet)
+    else:
+        self.data.seq = Seq(sequence)#, seq_alphabet)
+
+
+Bio.GenBank._FeatureConsumer.record_end = record_end
b
diff -r 000000000000 -r 8cac332dbc77 cpt_renumber_gbk/cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_renumber_gbk/cpt-macros.xml Fri Jun 17 13:13:47 2022 +0000
[
@@ -0,0 +1,115 @@
+<?xml version="1.0"?>
+<macros>
+ <xml name="gff_requirements">
+ <requirements>
+ <requirement type="package" version="2.7">python</requirement>
+ <requirement type="package" version="1.65">biopython</requirement>
+ <requirement type="package" version="2.12.1">requests</requirement>
+ <yield/>
+ </requirements>
+ <version_command>
+ <![CDATA[
+ cd $__tool_directory__ && git rev-parse HEAD
+ ]]>
+ </version_command>
+ </xml>
+ <xml name="citation/mijalisrasche">
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">@unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+ </xml>
+ <xml name="citations">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation> 
+ <yield/>
+ </citations>
+ </xml>
+     <xml name="citations-crr">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+ <yield/>
+ </citations>
+ </xml>
+        <xml name="citations-2020">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+                        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+                        <yield/>
+ </citations>
+ </xml>
+        <xml name="citations-2020-AJC-solo">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+                        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+                        <yield/>
+ </citations>
+ </xml>
+        <xml name="citations-clm">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+                        <yield/>
+ </citations>
+ </xml>
+        <xml name="sl-citations-clm">
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+                        <yield/>
+ </xml>
+</macros>
b
diff -r 000000000000 -r 8cac332dbc77 cpt_renumber_gbk/macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_renumber_gbk/macros.xml Fri Jun 17 13:13:47 2022 +0000
b
@@ -0,0 +1,105 @@
+<?xml version="1.0"?>
+<macros>
+  <xml name="requirements">
+    <requirements>
+ <requirement type="package" version="3.8.13">python</requirement>
+ <requirement type="package" version="1.79">biopython</requirement>
+ <requirement type="package" version="1.2.2">cpt_gffparser</requirement>  
+ <yield/>
+    </requirements>
+  </xml>
+  <xml name="ldap_ref"
+    token_name="dn_ref"
+    token_label="Pick a DN"
+    token_fromfile="ldap_people.loc">
+        <repeat name="repeat_@NAME@" title="@LABEL@">
+          <param name="@NAME@" label="Select a @LABEL@" type="select">
+            <options from_file="@FROMFILE@">
+                <column name="name" index="0"/>
+                <column name="value" index="1"/>
+            </options>
+          </param>
+        </repeat>
+    </xml>
+  <xml name="ldap_ref_single"
+    token_name="dn_ref"
+    token_label="Pick a DN"
+    token_fromfile="ldap_people.loc">
+          <param name="@NAME@" label="Select a @LABEL@" type="select">
+            <options from_file="@FROMFILE@">
+                <column name="name" index="0"/>
+                <column name="value" index="1"/>
+            </options>
+          </param>
+    </xml>
+ <xml name="gbk_feature_type"
+ token_label="Feature type to remove"
+ token_multiple="True"
+ token_optional="False"
+ token_name="positional_2">
+    <param label="@LABEL@" optional="@TOKEN_OPTIONAL" multiple="@MULTIPLE@" name="feature_type" type="select">
+      <option value="-10_signal">-10_signal</option>
+      <option value="-35_signal">-35_signal</option>
+      <option value="3'UTR">3'UTR</option>
+      <option value="5'UTR">5'UTR</option>
+      <option value="CAAT_signal">CAAT_signal</option>
+      <option selected="true" value="CDS">CDS</option>
+      <option value="C_region">C_region</option>
+      <option value="D-loop">D-loop</option>
+      <option value="D_segment">D_segment</option>
+      <option value="GC_signal">GC_signal</option>
+      <option value="J_segment">J_segment</option>
+      <option value="LTR">LTR</option>
+      <option value="N_region">N_region</option>
+      <option value="RBS">RBS</option>
+      <option value="STS">STS</option>
+      <option value="S_region">S_region</option>
+      <option value="TATA_signal">TATA_signal</option>
+      <option value="V_region">V_region</option>
+      <option value="V_segment">V_segment</option>
+      <option value="all">all</option>
+      <option value="assembly_gap">assembly_gap</option>
+      <option value="attenuator">attenuator</option>
+      <option value="enhancer">enhancer</option>
+      <option value="exon">exon</option>
+      <option value="gap">gap</option>
+      <option value="gene">gene</option>
+      <option value="iDNA">iDNA</option>
+      <option value="intron">intron</option>
+      <option value="mRNA">mRNA</option>
+      <option value="mat_peptide">mat_peptide</option>
+      <option value="misc_RNA">misc_RNA</option>
+      <option value="misc_binding">misc_binding</option>
+      <option value="misc_difference">misc_difference</option>
+      <option value="misc_feature">misc_feature</option>
+      <option value="misc_recomb">misc_recomb</option>
+      <option value="misc_signal">misc_signal</option>
+      <option value="misc_structure">misc_structure</option>
+      <option value="mobile_element">mobile_element</option>
+      <option value="modified_base">modified_base</option>
+      <option value="ncRNA">ncRNA</option>
+      <option value="old_sequence">old_sequence</option>
+      <option value="operon">operon</option>
+      <option value="oriT">oriT</option>
+      <option value="polyA_signal">polyA_signal</option>
+      <option value="polyA_site">polyA_site</option>
+      <option value="precursor_RNA">precursor_RNA</option>
+      <option value="prim_transcript">prim_transcript</option>
+      <option value="primer_bind">primer_bind</option>
+      <option value="promoter">promoter</option>
+      <option value="protein_bind">protein_bind</option>
+      <option value="rRNA">rRNA</option>
+      <option value="rep_origin">rep_origin</option>
+      <option value="repeat_region">repeat_region</option>
+      <option value="sig_peptide">sig_peptide</option>
+      <option value="source">source</option>
+      <option value="stem_loop">stem_loop</option>
+      <option value="tRNA">tRNA</option>
+      <option value="terminator">terminator</option>
+      <option value="tmRNA">tmRNA</option>
+      <option value="transit_peptide">transit_peptide</option>
+      <option value="unsure">unsure</option>
+      <option value="variation">variation</option>
+    </param>
+ </xml>
+</macros>
b
diff -r 000000000000 -r 8cac332dbc77 cpt_renumber_gbk/renumber.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_renumber_gbk/renumber.py Fri Jun 17 13:13:47 2022 +0000
[
b'@@ -0,0 +1,397 @@\n+#!/usr/bin/env python\n+import BIO_FIX_TOPO  # NOQA\n+import argparse\n+import sys  # noqa\n+from Bio import SeqIO\n+\n+import logging\n+\n+logging.basicConfig(level=logging.INFO)\n+log = logging.getLogger()\n+\n+# gene and RBS features are also included in the tagged features list, but are dealt with specifically elsewhere.\n+# This is used to filter out just valid "in gene" features\n+TAGGED_FEATURES = ["CDS", "tRNA", "intron", "mat_peptide"]\n+\n+\n+def renumber_genes(\n+    gbk_files,\n+    tag_to_update="locus_tag",\n+    string_prefix="display_id",\n+    leading_zeros=3,\n+    forceTagMatch=False,\n+    change_table=None,\n+):\n+\n+    for gbk_file in gbk_files:\n+        for record in SeqIO.parse(gbk_file, "genbank"):\n+            if string_prefix == "display_id":\n+                format_string = record.id + "_%0" + str(leading_zeros) + "d"\n+            else:\n+                format_string = string_prefix + "%0" + str(leading_zeros) + "d"\n+\n+            # f_cds = [f for f in record.features if f.type == \'CDS\']\n+            # f_rbs = [f for f in record.features if f.type == \'RBS\']\n+            # f_gene = [f for f in record.features if f.type == \'gene\']\n+            # f_intron = [f for f in record.features if f.type == \'intron\']\n+            # f_trna = [f for f in record.features if f.type == \'tRNA\']\n+            # f_pep = [f for f in record.features if f.type == \'mat_peptide\']\n+            # f_oth = [f for f in record.features if f.type not in [\'CDS\', \'RBS\',\n+            #                                                      \'gene\', \'intron\',\n+            #                                                      \'tRNA\', \'mat_peptide\']]\n+            # Apparently we\'re numbering tRNAs now, thanks for telling me.\n+            # f_oth2 = []\n+            # for q in sorted(f_oth, key=lambda x: x.location.start):\n+            #    if q.type == \'tRNA\':\n+            #        q.qualifiers[\'locus_tag\'] = format_string_t % tRNA_count\n+            #        tRNA_count += 1\n+            #        f_oth2.append(q)\n+            #    else:\n+            #        f_oth2.append(q)\n+            # f_oth = f_oth2\n+\n+            # f_care_about = []\n+\n+            # Make sure we\'ve hit every RBS and gene\n+            # for cds in f_cds:\n+            # If there\'s an associated gene feature, it will share a stop codon\n+            #    if cds.location.strand > 0:\n+            #        associated_genes = [f for f in f_gene if f.location.end ==\n+            #                            cds.location.end]\n+            #    else:\n+            #        associated_genes = [f for f in f_gene if f.location.start ==\n+            #                            cds.location.start]\n+\n+            #    # If there\'s an RBS it\'ll be upstream a bit.\n+            #    if cds.location.strand > 0:\n+            #        associated_rbss = [f for f in f_rbs if f.location.end <\n+            #                           cds.location.start and f.location.end >\n+            #                           cds.location.start - 24]\n+            #    else:\n+            #        associated_rbss = [f for f in f_rbs if f.location.start >\n+            #                           cds.location.end and f.location.start <\n+            #                           cds.location.end + 24]\n+            #    tmp_result = [cds]\n+            #    if len(associated_genes) > 0:\n+            #        tmp_result.append(associated_genes[0])\n+\n+            #   if len(associated_rbss) == 1:\n+            #       tmp_result.append(associated_rbss[0])\n+            #   else:\n+            #       log.warning("%s RBSs found for %s", len(associated_rbss), cds.location)\n+            # We choose to append to f_other as that has all features not\n+            # already accessed. It may mean that some gene/RBS features are\n+            # missed if they aren\'t detected here, which we\'ll need to handle.\n+            #    f_care_about.append(tmp_result)\n+\n+            #####-----------------------------------------------------------'..b'    record.id\n+                        + "\\t"\n+                        + feature.type\n+                        + ":"\n+                        + (feature.qualifiers[tag_to_update][0])\n+                        + "\\t[Removed: Feature not within boundary of a gene]\\n"\n+                      )\n+                  elif tag_to_update in feature.qualifiers.keys():\n+                    change_table.write(\n+                        record.id\n+                        + "\\t"\n+                        + feature.type\n+                        + ":"\n+                        + (feature.qualifiers[tag_to_update][0])\n+                        + "\\t[Removed: Feature not within boundary of a gene]\\n"\n+                    )\n+                  else:\n+                    change_table.write(\n+                        record.id\n+                        + "\\t"\n+                        + feature.type\n+                        + ": (has no "\n+                        + tag_to_update\n+                        + ")\\t[Removed: Feature not within boundary of a gene]\\n"\n+                    )\n+            change_table.write("\\n".join(delta) + "\\n")\n+\n+            # Output\n+            yield record\n+\n+\n+def delta_old(feature, tag_to_update):\n+    # First part of delta entry, old name\n+    if tag_to_update in feature.qualifiers:\n+        return feature.qualifiers[tag_to_update][0]\n+    else:\n+        return "%s %s %s" % (\n+            feature.location.start,\n+            feature.location.end,\n+            feature.location.strand,\n+        )\n+\n+\n+def is_within(query, feature):\n+    # checks if the query item is within the bounds of the given feature\n+    sortedList = sorted(query.location.parts, key=lambda x: x.start)\n+    for x in sortedList:\n+      if (\n+          feature.location.start <= x.start\n+          and feature.location.end >= x.end\n+      ):\n+        if x.strand < 0 and x == sortedList[-1]:\n+          return True\n+        elif x.strand >= 0 and x == sortedList[0]:\n+          return True\n+    #else:\n+    return False\n+\n+\n+# def fix_frameshift(a, b):\n+#    #checks if gene a and gene b are a frameshifted gene (either shares a start or an end and an RBS)\n+#    if a[0].location.start == b[0].location.start or a[0].location.end == b[0].location.end:\n+#        # It is likely a frameshift. Treat is as such. Find shared RBS, determine which CDS is which\n+#        big_gene = a if (a[0].location.end - a[0].location.start) > (b[0].location.end - b[0].location.start) else b\n+#        small_gene = a if big_gene==b else b\n+#        rbs = [f for f in a if f.type == \'RBS\']\n+#        # In the way that the tag lists are generated, the larger gene should contain both CDS features.\n+#        # Retrieve and dermine big/small CDS\n+#        cdss = [f for f in big_gene if f.type == \'CDS\']\n+#        big_cds = cdss[0] if (cdss[0].location.end - cdss[0].location.start) > (cdss[1].location.end - cdss[1].location.start) else cdss[1]\n+#        small_cds = cdss[0] if big_cds==cdss[1] else cdss[1]\n+\n+\n+if __name__ == "__main__":\n+    parser = argparse.ArgumentParser(description="Renumber genbank files")\n+    parser.add_argument(\n+        "gbk_files", type=argparse.FileType("r"), nargs="+", help="Genbank files"\n+    )\n+    parser.add_argument(\n+        "--tag_to_update", type=str, help="Tag to update", default="locus_tag"\n+    )\n+    parser.add_argument(\n+        "--string_prefix", type=str, help="Prefix string", default="display_id"\n+    )\n+    parser.add_argument(\n+        "--leading_zeros", type=int, help="# of leading zeroes", default=3\n+    )\n+\n+    parser.add_argument(\n+        "--forceTagMatch", action="store_true", help="Make non-CDS features match tag initially"\n+    )\n+\n+    parser.add_argument(\n+        "--change_table",\n+        type=argparse.FileType("w"),\n+        help="Location to store change table in",\n+        default="renumber.tsv",\n+    )\n+\n+    args = parser.parse_args()\n+    for record in renumber_genes(**vars(args)):\n+        SeqIO.write(record, sys.stdout, "genbank")\n'
b
diff -r 000000000000 -r 8cac332dbc77 cpt_renumber_gbk/renumber.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_renumber_gbk/renumber.xml Fri Jun 17 13:13:47 2022 +0000
[
@@ -0,0 +1,58 @@
+<?xml version="1.0"?>
+<tool id="edu.tamu.cpt.genbank.RelabelTags" name="Renumber GenBank Genes" version="0.4" profile="16.04">
+    <description>relabels/renumbers GenBank tags according to rules</description>
+  <macros>
+    <import>macros.xml</import>
+ <import>cpt-macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <command detect_errors="aggressive"><![CDATA[
+python $__tool_directory__/renumber.py
+  $file
+--tag_to_update "${tag_to_update}"
+--string_prefix "${string_prefix}"
+--leading_zeros "${leading_zeros}"
+$forceMatch
+--change_table $change_table
+
+> $output
+
+]]></command>
+  <inputs>
+    <param label="GenBank file" name="file" type="data" format="genbank" />
+    <param help="Which tag is used to store gene numbers" label="Tag to update"
+        name="tag_to_update" type="text" value="locus_tag"/>
+    <param help="A string to use as a prefix for the numbering. Will be used as XXXXXXNNN where XXXXXX is the string and NNN is a numerical identifier. Using &quot;display_id&quot; has special meaning, it will use the genome's name/accession number"
+        label="String prefix" name="string_prefix" type="text" value="display_id"/>
+    <param label="Number of leading zeros/padding" name="leading_zeros"
+        type="integer" value="3"/>
+    <param name="forceMatch" label="Force Updated Tags to initially match in addition to location checks. " help="If tag is not present, only location and type checks will be used to infer renumber" type="boolean" truevalue="--forceTagMatch" falsevalue="" checked="True" />
+  </inputs>
+  <outputs>
+    <data format="genbank" name="output">
+    </data>
+    <data format="tabular" name="change_table">
+    </data>
+  </outputs>
+  <tests>
+      <test>
+          <param name="file" value="MS105.genbank" />
+          <param name="leading_zeros" value="10" />
+          <param name="forceMatch" value="" />
+          <param name="string_prefix" value="MS105_" />
+          <output name="genbank" value="renumbered.gbk" />
+          <output name="change_table" value="renumbered.tsv" />
+      </test>
+  </tests>
+  <help>
+Gene Renumbering Tool
+=====================
+
+Renumber genes in a genome.
+
+Subfeatures, such as CDS or intron, will attempt to be grouped with their associated gene, based on location. CDSs and RBSs must share either a start or an end boundary with their parent gene, and also fall entirely within the boundary of said gene. All other features only need to fall within the boundary of a gene. 
+
+If the tag check is enabled, then whatever qualifier is selected for updating (such as locus_tag), the subfeatures must also have the same value as any canidate parent gene had for that qualifier. This is useful for subfeatures such as introns, which may be inside more than one gene and could potentially get renumbered to the wrong parent in a location-only check. However, if your dataset does not already have a consistent naming convention, other valid heirarchies could get dropped. The log file will list what features got dropped based on tag checks, so be sure to use that to verify all data made it through.
+</help>
+ <expand macro="citations-2020" />
+</tool>
b
diff -r 000000000000 -r 8cac332dbc77 cpt_renumber_gbk/test-data/MS105.genbank
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_renumber_gbk/test-data/MS105.genbank Fri Jun 17 13:13:47 2022 +0000
b
b'@@ -0,0 +1,1069 @@\n+LOCUS       NODE_2_length_39       39186 bp    DNA              UNK 01-JAN-1980\n+DEFINITION  NODE_2_length_39186_cov_113.152335\n+ACCESSION   NODE_2_length_39186_cov_113\n+VERSION     NODE_2_length_39186_cov_113.152335\n+KEYWORDS    .\n+SOURCE      .\n+  ORGANISM  .\n+            .\n+FEATURES             Location/Qualifiers\n+     gene            57..345\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_001"\n+     RBS             57..60\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_001"\n+     CDS             70..345\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_001"\n+                     /product="hypothetical protein"\n+     gene            complement(408..723)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_002"\n+     CDS             complement(408..713)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_002"\n+                     /product="hypothetical protein"\n+     RBS             complement(719..723)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_002"\n+     gene            complement(888..1173)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_003"\n+     CDS             complement(888..1160)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_003"\n+                     /product="hypothetical protein"\n+     RBS             complement(1170..1173)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_003"\n+     gene            complement(1180..2369)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_004"\n+     CDS             complement(1180..2358)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_004"\n+                     /product="hypothetical protein"\n+     RBS             complement(2367..2369)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_004"\n+     gene            complement(2399..2975)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_005"\n+     CDS             complement(2399..2965)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_005"\n+                     /product="hypothetical protein"\n+     RBS             complement(2973..2975)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_005"\n+     gene            complement(2984..3146)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_006"\n+     CDS             complement(2984..3136)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_006"\n+                     /product="hypothetical protein"\n+     gene            complement(3137..3265)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_007"\n+     CDS             complement(3137..3250)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_007"\n+                     /product="hypothetical protein"\n+     RBS             complement(3144..3146)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_006"\n+     RBS             complement(3263..3265)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_007"\n+     gene            complement(3269..3507)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_008"\n+     CDS             complement(3269..3490)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_008"\n+                     /product="hypothetical protein"\n+     RBS             complement(3505..3507)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_008"\n+     gene            complement(3638..4310)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_009"\n+     CDS             complement(3638..4300)\n+                     /locus_tag="CPT_NODE_2_length_39186_cov_113.152335_009"\n+                     /product="endonuclease"\n+     gene            complement(4297.'..b'gcgtg atcgcgcgcg cgcgtcgccc tacgcttgcc gcatgctcga\n+    36121 aacgacaaat ccaattcaac gcgaagcgaa cgtgcgacag atcgcgcgct cgctctactg\n+    36181 gcaaggctgg cgcatctcgt cgatcgcacg gcatctcgaa ctgaagcccg cgaccgtggc\n+    36241 gtcgtggtgc cgtcgcgaca agtggaaaga cgcaacgccg atcgagcgca tcgaggcggc\n+    36301 ggccgaaacg cgcctgatgg tcctgattgc gaaggacaag aaggacggcg cggactacaa\n+    36361 ggaaatcgac ctgctcggcc ggcagattga gcggctcgcg cgcgtgcaga aatacgggga\n+    36421 gacggggaag gaaggcgacc tgaaccccaa catcgccgcg cgcaatgccg ggccgaagcg\n+    36481 caagccgccg cgcaacgaaa tcagcgagga acagcacgag cggatcgtgg cggcgttccg\n+    36541 cgaatcgctg ttcgactacc agaaggtctg gtatcgcaat ggcgatcaac gcacgcgcaa\n+    36601 catcctgaag tcacggcaga tcggcgcgac ttggtatttc tcgcgcgaag cgttcgtcga\n+    36661 cgcgctcgaa accggccgca atcagatttt tctgtcggcc agcaaggcgc aggcgcacgt\n+    36721 cttcaaacag tacatcgcgc agttcgcgcg tgacgcggcc gacgtggaac tgaccggcga\n+    36781 tccgatcatc ctgccgaacg gcgcgattct gtatttcctg gggacgaacg cgcgcacggc\n+    36841 gcagtcgtat cacggcaatt tctatttcga cgaatacttc tgggttccga agtttcgcga\n+    36901 gctgaacaag gtcgcgtcgg gcatggcgat gcacaagcgc tggcgcaaga cctacttcag\n+    36961 cacgccgtcg agcatcacgc atgaggcgta tgcgttctgg agcggcgcgc acgcgaaccg\n+    37021 cggccgcgca gccgccgatc gtatccagat cgacacgagc cacgaagcgc tcgtgcgcgg\n+    37081 catgctgggc gaggacgcac agtggcgcca gatcgtgacg attctggatg cgatggcggg\n+    37141 cggctgcgac ctgttcgaca tcgacgagct gcgccgcgaa tacagcgccg aggaattcgc\n+    37201 caatctgctg atgtgcgcgt tcatcgatga ttcgctgtcg gtgttcaagc tggccgagct\n+    37261 gcagcgctgc atggtcgact cgtgggagga atgggccgac gacttctcgc cgctgctgct\n+    37321 gcgcccgttc ggctatcgcg aggtatgggt tggctacgat ccggcgctga ctggcgactc\n+    37381 ggccggcctg gtcgtcgtgg cgccgccgcg ggtcgagggt ggggcgtttc gcgtgctcga\n+    37441 acgtcatcag ttccgcggta acgacttcga ggaacaggcc gcggcgatcg agcagatcac\n+    37501 gcagcgctac aacgtcggct acatcgcgat cgacacgaca ggcatggggc agggggtcta\n+    37561 tcagctcgtg cgcaagttct acccggccgt cgtcgcgttg aactactcgc ccgaggtgaa\n+    37621 aactcgcctc gtgctgaaag ggcaatccgt tatccgcaat ggccgcctgc aattcgacgc\n+    37681 gggctggacc gacctggccg ctgccttcat ggcgatcaaa cagaccatga cgccgagcgg\n+    37741 tcgacagacg acgttcacgg ctgaccggaa cgacgagacc ggtcacgcgg atctagcgtg\n+    37801 ggcctgcctg cacgcgatcg accgcgaacc gctcgccggc ggcgacatca attcttcatc\n+    37861 tttcacggag ttctattcat gagcaagcgc cgatcgcgcg cgccgcgcac gttcgcggcc\n+    37921 gcgccggatt cgggcgccgc cggcgccgcg ccggcgcgcg ccgaggtctt caccttcgac\n+    37981 gatcccacgc cggtcatgaa ccgggcggag attctcgatt acgtcgaatg ctggtcgaac\n+    38041 ggcgattggt tcgagccgcc tgtcagcttc gccggcctgg cgaaatcgtt tcgcgcgagc\n+    38101 acgcaccaca gctcggcgct gtacttcaag gcgaacgtgc tggcgtcgac attccggccg\n+    38161 cacaagtggc tgtcgcggca cgcgttcgaa cggtgggcgc tcgattttct gacgttcggc\n+    38221 aacggctacc tggaacgccg ccgcaatcag ctcggcgaca cgctgcgact cgaaccagcc\n+    38281 ctggcgaaat acacgcggcg caaggcagat ttcagcggct tcgtgtacgt gaacggctgg\n+    38341 caggacaagc acgagttcga gccgggcagc gtgttccagc tcatgcgacc ggacatcaac\n+    38401 caggaggtgt acggcctgcc cgaatatctc agctcgcttc actcggcgtg gctgaacgaa\n+    38461 tcgtcgacgt tgttccggcg gaagtactac gaaaacggga gccacgccgg cttcatcctc\n+    38521 tacatgaccg acgcggcgca gaaacaggag gatgtcgaca acatgcgcac ggcgttgaag\n+    38581 aacgcaaagg ggccgggcaa tttccgcaac gtgttcatgt acgcgccggg cgggaagaag\n+    38641 gacggcatcc agctcattcc cgtgtcggag gtcgcggcga aggacgagtt cttcaacatc\n+    38701 aagaacgtga cgcgcgacga cctgctcgcc gcgcatcgcg tgccgccgca actgctcggc\n+    38761 atcgtgccga gcaactcggg gggcttcggc acgccggaca ccgcggcgcg cgtgttcggc\n+    38821 cggaacgaaa tcaagccact gcaggcgcgc ttcgccgagc tgaatgactg gctcggcgaa\n+    38881 gaggtcgtgt cgttcgacga ttacgagatt ccgccggttc cggcggccga gtagcgcacg\n+    38941 cggcgattcg aagtcatgcg gcagggccgc gcaccgggca accgggcgcg gccctttttg\n+    39001 cgtttggggc cggcgcgatt agaggggcta cagcggcttg gccgtccgca gggtgcgcaa\n+    39061 gggtcggacg ccgcaaggcg ggagccgcag cgagcctgtc gtccggccgt gcagggtgtc\n+    39121 ccgcggggtg gggagggggc agggaggccc gcgccgcgcc cgccgctgcg cggtcccctc\n+    39181 cccgcc\n+//\n'
b
diff -r 000000000000 -r 8cac332dbc77 cpt_renumber_gbk/test-data/renumbered.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_renumber_gbk/test-data/renumbered.gbk Fri Jun 17 13:13:47 2022 +0000
b
b'@@ -0,0 +1,1063 @@\n+LOCUS       NODE_2_length_39       39186 bp    DNA              UNK 01-JAN-1980\n+DEFINITION  NODE_2_length_39186_cov_113.152335.\n+ACCESSION   NODE_2_length_39186_cov_113\n+VERSION     NODE_2_length_39186_cov_113.152335\n+KEYWORDS    .\n+SOURCE      \n+  ORGANISM  .\n+            .\n+FEATURES             Location/Qualifiers\n+     gene            57..345\n+                     /locus_tag="MS105_0000000001"\n+     RBS             57..60\n+                     /locus_tag="MS105_0000000001"\n+     CDS             70..345\n+                     /locus_tag="MS105_0000000001"\n+                     /product="hypothetical protein"\n+     gene            complement(408..723)\n+                     /locus_tag="MS105_0000000002"\n+     CDS             complement(408..713)\n+                     /locus_tag="MS105_0000000002"\n+                     /product="hypothetical protein"\n+     RBS             complement(719..723)\n+                     /locus_tag="MS105_0000000002"\n+     gene            complement(888..1173)\n+                     /locus_tag="MS105_0000000003"\n+     CDS             complement(888..1160)\n+                     /locus_tag="MS105_0000000003"\n+                     /product="hypothetical protein"\n+     RBS             complement(1170..1173)\n+                     /locus_tag="MS105_0000000003"\n+     gene            complement(1180..2369)\n+                     /locus_tag="MS105_0000000004"\n+     CDS             complement(1180..2358)\n+                     /locus_tag="MS105_0000000004"\n+                     /product="hypothetical protein"\n+     RBS             complement(2367..2369)\n+                     /locus_tag="MS105_0000000004"\n+     gene            complement(2399..2975)\n+                     /locus_tag="MS105_0000000005"\n+     CDS             complement(2399..2965)\n+                     /locus_tag="MS105_0000000005"\n+                     /product="hypothetical protein"\n+     RBS             complement(2973..2975)\n+                     /locus_tag="MS105_0000000005"\n+     gene            complement(2984..3146)\n+                     /locus_tag="MS105_0000000006"\n+     CDS             complement(2984..3136)\n+                     /locus_tag="MS105_0000000006"\n+                     /product="hypothetical protein"\n+     gene            complement(3137..3265)\n+                     /locus_tag="MS105_0000000007"\n+     CDS             complement(3137..3250)\n+                     /locus_tag="MS105_0000000007"\n+                     /product="hypothetical protein"\n+     RBS             complement(3144..3146)\n+                     /locus_tag="MS105_0000000006"\n+     RBS             complement(3263..3265)\n+                     /locus_tag="MS105_0000000007"\n+     gene            complement(3269..3507)\n+                     /locus_tag="MS105_0000000008"\n+     CDS             complement(3269..3490)\n+                     /locus_tag="MS105_0000000008"\n+                     /product="hypothetical protein"\n+     RBS             complement(3505..3507)\n+                     /locus_tag="MS105_0000000008"\n+     gene            complement(3638..4310)\n+                     /locus_tag="MS105_0000000009"\n+     CDS             complement(3638..4300)\n+                     /locus_tag="MS105_0000000009"\n+                     /product="endonuclease"\n+     gene            complement(4297..5531)\n+                     /locus_tag="MS105_0000000010"\n+     CDS             complement(4297..5520)\n+                     /locus_tag="MS105_0000000010"\n+                     /product="methylase"\n+     RBS             complement(4307..4310)\n+                     /locus_tag="MS105_0000000009"\n+     gene            complement(5507..5956)\n+                     /locus_tag="MS105_0000000011"\n+     CDS             complement(5507..5938)\n+                     /locus_tag="MS105_0000000011"\n+                     /product="Vsr endonuclease"\n+     RBS             complement(5528..5531)\n+                     /locus_tag="MS105_0000000010"\n+     gene            complement(6206..'..b'gcgtg atcgcgcgcg cgcgtcgccc tacgcttgcc gcatgctcga\n+    36121 aacgacaaat ccaattcaac gcgaagcgaa cgtgcgacag atcgcgcgct cgctctactg\n+    36181 gcaaggctgg cgcatctcgt cgatcgcacg gcatctcgaa ctgaagcccg cgaccgtggc\n+    36241 gtcgtggtgc cgtcgcgaca agtggaaaga cgcaacgccg atcgagcgca tcgaggcggc\n+    36301 ggccgaaacg cgcctgatgg tcctgattgc gaaggacaag aaggacggcg cggactacaa\n+    36361 ggaaatcgac ctgctcggcc ggcagattga gcggctcgcg cgcgtgcaga aatacgggga\n+    36421 gacggggaag gaaggcgacc tgaaccccaa catcgccgcg cgcaatgccg ggccgaagcg\n+    36481 caagccgccg cgcaacgaaa tcagcgagga acagcacgag cggatcgtgg cggcgttccg\n+    36541 cgaatcgctg ttcgactacc agaaggtctg gtatcgcaat ggcgatcaac gcacgcgcaa\n+    36601 catcctgaag tcacggcaga tcggcgcgac ttggtatttc tcgcgcgaag cgttcgtcga\n+    36661 cgcgctcgaa accggccgca atcagatttt tctgtcggcc agcaaggcgc aggcgcacgt\n+    36721 cttcaaacag tacatcgcgc agttcgcgcg tgacgcggcc gacgtggaac tgaccggcga\n+    36781 tccgatcatc ctgccgaacg gcgcgattct gtatttcctg gggacgaacg cgcgcacggc\n+    36841 gcagtcgtat cacggcaatt tctatttcga cgaatacttc tgggttccga agtttcgcga\n+    36901 gctgaacaag gtcgcgtcgg gcatggcgat gcacaagcgc tggcgcaaga cctacttcag\n+    36961 cacgccgtcg agcatcacgc atgaggcgta tgcgttctgg agcggcgcgc acgcgaaccg\n+    37021 cggccgcgca gccgccgatc gtatccagat cgacacgagc cacgaagcgc tcgtgcgcgg\n+    37081 catgctgggc gaggacgcac agtggcgcca gatcgtgacg attctggatg cgatggcggg\n+    37141 cggctgcgac ctgttcgaca tcgacgagct gcgccgcgaa tacagcgccg aggaattcgc\n+    37201 caatctgctg atgtgcgcgt tcatcgatga ttcgctgtcg gtgttcaagc tggccgagct\n+    37261 gcagcgctgc atggtcgact cgtgggagga atgggccgac gacttctcgc cgctgctgct\n+    37321 gcgcccgttc ggctatcgcg aggtatgggt tggctacgat ccggcgctga ctggcgactc\n+    37381 ggccggcctg gtcgtcgtgg cgccgccgcg ggtcgagggt ggggcgtttc gcgtgctcga\n+    37441 acgtcatcag ttccgcggta acgacttcga ggaacaggcc gcggcgatcg agcagatcac\n+    37501 gcagcgctac aacgtcggct acatcgcgat cgacacgaca ggcatggggc agggggtcta\n+    37561 tcagctcgtg cgcaagttct acccggccgt cgtcgcgttg aactactcgc ccgaggtgaa\n+    37621 aactcgcctc gtgctgaaag ggcaatccgt tatccgcaat ggccgcctgc aattcgacgc\n+    37681 gggctggacc gacctggccg ctgccttcat ggcgatcaaa cagaccatga cgccgagcgg\n+    37741 tcgacagacg acgttcacgg ctgaccggaa cgacgagacc ggtcacgcgg atctagcgtg\n+    37801 ggcctgcctg cacgcgatcg accgcgaacc gctcgccggc ggcgacatca attcttcatc\n+    37861 tttcacggag ttctattcat gagcaagcgc cgatcgcgcg cgccgcgcac gttcgcggcc\n+    37921 gcgccggatt cgggcgccgc cggcgccgcg ccggcgcgcg ccgaggtctt caccttcgac\n+    37981 gatcccacgc cggtcatgaa ccgggcggag attctcgatt acgtcgaatg ctggtcgaac\n+    38041 ggcgattggt tcgagccgcc tgtcagcttc gccggcctgg cgaaatcgtt tcgcgcgagc\n+    38101 acgcaccaca gctcggcgct gtacttcaag gcgaacgtgc tggcgtcgac attccggccg\n+    38161 cacaagtggc tgtcgcggca cgcgttcgaa cggtgggcgc tcgattttct gacgttcggc\n+    38221 aacggctacc tggaacgccg ccgcaatcag ctcggcgaca cgctgcgact cgaaccagcc\n+    38281 ctggcgaaat acacgcggcg caaggcagat ttcagcggct tcgtgtacgt gaacggctgg\n+    38341 caggacaagc acgagttcga gccgggcagc gtgttccagc tcatgcgacc ggacatcaac\n+    38401 caggaggtgt acggcctgcc cgaatatctc agctcgcttc actcggcgtg gctgaacgaa\n+    38461 tcgtcgacgt tgttccggcg gaagtactac gaaaacggga gccacgccgg cttcatcctc\n+    38521 tacatgaccg acgcggcgca gaaacaggag gatgtcgaca acatgcgcac ggcgttgaag\n+    38581 aacgcaaagg ggccgggcaa tttccgcaac gtgttcatgt acgcgccggg cgggaagaag\n+    38641 gacggcatcc agctcattcc cgtgtcggag gtcgcggcga aggacgagtt cttcaacatc\n+    38701 aagaacgtga cgcgcgacga cctgctcgcc gcgcatcgcg tgccgccgca actgctcggc\n+    38761 atcgtgccga gcaactcggg gggcttcggc acgccggaca ccgcggcgcg cgtgttcggc\n+    38821 cggaacgaaa tcaagccact gcaggcgcgc ttcgccgagc tgaatgactg gctcggcgaa\n+    38881 gaggtcgtgt cgttcgacga ttacgagatt ccgccggttc cggcggccga gtagcgcacg\n+    38941 cggcgattcg aagtcatgcg gcagggccgc gcaccgggca accgggcgcg gccctttttg\n+    39001 cgtttggggc cggcgcgatt agaggggcta cagcggcttg gccgtccgca gggtgcgcaa\n+    39061 gggtcggacg ccgcaaggcg ggagccgcag cgagcctgtc gtccggccgt gcagggtgtc\n+    39121 ccgcggggtg gggagggggc agggaggccc gcgccgcgcc cgccgctgcg cggtcccctc\n+    39181 cccgcc\n+//\n'
b
diff -r 000000000000 -r 8cac332dbc77 cpt_renumber_gbk/test-data/renumbered.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_renumber_gbk/test-data/renumbered.tsv Fri Jun 17 13:13:47 2022 +0000
[
b'@@ -0,0 +1,166 @@\n+NODE_2_length_39186_cov_113.152335\tRBS:CPT_NODE_2_length_39186_cov_113.152335_011\t[Removed: RBS did not both fall within boundary of gene and share a boundary with a gene]\n+NODE_2_length_39186_cov_113.152335\tRBS:CPT_NODE_2_length_39186_cov_113.152335_027\t[Removed: RBS did not both fall within boundary of gene and share a boundary with a gene]\n+NODE_2_length_39186_cov_113.152335\tRBS:CPT_NODE_2_length_39186_cov_113.152335_027\t[Removed: RBS did not both fall within boundary of gene and share a boundary with a gene]\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_001\tMS105_0000000001\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_001\tMS105_0000000001\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_001\tMS105_0000000001\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_002\tMS105_0000000002\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_002\tMS105_0000000002\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_002\tMS105_0000000002\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_003\tMS105_0000000003\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_003\tMS105_0000000003\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_003\tMS105_0000000003\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_004\tMS105_0000000004\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_004\tMS105_0000000004\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_004\tMS105_0000000004\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_005\tMS105_0000000005\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_005\tMS105_0000000005\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_005\tMS105_0000000005\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_006\tMS105_0000000006\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_006\tMS105_0000000006\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_006\tMS105_0000000006\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_007\tMS105_0000000007\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_007\tMS105_0000000007\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_007\tMS105_0000000007\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_008\tMS105_0000000008\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_008\tMS105_0000000008\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_008\tMS105_0000000008\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_009\tMS105_0000000009\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_009\tMS105_0000000009\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_009\tMS105_0000000009\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_010\tMS105_0000000010\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_010\tMS105_0000000010\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_010\tMS105_0000000010\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_011\tMS105_0000000011\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_011\tMS105_0000000011\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_012\tMS105_0000000012\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_012\tMS105_0000000012\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_012\tMS105_0000000012\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_013\tMS105_0000000013\n+NODE_'..b'35\tCPT_NODE_2_length_39186_cov_113.152335_041\tMS105_0000000041\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_042\tMS105_0000000042\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_042\tMS105_0000000042\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_042\tMS105_0000000042\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_043\tMS105_0000000043\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_043\tMS105_0000000043\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_043\tMS105_0000000043\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_044\tMS105_0000000044\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_044\tMS105_0000000044\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_044\tMS105_0000000044\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_045\tMS105_0000000045\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_045\tMS105_0000000045\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_045\tMS105_0000000045\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_046\tMS105_0000000046\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_046\tMS105_0000000046\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_046\tMS105_0000000046\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_047\tMS105_0000000047\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_047\tMS105_0000000047\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_047\tMS105_0000000047\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_048\tMS105_0000000048\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_048\tMS105_0000000048\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_048\tMS105_0000000048\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_049\tMS105_0000000049\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_049\tMS105_0000000049\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_049\tMS105_0000000049\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_050\tMS105_0000000050\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_050\tMS105_0000000050\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_051\tMS105_0000000051\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_051\tMS105_0000000051\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_051\tMS105_0000000051\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_052\tMS105_0000000052\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_052\tMS105_0000000052\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_052\tMS105_0000000052\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_053\tMS105_0000000053\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_053\tMS105_0000000053\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_053\tMS105_0000000053\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_054\tMS105_0000000054\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_054\tMS105_0000000054\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_054\tMS105_0000000054\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_055\tMS105_0000000055\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_055\tMS105_0000000055\n+NODE_2_length_39186_cov_113.152335\tCPT_NODE_2_length_39186_cov_113.152335_055\tMS105_0000000055\n'