Repository 'cpt_genome_editor'
hg clone https://toolshed.g2.bx.psu.edu/repos/cpt/cpt_genome_editor

Changeset 3:134bb2d7cdfd (2023-06-05)
Previous changeset 2:787ce84e8d16 (2022-06-17) Next changeset 4:78ce8a1a8fd1 (2023-07-23)
Commit message:
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
added:
cpt-macros.xml
genome_editor.py
genome_editor.xml
gff3.py
macros.xml
test-data/genome_editor.mirov2.chain
test-data/genome_editor.mirov2.fa
test-data/genome_editor.mirov2.gff3
test-data/genome_editor.simple.fa
test-data/genome_editor.simple.gff3
test-data/genome_editor.simple.out.chain
test-data/genome_editor.simple.out.fa
test-data/genome_editor.simple.out.gff3
test-data/miro.2.gff3
test-data/miro.fa
tsv.py
removed:
cpt_genome_editor/cpt-macros.xml
cpt_genome_editor/genome_editor.py
cpt_genome_editor/genome_editor.xml
cpt_genome_editor/gff3.py
cpt_genome_editor/macros.xml
cpt_genome_editor/test-data/genome_editor.mirov2.chain
cpt_genome_editor/test-data/genome_editor.mirov2.fa
cpt_genome_editor/test-data/genome_editor.mirov2.gff3
cpt_genome_editor/test-data/genome_editor.simple.fa
cpt_genome_editor/test-data/genome_editor.simple.gff3
cpt_genome_editor/test-data/genome_editor.simple.out.chain
cpt_genome_editor/test-data/genome_editor.simple.out.fa
cpt_genome_editor/test-data/genome_editor.simple.out.gff3
cpt_genome_editor/test-data/miro.2.gff3
cpt_genome_editor/test-data/miro.fa
cpt_genome_editor/tsv.py
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml Mon Jun 05 02:43:21 2023 +0000
[
@@ -0,0 +1,115 @@
+<macros>
+    <xml name="gff_requirements">
+        <requirements>
+            <requirement type="package" version="2.7">python</requirement>
+            <requirement type="package" version="1.65">biopython</requirement>
+            <requirement type="package" version="2.12.1">requests</requirement>
+ <requirement type="package" version="1.2.2">cpt_gffparser</requirement>
+            <yield/>
+        </requirements>
+        <version_command>
+ <![CDATA[
+ cd '$__tool_directory__' && git rev-parse HEAD
+ ]]>
+ </version_command>
+    </xml>
+    <xml name="citation/mijalisrasche">
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">@unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-crr">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020-AJC-solo">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-clm">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="sl-citations-clm">
+        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+        <yield/>
+    </xml>
+</macros>
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/cpt-macros.xml
--- a/cpt_genome_editor/cpt-macros.xml Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,115 +0,0 @@
-<?xml version="1.0"?>
-<macros>
- <xml name="gff_requirements">
- <requirements>
- <requirement type="package" version="2.7">python</requirement>
- <requirement type="package" version="1.65">biopython</requirement>
- <requirement type="package" version="2.12.1">requests</requirement>
- <yield/>
- </requirements>
- <version_command>
- <![CDATA[
- cd $__tool_directory__ && git rev-parse HEAD
- ]]>
- </version_command>
- </xml>
- <xml name="citation/mijalisrasche">
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">@unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- </xml>
- <xml name="citations">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation> 
- <yield/>
- </citations>
- </xml>
-     <xml name="citations-crr">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Ross},
- title = {CPT Galaxy Tools},
- year = {2020-},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- <yield/>
- </citations>
- </xml>
-        <xml name="citations-2020">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="citations-2020-AJC-solo">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="citations-clm">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="sl-citations-clm">
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <yield/>
- </xml>
-</macros>
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/genome_editor.py
--- a/cpt_genome_editor/genome_editor.py Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,166 +0,0 @@
-#!/usr/bin/env python
-import logging
-import copy
-import argparse
-import tsv
-from Bio import SeqIO
-from Bio.Seq import Seq
-from Bio.SeqFeature import FeatureLocation
-from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature, convertSeqRec
-from gff3 import feature_lambda, feature_test_contains
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger(__name__)
-
-
-def mutate(gff3, fasta, changes, customSeqs, new_id):
-    # Change Language
-    # - we can only accept ONE genome as an input. (TODO: support multiple?)
-    # - we can only build ONE genome as an output. (TODO: support multiple?)
-    # - must allow selection of various regions
-    # '1,1000,+   40,100,-    custom_seq_1'
-    try:
-        custom_seqs = SeqIO.to_dict(SeqIO.parse(customSeqs, "fasta"))
-    except:
-        custom_seqs = {}
-    seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
-    # Pull first and onl record
-    rec = list(gffParse(gff3, base_dict=seq_dict))[0]
-    # Create a "clean" record
-    new_record = copy.deepcopy(rec)
-    new_record.id = new_id
-    new_record.seq = Seq("")
-    new_record.features = []
-    new_record.annotations = {}
-    # Process changes.
-    chain = []
-    topFeats = {}
-    covered = 0
-    for feat in rec.features:
-        if "ID" in feat.qualifiers.keys():
-          topFeats[feat.qualifiers["ID"][0]] = feat.location.start
-    for change in changes:
-        if "," in change:
-            (start, end, strand) = change.split(",")
-            start = int(start) - 1
-            end = int(end)
-
-            # Make any complaints
-            broken_feature_start = list(
-                feature_lambda(
-                    rec.features,
-                    feature_test_contains,
-                    {"index": start},
-                    subfeatures=False,
-                )
-            )
-            if len(broken_feature_start) > 0:
-                pass
-                # log.info("DANGER: Start index chosen (%s) is in the middle of a feature (%s %s). This feature will disappear from the output", start, broken_feature_start[0].id, broken_feature_start[0].location)
-            broken_feature_end = list(
-                feature_lambda(
-                    rec.features,
-                    feature_test_contains,
-                    {"index": end},
-                    subfeatures=False,
-                )
-            )
-            if len(broken_feature_end) > 0:
-                pass
-                # log.info("DANGER: End index chosen (%s) is in the middle of a feature (%s %s). This feature will disappear from the output", end, broken_feature_end[0].id, broken_feature_end[0].location)
-
-            # Ok, fetch features
-            if strand == "+":
-                tmp_req = rec[start:end]
-            else:
-                tmp_req = rec[start:end].reverse_complement(
-                    id=True,
-                    name=True,
-                    description=True,
-                    features=True,
-                    annotations=True,
-                    letter_annotations=True,
-                    dbxrefs=True,
-                )
-            tmp_req = convertSeqRec(tmp_req)[0]
-            def update_location(feature, shiftS):
-                feature.location = FeatureLocation(feature.location.start + shiftS, feature.location.end + shiftS, feature.strand)
-                for i in feature.sub_features:
-                  i = update_location(i, shiftS)
-                return feature
-                
-            
-
-            #for feature in tmp_req.features:
-            
-                  
-                
-
-            chain.append(
-                [
-                    rec.id,
-                    start + 1,
-                    end,
-                    strand,
-                    new_record.id,
-                    len(new_record) + 1,
-                    len(new_record) + (end - start),
-                    "+",
-                ]
-            )
-
-            covered += len(new_record.seq)
-            print(covered)
-            new_record.seq += tmp_req.seq
-            # NB: THIS MUST USE BIOPYTHON 1.67. 1.68 Removes access to
-            # subfeatures, which means you will only get top-level features.
-            startInd = len(new_record.features)
-            new_record.features += tmp_req.features
-            
-            for i in new_record.features[startInd:]:
-                i.location = FeatureLocation(i.location.start + covered, i.location.end + covered, i.location.strand)
-                if "ID" not in i.qualifiers.keys():
-                  continue
-                diffS = i.location.start - topFeats[i.qualifiers["ID"][0]]
-                subFeats = i.sub_features
-                for j in subFeats:
-                  j = update_location(j, diffS)
-        else:
-            new_record.seq += custom_seqs[change].seq
-    yield new_record, chain
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("fasta", type=argparse.FileType("r"), help="Sequence")
-    parser.add_argument("gff3", type=argparse.FileType("r"), help="Annotations")
-    parser.add_argument("new_id", help="Append to ID", default="_v2")
-    parser.add_argument(
-        "--out_fasta",
-        type=argparse.FileType("w"),
-        help="Output fasta",
-        default="out.fa",
-    )
-    parser.add_argument(
-        "--out_gff3",
-        type=argparse.FileType("w"),
-        help="Output gff3",
-        default="out.gff3",
-    )
-    parser.add_argument(
-        "--out_simpleChain",
-        type=argparse.FileType("w"),
-        help="Output simple chain (i.e. not a real UCSC chain file)",
-        default="out.chain",
-    )
-    parser.add_argument("--changes", nargs="+")
-    parser.add_argument("--customSeqs", type=argparse.FileType("r"))
-    args = parser.parse_args()
-
-    for rec, chain in mutate(
-        args.gff3, args.fasta, args.changes, args.customSeqs, args.new_id
-    ):
-        # TODO: Check that this appends and doesn't overwirte
-        gffWrite([rec], args.out_gff3)
-        SeqIO.write([rec], args.out_fasta, "fasta")
-        tsv.dump(chain, args.out_simpleChain)
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/genome_editor.xml
--- a/cpt_genome_editor/genome_editor.xml Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,169 +0,0 @@
-<?xml version="1.0"?>
-<tool id="edu.tamu.cpt.gff3.genome_editor" name="Genome Editor" version="2.1">
-  <description>allows you to re-arrange a genome</description>
-  <macros>
-    <import>macros.xml</import>
-    <import>cpt-macros.xml</import>
-  </macros>
-  <expand macro="requirements"/>
-  <command><![CDATA[
-@GENOME_SELECTOR_PRE@
-python $__tool_directory__/genome_editor.py
-
-@GENOME_SELECTOR@
-@INPUT_GFF@
-"$new_id"
-
---out_fasta "$out_fasta"
---out_gff3 "$out_gff3"
---out_simpleChain "$out_chain"
---customSeqs "$custom_seqs"
---changes
-#for $idx, $change in enumerate($changes):
- #if $change.input_type.input_type_select == "region":
- ${change.input_type.start},${change.input_type.end},${change.input_type.revcom}
- #else
- custom${idx}
- #end if
-#end for
-]]></command>
- <configfiles>
- <configfile name="custom_seqs">
- <![CDATA[
-#for $idx, $change in enumerate($changes):
- #if $change.input_type.input_type_select == "custom":
->custom${idx}
-${change.input_type.seq}
- #end if
-#end for
- ]]>
- </configfile>
- </configfiles>
-  <inputs>
-      <expand macro="input/gff3+fasta" />
-      <param label="New ID" name="new_id" type="text" help="New ID for the sequence to uniquely identify it from the previous build of the sequence. E.g. append Miro could become Miro.2 or Miro.v2 ">
-   <validator type="expression" message="You must specify a new ID"><![CDATA[value and len(value) > 0]]></validator>
-   </param>
-      <repeat name="changes" title="Sequence Component Selections">
- <conditional name="input_type">
- <param name="input_type_select" type="select" label="Data Source">
- <option value="region" selected="True">Region from FASTA file</option>
- <option value="custom">Custom Additional Sequence</option>
- </param>
- <when value="region">
- <param label="Start" name="start" type="integer" min="1" value="1" help="Inclusive range, 1-indexed genome. (I.e. specifying 1-2000 will include base number 1)"/>
- <param label="End" name="end" type="integer" min="1" value="1" help="Inclusive range. (I.e. specifying 1-2000 will include base number 2000)"/>
- <param label="Reverse + Complement" name="revcom" type="boolean" truevalue="-" falsevalue="+" />
- </when>
- <when value="custom">
- <param label="Custom Sequence" name="seq" type="text" help="Enter the sequence, e.g. 'ACTG'. No FASTA definition line."/>
- </when>
- </conditional>
-      </repeat>
-  </inputs>
-  <outputs>
- <data format="gff3" name="out_gff3" label="${new_id} Features"/>
-    <data format="fasta" name="out_fasta" label="${new_id}"/>
-    <data format="tabular" name="out_chain" label="${new_id} Change Table"/>
-  </outputs>
-  <tests>
-      <test>
- <param name="reference_genome_source" value="history" />
- <param name="genome_fasta" value="genome_editor.simple.fa" />
- <param name="gff3_data" value="genome_editor.simple.gff3" />
- <param name="new_id" value="test2" />
- <repeat name="changes">
- <conditional name="input_type">
- <param name="input_type_select" value="region" />
- <param name="start" value="1"/>
- <param name="end" value="4"/>
- <param name="revcom" value="+"/>
- </conditional>
- </repeat>
- <repeat name="changes">
- <conditional name="input_type">
- <param name="input_type_select" value="custom" />
- <param name="seq" value="cccggg"/>
- </conditional>
- </repeat>
- <repeat name="changes">
- <conditional name="input_type">
- <param name="input_type_select" value="region" />
- <param name="start" value="5"/>
- <param name="end" value="8"/>
- <param name="revcom" value="-"/>
- </conditional>
- </repeat>
- <output name="out_gff3" file="genome_editor.simple.out.gff3" />
- <output name="out_fasta" file="genome_editor.simple.out.fa" />
- <output name="out_chain" file="genome_editor.simple.out.chain" />
-      </test>
-      <test>
-
- <param name="reference_genome_source" value="history" />
- <param name="genome_fasta" value="miro.fa" />
- <param name="gff3_data" value="miro.2.gff3" />
- <param name="new_id" value="Miro.v2" />
- <repeat name="changes">
- <conditional name="input_type">
- <param name="input_type_select" value="region" />
- <param name="start" value="1"/>
- <param name="end" value="950"/>
- <param name="revcom" value="+"/>
- </conditional>
- </repeat>
- <repeat name="changes">
- <conditional name="input_type">
- <param name="input_type_select" value="custom" />
- <param name="seq" value="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"/>
- </conditional>
- </repeat>
- <repeat name="changes">
- <conditional name="input_type">
- <param name="input_type_select" value="region" />
- <param name="start" value="3170"/>
- <param name="end" value="3450"/>
- <param name="revcom" value="+"/>
- </conditional>
- </repeat>
- <output name="out_gff3" file="genome_editor.mirov2.gff3" />
- <output name="out_fasta" file="genome_editor.mirov2.fa" />
- <output name="out_chain" file="genome_editor.mirov2.chain" />
-      </test>
-  </tests>
-  <help><![CDATA[
-**What it does**
-
-Allows for re-arranging a FASTA genomic sequence, and remaps the associated features 
-from a gff3 file with the new coordinates. Segments of the genome are moved around 
-and stitched back together according to user-specified positions. 
-
-**Example FASTA input** (spaces added for clarity)::
- >Miro
- TTA GTA ATG GCT AAA
-
-With user-specified *sequence component selections*:
-
-- start: 1, end: 10, strand: +
-- start: 6, end: 10, strand: +
-
-the first ten bases will be listed, followed by a duplication of bases 6-10. 
-Bases 11-15 are not part of the sequence component selection parameters and 
-are therefore not in the output:: 
-
- >Miro.v2
- TTA GTA ATG GAA TGG
-
-Alternatively, with user-specified *sequence component selections*::
-
-- start: 1, end: 10, strand: +
-- start: 6, end: 10, strand: -
-
-the last section with be reverse-complemented and give the following output::
-
- >Miro.v2
- TTA GTA ATG GCC ATT
-
-]]></help>
-  <expand macro="citations" />
-</tool>
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/gff3.py
--- a/cpt_genome_editor/gff3.py Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,346 +0,0 @@\n-import copy\n-import logging\n-\n-log = logging.getLogger()\n-log.setLevel(logging.WARN)\n-\n-\n-def feature_lambda(\n-    feature_list,\n-    test,\n-    test_kwargs,\n-    subfeatures=True,\n-    parent=None,\n-    invert=False,\n-    recurse=True,\n-):\n-    """Recursively search through features, testing each with a test function, yielding matches.\n-\n-    GFF3 is a hierachical data structure, so we need to be able to recursively\n-    search through features. E.g. if you\'re looking for a feature with\n-    ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n-    case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n-\n-    :type feature_list: list\n-    :param feature_list: an iterable of features\n-\n-    :type test: function reference\n-    :param test: a closure with the method signature (feature, **kwargs) where\n-                 the kwargs are those passed in the next argument. This\n-                 function should return True or False, True if the feature is\n-                 to be yielded as part of the main feature_lambda function, or\n-                 False if it is to be ignored. This function CAN mutate the\n-                 features passed to it (think "apply").\n-\n-    :type test_kwargs: dictionary\n-    :param test_kwargs: kwargs to pass to your closure when it is called.\n-\n-    :type subfeatures: boolean\n-    :param subfeatures: when a feature is matched, should just that feature be\n-                        yielded to the caller, or should the entire sub_feature\n-                        tree for that feature be included? subfeatures=True is\n-                        useful in cases such as searching for a gene feature,\n-                        and wanting to know what RBS/Shine_Dalgarno_sequences\n-                        are in the sub_feature tree (which can be accomplished\n-                        with two feature_lambda calls). subfeatures=False is\n-                        useful in cases when you want to process (and possibly\n-                        return) the entire feature tree, such as applying a\n-                        qualifier to every single feature.\n-\n-    :type invert: boolean\n-    :param invert: Negate/invert the result of the filter.\n-\n-    :rtype: yielded list\n-    :return: Yields a list of matching features.\n-    """\n-    # Either the top level set of [features] or the subfeature attribute\n-    for feature in feature_list:\n-        feature._parent = parent\n-        if not parent:\n-            # Set to self so we cannot go above root.\n-            feature._parent = feature\n-        test_result = test(feature, **test_kwargs)\n-        # if (not invert and test_result) or (invert and not test_result):\n-        if invert ^ test_result:\n-            if not subfeatures:\n-                feature_copy = copy.deepcopy(feature)\n-                feature_copy.sub_features = list()\n-                yield feature_copy\n-            else:\n-                yield feature\n-\n-        if recurse and hasattr(feature, "sub_features"):\n-            for x in feature_lambda(\n-                feature.sub_features,\n-                test,\n-                test_kwargs,\n-                subfeatures=subfeatures,\n-                parent=feature,\n-                invert=invert,\n-                recurse=recurse,\n-            ):\n-                yield x\n-\n-\n-def fetchParent(feature):\n-    if not hasattr(feature, "_parent") or feature._parent is None:\n-        return feature\n-    else:\n-        return fetchParent(feature._parent)\n-\n-\n-def feature_test_true(feature, **kwargs):\n-    return True\n-\n-\n-def feature_test_type(feature, **kwargs):\n-    if "type" in kwargs:\n-        return str(feature.type).upper() == str(kwargs["type"]).upper()\n-    elif "types" in kwargs:\n-      for x in kwargs["types"]:\n-        if str(feature.type).upper() == str(x).upper():\n-          return True\n-      return False\n-    raise Exception("Incorrect feature_test_type call, ne'..b'feature.location.start,\n-        # feature.location.end,\n-        # feature.location.strand\n-        # )\n-    return result\n-\n-\n-def get_gff3_id(gene):\n-    return gene.qualifiers.get("Name", [gene.id])[0]\n-\n-\n-def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n-    # This prevents frameshift errors\n-    while start < 0:\n-        start += 3\n-    while end < 0:\n-        end += 3\n-    while start > parent_length:\n-        start -= 3\n-    while end > parent_length:\n-        end -= 3\n-    return (start, end)\n-\n-\n-def coding_genes(feature_list):\n-    for x in genes(feature_list):\n-        if (\n-            len(\n-                list(\n-                    feature_lambda(\n-                        x.sub_features,\n-                        feature_test_type,\n-                        {"type": "CDS"},\n-                        subfeatures=False,\n-                    )\n-                )\n-            )\n-            > 0\n-        ):\n-            yield x\n-\n-\n-def genes(feature_list, feature_type="gene", sort=False):\n-    """\n-    Simple filter to extract gene features from the feature set.\n-    """\n-\n-    if not sort:\n-        for x in feature_lambda(\n-            feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n-        ):\n-            yield x\n-    else:\n-        data = list(genes(feature_list, feature_type=feature_type, sort=False))\n-        data = sorted(data, key=lambda feature: feature.location.start)\n-        for x in data:\n-            yield x\n-\n-\n-def wa_unified_product_name(feature):\n-    """\n-    Try and figure out a name. We gave conflicting instructions, so\n-    this isn\'t as trivial as it should be. Sometimes it will be in\n-    \'product\' or \'Product\', othertimes in \'Name\'\n-    """\n-    # Manually applied tags.\n-    protein_product = feature.qualifiers.get(\n-        "product", feature.qualifiers.get("Product", [None])\n-    )[0]\n-\n-    # If neither of those are available ...\n-    if protein_product is None:\n-        # And there\'s a name...\n-        if "Name" in feature.qualifiers:\n-            if not is_uuid(feature.qualifiers["Name"][0]):\n-                protein_product = feature.qualifiers["Name"][0]\n-\n-    return protein_product\n-\n-\n-def is_uuid(name):\n-    return name.count("-") == 4 and len(name) == 36\n-\n-\n-def get_rbs_from(gene):\n-    # Normal RBS annotation types\n-    rbs_rbs = list(\n-        feature_lambda(\n-            gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n-        )\n-    )\n-    rbs_sds = list(\n-        feature_lambda(\n-            gene.sub_features,\n-            feature_test_type,\n-            {"type": "Shine_Dalgarno_sequence"},\n-            subfeatures=False,\n-        )\n-    )\n-    # Fraking apollo\n-    apollo_exons = list(\n-        feature_lambda(\n-            gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n-        )\n-    )\n-    apollo_exons = [x for x in apollo_exons if len(x) < 10]\n-    # These are more NCBI\'s style\n-    regulatory_elements = list(\n-        feature_lambda(\n-            gene.sub_features,\n-            feature_test_type,\n-            {"type": "regulatory"},\n-            subfeatures=False,\n-        )\n-    )\n-    rbs_regulatory = list(\n-        feature_lambda(\n-            regulatory_elements,\n-            feature_test_quals,\n-            {"regulatory_class": ["ribosome_binding_site"]},\n-            subfeatures=False,\n-        )\n-    )\n-    # Here\'s hoping you find just one ;)\n-    return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n-\n-\n-def nice_name(record):\n-    """\n-    get the real name rather than NCBI IDs and so on. If fails, will return record.id\n-    """\n-    name = record.id\n-    likely_parental_contig = list(genes(record.features, feature_type="contig"))\n-    if len(likely_parental_contig) == 1:\n-        name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n-    return name\n-\n-\n-def fsort(it):\n-    for i in sorted(it, key=lambda x: int(x.location.start)):\n-        yield i\n'
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/macros.xml
--- a/cpt_genome_editor/macros.xml Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,62 +0,0 @@
-<?xml version="1.0"?>
-<macros>
- <xml name="requirements">
- <requirements>
- <requirement type="package" version="3.8.13">python</requirement>
- <requirement type="package" version="1.79">biopython</requirement>
- <requirement type="package" version="1.2.2">cpt_gffparser</requirement>  
- <yield/>
- </requirements>
- </xml>
- <token name="@BLAST_TSV@">
- "$blast_tsv"
- </token>
- <xml name="blast_tsv">
- <param label="Blast Results" help="TSV/tabular (25 Column)"
- name="blast_tsv" type="data" format="tabular" />
- </xml>
-
- <token name="@BLAST_XML@">
- "$blast_xml"
- </token>
- <xml name="blast_xml">
- <param label="Blast Results" help="XML format"
- name="blast_xml" type="data" format="blastxml" />
- </xml>
- <xml name="gff3_with_fasta">
- <param label="Genome Sequences" name="fasta" type="data" format="fasta" />
- <param label="Genome Annotations" name="gff3" type="data" format="gff3" />
- </xml>
- <xml name="genome_selector">
-     <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
- </xml>
- <xml name="gff3_input">
- <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
- </xml>
- <xml name="input/gff3+fasta">
- <expand macro="gff3_input" />
- <expand macro="genome_selector" />
- </xml>
- <token name="@INPUT_GFF@">
- "$gff3_data"
- </token>
- <token name="@INPUT_FASTA@">
- genomeref.fa
- </token>
- <token name="@GENOME_SELECTOR_PRE@">
- ln -s $genome_fasta genomeref.fa;
- </token>
- <token name="@GENOME_SELECTOR@">
- genomeref.fa
- </token>
-        <xml name="input/fasta">
- <param label="Fasta file" name="sequences" type="data" format="fasta"/>
- </xml>
-
- <token name="@SEQUENCE@">
- "$sequences"
- </token>
- <xml name="input/fasta/protein">
- <param label="Protein fasta file" name="sequences" type="data" format="fasta"/>
- </xml>
-</macros>
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.mirov2.chain
--- a/cpt_genome_editor/test-data/genome_editor.mirov2.chain Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-Miro 1 950 + Miro.v2 1 950 +
-Miro 3170 3450 + Miro.v2 1017 1297 +
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.mirov2.fa
--- a/cpt_genome_editor/test-data/genome_editor.mirov2.fa Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,23 +0,0 @@
->Miro.v2 <unknown description>
-TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT
-TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG
-AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG
-GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT
-CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG
-TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC
-ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC
-CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA
-ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT
-ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC
-TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG
-CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA
-TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT
-AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC
-CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT
-TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTaaaaaaaaaa
-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaTCTC
-ACTTAACATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCAT
-ATTCTACTACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCT
-GTTTCTTCATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCAC
-AAAATCCGAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTT
-ACCCCTCTTTGCGAATGTATGCAAGTTCTTCATGGGT
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.mirov2.gff3
--- a/cpt_genome_editor/test-data/genome_editor.mirov2.gff3 Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-##gff-version 3
-Miro.v2 gffSeqFeature gene 1 910 . - . ID=8861dd01-d426-40d4-96b5-f8e4b81c93d2;Name=Miro.gene_1;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis@tamu.edu;
-Miro.v2 gffSeqFeature gene 1019 1264 . - . ID=b8da12d7-fe5c-42bd-b020-6a8ab205a133;Name=Miroa;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis@tamu.edu;
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.simple.fa
--- a/cpt_genome_editor/test-data/genome_editor.simple.fa Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
->test
-AAAATTTT
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.simple.gff3
--- a/cpt_genome_editor/test-data/genome_editor.simple.gff3 Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-##gff-version 3
-test feature gene 1 4 . + . ID=1
-test GenBank CDS 1 4 . + 1 ID=1.cds
-test feature gene 5 8 . - . ID=2
-test GenBank CDS 5 8 . - 1 ID=2.cds
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.simple.out.chain
--- a/cpt_genome_editor/test-data/genome_editor.simple.out.chain Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-test 1 4 + test2 1 4 +
-test 5 8 - test2 11 14 +
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.simple.out.fa
--- a/cpt_genome_editor/test-data/genome_editor.simple.out.fa Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
->test2 <unknown description>
-AAAAcccgggAAAA
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.simple.out.gff3
--- a/cpt_genome_editor/test-data/genome_editor.simple.out.gff3 Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-##gff-version 3
-test2 gffSeqFeature gene 1 4 . + . ID=1;
-test2 gffSeqFeature CDS 1 4 . + 0 ID=1.cds;
-test2 gffSeqFeature gene 11 14 . + . ID=2;
-test2 gffSeqFeature CDS 11 14 . + 0 ID=2.cds;
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/miro.2.gff3
--- a/cpt_genome_editor/test-data/miro.2.gff3 Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,1371 +0,0 @@\n-##gff-version 3\n-##sequence-region Miro 1 176055\n-Miro\tCPT\tgene\t1\t910\t.\t-\t.\tID=8861dd01-d426-40d4-96b5-f8e4b81c93d2;Name=Miro.gene_1;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t1\t910\t.\t-\t.\tID=1d8680f9-d6bc-4ef1-a535-83fe555cd097;Name=Miro.gene_1-00001;Parent=8861dd01-d426-40d4-96b5-f8e4b81c93d2;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tCDS\t1\t900\t.\t-\t0\tID=cd13f44c-b958-4dcc-a6b1-b901ad465268;Name=cd13f44c-b958-4dcc-a6b1-b901ad465268-CDS;Parent=1d8680f9-d6bc-4ef1-a535-83fe555cd097;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\texon\t908\t910\t.\t-\t.\tID=532024dc-47d1-40e1-9255-9ec5ee018cc0;Name=532024dc-47d1-40e1-9255-9ec5ee018cc0-exon;Parent=1d8680f9-d6bc-4ef1-a535-83fe555cd097;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\texon\t1\t900\t.\t-\t.\tID=c85f1713-7e6e-4cf9-8d09-1f9ffc943c93;Name=c85f1713-7e6e-4cf9-8d09-1f9ffc943c93-exon;Parent=1d8680f9-d6bc-4ef1-a535-83fe555cd097;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\tgene\t900\t3173\t.\t-\t.\tID=93e09f06-6e7f-40ce-9308-bfe491cf0a24;Name=fdsa;date_creation=2016-09-27;date_last_modified=2017-04-07;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t900\t3173\t.\t-\t.\tID=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;Name=fdsa;Parent=93e09f06-6e7f-40ce-9308-bfe491cf0a24;date_creation=2016-09-27;date_last_modified=2017-04-07;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tCDS\t900\t3161\t.\t-\t0\tID=afd28aa9-d1d9-4125-923d-f62a8cbf8307;Name=afd28aa9-d1d9-4125-923d-f62a8cbf8307-CDS;Parent=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\texon\t3171\t3173\t.\t-\t.\tID=c452f5b8-598e-4122-961c-302e28fdd0a4;Name=c452f5b8-598e-4122-961c-302e28fdd0a4-exon;Parent=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\texon\t900\t3161\t.\t-\t.\tID=4350b621-93f7-45fb-85b8-a60ce93f2944;Name=4350b621-93f7-45fb-85b8-a60ce93f2944-exon;Parent=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\tgene\t3172\t3417\t.\t-\t.\tID=b8da12d7-fe5c-42bd-b020-6a8ab205a133;Name=Miroa;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t3172\t3417\t.\t-\t.\tID=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;Name=Miroa-00001;Parent=b8da12d7-fe5c-42bd-b020-6a8ab205a133;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\texon\t3414\t3417\t.\t-\t.\tID=6bc4ea79-a43e-43e2-90fe-6af95e31e214;Name=6bc4ea79-a43e-43e2-90fe-6af95e31e214-exon;Parent=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\texon\t3172\t3408\t.\t-\t.\tID=27847554-35ea-48a8-b5cd-11e64a233d41;Name=27847554-35ea-48a8-b5cd-11e64a233d41-exon;Parent=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\tCDS\t3172\t3408\t.\t-\t0\tID=a508e82a-5e35-4ac5-8082-e52d06b2edad;Name=a508e82a-5e35-4ac5-8082-e52d06b2edad-CDS;Parent=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\tgene\t3412\t3979\t.\t-\t.\tID=47402bc1-9d3d-4881-8456-cd85ed73e3d9;Name=Miro.gene_4;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t3412\t3979\t.\t-\t.\tID=d350bc7f-e8b4-49d1-a0a4-2046bbb4a677;Name=Miro.gene_4-00001;Parent=47402bc1-9d3d-4881-8456-cd85ed73e3d9;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tCDS\t3412\t3966\t.\t-\t0\tID=f21739ad-e0a2-4f1c-af3f-6cbbe58572b9;Name=f21739ad-e0a2-4f1c-af3f-6cbbe58572b9-CDS;Note=Manually set translation start;Parent=d350bc7f-e8b4-49d1-a0a4-2046bbb4a677;date_creation=2016-09-27;date_last_modified=2016-09-28\n-Miro\tCPT\texon\t3975\t3979\t.\t-\t.\tID=a77e978d-fba5-4782-9d07-0172c8fa6df9;Name=a77e978d-fba5-4782-9d07-0172c8fa6df9-exon;Parent=d350bc7f-e8b4-49d1-a0a4-2046bbb4a677;date_creation=2016-09-27;date_last_modif'..b'fea64;Name=Miro%C4%8B-00001;Parent=1e943ae8-2418-4212-a066-65a568705f1c;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\texon\t174760\t174894\t.\t-\t.\tID=51429ca4-a6d5-4a29-8a3a-7151ab119c36;Name=51429ca4-a6d5-4a29-8a3a-7151ab119c36-exon;Parent=56f6ccd6-af67-4c2a-a34b-f6331d4fea64;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\texon\t174900\t174903\t.\t-\t.\tID=c024b6e9-b5ae-4dd6-8e3c-6818dca7c255;Name=c024b6e9-b5ae-4dd6-8e3c-6818dca7c255-exon;Parent=56f6ccd6-af67-4c2a-a34b-f6331d4fea64;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tCDS\t174760\t174894\t.\t-\t0\tID=4b13974a-0ee4-4e88-bdbe-659aa2b204ba;Name=4b13974a-0ee4-4e88-bdbe-659aa2b204ba-CDS;Parent=56f6ccd6-af67-4c2a-a34b-f6331d4fea64;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tgene\t174939\t175197\t.\t-\t.\tID=7ada7c78-27c7-4b75-a47e-0ecab4dfe665;Name=gene_274;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t174939\t175197\t.\t-\t.\tID=872415a0-d11f-469c-bb74-0dfc428d105f;Name=gene_274-00001;Parent=7ada7c78-27c7-4b75-a47e-0ecab4dfe665;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\texon\t175195\t175197\t.\t-\t.\tID=6d561d73-b40e-4be2-91b8-0c11508c6e03;Name=6d561d73-b40e-4be2-91b8-0c11508c6e03-exon;Parent=872415a0-d11f-469c-bb74-0dfc428d105f;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tCDS\t174939\t175187\t.\t-\t0\tID=d5367d3a-56eb-417f-8056-36c5a82d618a;Name=d5367d3a-56eb-417f-8056-36c5a82d618a-CDS;Parent=872415a0-d11f-469c-bb74-0dfc428d105f;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\texon\t174939\t175187\t.\t-\t.\tID=e0bd32bc-9332-4d12-b92e-358ae5432569;Name=e0bd32bc-9332-4d12-b92e-358ae5432569-exon;Parent=872415a0-d11f-469c-bb74-0dfc428d105f;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tgene\t175224\t175675\t.\t-\t.\tID=4596ff39-c162-4c8d-b40f-1403a578f3cd;Name=gene_275;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t175224\t175675\t.\t-\t.\tID=71e48bbd-f449-4964-a088-b216bdcc36b6;Name=gene_275-00001;Parent=4596ff39-c162-4c8d-b40f-1403a578f3cd;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\texon\t175673\t175675\t.\t-\t.\tID=87fb1a55-1430-444a-95cc-0e40cfcde490;Name=87fb1a55-1430-444a-95cc-0e40cfcde490-exon;Parent=71e48bbd-f449-4964-a088-b216bdcc36b6;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\texon\t175224\t175661\t.\t-\t.\tID=484e9b53-43f7-4a20-8ffe-aa7298d0c95c;Name=484e9b53-43f7-4a20-8ffe-aa7298d0c95c-exon;Parent=71e48bbd-f449-4964-a088-b216bdcc36b6;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tCDS\t175224\t175661\t.\t-\t0\tID=6ba184ad-ba33-49f7-8acd-d2dd30cc8dc8;Name=6ba184ad-ba33-49f7-8acd-d2dd30cc8dc8-CDS;Parent=71e48bbd-f449-4964-a088-b216bdcc36b6;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tgene\t175636\t176021\t.\t-\t.\tID=9cbd3255-806e-4d6f-9e7e-b974495fe09e;Name=Miro%C4%8C;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t175636\t176021\t.\t-\t.\tID=5c9e6321-2990-4cc9-b58b-69d43d1304ff;Name=Miro%C4%8C-00001;Parent=9cbd3255-806e-4d6f-9e7e-b974495fe09e;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\texon\t176017\t176021\t.\t-\t.\tID=91dd7574-88be-4c6b-a9a1-48cb6f600abb;Name=91dd7574-88be-4c6b-a9a1-48cb6f600abb-exon;Parent=5c9e6321-2990-4cc9-b58b-69d43d1304ff;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tCDS\t175636\t176010\t.\t-\t0\tID=94a131f5-9235-4efc-afe1-3e33c7b2ddb6;Name=94a131f5-9235-4efc-afe1-3e33c7b2ddb6-CDS;Parent=5c9e6321-2990-4cc9-b58b-69d43d1304ff;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\texon\t175636\t176010\t.\t-\t.\tID=c422b844-7ef3-42ed-b3fb-f2a60cb04b6a;Name=c422b844-7ef3-42ed-b3fb-f2a60cb04b6a-exon;Parent=5c9e6321-2990-4cc9-b58b-69d43d1304ff;date_creation=2016-09-28;date_last_modified=2016-09-28\n'
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/miro.fa
--- a/cpt_genome_editor/test-data/miro.fa Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,2936 +0,0 @@\n->Miro\n-TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT\n-TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG\n-AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG\n-GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT\n-CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG\n-TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC\n-ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC\n-CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA\n-ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT\n-ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC\n-TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG\n-CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA\n-TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT\n-AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC\n-CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT\n-TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTGTAAATCGGA\n-CGAATTGAATAAGAATCGCTATGTTCAACAAGTGAAGCCAGTAACGGGTTTAGTGATTTA\n-AAGATTTCCCATGCCTGATCAACACGTTTTTTCATATTGCTGCGTTTAACGCGCATTGAT\n-GCTACGGACTCACGCAGAATCGGACAACGCACCCTGGAAAGATTTTTACCATCTTTCTCA\n-TACCCTTCCAGACACACTATACGTTCGAGGGTATCAACAATCTTATACAGTTTTTCATTA\n-TATCGGTTTTTCACAATCCGATCAAGCGATACACCGAAACGGCTATGCAATGCGTCTGTT\n-TCTGTTGAGTGATCCTTCCCAATCCATCCAGGCAAGCAATTATCTTTCAATGCCTTTTCA\n-GATTTAACATACTGCTTGCACAGCATATCATCAAAGCATACCAGATTTGAATCCGGGATC\n-CACTTCCAGAGGCTGTTACGTATAGCAAACACAACAGGGATCCCAGTATGGCGCATGATA\n-CGCGATAAAGTAGATTCTTTCATTGCTGAATCCATAGACAACCCGGAATTTTCACCATCT\n-AAGCGGCTATATTCATCAATACCATACAACCGAACACCTGGGGCTTTATCCAGTGATAAA\n-AACTCTGATTTTGTCATAAACAGAGAAGTTTTTGCCAGATTGCCGTTACTATCCAACTCA\n-TAACGATATACGGTCGGGGTTTTTGGGCGCGGTTCTGAATTTTTCGGTGCATATAGCGCT\n-TTTGATTTTTCGCGATCTGCATCATAGATCTCTTTTTCTTTTGTCATTTCACTGGTACGG\n-AGATACACAATTTCCGATTCATCAAAATGACCTTTCCGAACGATATCATTAACAATCTCA\n-CGCTTTGAATCACTATCGTAATATGCAACAAAGCTAACACGGCTTAGGTTATGCATTTTA\n-GCATACCCGACGATATACGGTTTAACCGTGTTGGTATCCACTTTTAACAGAATGAGTTTC\n-TTCTGTTTCCACGGATAATAAATGCGTGTGATGTCCTGGCGTTTGGTTGTTTCTGGCTTA\n-TACTTGCTCCAGCGGCCTCCGCTACCAGTTACCTGATACCATGCATCTTTACCGTCGTAT\n-TCGTTCGCCCAATACCCAGCAACATAATCATCATTATATTTGTTTGGTTTGACTAGTTCG\n-CTATGGATCCAGCCAATAGAATCACCATTGATGCGAAAATTAGCATCTTTACCAACAAAG\n-TTTTGTACCATTGAAGGCAGAGAATGGAACCACGTCAGTTTATCACGCACGGTTTGTAAC\n-TTATCGAATTCTGATTTAACTCGATTGAAATATACCCGGCTGATTTGTTTCAGACGTTCT\n-TTAACAATCCCTACTGTCATTTTATCCATACTCAACTCTTCGCGAGAAGGCATGAAATCA\n-AGTTCACCGATCGGGAAGTCAATAATATACGTATACTGGCTTTCTGTATAGCAATAGAAC\n-ATCGAGGTATCATACAAATCTTTATCCAGAGGATAAATGATGTTACCCATGCGAGCATAT\n-ACACCGCTAGTGTATGCTGATTTATGACGGATCACCCCGCTATCGTTGGTTGCTTCTTTC\n-GGCTGATAGTTGATTTTGAGAATAGAAGCACCAACAAAGTTAGGACGAATATCAGTAAAT\n-GATTCGTATACCCTTGCTGCTTCGTTTTCCCATTCTTTGATATCTTCAACCTTAACCGGA\n-ACAGTGATAGTAACCCCGTTAGGTTCATCGCTTTCAATCTCATACAGAGGATCGCAGAAA\n-GGTTCCCCATCATCCATATAGATTGTGTAACCGCATTTGATACCGTCTTTTACGGATTCC\n-ACCGTGAAAGCATCGGAATAGCAAAGCGGAGATTTGCAACCCAGACCCATAGAACCGATC\n-AGGTCGTTTGAATCATTTTTAGTTGATTCGAAGTAAACGGTAAACGCATCACTAACGAAA\n-TCAGGAGACATACCGATCCCGTAGTCACGAATAACAAAACGAGGATCAACAGCAGTTGGC\n-AACTGGACATCAAACGGGTTCTGATTTCCCGCTTCTTTGTGTCCATCAATCGCATTACAA\n-GACAGTTCGCGAATGATTGCGCGGATCTTGTATTTGTATACTGTCGAAGAAAGGATCTTA\n-TACGCTTTCTTGTTTGCGCGTAGAGATAGTTTGTTTCGTCCCTTGCTGGTATCTGTACCA\n-ACACGGTAGATGGTTTGCGGTGTATCTTCGCGTAATTTCATTGTTTATTTCTCACTTAAC\n-ATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCATATTCTAC\n-TACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCTGTTTCTT\n-CATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCACAAAATCC\n-GAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTTACCCCTC\n-TTTGCGAATGTATGCAAGTTCTTCATGGGTTACTGGACGGATATACAGACGGCCTTTTGT\n-ATATGCCTTGCGCCCGCTGATCCAAATGTTTTTCATATCCTTAACACCGTTCATCACATC\n-GTTGTAAAACTTCTTATCAGCTTTAGCCTGATAGACTTCACGGCCTTGATAATCTTTCAT\n-GAACAAACAATAAAGGATCTCATTCTTATCAACTAGATTAGCATCCTTTGTAGTTGTTTT\n-ACTTGGTGAAGGTTTCGCACCCAGGCGCAAGGCCATAGCTTGCCACACTTTACCATGTTC\n-ATAACCGCGCCCGACAAGAGCATGAGCGATTTCGTGTAAAAGAGTGTCTAAAATATCCTC\n-GTAGATATCTTCCGCAACATGACGACCAGACAGTTCGATCAGTTTTTTGGTATAACTGCA\n-ACGG'..b'TCGGGTAATATCG\n-TTTGTGATGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTT\n-ATGTTTCTTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGT\n-GCCCTTGTAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCC\n-GCGTTGGAACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTG\n-GCATTCAGACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTA\n-CCATCCCAAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAA\n-TACACGAATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCG\n-ACACGTGATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAAC\n-TACTATTCTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATG\n-AAGATAAACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGATTAACG\n-TAATGCTGGTGTTAAATTGTGTGATCTTCCAATAGCCCGTTTGATTGCTTTAAAGAAGTT\n-CATCACCGGGCTATTTTTCTCGTAAATATCCCAAACTTTCAATTTGTCCCACGGATCCGG\n-AACATAATCTTCATTCCTTGCCGAAACCCCCAACGTAACCCTCCTGTATCCAGCGCTGTC\n-GTGATAATACACGAAGAATGGCCTACCTATTGGCGCAACCTTACAACCTCTCTTAGCGGC\n-TCTCATAGCCTCATCGAACGTCATGGATTCCCCCAAAAATTTCTATGCATGAATGGTCGA\n-ATTCCGATAGTTTCACTCAAAATGAATATCGGATGATCTAGCTCATCGTGGTTTTCTTCG\n-TCAATGACGATATCCCAATCAGTAGCCTTTTGTTCTTCTACCGTGGCAATGAATACCTGG\n-TTTACTTCACACTTGCGGTGTGTACGTCTGATGATTGTATCCCCCTCACGAAACACAATC\n-ATATCAGGATTGGTAGTGCGGTACGCGGTTTTACCCGCGCACACTTCATTAAGCATATCT\n-TCGTATGTCATTATAAAACCTTTACACGTTGAACGATGGTTTGTTTAACGTCTTTGTATT\n-CTCCGTGCTCTTTAACGGTTGCTTTGAAAGTGATTTCATCACCTTCGTTTGCAATGTTAT\n-TACCGAAGTAAACAACAACATTACCATCAACATTAATTTTGGTCATGAATCTTTCTACAG\n-AAGTGTAGTAAGAAACTTGAGTATATCCCAGTGAAATCACTTTCTCAACGGTTCCGGTCA\n-TTTCCAGACGTTGTTTGATTTCACCGATGTGGTTAGCTTTAGAAATGCGTTCCTGGCGCT\n-CTTGTTCCCACTGTTCGCGGATTTCTTCGCGTTTGGCGATATAATCCTTTTCCAGTGCAA\n-CACCCATGCAGTAAGCGCACACAGCATCGAATACAGGGCTGTTTTTGTTGTCTTCTTTAG\n-ACTGGTCAGCCCACCAAAGAACAGTAAACATTGGCATTTCTGCAATTACTTCGCCTTTAC\n-GCTTGCCAATCGGCATGATCCCTTTTTCCAGCAGTTCCAGTTTTTCAGTATCGAACACTG\n-ACAGTTTACCGCGACGTTCGAACAGATCGAAATCTGCAAAGCCCTGGAATATCATTTTGA\n-AAGTATCAGTTTCAGTTAAACGGGCTGAAACACGGTCGAAATACTCACGCGCTTTCGCTT\n-CCGCTTTCTCCGGATCGGTAGATAAGTTGCAGATATAGTTATCAGAAGTATAACCGCCGC\n-CTCTACGCTCAACACGCAAGGTATACATTGCATTTTTACGACCAGAAGAAATGAAGTAAG\n-TGGTAGTAACTACGGTTGCGTTAGTCATGGTATTTCTCCTTAAAGGGTATCTCGTTTCGA\n-TATGGCTAATATAGCAAAAGCCCCTGACCGAAGTCAAGGGCTTTTTCATCATTCATTCGA\n-ATCTTTCATTGTTTTATGAAGATGAATATCAAAAATTTTCCAGTACGCCTTTCCGCGAGG\n-ATAAATTTTTGCTTTGTCAATATCGTTGTTGCTTCCCCATGTGTTGTTTGGGCCACGACA\n-TCGATTTTTTATATAATCTGTATGCCAGAATAAGCGCTGAACCGATGATTCCGTACCTAA\n-TGGATCTTCTTTACTGAACAGAATTTGTATACTCATAAGAAGAACCCAGTGCGAACAATC\n-AGATCGATTTTCTTTTCTGGTTCAAACGGTGATTTGCTATCGATGTTACACTGATAGAAC\n-ATACCAACATACTTTTCAGGAATGTTGGATTCACGCGCCCATTTCAGGTTATCATCGGTA\n-TTCGGCCCCAGCATGAGGTTAACAACATCAACAGCATAATCCTGTTCTTTGGTGTTGCCG\n-GAACCGTTTACATAATGCCCGTGGGCGGTTTTGAGGATCTCAATTACTTCGCTATCGTCA\n-GTTTCGACTTTGTAAAGCGTTGTATTTTCAGGAATTTCTTCAAGAATGATCAAAGCGGTT\n-TTCATCACACTTACCTTTGTGTTTCTGTTTACGTTTTGCTTCTTTAAATGCTCGCTTGCG\n-ATCGCGGTGAGTAGAAGCGCGGTTGAAATCATGTTTCGCTACCAAATTATTCATATAAGC\n-CCCTTAAAGAAAAATATTTAGGGGCTTTCGCCCCTGTATTAATCCAGCAATTTGCGGATC\n-TTGTCTGCGATACGTCCGGCGCGGGTTGCACTTGCAGTATGATCGCTTTCTTTTGAAGCC\n-AGTTCCGCCAGCTTACGCTGATGTTCTTCTTCTGCTGCTTGACGATCTGCTGCAACCTGT\n-GCAACCTGCTCATTATCGTGAGCAATACGCGCTTCCAGTTCAGACAGGGTTTTGTCGAAA\n-GTTGCTACGATTTCATCTACAGAACGAATTTTATTAAACAGTTTCATAATTTATCTCAAT\n-TGGTTAGTTTTAATCAGTATACATCAATATGGTTGAAATTCAAAATCATAAATGTCATTC\n-AGTGCGCGGTTCCACTCGGTGTAGTTTTCACCAGCACCATAACGCATTTGAATAGCACTT\n-TCGAACGTTGATCCGTTGAGGTTCGGGAAACCGAACAGGTTTTTGATTTTGTCATGTGCT\n-ACATAATACAGAGAAGCACTTTCCAGCATCGCAACCATCGCAGACGGTTCGTGTTCGCGG\n-CGCTTGATACGTAACAGAGTACGACTAGCACCAGTTTTACGGCGTTGATTTGGTGCTACG\n-TAGAAACGGAATACTACGCGCCCTGTTTTATCATCAACTACCAGGTAAAACCCGTTTTCT\n-TTCAGATCCACGCCTTCGAATTTCTTGAAGGTTCCGCGTTTCATGTCACCAATTTTAATT\n-GCATATTTGTGAATGTCAAGTCTTGTCAGAATTCTTTTCATATTTTTTAGATACCAGTTT\n-GCCTAATTTTGTAATTTCGCCTGTTTTTACGTTAACAAACAAGGCGATGCTCAGAAATGG\n-GATGCTAATCACTACGCTGATCAATGTAAACAGAAAACGTATCACAAAAAGAACAGCACG\n-TTCAAGATATCGTTGCATCCACGCGATTCCTAAACAACTATACCCTACTATAAAGGTGGT\n-TGCAACATAAAATGCACCAAATCCTTTACGAAATACGTAACCTTTCCCGGATTCTATCCG\n-GTCGTCGGCCCACATTTCACGGGCAGTTTTCAGAATAGATTCACCACTAGCGCGAGTTTC\n-GTTAGCCGAAGGCATGTTTTTAAATTTCATGATAGTCTCCTATGCGCCCAGAACTCTCCA\n-GGCGCGGTTGTTTAG\n'
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/tsv.py
--- a/cpt_genome_editor/tsv.py Fri Jun 17 12:52:23 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,30 +0,0 @@
-import sys
-
-
-# Like 'import json' / 'import yaml', except.. tab data.
-def loads(str_data):
-    return NotImplementedError()
-
-
-def load(handle):
-    return NotImplementedError()
-
-
-def dump(data, handle=sys.stdout):
-    for row in data:
-        handle.write("%s\n" % "\t".join(map(str, row)))
-
-
-def dumps(data):
-    output = ""
-    for row in data:
-        output += "%s\n" % "\t".join(map(str, row))
-    return output
-
-
-def dump_line(row, handle=sys.stdout):
-    dump([row], handle=handle)
-
-
-def dumps_line(row):
-    return dumps([row])
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd genome_editor.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_editor.py Mon Jun 05 02:43:21 2023 +0000
[
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+import logging
+import copy
+import argparse
+import tsv
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.SeqFeature import FeatureLocation
+from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature, convertSeqRec
+from gff3 import feature_lambda, feature_test_contains
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+
+def mutate(gff3, fasta, changes, customSeqs, new_id):
+    # Change Language
+    # - we can only accept ONE genome as an input. (TODO: support multiple?)
+    # - we can only build ONE genome as an output. (TODO: support multiple?)
+    # - must allow selection of various regions
+    # '1,1000,+   40,100,-    custom_seq_1'
+    try:
+        custom_seqs = SeqIO.to_dict(SeqIO.parse(customSeqs, "fasta"))
+    except:
+        custom_seqs = {}
+    seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
+    # Pull first and onl record
+    rec = list(gffParse(gff3, base_dict=seq_dict))[0]
+    # Create a "clean" record
+    new_record = copy.deepcopy(rec)
+    new_record.id = new_id
+    new_record.seq = Seq("")
+    new_record.features = []
+    new_record.annotations = {}
+    # Process changes.
+    chain = []
+    topFeats = {}
+    covered = 0
+    for feat in rec.features:
+        if "ID" in feat.qualifiers.keys():
+            topFeats[feat.qualifiers["ID"][0]] = feat.location.start
+    for change in changes:
+        if "," in change:
+            (start, end, strand) = change.split(",")
+            start = int(start) - 1
+            end = int(end)
+
+            # Make any complaints
+            broken_feature_start = list(
+                feature_lambda(
+                    rec.features,
+                    feature_test_contains,
+                    {"index": start},
+                    subfeatures=False,
+                )
+            )
+            if len(broken_feature_start) > 0:
+                pass
+                # log.info("DANGER: Start index chosen (%s) is in the middle of a feature (%s %s). This feature will disappear from the output", start, broken_feature_start[0].id, broken_feature_start[0].location)
+            broken_feature_end = list(
+                feature_lambda(
+                    rec.features,
+                    feature_test_contains,
+                    {"index": end},
+                    subfeatures=False,
+                )
+            )
+            if len(broken_feature_end) > 0:
+                pass
+                # log.info("DANGER: End index chosen (%s) is in the middle of a feature (%s %s). This feature will disappear from the output", end, broken_feature_end[0].id, broken_feature_end[0].location)
+
+            # Ok, fetch features
+            if strand == "+":
+                tmp_req = rec[start:end]
+            else:
+                tmp_req = rec[start:end].reverse_complement(
+                    id=True,
+                    name=True,
+                    description=True,
+                    features=True,
+                    annotations=True,
+                    letter_annotations=True,
+                    dbxrefs=True,
+                )
+            tmp_req = convertSeqRec(tmp_req)[0]
+
+            def update_location(feature, shiftS):
+                feature.location = FeatureLocation(
+                    feature.location.start + shiftS,
+                    feature.location.end + shiftS,
+                    feature.strand,
+                )
+                for i in feature.sub_features:
+                    i = update_location(i, shiftS)
+                return feature
+
+            # for feature in tmp_req.features:
+
+            chain.append(
+                [
+                    rec.id,
+                    start + 1,
+                    end,
+                    strand,
+                    new_record.id,
+                    len(new_record) + 1,
+                    len(new_record) + (end - start),
+                    "+",
+                ]
+            )
+
+            covered += len(new_record.seq)
+            print(covered)
+            new_record.seq += tmp_req.seq
+            # NB: THIS MUST USE BIOPYTHON 1.67. 1.68 Removes access to
+            # subfeatures, which means you will only get top-level features.
+            startInd = len(new_record.features)
+            new_record.features += tmp_req.features
+
+            for i in new_record.features[startInd:]:
+                i.location = FeatureLocation(
+                    i.location.start + covered,
+                    i.location.end + covered,
+                    i.location.strand,
+                )
+                if "ID" not in i.qualifiers.keys():
+                    continue
+                diffS = i.location.start - topFeats[i.qualifiers["ID"][0]]
+                subFeats = i.sub_features
+                for j in subFeats:
+                    j = update_location(j, diffS)
+        else:
+            new_record.seq += custom_seqs[change].seq
+    yield new_record, chain
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("fasta", type=argparse.FileType("r"), help="Sequence")
+    parser.add_argument("gff3", type=argparse.FileType("r"), help="Annotations")
+    parser.add_argument("new_id", help="Append to ID", default="_v2")
+    parser.add_argument(
+        "--out_fasta",
+        type=argparse.FileType("w"),
+        help="Output fasta",
+        default="out.fa",
+    )
+    parser.add_argument(
+        "--out_gff3",
+        type=argparse.FileType("w"),
+        help="Output gff3",
+        default="out.gff3",
+    )
+    parser.add_argument(
+        "--out_simpleChain",
+        type=argparse.FileType("w"),
+        help="Output simple chain (i.e. not a real UCSC chain file)",
+        default="out.chain",
+    )
+    parser.add_argument("--changes", nargs="+")
+    parser.add_argument("--customSeqs", type=argparse.FileType("r"))
+    args = parser.parse_args()
+
+    for rec, chain in mutate(
+        args.gff3, args.fasta, args.changes, args.customSeqs, args.new_id
+    ):
+        # TODO: Check that this appends and doesn't overwirte
+        gffWrite([rec], args.out_gff3)
+        SeqIO.write([rec], args.out_fasta, "fasta")
+        tsv.dump(chain, args.out_simpleChain)
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd genome_editor.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_editor.xml Mon Jun 05 02:43:21 2023 +0000
[
@@ -0,0 +1,167 @@
+<tool id="edu.tamu.cpt.gff3.genome_editor" name="Genome Editor" version="2.1">
+    <description>allows you to re-arrange a genome</description>
+    <macros>
+        <import>macros.xml</import>
+        <import>cpt-macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command><![CDATA[
+@GENOME_SELECTOR_PRE@
+'python $__tool_directory__/genome_editor.py'
+
+@GENOME_SELECTOR@
+@INPUT_GFF@
+"$new_id"
+
+--out_fasta "$out_fasta"
+--out_gff3 "$out_gff3"
+--out_simpleChain "$out_chain"
+--customSeqs "$custom_seqs"
+--changes
+#for $idx, $change in enumerate($changes):
+ #if $change.input_type.input_type_select == "region":
+ ${change.input_type.start},${change.input_type.end},${change.input_type.revcom}
+ #else
+ custom${idx}
+ #end if
+#end for
+]]></command>
+    <configfiles>
+        <configfile name="custom_seqs">
+ <![CDATA[
+#for $idx, $change in enumerate($changes):
+ #if $change.input_type.input_type_select == "custom":
+>custom${idx}
+${change.input_type.seq}
+ #end if
+#end for
+ ]]>
+ </configfile>
+    </configfiles>
+    <inputs>
+        <expand macro="input/gff3+fasta"/>
+        <param label="New ID" name="new_id" type="text" help="New ID for the sequence to uniquely identify it from the previous build of the sequence. E.g. append Miro could become Miro.2 or Miro.v2 ">
+            <validator type="expression" message="You must specify a new ID"><![CDATA[value and len(value) > 0]]></validator>
+        </param>
+        <repeat name="changes" title="Sequence Component Selections">
+            <conditional name="input_type">
+                <param name="input_type_select" type="select" label="Data Source">
+                    <option value="region" selected="True">Region from FASTA file</option>
+                    <option value="custom">Custom Additional Sequence</option>
+                </param>
+                <when value="region">
+                    <param label="Start" name="start" type="integer" min="1" value="1" help="Inclusive range, 1-indexed genome. (I.e. specifying 1-2000 will include base number 1)"/>
+                    <param label="End" name="end" type="integer" min="1" value="1" help="Inclusive range. (I.e. specifying 1-2000 will include base number 2000)"/>
+                    <param label="Reverse + Complement" name="revcom" type="boolean" truevalue="-" falsevalue="+"/>
+                </when>
+                <when value="custom">
+                    <param label="Custom Sequence" name="seq" type="text" help="Enter the sequence, e.g. 'ACTG'. No FASTA definition line."/>
+                </when>
+            </conditional>
+        </repeat>
+    </inputs>
+    <outputs>
+        <data format="gff3" name="out_gff3" label="${new_id} Features"/>
+        <data format="fasta" name="out_fasta" label="${new_id}"/>
+        <data format="tabular" name="out_chain" label="${new_id} Change Table"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="reference_genome_source" value="history"/>
+            <param name="genome_fasta" value="genome_editor.simple.fa"/>
+            <param name="gff3_data" value="genome_editor.simple.gff3"/>
+            <param name="new_id" value="test2"/>
+            <repeat name="changes">
+                <conditional name="input_type">
+                    <param name="input_type_select" value="region"/>
+                    <param name="start" value="1"/>
+                    <param name="end" value="4"/>
+                    <param name="revcom" value="+"/>
+                </conditional>
+            </repeat>
+            <repeat name="changes">
+                <conditional name="input_type">
+                    <param name="input_type_select" value="custom"/>
+                    <param name="seq" value="cccggg"/>
+                </conditional>
+            </repeat>
+            <repeat name="changes">
+                <conditional name="input_type">
+                    <param name="input_type_select" value="region"/>
+                    <param name="start" value="5"/>
+                    <param name="end" value="8"/>
+                    <param name="revcom" value="-"/>
+                </conditional>
+            </repeat>
+            <output name="out_gff3" file="genome_editor.simple.out.gff3"/>
+            <output name="out_fasta" file="genome_editor.simple.out.fa"/>
+            <output name="out_chain" file="genome_editor.simple.out.chain"/>
+        </test>
+        <test>
+            <param name="reference_genome_source" value="history"/>
+            <param name="genome_fasta" value="miro.fa"/>
+            <param name="gff3_data" value="miro.2.gff3"/>
+            <param name="new_id" value="Miro.v2"/>
+            <repeat name="changes">
+                <conditional name="input_type">
+                    <param name="input_type_select" value="region"/>
+                    <param name="start" value="1"/>
+                    <param name="end" value="950"/>
+                    <param name="revcom" value="+"/>
+                </conditional>
+            </repeat>
+            <repeat name="changes">
+                <conditional name="input_type">
+                    <param name="input_type_select" value="custom"/>
+                    <param name="seq" value="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"/>
+                </conditional>
+            </repeat>
+            <repeat name="changes">
+                <conditional name="input_type">
+                    <param name="input_type_select" value="region"/>
+                    <param name="start" value="3170"/>
+                    <param name="end" value="3450"/>
+                    <param name="revcom" value="+"/>
+                </conditional>
+            </repeat>
+            <output name="out_gff3" file="genome_editor.mirov2.gff3"/>
+            <output name="out_fasta" file="genome_editor.mirov2.fa"/>
+            <output name="out_chain" file="genome_editor.mirov2.chain"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+Allows for re-arranging a FASTA genomic sequence, and remaps the associated features 
+from a gff3 file with the new coordinates. Segments of the genome are moved around 
+and stitched back together according to user-specified positions. 
+
+**Example FASTA input** (spaces added for clarity)::
+ >Miro
+ TTA GTA ATG GCT AAA
+
+With user-specified *sequence component selections*:
+
+- start: 1, end: 10, strand: +
+- start: 6, end: 10, strand: +
+
+the first ten bases will be listed, followed by a duplication of bases 6-10. 
+Bases 11-15 are not part of the sequence component selection parameters and 
+are therefore not in the output:: 
+
+ >Miro.v2
+ TTA GTA ATG GAA TGG
+
+Alternatively, with user-specified *sequence component selections*::
+
+- start: 1, end: 10, strand: +
+- start: 6, end: 10, strand: -
+
+the last section with be reverse-complemented and give the following output::
+
+ >Miro.v2
+ TTA GTA ATG GCC ATT
+
+]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd gff3.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3.py Mon Jun 05 02:43:21 2023 +0000
[
b'@@ -0,0 +1,346 @@\n+import copy\n+import logging\n+\n+log = logging.getLogger()\n+log.setLevel(logging.WARN)\n+\n+\n+def feature_lambda(\n+    feature_list,\n+    test,\n+    test_kwargs,\n+    subfeatures=True,\n+    parent=None,\n+    invert=False,\n+    recurse=True,\n+):\n+    """Recursively search through features, testing each with a test function, yielding matches.\n+\n+    GFF3 is a hierachical data structure, so we need to be able to recursively\n+    search through features. E.g. if you\'re looking for a feature with\n+    ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n+    case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n+\n+    :type feature_list: list\n+    :param feature_list: an iterable of features\n+\n+    :type test: function reference\n+    :param test: a closure with the method signature (feature, **kwargs) where\n+                 the kwargs are those passed in the next argument. This\n+                 function should return True or False, True if the feature is\n+                 to be yielded as part of the main feature_lambda function, or\n+                 False if it is to be ignored. This function CAN mutate the\n+                 features passed to it (think "apply").\n+\n+    :type test_kwargs: dictionary\n+    :param test_kwargs: kwargs to pass to your closure when it is called.\n+\n+    :type subfeatures: boolean\n+    :param subfeatures: when a feature is matched, should just that feature be\n+                        yielded to the caller, or should the entire sub_feature\n+                        tree for that feature be included? subfeatures=True is\n+                        useful in cases such as searching for a gene feature,\n+                        and wanting to know what RBS/Shine_Dalgarno_sequences\n+                        are in the sub_feature tree (which can be accomplished\n+                        with two feature_lambda calls). subfeatures=False is\n+                        useful in cases when you want to process (and possibly\n+                        return) the entire feature tree, such as applying a\n+                        qualifier to every single feature.\n+\n+    :type invert: boolean\n+    :param invert: Negate/invert the result of the filter.\n+\n+    :rtype: yielded list\n+    :return: Yields a list of matching features.\n+    """\n+    # Either the top level set of [features] or the subfeature attribute\n+    for feature in feature_list:\n+        feature._parent = parent\n+        if not parent:\n+            # Set to self so we cannot go above root.\n+            feature._parent = feature\n+        test_result = test(feature, **test_kwargs)\n+        # if (not invert and test_result) or (invert and not test_result):\n+        if invert ^ test_result:\n+            if not subfeatures:\n+                feature_copy = copy.deepcopy(feature)\n+                feature_copy.sub_features = list()\n+                yield feature_copy\n+            else:\n+                yield feature\n+\n+        if recurse and hasattr(feature, "sub_features"):\n+            for x in feature_lambda(\n+                feature.sub_features,\n+                test,\n+                test_kwargs,\n+                subfeatures=subfeatures,\n+                parent=feature,\n+                invert=invert,\n+                recurse=recurse,\n+            ):\n+                yield x\n+\n+\n+def fetchParent(feature):\n+    if not hasattr(feature, "_parent") or feature._parent is None:\n+        return feature\n+    else:\n+        return fetchParent(feature._parent)\n+\n+\n+def feature_test_true(feature, **kwargs):\n+    return True\n+\n+\n+def feature_test_type(feature, **kwargs):\n+    if "type" in kwargs:\n+        return str(feature.type).upper() == str(kwargs["type"]).upper()\n+    elif "types" in kwargs:\n+        for x in kwargs["types"]:\n+            if str(feature.type).upper() == str(x).upper():\n+                return True\n+        return False\n+    raise Exception("Incorrect feature_test'..b'feature.location.start,\n+        # feature.location.end,\n+        # feature.location.strand\n+        # )\n+    return result\n+\n+\n+def get_gff3_id(gene):\n+    return gene.qualifiers.get("Name", [gene.id])[0]\n+\n+\n+def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n+    # This prevents frameshift errors\n+    while start < 0:\n+        start += 3\n+    while end < 0:\n+        end += 3\n+    while start > parent_length:\n+        start -= 3\n+    while end > parent_length:\n+        end -= 3\n+    return (start, end)\n+\n+\n+def coding_genes(feature_list):\n+    for x in genes(feature_list):\n+        if (\n+            len(\n+                list(\n+                    feature_lambda(\n+                        x.sub_features,\n+                        feature_test_type,\n+                        {"type": "CDS"},\n+                        subfeatures=False,\n+                    )\n+                )\n+            )\n+            > 0\n+        ):\n+            yield x\n+\n+\n+def genes(feature_list, feature_type="gene", sort=False):\n+    """\n+    Simple filter to extract gene features from the feature set.\n+    """\n+\n+    if not sort:\n+        for x in feature_lambda(\n+            feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n+        ):\n+            yield x\n+    else:\n+        data = list(genes(feature_list, feature_type=feature_type, sort=False))\n+        data = sorted(data, key=lambda feature: feature.location.start)\n+        for x in data:\n+            yield x\n+\n+\n+def wa_unified_product_name(feature):\n+    """\n+    Try and figure out a name. We gave conflicting instructions, so\n+    this isn\'t as trivial as it should be. Sometimes it will be in\n+    \'product\' or \'Product\', othertimes in \'Name\'\n+    """\n+    # Manually applied tags.\n+    protein_product = feature.qualifiers.get(\n+        "product", feature.qualifiers.get("Product", [None])\n+    )[0]\n+\n+    # If neither of those are available ...\n+    if protein_product is None:\n+        # And there\'s a name...\n+        if "Name" in feature.qualifiers:\n+            if not is_uuid(feature.qualifiers["Name"][0]):\n+                protein_product = feature.qualifiers["Name"][0]\n+\n+    return protein_product\n+\n+\n+def is_uuid(name):\n+    return name.count("-") == 4 and len(name) == 36\n+\n+\n+def get_rbs_from(gene):\n+    # Normal RBS annotation types\n+    rbs_rbs = list(\n+        feature_lambda(\n+            gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n+        )\n+    )\n+    rbs_sds = list(\n+        feature_lambda(\n+            gene.sub_features,\n+            feature_test_type,\n+            {"type": "Shine_Dalgarno_sequence"},\n+            subfeatures=False,\n+        )\n+    )\n+    # Fraking apollo\n+    apollo_exons = list(\n+        feature_lambda(\n+            gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n+        )\n+    )\n+    apollo_exons = [x for x in apollo_exons if len(x) < 10]\n+    # These are more NCBI\'s style\n+    regulatory_elements = list(\n+        feature_lambda(\n+            gene.sub_features,\n+            feature_test_type,\n+            {"type": "regulatory"},\n+            subfeatures=False,\n+        )\n+    )\n+    rbs_regulatory = list(\n+        feature_lambda(\n+            regulatory_elements,\n+            feature_test_quals,\n+            {"regulatory_class": ["ribosome_binding_site"]},\n+            subfeatures=False,\n+        )\n+    )\n+    # Here\'s hoping you find just one ;)\n+    return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n+\n+\n+def nice_name(record):\n+    """\n+    get the real name rather than NCBI IDs and so on. If fails, will return record.id\n+    """\n+    name = record.id\n+    likely_parental_contig = list(genes(record.features, feature_type="contig"))\n+    if len(likely_parental_contig) == 1:\n+        name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n+    return name\n+\n+\n+def fsort(it):\n+    for i in sorted(it, key=lambda x: int(x.location.start)):\n+        yield i\n'
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jun 05 02:43:21 2023 +0000
b
@@ -0,0 +1,74 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">progressivemauve</requirement>
+            <!--<requirement type="package" version="2.7">python</requirement>-->
+            <requirement type="package" version="0.6.4">bcbiogff</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">2.4.0</token>
+    <xml name="citation/progressive_mauve">
+        <citation type="doi">10.1371/journal.pone.0011147</citation>
+    </xml>
+    <xml name="citation/gepard">
+        <citation type="doi">10.1093/bioinformatics/btm039</citation>
+    </xml>
+    <token name="@XMFA_INPUT@">
+ '$xmfa'
+ </token>
+    <xml name="xmfa_input" token_formats="xmfa">
+        <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/>
+    </xml>
+    <token name="@XMFA_FA_INPUT@">
+ '$sequences'
+ </token>
+    <xml name="xmfa_fa_input">
+        <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
+    </xml>
+    <xml name="genome_selector">
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference Genome">
+                <option value="history" selected="True">From History</option>
+                <option value="cached">Locally Cached</option>
+            </param>
+            <when value="cached">
+                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="gff3_input">
+        <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+    </xml>
+    <xml name="input/gff3+fasta">
+        <expand macro="gff3_input"/>
+        <expand macro="genome_selector"/>
+    </xml>
+    <token name="@INPUT_GFF@">
+     '$gff3_data'
+ </token>
+    <token name="@INPUT_FASTA@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR_PRE@">
+    #if $reference_genome.reference_genome_source == 'history':
+            ln -s '$reference_genome.genome_fasta' genomeref.fa;
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+</macros>
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.mirov2.chain
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_editor.mirov2.chain Mon Jun 05 02:43:21 2023 +0000
b
@@ -0,0 +1,2 @@
+Miro 1 950 + Miro.v2 1 950 +
+Miro 3170 3450 + Miro.v2 1017 1297 +
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.mirov2.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_editor.mirov2.fa Mon Jun 05 02:43:21 2023 +0000
b
@@ -0,0 +1,23 @@
+>Miro.v2 <unknown description>
+TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT
+TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG
+AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG
+GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT
+CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG
+TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC
+ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC
+CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA
+ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT
+ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC
+TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG
+CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA
+TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT
+AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC
+CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT
+TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaTCTC
+ACTTAACATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCAT
+ATTCTACTACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCT
+GTTTCTTCATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCAC
+AAAATCCGAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTT
+ACCCCTCTTTGCGAATGTATGCAAGTTCTTCATGGGT
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.mirov2.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_editor.mirov2.gff3 Mon Jun 05 02:43:21 2023 +0000
b
@@ -0,0 +1,3 @@
+##gff-version 3
+Miro.v2 gffSeqFeature gene 1 910 . - . ID=8861dd01-d426-40d4-96b5-f8e4b81c93d2;Name=Miro.gene_1;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis@tamu.edu;
+Miro.v2 gffSeqFeature gene 1019 1264 . - . ID=b8da12d7-fe5c-42bd-b020-6a8ab205a133;Name=Miroa;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis@tamu.edu;
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.simple.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_editor.simple.fa Mon Jun 05 02:43:21 2023 +0000
b
@@ -0,0 +1,2 @@
+>test
+AAAATTTT
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.simple.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_editor.simple.gff3 Mon Jun 05 02:43:21 2023 +0000
b
@@ -0,0 +1,5 @@
+##gff-version 3
+test feature gene 1 4 . + . ID=1
+test GenBank CDS 1 4 . + 1 ID=1.cds
+test feature gene 5 8 . - . ID=2
+test GenBank CDS 5 8 . - 1 ID=2.cds
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.simple.out.chain
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_editor.simple.out.chain Mon Jun 05 02:43:21 2023 +0000
b
@@ -0,0 +1,2 @@
+test 1 4 + test2 1 4 +
+test 5 8 - test2 11 14 +
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.simple.out.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_editor.simple.out.fa Mon Jun 05 02:43:21 2023 +0000
b
@@ -0,0 +1,2 @@
+>test2 <unknown description>
+AAAAcccgggAAAA
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.simple.out.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_editor.simple.out.gff3 Mon Jun 05 02:43:21 2023 +0000
b
@@ -0,0 +1,5 @@
+##gff-version 3
+test2 gffSeqFeature gene 1 4 . + . ID=1;
+test2 gffSeqFeature CDS 1 4 . + 0 ID=1.cds;
+test2 gffSeqFeature gene 11 14 . + . ID=2;
+test2 gffSeqFeature CDS 11 14 . + 0 ID=2.cds;
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/miro.2.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/miro.2.gff3 Mon Jun 05 02:43:21 2023 +0000
b
b'@@ -0,0 +1,1371 @@\n+##gff-version 3\n+##sequence-region Miro 1 176055\n+Miro\tCPT\tgene\t1\t910\t.\t-\t.\tID=8861dd01-d426-40d4-96b5-f8e4b81c93d2;Name=Miro.gene_1;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t1\t910\t.\t-\t.\tID=1d8680f9-d6bc-4ef1-a535-83fe555cd097;Name=Miro.gene_1-00001;Parent=8861dd01-d426-40d4-96b5-f8e4b81c93d2;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tCDS\t1\t900\t.\t-\t0\tID=cd13f44c-b958-4dcc-a6b1-b901ad465268;Name=cd13f44c-b958-4dcc-a6b1-b901ad465268-CDS;Parent=1d8680f9-d6bc-4ef1-a535-83fe555cd097;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\texon\t908\t910\t.\t-\t.\tID=532024dc-47d1-40e1-9255-9ec5ee018cc0;Name=532024dc-47d1-40e1-9255-9ec5ee018cc0-exon;Parent=1d8680f9-d6bc-4ef1-a535-83fe555cd097;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\texon\t1\t900\t.\t-\t.\tID=c85f1713-7e6e-4cf9-8d09-1f9ffc943c93;Name=c85f1713-7e6e-4cf9-8d09-1f9ffc943c93-exon;Parent=1d8680f9-d6bc-4ef1-a535-83fe555cd097;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\tgene\t900\t3173\t.\t-\t.\tID=93e09f06-6e7f-40ce-9308-bfe491cf0a24;Name=fdsa;date_creation=2016-09-27;date_last_modified=2017-04-07;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t900\t3173\t.\t-\t.\tID=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;Name=fdsa;Parent=93e09f06-6e7f-40ce-9308-bfe491cf0a24;date_creation=2016-09-27;date_last_modified=2017-04-07;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tCDS\t900\t3161\t.\t-\t0\tID=afd28aa9-d1d9-4125-923d-f62a8cbf8307;Name=afd28aa9-d1d9-4125-923d-f62a8cbf8307-CDS;Parent=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\texon\t3171\t3173\t.\t-\t.\tID=c452f5b8-598e-4122-961c-302e28fdd0a4;Name=c452f5b8-598e-4122-961c-302e28fdd0a4-exon;Parent=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\texon\t900\t3161\t.\t-\t.\tID=4350b621-93f7-45fb-85b8-a60ce93f2944;Name=4350b621-93f7-45fb-85b8-a60ce93f2944-exon;Parent=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\tgene\t3172\t3417\t.\t-\t.\tID=b8da12d7-fe5c-42bd-b020-6a8ab205a133;Name=Miroa;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t3172\t3417\t.\t-\t.\tID=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;Name=Miroa-00001;Parent=b8da12d7-fe5c-42bd-b020-6a8ab205a133;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\texon\t3414\t3417\t.\t-\t.\tID=6bc4ea79-a43e-43e2-90fe-6af95e31e214;Name=6bc4ea79-a43e-43e2-90fe-6af95e31e214-exon;Parent=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\texon\t3172\t3408\t.\t-\t.\tID=27847554-35ea-48a8-b5cd-11e64a233d41;Name=27847554-35ea-48a8-b5cd-11e64a233d41-exon;Parent=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\tCDS\t3172\t3408\t.\t-\t0\tID=a508e82a-5e35-4ac5-8082-e52d06b2edad;Name=a508e82a-5e35-4ac5-8082-e52d06b2edad-CDS;Parent=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\tgene\t3412\t3979\t.\t-\t.\tID=47402bc1-9d3d-4881-8456-cd85ed73e3d9;Name=Miro.gene_4;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t3412\t3979\t.\t-\t.\tID=d350bc7f-e8b4-49d1-a0a4-2046bbb4a677;Name=Miro.gene_4-00001;Parent=47402bc1-9d3d-4881-8456-cd85ed73e3d9;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tCDS\t3412\t3966\t.\t-\t0\tID=f21739ad-e0a2-4f1c-af3f-6cbbe58572b9;Name=f21739ad-e0a2-4f1c-af3f-6cbbe58572b9-CDS;Note=Manually set translation start;Parent=d350bc7f-e8b4-49d1-a0a4-2046bbb4a677;date_creation=2016-09-27;date_last_modified=2016-09-28\n+Miro\tCPT\texon\t3975\t3979\t.\t-\t.\tID=a77e978d-fba5-4782-9d07-0172c8fa6df9;Name=a77e978d-fba5-4782-9d07-0172c8fa6df9-exon;Parent=d350bc7f-e8b4-49d1-a0a4-2046bbb4a677;date_creation=2016-09-27;date_last_modif'..b'fea64;Name=Miro%C4%8B-00001;Parent=1e943ae8-2418-4212-a066-65a568705f1c;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\texon\t174760\t174894\t.\t-\t.\tID=51429ca4-a6d5-4a29-8a3a-7151ab119c36;Name=51429ca4-a6d5-4a29-8a3a-7151ab119c36-exon;Parent=56f6ccd6-af67-4c2a-a34b-f6331d4fea64;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\texon\t174900\t174903\t.\t-\t.\tID=c024b6e9-b5ae-4dd6-8e3c-6818dca7c255;Name=c024b6e9-b5ae-4dd6-8e3c-6818dca7c255-exon;Parent=56f6ccd6-af67-4c2a-a34b-f6331d4fea64;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tCDS\t174760\t174894\t.\t-\t0\tID=4b13974a-0ee4-4e88-bdbe-659aa2b204ba;Name=4b13974a-0ee4-4e88-bdbe-659aa2b204ba-CDS;Parent=56f6ccd6-af67-4c2a-a34b-f6331d4fea64;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tgene\t174939\t175197\t.\t-\t.\tID=7ada7c78-27c7-4b75-a47e-0ecab4dfe665;Name=gene_274;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t174939\t175197\t.\t-\t.\tID=872415a0-d11f-469c-bb74-0dfc428d105f;Name=gene_274-00001;Parent=7ada7c78-27c7-4b75-a47e-0ecab4dfe665;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\texon\t175195\t175197\t.\t-\t.\tID=6d561d73-b40e-4be2-91b8-0c11508c6e03;Name=6d561d73-b40e-4be2-91b8-0c11508c6e03-exon;Parent=872415a0-d11f-469c-bb74-0dfc428d105f;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tCDS\t174939\t175187\t.\t-\t0\tID=d5367d3a-56eb-417f-8056-36c5a82d618a;Name=d5367d3a-56eb-417f-8056-36c5a82d618a-CDS;Parent=872415a0-d11f-469c-bb74-0dfc428d105f;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\texon\t174939\t175187\t.\t-\t.\tID=e0bd32bc-9332-4d12-b92e-358ae5432569;Name=e0bd32bc-9332-4d12-b92e-358ae5432569-exon;Parent=872415a0-d11f-469c-bb74-0dfc428d105f;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tgene\t175224\t175675\t.\t-\t.\tID=4596ff39-c162-4c8d-b40f-1403a578f3cd;Name=gene_275;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t175224\t175675\t.\t-\t.\tID=71e48bbd-f449-4964-a088-b216bdcc36b6;Name=gene_275-00001;Parent=4596ff39-c162-4c8d-b40f-1403a578f3cd;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\texon\t175673\t175675\t.\t-\t.\tID=87fb1a55-1430-444a-95cc-0e40cfcde490;Name=87fb1a55-1430-444a-95cc-0e40cfcde490-exon;Parent=71e48bbd-f449-4964-a088-b216bdcc36b6;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\texon\t175224\t175661\t.\t-\t.\tID=484e9b53-43f7-4a20-8ffe-aa7298d0c95c;Name=484e9b53-43f7-4a20-8ffe-aa7298d0c95c-exon;Parent=71e48bbd-f449-4964-a088-b216bdcc36b6;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tCDS\t175224\t175661\t.\t-\t0\tID=6ba184ad-ba33-49f7-8acd-d2dd30cc8dc8;Name=6ba184ad-ba33-49f7-8acd-d2dd30cc8dc8-CDS;Parent=71e48bbd-f449-4964-a088-b216bdcc36b6;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tgene\t175636\t176021\t.\t-\t.\tID=9cbd3255-806e-4d6f-9e7e-b974495fe09e;Name=Miro%C4%8C;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t175636\t176021\t.\t-\t.\tID=5c9e6321-2990-4cc9-b58b-69d43d1304ff;Name=Miro%C4%8C-00001;Parent=9cbd3255-806e-4d6f-9e7e-b974495fe09e;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\texon\t176017\t176021\t.\t-\t.\tID=91dd7574-88be-4c6b-a9a1-48cb6f600abb;Name=91dd7574-88be-4c6b-a9a1-48cb6f600abb-exon;Parent=5c9e6321-2990-4cc9-b58b-69d43d1304ff;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tCDS\t175636\t176010\t.\t-\t0\tID=94a131f5-9235-4efc-afe1-3e33c7b2ddb6;Name=94a131f5-9235-4efc-afe1-3e33c7b2ddb6-CDS;Parent=5c9e6321-2990-4cc9-b58b-69d43d1304ff;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\texon\t175636\t176010\t.\t-\t.\tID=c422b844-7ef3-42ed-b3fb-f2a60cb04b6a;Name=c422b844-7ef3-42ed-b3fb-f2a60cb04b6a-exon;Parent=5c9e6321-2990-4cc9-b58b-69d43d1304ff;date_creation=2016-09-28;date_last_modified=2016-09-28\n'
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/miro.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/miro.fa Mon Jun 05 02:43:21 2023 +0000
b
b'@@ -0,0 +1,2936 @@\n+>Miro\n+TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT\n+TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG\n+AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG\n+GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT\n+CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG\n+TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC\n+ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC\n+CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA\n+ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT\n+ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC\n+TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG\n+CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA\n+TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT\n+AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC\n+CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT\n+TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTGTAAATCGGA\n+CGAATTGAATAAGAATCGCTATGTTCAACAAGTGAAGCCAGTAACGGGTTTAGTGATTTA\n+AAGATTTCCCATGCCTGATCAACACGTTTTTTCATATTGCTGCGTTTAACGCGCATTGAT\n+GCTACGGACTCACGCAGAATCGGACAACGCACCCTGGAAAGATTTTTACCATCTTTCTCA\n+TACCCTTCCAGACACACTATACGTTCGAGGGTATCAACAATCTTATACAGTTTTTCATTA\n+TATCGGTTTTTCACAATCCGATCAAGCGATACACCGAAACGGCTATGCAATGCGTCTGTT\n+TCTGTTGAGTGATCCTTCCCAATCCATCCAGGCAAGCAATTATCTTTCAATGCCTTTTCA\n+GATTTAACATACTGCTTGCACAGCATATCATCAAAGCATACCAGATTTGAATCCGGGATC\n+CACTTCCAGAGGCTGTTACGTATAGCAAACACAACAGGGATCCCAGTATGGCGCATGATA\n+CGCGATAAAGTAGATTCTTTCATTGCTGAATCCATAGACAACCCGGAATTTTCACCATCT\n+AAGCGGCTATATTCATCAATACCATACAACCGAACACCTGGGGCTTTATCCAGTGATAAA\n+AACTCTGATTTTGTCATAAACAGAGAAGTTTTTGCCAGATTGCCGTTACTATCCAACTCA\n+TAACGATATACGGTCGGGGTTTTTGGGCGCGGTTCTGAATTTTTCGGTGCATATAGCGCT\n+TTTGATTTTTCGCGATCTGCATCATAGATCTCTTTTTCTTTTGTCATTTCACTGGTACGG\n+AGATACACAATTTCCGATTCATCAAAATGACCTTTCCGAACGATATCATTAACAATCTCA\n+CGCTTTGAATCACTATCGTAATATGCAACAAAGCTAACACGGCTTAGGTTATGCATTTTA\n+GCATACCCGACGATATACGGTTTAACCGTGTTGGTATCCACTTTTAACAGAATGAGTTTC\n+TTCTGTTTCCACGGATAATAAATGCGTGTGATGTCCTGGCGTTTGGTTGTTTCTGGCTTA\n+TACTTGCTCCAGCGGCCTCCGCTACCAGTTACCTGATACCATGCATCTTTACCGTCGTAT\n+TCGTTCGCCCAATACCCAGCAACATAATCATCATTATATTTGTTTGGTTTGACTAGTTCG\n+CTATGGATCCAGCCAATAGAATCACCATTGATGCGAAAATTAGCATCTTTACCAACAAAG\n+TTTTGTACCATTGAAGGCAGAGAATGGAACCACGTCAGTTTATCACGCACGGTTTGTAAC\n+TTATCGAATTCTGATTTAACTCGATTGAAATATACCCGGCTGATTTGTTTCAGACGTTCT\n+TTAACAATCCCTACTGTCATTTTATCCATACTCAACTCTTCGCGAGAAGGCATGAAATCA\n+AGTTCACCGATCGGGAAGTCAATAATATACGTATACTGGCTTTCTGTATAGCAATAGAAC\n+ATCGAGGTATCATACAAATCTTTATCCAGAGGATAAATGATGTTACCCATGCGAGCATAT\n+ACACCGCTAGTGTATGCTGATTTATGACGGATCACCCCGCTATCGTTGGTTGCTTCTTTC\n+GGCTGATAGTTGATTTTGAGAATAGAAGCACCAACAAAGTTAGGACGAATATCAGTAAAT\n+GATTCGTATACCCTTGCTGCTTCGTTTTCCCATTCTTTGATATCTTCAACCTTAACCGGA\n+ACAGTGATAGTAACCCCGTTAGGTTCATCGCTTTCAATCTCATACAGAGGATCGCAGAAA\n+GGTTCCCCATCATCCATATAGATTGTGTAACCGCATTTGATACCGTCTTTTACGGATTCC\n+ACCGTGAAAGCATCGGAATAGCAAAGCGGAGATTTGCAACCCAGACCCATAGAACCGATC\n+AGGTCGTTTGAATCATTTTTAGTTGATTCGAAGTAAACGGTAAACGCATCACTAACGAAA\n+TCAGGAGACATACCGATCCCGTAGTCACGAATAACAAAACGAGGATCAACAGCAGTTGGC\n+AACTGGACATCAAACGGGTTCTGATTTCCCGCTTCTTTGTGTCCATCAATCGCATTACAA\n+GACAGTTCGCGAATGATTGCGCGGATCTTGTATTTGTATACTGTCGAAGAAAGGATCTTA\n+TACGCTTTCTTGTTTGCGCGTAGAGATAGTTTGTTTCGTCCCTTGCTGGTATCTGTACCA\n+ACACGGTAGATGGTTTGCGGTGTATCTTCGCGTAATTTCATTGTTTATTTCTCACTTAAC\n+ATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCATATTCTAC\n+TACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCTGTTTCTT\n+CATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCACAAAATCC\n+GAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTTACCCCTC\n+TTTGCGAATGTATGCAAGTTCTTCATGGGTTACTGGACGGATATACAGACGGCCTTTTGT\n+ATATGCCTTGCGCCCGCTGATCCAAATGTTTTTCATATCCTTAACACCGTTCATCACATC\n+GTTGTAAAACTTCTTATCAGCTTTAGCCTGATAGACTTCACGGCCTTGATAATCTTTCAT\n+GAACAAACAATAAAGGATCTCATTCTTATCAACTAGATTAGCATCCTTTGTAGTTGTTTT\n+ACTTGGTGAAGGTTTCGCACCCAGGCGCAAGGCCATAGCTTGCCACACTTTACCATGTTC\n+ATAACCGCGCCCGACAAGAGCATGAGCGATTTCGTGTAAAAGAGTGTCTAAAATATCCTC\n+GTAGATATCTTCCGCAACATGACGACCAGACAGTTCGATCAGTTTTTTGGTATAACTGCA\n+ACGG'..b'TCGGGTAATATCG\n+TTTGTGATGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTT\n+ATGTTTCTTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGT\n+GCCCTTGTAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCC\n+GCGTTGGAACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTG\n+GCATTCAGACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTA\n+CCATCCCAAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAA\n+TACACGAATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCG\n+ACACGTGATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAAC\n+TACTATTCTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATG\n+AAGATAAACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGATTAACG\n+TAATGCTGGTGTTAAATTGTGTGATCTTCCAATAGCCCGTTTGATTGCTTTAAAGAAGTT\n+CATCACCGGGCTATTTTTCTCGTAAATATCCCAAACTTTCAATTTGTCCCACGGATCCGG\n+AACATAATCTTCATTCCTTGCCGAAACCCCCAACGTAACCCTCCTGTATCCAGCGCTGTC\n+GTGATAATACACGAAGAATGGCCTACCTATTGGCGCAACCTTACAACCTCTCTTAGCGGC\n+TCTCATAGCCTCATCGAACGTCATGGATTCCCCCAAAAATTTCTATGCATGAATGGTCGA\n+ATTCCGATAGTTTCACTCAAAATGAATATCGGATGATCTAGCTCATCGTGGTTTTCTTCG\n+TCAATGACGATATCCCAATCAGTAGCCTTTTGTTCTTCTACCGTGGCAATGAATACCTGG\n+TTTACTTCACACTTGCGGTGTGTACGTCTGATGATTGTATCCCCCTCACGAAACACAATC\n+ATATCAGGATTGGTAGTGCGGTACGCGGTTTTACCCGCGCACACTTCATTAAGCATATCT\n+TCGTATGTCATTATAAAACCTTTACACGTTGAACGATGGTTTGTTTAACGTCTTTGTATT\n+CTCCGTGCTCTTTAACGGTTGCTTTGAAAGTGATTTCATCACCTTCGTTTGCAATGTTAT\n+TACCGAAGTAAACAACAACATTACCATCAACATTAATTTTGGTCATGAATCTTTCTACAG\n+AAGTGTAGTAAGAAACTTGAGTATATCCCAGTGAAATCACTTTCTCAACGGTTCCGGTCA\n+TTTCCAGACGTTGTTTGATTTCACCGATGTGGTTAGCTTTAGAAATGCGTTCCTGGCGCT\n+CTTGTTCCCACTGTTCGCGGATTTCTTCGCGTTTGGCGATATAATCCTTTTCCAGTGCAA\n+CACCCATGCAGTAAGCGCACACAGCATCGAATACAGGGCTGTTTTTGTTGTCTTCTTTAG\n+ACTGGTCAGCCCACCAAAGAACAGTAAACATTGGCATTTCTGCAATTACTTCGCCTTTAC\n+GCTTGCCAATCGGCATGATCCCTTTTTCCAGCAGTTCCAGTTTTTCAGTATCGAACACTG\n+ACAGTTTACCGCGACGTTCGAACAGATCGAAATCTGCAAAGCCCTGGAATATCATTTTGA\n+AAGTATCAGTTTCAGTTAAACGGGCTGAAACACGGTCGAAATACTCACGCGCTTTCGCTT\n+CCGCTTTCTCCGGATCGGTAGATAAGTTGCAGATATAGTTATCAGAAGTATAACCGCCGC\n+CTCTACGCTCAACACGCAAGGTATACATTGCATTTTTACGACCAGAAGAAATGAAGTAAG\n+TGGTAGTAACTACGGTTGCGTTAGTCATGGTATTTCTCCTTAAAGGGTATCTCGTTTCGA\n+TATGGCTAATATAGCAAAAGCCCCTGACCGAAGTCAAGGGCTTTTTCATCATTCATTCGA\n+ATCTTTCATTGTTTTATGAAGATGAATATCAAAAATTTTCCAGTACGCCTTTCCGCGAGG\n+ATAAATTTTTGCTTTGTCAATATCGTTGTTGCTTCCCCATGTGTTGTTTGGGCCACGACA\n+TCGATTTTTTATATAATCTGTATGCCAGAATAAGCGCTGAACCGATGATTCCGTACCTAA\n+TGGATCTTCTTTACTGAACAGAATTTGTATACTCATAAGAAGAACCCAGTGCGAACAATC\n+AGATCGATTTTCTTTTCTGGTTCAAACGGTGATTTGCTATCGATGTTACACTGATAGAAC\n+ATACCAACATACTTTTCAGGAATGTTGGATTCACGCGCCCATTTCAGGTTATCATCGGTA\n+TTCGGCCCCAGCATGAGGTTAACAACATCAACAGCATAATCCTGTTCTTTGGTGTTGCCG\n+GAACCGTTTACATAATGCCCGTGGGCGGTTTTGAGGATCTCAATTACTTCGCTATCGTCA\n+GTTTCGACTTTGTAAAGCGTTGTATTTTCAGGAATTTCTTCAAGAATGATCAAAGCGGTT\n+TTCATCACACTTACCTTTGTGTTTCTGTTTACGTTTTGCTTCTTTAAATGCTCGCTTGCG\n+ATCGCGGTGAGTAGAAGCGCGGTTGAAATCATGTTTCGCTACCAAATTATTCATATAAGC\n+CCCTTAAAGAAAAATATTTAGGGGCTTTCGCCCCTGTATTAATCCAGCAATTTGCGGATC\n+TTGTCTGCGATACGTCCGGCGCGGGTTGCACTTGCAGTATGATCGCTTTCTTTTGAAGCC\n+AGTTCCGCCAGCTTACGCTGATGTTCTTCTTCTGCTGCTTGACGATCTGCTGCAACCTGT\n+GCAACCTGCTCATTATCGTGAGCAATACGCGCTTCCAGTTCAGACAGGGTTTTGTCGAAA\n+GTTGCTACGATTTCATCTACAGAACGAATTTTATTAAACAGTTTCATAATTTATCTCAAT\n+TGGTTAGTTTTAATCAGTATACATCAATATGGTTGAAATTCAAAATCATAAATGTCATTC\n+AGTGCGCGGTTCCACTCGGTGTAGTTTTCACCAGCACCATAACGCATTTGAATAGCACTT\n+TCGAACGTTGATCCGTTGAGGTTCGGGAAACCGAACAGGTTTTTGATTTTGTCATGTGCT\n+ACATAATACAGAGAAGCACTTTCCAGCATCGCAACCATCGCAGACGGTTCGTGTTCGCGG\n+CGCTTGATACGTAACAGAGTACGACTAGCACCAGTTTTACGGCGTTGATTTGGTGCTACG\n+TAGAAACGGAATACTACGCGCCCTGTTTTATCATCAACTACCAGGTAAAACCCGTTTTCT\n+TTCAGATCCACGCCTTCGAATTTCTTGAAGGTTCCGCGTTTCATGTCACCAATTTTAATT\n+GCATATTTGTGAATGTCAAGTCTTGTCAGAATTCTTTTCATATTTTTTAGATACCAGTTT\n+GCCTAATTTTGTAATTTCGCCTGTTTTTACGTTAACAAACAAGGCGATGCTCAGAAATGG\n+GATGCTAATCACTACGCTGATCAATGTAAACAGAAAACGTATCACAAAAAGAACAGCACG\n+TTCAAGATATCGTTGCATCCACGCGATTCCTAAACAACTATACCCTACTATAAAGGTGGT\n+TGCAACATAAAATGCACCAAATCCTTTACGAAATACGTAACCTTTCCCGGATTCTATCCG\n+GTCGTCGGCCCACATTTCACGGGCAGTTTTCAGAATAGATTCACCACTAGCGCGAGTTTC\n+GTTAGCCGAAGGCATGTTTTTAAATTTCATGATAGTCTCCTATGCGCCCAGAACTCTCCA\n+GGCGCGGTTGTTTAG\n'
b
diff -r 787ce84e8d16 -r 134bb2d7cdfd tsv.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tsv.py Mon Jun 05 02:43:21 2023 +0000
[
@@ -0,0 +1,30 @@
+import sys
+
+
+# Like 'import json' / 'import yaml', except.. tab data.
+def loads(str_data):
+    return NotImplementedError()
+
+
+def load(handle):
+    return NotImplementedError()
+
+
+def dump(data, handle=sys.stdout):
+    for row in data:
+        handle.write("%s\n" % "\t".join(map(str, row)))
+
+
+def dumps(data):
+    output = ""
+    for row in data:
+        output += "%s\n" % "\t".join(map(str, row))
+    return output
+
+
+def dump_line(row, handle=sys.stdout):
+    dump([row], handle=handle)
+
+
+def dumps_line(row):
+    return dumps([row])