Previous changeset 2:787ce84e8d16 (2022-06-17) Next changeset 4:78ce8a1a8fd1 (2023-07-23) |
Commit message:
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c |
added:
cpt-macros.xml genome_editor.py genome_editor.xml gff3.py macros.xml test-data/genome_editor.mirov2.chain test-data/genome_editor.mirov2.fa test-data/genome_editor.mirov2.gff3 test-data/genome_editor.simple.fa test-data/genome_editor.simple.gff3 test-data/genome_editor.simple.out.chain test-data/genome_editor.simple.out.fa test-data/genome_editor.simple.out.gff3 test-data/miro.2.gff3 test-data/miro.fa tsv.py |
removed:
cpt_genome_editor/cpt-macros.xml cpt_genome_editor/genome_editor.py cpt_genome_editor/genome_editor.xml cpt_genome_editor/gff3.py cpt_genome_editor/macros.xml cpt_genome_editor/test-data/genome_editor.mirov2.chain cpt_genome_editor/test-data/genome_editor.mirov2.fa cpt_genome_editor/test-data/genome_editor.mirov2.gff3 cpt_genome_editor/test-data/genome_editor.simple.fa cpt_genome_editor/test-data/genome_editor.simple.gff3 cpt_genome_editor/test-data/genome_editor.simple.out.chain cpt_genome_editor/test-data/genome_editor.simple.out.fa cpt_genome_editor/test-data/genome_editor.simple.out.gff3 cpt_genome_editor/test-data/miro.2.gff3 cpt_genome_editor/test-data/miro.fa cpt_genome_editor/tsv.py |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt-macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt-macros.xml Mon Jun 05 02:43:21 2023 +0000 |
[ |
@@ -0,0 +1,115 @@ +<macros> + <xml name="gff_requirements"> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.65">biopython</requirement> + <requirement type="package" version="2.12.1">requests</requirement> + <requirement type="package" version="1.2.2">cpt_gffparser</requirement> + <yield/> + </requirements> + <version_command> + <![CDATA[ + cd '$__tool_directory__' && git rev-parse HEAD + ]]> + </version_command> + </xml> + <xml name="citation/mijalisrasche"> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex">@unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-crr"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020-AJC-solo"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-clm"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="sl-citations-clm"> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </xml> +</macros> |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/cpt-macros.xml --- a/cpt_genome_editor/cpt-macros.xml Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,115 +0,0 @@ -<?xml version="1.0"?> -<macros> - <xml name="gff_requirements"> - <requirements> - <requirement type="package" version="2.7">python</requirement> - <requirement type="package" version="1.65">biopython</requirement> - <requirement type="package" version="2.12.1">requests</requirement> - <yield/> - </requirements> - <version_command> - <![CDATA[ - cd $__tool_directory__ && git rev-parse HEAD - ]]> - </version_command> - </xml> - <xml name="citation/mijalisrasche"> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex">@unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - </xml> - <xml name="citations"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-crr"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {C. Ross}, - title = {CPT Galaxy Tools}, - year = {2020-}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-2020"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-2020-AJC-solo"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-clm"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="sl-citations-clm"> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </xml> -</macros> |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/genome_editor.py --- a/cpt_genome_editor/genome_editor.py Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,166 +0,0 @@ -#!/usr/bin/env python -import logging -import copy -import argparse -import tsv -from Bio import SeqIO -from Bio.Seq import Seq -from Bio.SeqFeature import FeatureLocation -from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature, convertSeqRec -from gff3 import feature_lambda, feature_test_contains - -logging.basicConfig(level=logging.INFO) -log = logging.getLogger(__name__) - - -def mutate(gff3, fasta, changes, customSeqs, new_id): - # Change Language - # - we can only accept ONE genome as an input. (TODO: support multiple?) - # - we can only build ONE genome as an output. (TODO: support multiple?) - # - must allow selection of various regions - # '1,1000,+ 40,100,- custom_seq_1' - try: - custom_seqs = SeqIO.to_dict(SeqIO.parse(customSeqs, "fasta")) - except: - custom_seqs = {} - seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) - # Pull first and onl record - rec = list(gffParse(gff3, base_dict=seq_dict))[0] - # Create a "clean" record - new_record = copy.deepcopy(rec) - new_record.id = new_id - new_record.seq = Seq("") - new_record.features = [] - new_record.annotations = {} - # Process changes. - chain = [] - topFeats = {} - covered = 0 - for feat in rec.features: - if "ID" in feat.qualifiers.keys(): - topFeats[feat.qualifiers["ID"][0]] = feat.location.start - for change in changes: - if "," in change: - (start, end, strand) = change.split(",") - start = int(start) - 1 - end = int(end) - - # Make any complaints - broken_feature_start = list( - feature_lambda( - rec.features, - feature_test_contains, - {"index": start}, - subfeatures=False, - ) - ) - if len(broken_feature_start) > 0: - pass - # log.info("DANGER: Start index chosen (%s) is in the middle of a feature (%s %s). This feature will disappear from the output", start, broken_feature_start[0].id, broken_feature_start[0].location) - broken_feature_end = list( - feature_lambda( - rec.features, - feature_test_contains, - {"index": end}, - subfeatures=False, - ) - ) - if len(broken_feature_end) > 0: - pass - # log.info("DANGER: End index chosen (%s) is in the middle of a feature (%s %s). This feature will disappear from the output", end, broken_feature_end[0].id, broken_feature_end[0].location) - - # Ok, fetch features - if strand == "+": - tmp_req = rec[start:end] - else: - tmp_req = rec[start:end].reverse_complement( - id=True, - name=True, - description=True, - features=True, - annotations=True, - letter_annotations=True, - dbxrefs=True, - ) - tmp_req = convertSeqRec(tmp_req)[0] - def update_location(feature, shiftS): - feature.location = FeatureLocation(feature.location.start + shiftS, feature.location.end + shiftS, feature.strand) - for i in feature.sub_features: - i = update_location(i, shiftS) - return feature - - - - #for feature in tmp_req.features: - - - - - chain.append( - [ - rec.id, - start + 1, - end, - strand, - new_record.id, - len(new_record) + 1, - len(new_record) + (end - start), - "+", - ] - ) - - covered += len(new_record.seq) - print(covered) - new_record.seq += tmp_req.seq - # NB: THIS MUST USE BIOPYTHON 1.67. 1.68 Removes access to - # subfeatures, which means you will only get top-level features. - startInd = len(new_record.features) - new_record.features += tmp_req.features - - for i in new_record.features[startInd:]: - i.location = FeatureLocation(i.location.start + covered, i.location.end + covered, i.location.strand) - if "ID" not in i.qualifiers.keys(): - continue - diffS = i.location.start - topFeats[i.qualifiers["ID"][0]] - subFeats = i.sub_features - for j in subFeats: - j = update_location(j, diffS) - else: - new_record.seq += custom_seqs[change].seq - yield new_record, chain - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("fasta", type=argparse.FileType("r"), help="Sequence") - parser.add_argument("gff3", type=argparse.FileType("r"), help="Annotations") - parser.add_argument("new_id", help="Append to ID", default="_v2") - parser.add_argument( - "--out_fasta", - type=argparse.FileType("w"), - help="Output fasta", - default="out.fa", - ) - parser.add_argument( - "--out_gff3", - type=argparse.FileType("w"), - help="Output gff3", - default="out.gff3", - ) - parser.add_argument( - "--out_simpleChain", - type=argparse.FileType("w"), - help="Output simple chain (i.e. not a real UCSC chain file)", - default="out.chain", - ) - parser.add_argument("--changes", nargs="+") - parser.add_argument("--customSeqs", type=argparse.FileType("r")) - args = parser.parse_args() - - for rec, chain in mutate( - args.gff3, args.fasta, args.changes, args.customSeqs, args.new_id - ): - # TODO: Check that this appends and doesn't overwirte - gffWrite([rec], args.out_gff3) - SeqIO.write([rec], args.out_fasta, "fasta") - tsv.dump(chain, args.out_simpleChain) |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/genome_editor.xml --- a/cpt_genome_editor/genome_editor.xml Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,169 +0,0 @@ -<?xml version="1.0"?> -<tool id="edu.tamu.cpt.gff3.genome_editor" name="Genome Editor" version="2.1"> - <description>allows you to re-arrange a genome</description> - <macros> - <import>macros.xml</import> - <import>cpt-macros.xml</import> - </macros> - <expand macro="requirements"/> - <command><![CDATA[ -@GENOME_SELECTOR_PRE@ -python $__tool_directory__/genome_editor.py - -@GENOME_SELECTOR@ -@INPUT_GFF@ -"$new_id" - ---out_fasta "$out_fasta" ---out_gff3 "$out_gff3" ---out_simpleChain "$out_chain" ---customSeqs "$custom_seqs" ---changes -#for $idx, $change in enumerate($changes): - #if $change.input_type.input_type_select == "region": - ${change.input_type.start},${change.input_type.end},${change.input_type.revcom} - #else - custom${idx} - #end if -#end for -]]></command> - <configfiles> - <configfile name="custom_seqs"> - <![CDATA[ -#for $idx, $change in enumerate($changes): - #if $change.input_type.input_type_select == "custom": ->custom${idx} -${change.input_type.seq} - #end if -#end for - ]]> - </configfile> - </configfiles> - <inputs> - <expand macro="input/gff3+fasta" /> - <param label="New ID" name="new_id" type="text" help="New ID for the sequence to uniquely identify it from the previous build of the sequence. E.g. append Miro could become Miro.2 or Miro.v2 "> - <validator type="expression" message="You must specify a new ID"><![CDATA[value and len(value) > 0]]></validator> - </param> - <repeat name="changes" title="Sequence Component Selections"> - <conditional name="input_type"> - <param name="input_type_select" type="select" label="Data Source"> - <option value="region" selected="True">Region from FASTA file</option> - <option value="custom">Custom Additional Sequence</option> - </param> - <when value="region"> - <param label="Start" name="start" type="integer" min="1" value="1" help="Inclusive range, 1-indexed genome. (I.e. specifying 1-2000 will include base number 1)"/> - <param label="End" name="end" type="integer" min="1" value="1" help="Inclusive range. (I.e. specifying 1-2000 will include base number 2000)"/> - <param label="Reverse + Complement" name="revcom" type="boolean" truevalue="-" falsevalue="+" /> - </when> - <when value="custom"> - <param label="Custom Sequence" name="seq" type="text" help="Enter the sequence, e.g. 'ACTG'. No FASTA definition line."/> - </when> - </conditional> - </repeat> - </inputs> - <outputs> - <data format="gff3" name="out_gff3" label="${new_id} Features"/> - <data format="fasta" name="out_fasta" label="${new_id}"/> - <data format="tabular" name="out_chain" label="${new_id} Change Table"/> - </outputs> - <tests> - <test> - <param name="reference_genome_source" value="history" /> - <param name="genome_fasta" value="genome_editor.simple.fa" /> - <param name="gff3_data" value="genome_editor.simple.gff3" /> - <param name="new_id" value="test2" /> - <repeat name="changes"> - <conditional name="input_type"> - <param name="input_type_select" value="region" /> - <param name="start" value="1"/> - <param name="end" value="4"/> - <param name="revcom" value="+"/> - </conditional> - </repeat> - <repeat name="changes"> - <conditional name="input_type"> - <param name="input_type_select" value="custom" /> - <param name="seq" value="cccggg"/> - </conditional> - </repeat> - <repeat name="changes"> - <conditional name="input_type"> - <param name="input_type_select" value="region" /> - <param name="start" value="5"/> - <param name="end" value="8"/> - <param name="revcom" value="-"/> - </conditional> - </repeat> - <output name="out_gff3" file="genome_editor.simple.out.gff3" /> - <output name="out_fasta" file="genome_editor.simple.out.fa" /> - <output name="out_chain" file="genome_editor.simple.out.chain" /> - </test> - <test> - - <param name="reference_genome_source" value="history" /> - <param name="genome_fasta" value="miro.fa" /> - <param name="gff3_data" value="miro.2.gff3" /> - <param name="new_id" value="Miro.v2" /> - <repeat name="changes"> - <conditional name="input_type"> - <param name="input_type_select" value="region" /> - <param name="start" value="1"/> - <param name="end" value="950"/> - <param name="revcom" value="+"/> - </conditional> - </repeat> - <repeat name="changes"> - <conditional name="input_type"> - <param name="input_type_select" value="custom" /> - <param name="seq" value="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"/> - </conditional> - </repeat> - <repeat name="changes"> - <conditional name="input_type"> - <param name="input_type_select" value="region" /> - <param name="start" value="3170"/> - <param name="end" value="3450"/> - <param name="revcom" value="+"/> - </conditional> - </repeat> - <output name="out_gff3" file="genome_editor.mirov2.gff3" /> - <output name="out_fasta" file="genome_editor.mirov2.fa" /> - <output name="out_chain" file="genome_editor.mirov2.chain" /> - </test> - </tests> - <help><![CDATA[ -**What it does** - -Allows for re-arranging a FASTA genomic sequence, and remaps the associated features -from a gff3 file with the new coordinates. Segments of the genome are moved around -and stitched back together according to user-specified positions. - -**Example FASTA input** (spaces added for clarity):: - >Miro - TTA GTA ATG GCT AAA - -With user-specified *sequence component selections*: - -- start: 1, end: 10, strand: + -- start: 6, end: 10, strand: + - -the first ten bases will be listed, followed by a duplication of bases 6-10. -Bases 11-15 are not part of the sequence component selection parameters and -are therefore not in the output:: - - >Miro.v2 - TTA GTA ATG GAA TGG - -Alternatively, with user-specified *sequence component selections*:: - -- start: 1, end: 10, strand: + -- start: 6, end: 10, strand: - - -the last section with be reverse-complemented and give the following output:: - - >Miro.v2 - TTA GTA ATG GCC ATT - -]]></help> - <expand macro="citations" /> -</tool> |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/gff3.py --- a/cpt_genome_editor/gff3.py Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,346 +0,0 @@\n-import copy\n-import logging\n-\n-log = logging.getLogger()\n-log.setLevel(logging.WARN)\n-\n-\n-def feature_lambda(\n- feature_list,\n- test,\n- test_kwargs,\n- subfeatures=True,\n- parent=None,\n- invert=False,\n- recurse=True,\n-):\n- """Recursively search through features, testing each with a test function, yielding matches.\n-\n- GFF3 is a hierachical data structure, so we need to be able to recursively\n- search through features. E.g. if you\'re looking for a feature with\n- ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n- case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n-\n- :type feature_list: list\n- :param feature_list: an iterable of features\n-\n- :type test: function reference\n- :param test: a closure with the method signature (feature, **kwargs) where\n- the kwargs are those passed in the next argument. This\n- function should return True or False, True if the feature is\n- to be yielded as part of the main feature_lambda function, or\n- False if it is to be ignored. This function CAN mutate the\n- features passed to it (think "apply").\n-\n- :type test_kwargs: dictionary\n- :param test_kwargs: kwargs to pass to your closure when it is called.\n-\n- :type subfeatures: boolean\n- :param subfeatures: when a feature is matched, should just that feature be\n- yielded to the caller, or should the entire sub_feature\n- tree for that feature be included? subfeatures=True is\n- useful in cases such as searching for a gene feature,\n- and wanting to know what RBS/Shine_Dalgarno_sequences\n- are in the sub_feature tree (which can be accomplished\n- with two feature_lambda calls). subfeatures=False is\n- useful in cases when you want to process (and possibly\n- return) the entire feature tree, such as applying a\n- qualifier to every single feature.\n-\n- :type invert: boolean\n- :param invert: Negate/invert the result of the filter.\n-\n- :rtype: yielded list\n- :return: Yields a list of matching features.\n- """\n- # Either the top level set of [features] or the subfeature attribute\n- for feature in feature_list:\n- feature._parent = parent\n- if not parent:\n- # Set to self so we cannot go above root.\n- feature._parent = feature\n- test_result = test(feature, **test_kwargs)\n- # if (not invert and test_result) or (invert and not test_result):\n- if invert ^ test_result:\n- if not subfeatures:\n- feature_copy = copy.deepcopy(feature)\n- feature_copy.sub_features = list()\n- yield feature_copy\n- else:\n- yield feature\n-\n- if recurse and hasattr(feature, "sub_features"):\n- for x in feature_lambda(\n- feature.sub_features,\n- test,\n- test_kwargs,\n- subfeatures=subfeatures,\n- parent=feature,\n- invert=invert,\n- recurse=recurse,\n- ):\n- yield x\n-\n-\n-def fetchParent(feature):\n- if not hasattr(feature, "_parent") or feature._parent is None:\n- return feature\n- else:\n- return fetchParent(feature._parent)\n-\n-\n-def feature_test_true(feature, **kwargs):\n- return True\n-\n-\n-def feature_test_type(feature, **kwargs):\n- if "type" in kwargs:\n- return str(feature.type).upper() == str(kwargs["type"]).upper()\n- elif "types" in kwargs:\n- for x in kwargs["types"]:\n- if str(feature.type).upper() == str(x).upper():\n- return True\n- return False\n- raise Exception("Incorrect feature_test_type call, ne'..b'feature.location.start,\n- # feature.location.end,\n- # feature.location.strand\n- # )\n- return result\n-\n-\n-def get_gff3_id(gene):\n- return gene.qualifiers.get("Name", [gene.id])[0]\n-\n-\n-def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n- # This prevents frameshift errors\n- while start < 0:\n- start += 3\n- while end < 0:\n- end += 3\n- while start > parent_length:\n- start -= 3\n- while end > parent_length:\n- end -= 3\n- return (start, end)\n-\n-\n-def coding_genes(feature_list):\n- for x in genes(feature_list):\n- if (\n- len(\n- list(\n- feature_lambda(\n- x.sub_features,\n- feature_test_type,\n- {"type": "CDS"},\n- subfeatures=False,\n- )\n- )\n- )\n- > 0\n- ):\n- yield x\n-\n-\n-def genes(feature_list, feature_type="gene", sort=False):\n- """\n- Simple filter to extract gene features from the feature set.\n- """\n-\n- if not sort:\n- for x in feature_lambda(\n- feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n- ):\n- yield x\n- else:\n- data = list(genes(feature_list, feature_type=feature_type, sort=False))\n- data = sorted(data, key=lambda feature: feature.location.start)\n- for x in data:\n- yield x\n-\n-\n-def wa_unified_product_name(feature):\n- """\n- Try and figure out a name. We gave conflicting instructions, so\n- this isn\'t as trivial as it should be. Sometimes it will be in\n- \'product\' or \'Product\', othertimes in \'Name\'\n- """\n- # Manually applied tags.\n- protein_product = feature.qualifiers.get(\n- "product", feature.qualifiers.get("Product", [None])\n- )[0]\n-\n- # If neither of those are available ...\n- if protein_product is None:\n- # And there\'s a name...\n- if "Name" in feature.qualifiers:\n- if not is_uuid(feature.qualifiers["Name"][0]):\n- protein_product = feature.qualifiers["Name"][0]\n-\n- return protein_product\n-\n-\n-def is_uuid(name):\n- return name.count("-") == 4 and len(name) == 36\n-\n-\n-def get_rbs_from(gene):\n- # Normal RBS annotation types\n- rbs_rbs = list(\n- feature_lambda(\n- gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n- )\n- )\n- rbs_sds = list(\n- feature_lambda(\n- gene.sub_features,\n- feature_test_type,\n- {"type": "Shine_Dalgarno_sequence"},\n- subfeatures=False,\n- )\n- )\n- # Fraking apollo\n- apollo_exons = list(\n- feature_lambda(\n- gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n- )\n- )\n- apollo_exons = [x for x in apollo_exons if len(x) < 10]\n- # These are more NCBI\'s style\n- regulatory_elements = list(\n- feature_lambda(\n- gene.sub_features,\n- feature_test_type,\n- {"type": "regulatory"},\n- subfeatures=False,\n- )\n- )\n- rbs_regulatory = list(\n- feature_lambda(\n- regulatory_elements,\n- feature_test_quals,\n- {"regulatory_class": ["ribosome_binding_site"]},\n- subfeatures=False,\n- )\n- )\n- # Here\'s hoping you find just one ;)\n- return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n-\n-\n-def nice_name(record):\n- """\n- get the real name rather than NCBI IDs and so on. If fails, will return record.id\n- """\n- name = record.id\n- likely_parental_contig = list(genes(record.features, feature_type="contig"))\n- if len(likely_parental_contig) == 1:\n- name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n- return name\n-\n-\n-def fsort(it):\n- for i in sorted(it, key=lambda x: int(x.location.start)):\n- yield i\n' |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/macros.xml --- a/cpt_genome_editor/macros.xml Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,62 +0,0 @@ -<?xml version="1.0"?> -<macros> - <xml name="requirements"> - <requirements> - <requirement type="package" version="3.8.13">python</requirement> - <requirement type="package" version="1.79">biopython</requirement> - <requirement type="package" version="1.2.2">cpt_gffparser</requirement> - <yield/> - </requirements> - </xml> - <token name="@BLAST_TSV@"> - "$blast_tsv" - </token> - <xml name="blast_tsv"> - <param label="Blast Results" help="TSV/tabular (25 Column)" - name="blast_tsv" type="data" format="tabular" /> - </xml> - - <token name="@BLAST_XML@"> - "$blast_xml" - </token> - <xml name="blast_xml"> - <param label="Blast Results" help="XML format" - name="blast_xml" type="data" format="blastxml" /> - </xml> - <xml name="gff3_with_fasta"> - <param label="Genome Sequences" name="fasta" type="data" format="fasta" /> - <param label="Genome Annotations" name="gff3" type="data" format="gff3" /> - </xml> - <xml name="genome_selector"> - <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/> - </xml> - <xml name="gff3_input"> - <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/> - </xml> - <xml name="input/gff3+fasta"> - <expand macro="gff3_input" /> - <expand macro="genome_selector" /> - </xml> - <token name="@INPUT_GFF@"> - "$gff3_data" - </token> - <token name="@INPUT_FASTA@"> - genomeref.fa - </token> - <token name="@GENOME_SELECTOR_PRE@"> - ln -s $genome_fasta genomeref.fa; - </token> - <token name="@GENOME_SELECTOR@"> - genomeref.fa - </token> - <xml name="input/fasta"> - <param label="Fasta file" name="sequences" type="data" format="fasta"/> - </xml> - - <token name="@SEQUENCE@"> - "$sequences" - </token> - <xml name="input/fasta/protein"> - <param label="Protein fasta file" name="sequences" type="data" format="fasta"/> - </xml> -</macros> |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.mirov2.chain --- a/cpt_genome_editor/test-data/genome_editor.mirov2.chain Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -Miro 1 950 + Miro.v2 1 950 + -Miro 3170 3450 + Miro.v2 1017 1297 + |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.mirov2.fa --- a/cpt_genome_editor/test-data/genome_editor.mirov2.fa Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,23 +0,0 @@ ->Miro.v2 <unknown description> -TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT -TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG -AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG -GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT -CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG -TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC -ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC -CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA -ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT -ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC -TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG -CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA -TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT -AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC -CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT -TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaTCTC -ACTTAACATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCAT -ATTCTACTACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCT -GTTTCTTCATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCAC -AAAATCCGAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTT -ACCCCTCTTTGCGAATGTATGCAAGTTCTTCATGGGT |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.mirov2.gff3 --- a/cpt_genome_editor/test-data/genome_editor.mirov2.gff3 Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,3 +0,0 @@ -##gff-version 3 -Miro.v2 gffSeqFeature gene 1 910 . - . ID=8861dd01-d426-40d4-96b5-f8e4b81c93d2;Name=Miro.gene_1;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis@tamu.edu; -Miro.v2 gffSeqFeature gene 1019 1264 . - . ID=b8da12d7-fe5c-42bd-b020-6a8ab205a133;Name=Miroa;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis@tamu.edu; |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.simple.fa --- a/cpt_genome_editor/test-data/genome_editor.simple.fa Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->test -AAAATTTT |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.simple.gff3 --- a/cpt_genome_editor/test-data/genome_editor.simple.gff3 Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,5 +0,0 @@ -##gff-version 3 -test feature gene 1 4 . + . ID=1 -test GenBank CDS 1 4 . + 1 ID=1.cds -test feature gene 5 8 . - . ID=2 -test GenBank CDS 5 8 . - 1 ID=2.cds |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.simple.out.chain --- a/cpt_genome_editor/test-data/genome_editor.simple.out.chain Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -test 1 4 + test2 1 4 + -test 5 8 - test2 11 14 + |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.simple.out.fa --- a/cpt_genome_editor/test-data/genome_editor.simple.out.fa Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->test2 <unknown description> -AAAAcccgggAAAA |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/genome_editor.simple.out.gff3 --- a/cpt_genome_editor/test-data/genome_editor.simple.out.gff3 Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,5 +0,0 @@ -##gff-version 3 -test2 gffSeqFeature gene 1 4 . + . ID=1; -test2 gffSeqFeature CDS 1 4 . + 0 ID=1.cds; -test2 gffSeqFeature gene 11 14 . + . ID=2; -test2 gffSeqFeature CDS 11 14 . + 0 ID=2.cds; |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/miro.2.gff3 --- a/cpt_genome_editor/test-data/miro.2.gff3 Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,1371 +0,0 @@\n-##gff-version 3\n-##sequence-region Miro 1 176055\n-Miro\tCPT\tgene\t1\t910\t.\t-\t.\tID=8861dd01-d426-40d4-96b5-f8e4b81c93d2;Name=Miro.gene_1;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t1\t910\t.\t-\t.\tID=1d8680f9-d6bc-4ef1-a535-83fe555cd097;Name=Miro.gene_1-00001;Parent=8861dd01-d426-40d4-96b5-f8e4b81c93d2;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tCDS\t1\t900\t.\t-\t0\tID=cd13f44c-b958-4dcc-a6b1-b901ad465268;Name=cd13f44c-b958-4dcc-a6b1-b901ad465268-CDS;Parent=1d8680f9-d6bc-4ef1-a535-83fe555cd097;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\texon\t908\t910\t.\t-\t.\tID=532024dc-47d1-40e1-9255-9ec5ee018cc0;Name=532024dc-47d1-40e1-9255-9ec5ee018cc0-exon;Parent=1d8680f9-d6bc-4ef1-a535-83fe555cd097;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\texon\t1\t900\t.\t-\t.\tID=c85f1713-7e6e-4cf9-8d09-1f9ffc943c93;Name=c85f1713-7e6e-4cf9-8d09-1f9ffc943c93-exon;Parent=1d8680f9-d6bc-4ef1-a535-83fe555cd097;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\tgene\t900\t3173\t.\t-\t.\tID=93e09f06-6e7f-40ce-9308-bfe491cf0a24;Name=fdsa;date_creation=2016-09-27;date_last_modified=2017-04-07;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t900\t3173\t.\t-\t.\tID=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;Name=fdsa;Parent=93e09f06-6e7f-40ce-9308-bfe491cf0a24;date_creation=2016-09-27;date_last_modified=2017-04-07;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tCDS\t900\t3161\t.\t-\t0\tID=afd28aa9-d1d9-4125-923d-f62a8cbf8307;Name=afd28aa9-d1d9-4125-923d-f62a8cbf8307-CDS;Parent=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\texon\t3171\t3173\t.\t-\t.\tID=c452f5b8-598e-4122-961c-302e28fdd0a4;Name=c452f5b8-598e-4122-961c-302e28fdd0a4-exon;Parent=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\texon\t900\t3161\t.\t-\t.\tID=4350b621-93f7-45fb-85b8-a60ce93f2944;Name=4350b621-93f7-45fb-85b8-a60ce93f2944-exon;Parent=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\tgene\t3172\t3417\t.\t-\t.\tID=b8da12d7-fe5c-42bd-b020-6a8ab205a133;Name=Miroa;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t3172\t3417\t.\t-\t.\tID=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;Name=Miroa-00001;Parent=b8da12d7-fe5c-42bd-b020-6a8ab205a133;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\texon\t3414\t3417\t.\t-\t.\tID=6bc4ea79-a43e-43e2-90fe-6af95e31e214;Name=6bc4ea79-a43e-43e2-90fe-6af95e31e214-exon;Parent=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\texon\t3172\t3408\t.\t-\t.\tID=27847554-35ea-48a8-b5cd-11e64a233d41;Name=27847554-35ea-48a8-b5cd-11e64a233d41-exon;Parent=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\tCDS\t3172\t3408\t.\t-\t0\tID=a508e82a-5e35-4ac5-8082-e52d06b2edad;Name=a508e82a-5e35-4ac5-8082-e52d06b2edad-CDS;Parent=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;date_creation=2016-09-27;date_last_modified=2016-09-27\n-Miro\tCPT\tgene\t3412\t3979\t.\t-\t.\tID=47402bc1-9d3d-4881-8456-cd85ed73e3d9;Name=Miro.gene_4;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t3412\t3979\t.\t-\t.\tID=d350bc7f-e8b4-49d1-a0a4-2046bbb4a677;Name=Miro.gene_4-00001;Parent=47402bc1-9d3d-4881-8456-cd85ed73e3d9;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tCDS\t3412\t3966\t.\t-\t0\tID=f21739ad-e0a2-4f1c-af3f-6cbbe58572b9;Name=f21739ad-e0a2-4f1c-af3f-6cbbe58572b9-CDS;Note=Manually set translation start;Parent=d350bc7f-e8b4-49d1-a0a4-2046bbb4a677;date_creation=2016-09-27;date_last_modified=2016-09-28\n-Miro\tCPT\texon\t3975\t3979\t.\t-\t.\tID=a77e978d-fba5-4782-9d07-0172c8fa6df9;Name=a77e978d-fba5-4782-9d07-0172c8fa6df9-exon;Parent=d350bc7f-e8b4-49d1-a0a4-2046bbb4a677;date_creation=2016-09-27;date_last_modif'..b'fea64;Name=Miro%C4%8B-00001;Parent=1e943ae8-2418-4212-a066-65a568705f1c;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\texon\t174760\t174894\t.\t-\t.\tID=51429ca4-a6d5-4a29-8a3a-7151ab119c36;Name=51429ca4-a6d5-4a29-8a3a-7151ab119c36-exon;Parent=56f6ccd6-af67-4c2a-a34b-f6331d4fea64;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\texon\t174900\t174903\t.\t-\t.\tID=c024b6e9-b5ae-4dd6-8e3c-6818dca7c255;Name=c024b6e9-b5ae-4dd6-8e3c-6818dca7c255-exon;Parent=56f6ccd6-af67-4c2a-a34b-f6331d4fea64;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tCDS\t174760\t174894\t.\t-\t0\tID=4b13974a-0ee4-4e88-bdbe-659aa2b204ba;Name=4b13974a-0ee4-4e88-bdbe-659aa2b204ba-CDS;Parent=56f6ccd6-af67-4c2a-a34b-f6331d4fea64;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tgene\t174939\t175197\t.\t-\t.\tID=7ada7c78-27c7-4b75-a47e-0ecab4dfe665;Name=gene_274;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t174939\t175197\t.\t-\t.\tID=872415a0-d11f-469c-bb74-0dfc428d105f;Name=gene_274-00001;Parent=7ada7c78-27c7-4b75-a47e-0ecab4dfe665;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\texon\t175195\t175197\t.\t-\t.\tID=6d561d73-b40e-4be2-91b8-0c11508c6e03;Name=6d561d73-b40e-4be2-91b8-0c11508c6e03-exon;Parent=872415a0-d11f-469c-bb74-0dfc428d105f;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tCDS\t174939\t175187\t.\t-\t0\tID=d5367d3a-56eb-417f-8056-36c5a82d618a;Name=d5367d3a-56eb-417f-8056-36c5a82d618a-CDS;Parent=872415a0-d11f-469c-bb74-0dfc428d105f;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\texon\t174939\t175187\t.\t-\t.\tID=e0bd32bc-9332-4d12-b92e-358ae5432569;Name=e0bd32bc-9332-4d12-b92e-358ae5432569-exon;Parent=872415a0-d11f-469c-bb74-0dfc428d105f;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tgene\t175224\t175675\t.\t-\t.\tID=4596ff39-c162-4c8d-b40f-1403a578f3cd;Name=gene_275;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t175224\t175675\t.\t-\t.\tID=71e48bbd-f449-4964-a088-b216bdcc36b6;Name=gene_275-00001;Parent=4596ff39-c162-4c8d-b40f-1403a578f3cd;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\texon\t175673\t175675\t.\t-\t.\tID=87fb1a55-1430-444a-95cc-0e40cfcde490;Name=87fb1a55-1430-444a-95cc-0e40cfcde490-exon;Parent=71e48bbd-f449-4964-a088-b216bdcc36b6;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\texon\t175224\t175661\t.\t-\t.\tID=484e9b53-43f7-4a20-8ffe-aa7298d0c95c;Name=484e9b53-43f7-4a20-8ffe-aa7298d0c95c-exon;Parent=71e48bbd-f449-4964-a088-b216bdcc36b6;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tCDS\t175224\t175661\t.\t-\t0\tID=6ba184ad-ba33-49f7-8acd-d2dd30cc8dc8;Name=6ba184ad-ba33-49f7-8acd-d2dd30cc8dc8-CDS;Parent=71e48bbd-f449-4964-a088-b216bdcc36b6;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tgene\t175636\t176021\t.\t-\t.\tID=9cbd3255-806e-4d6f-9e7e-b974495fe09e;Name=Miro%C4%8C;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\tmRNA\t175636\t176021\t.\t-\t.\tID=5c9e6321-2990-4cc9-b58b-69d43d1304ff;Name=Miro%C4%8C-00001;Parent=9cbd3255-806e-4d6f-9e7e-b974495fe09e;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n-Miro\tCPT\texon\t176017\t176021\t.\t-\t.\tID=91dd7574-88be-4c6b-a9a1-48cb6f600abb;Name=91dd7574-88be-4c6b-a9a1-48cb6f600abb-exon;Parent=5c9e6321-2990-4cc9-b58b-69d43d1304ff;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\tCDS\t175636\t176010\t.\t-\t0\tID=94a131f5-9235-4efc-afe1-3e33c7b2ddb6;Name=94a131f5-9235-4efc-afe1-3e33c7b2ddb6-CDS;Parent=5c9e6321-2990-4cc9-b58b-69d43d1304ff;date_creation=2016-09-28;date_last_modified=2016-09-28\n-Miro\tCPT\texon\t175636\t176010\t.\t-\t.\tID=c422b844-7ef3-42ed-b3fb-f2a60cb04b6a;Name=c422b844-7ef3-42ed-b3fb-f2a60cb04b6a-exon;Parent=5c9e6321-2990-4cc9-b58b-69d43d1304ff;date_creation=2016-09-28;date_last_modified=2016-09-28\n' |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/test-data/miro.fa --- a/cpt_genome_editor/test-data/miro.fa Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,2936 +0,0 @@\n->Miro\n-TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT\n-TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG\n-AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG\n-GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT\n-CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG\n-TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC\n-ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC\n-CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA\n-ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT\n-ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC\n-TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG\n-CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA\n-TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT\n-AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC\n-CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT\n-TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTGTAAATCGGA\n-CGAATTGAATAAGAATCGCTATGTTCAACAAGTGAAGCCAGTAACGGGTTTAGTGATTTA\n-AAGATTTCCCATGCCTGATCAACACGTTTTTTCATATTGCTGCGTTTAACGCGCATTGAT\n-GCTACGGACTCACGCAGAATCGGACAACGCACCCTGGAAAGATTTTTACCATCTTTCTCA\n-TACCCTTCCAGACACACTATACGTTCGAGGGTATCAACAATCTTATACAGTTTTTCATTA\n-TATCGGTTTTTCACAATCCGATCAAGCGATACACCGAAACGGCTATGCAATGCGTCTGTT\n-TCTGTTGAGTGATCCTTCCCAATCCATCCAGGCAAGCAATTATCTTTCAATGCCTTTTCA\n-GATTTAACATACTGCTTGCACAGCATATCATCAAAGCATACCAGATTTGAATCCGGGATC\n-CACTTCCAGAGGCTGTTACGTATAGCAAACACAACAGGGATCCCAGTATGGCGCATGATA\n-CGCGATAAAGTAGATTCTTTCATTGCTGAATCCATAGACAACCCGGAATTTTCACCATCT\n-AAGCGGCTATATTCATCAATACCATACAACCGAACACCTGGGGCTTTATCCAGTGATAAA\n-AACTCTGATTTTGTCATAAACAGAGAAGTTTTTGCCAGATTGCCGTTACTATCCAACTCA\n-TAACGATATACGGTCGGGGTTTTTGGGCGCGGTTCTGAATTTTTCGGTGCATATAGCGCT\n-TTTGATTTTTCGCGATCTGCATCATAGATCTCTTTTTCTTTTGTCATTTCACTGGTACGG\n-AGATACACAATTTCCGATTCATCAAAATGACCTTTCCGAACGATATCATTAACAATCTCA\n-CGCTTTGAATCACTATCGTAATATGCAACAAAGCTAACACGGCTTAGGTTATGCATTTTA\n-GCATACCCGACGATATACGGTTTAACCGTGTTGGTATCCACTTTTAACAGAATGAGTTTC\n-TTCTGTTTCCACGGATAATAAATGCGTGTGATGTCCTGGCGTTTGGTTGTTTCTGGCTTA\n-TACTTGCTCCAGCGGCCTCCGCTACCAGTTACCTGATACCATGCATCTTTACCGTCGTAT\n-TCGTTCGCCCAATACCCAGCAACATAATCATCATTATATTTGTTTGGTTTGACTAGTTCG\n-CTATGGATCCAGCCAATAGAATCACCATTGATGCGAAAATTAGCATCTTTACCAACAAAG\n-TTTTGTACCATTGAAGGCAGAGAATGGAACCACGTCAGTTTATCACGCACGGTTTGTAAC\n-TTATCGAATTCTGATTTAACTCGATTGAAATATACCCGGCTGATTTGTTTCAGACGTTCT\n-TTAACAATCCCTACTGTCATTTTATCCATACTCAACTCTTCGCGAGAAGGCATGAAATCA\n-AGTTCACCGATCGGGAAGTCAATAATATACGTATACTGGCTTTCTGTATAGCAATAGAAC\n-ATCGAGGTATCATACAAATCTTTATCCAGAGGATAAATGATGTTACCCATGCGAGCATAT\n-ACACCGCTAGTGTATGCTGATTTATGACGGATCACCCCGCTATCGTTGGTTGCTTCTTTC\n-GGCTGATAGTTGATTTTGAGAATAGAAGCACCAACAAAGTTAGGACGAATATCAGTAAAT\n-GATTCGTATACCCTTGCTGCTTCGTTTTCCCATTCTTTGATATCTTCAACCTTAACCGGA\n-ACAGTGATAGTAACCCCGTTAGGTTCATCGCTTTCAATCTCATACAGAGGATCGCAGAAA\n-GGTTCCCCATCATCCATATAGATTGTGTAACCGCATTTGATACCGTCTTTTACGGATTCC\n-ACCGTGAAAGCATCGGAATAGCAAAGCGGAGATTTGCAACCCAGACCCATAGAACCGATC\n-AGGTCGTTTGAATCATTTTTAGTTGATTCGAAGTAAACGGTAAACGCATCACTAACGAAA\n-TCAGGAGACATACCGATCCCGTAGTCACGAATAACAAAACGAGGATCAACAGCAGTTGGC\n-AACTGGACATCAAACGGGTTCTGATTTCCCGCTTCTTTGTGTCCATCAATCGCATTACAA\n-GACAGTTCGCGAATGATTGCGCGGATCTTGTATTTGTATACTGTCGAAGAAAGGATCTTA\n-TACGCTTTCTTGTTTGCGCGTAGAGATAGTTTGTTTCGTCCCTTGCTGGTATCTGTACCA\n-ACACGGTAGATGGTTTGCGGTGTATCTTCGCGTAATTTCATTGTTTATTTCTCACTTAAC\n-ATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCATATTCTAC\n-TACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCTGTTTCTT\n-CATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCACAAAATCC\n-GAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTTACCCCTC\n-TTTGCGAATGTATGCAAGTTCTTCATGGGTTACTGGACGGATATACAGACGGCCTTTTGT\n-ATATGCCTTGCGCCCGCTGATCCAAATGTTTTTCATATCCTTAACACCGTTCATCACATC\n-GTTGTAAAACTTCTTATCAGCTTTAGCCTGATAGACTTCACGGCCTTGATAATCTTTCAT\n-GAACAAACAATAAAGGATCTCATTCTTATCAACTAGATTAGCATCCTTTGTAGTTGTTTT\n-ACTTGGTGAAGGTTTCGCACCCAGGCGCAAGGCCATAGCTTGCCACACTTTACCATGTTC\n-ATAACCGCGCCCGACAAGAGCATGAGCGATTTCGTGTAAAAGAGTGTCTAAAATATCCTC\n-GTAGATATCTTCCGCAACATGACGACCAGACAGTTCGATCAGTTTTTTGGTATAACTGCA\n-ACGG'..b'TCGGGTAATATCG\n-TTTGTGATGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTT\n-ATGTTTCTTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGT\n-GCCCTTGTAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCC\n-GCGTTGGAACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTG\n-GCATTCAGACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTA\n-CCATCCCAAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAA\n-TACACGAATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCG\n-ACACGTGATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAAC\n-TACTATTCTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATG\n-AAGATAAACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGATTAACG\n-TAATGCTGGTGTTAAATTGTGTGATCTTCCAATAGCCCGTTTGATTGCTTTAAAGAAGTT\n-CATCACCGGGCTATTTTTCTCGTAAATATCCCAAACTTTCAATTTGTCCCACGGATCCGG\n-AACATAATCTTCATTCCTTGCCGAAACCCCCAACGTAACCCTCCTGTATCCAGCGCTGTC\n-GTGATAATACACGAAGAATGGCCTACCTATTGGCGCAACCTTACAACCTCTCTTAGCGGC\n-TCTCATAGCCTCATCGAACGTCATGGATTCCCCCAAAAATTTCTATGCATGAATGGTCGA\n-ATTCCGATAGTTTCACTCAAAATGAATATCGGATGATCTAGCTCATCGTGGTTTTCTTCG\n-TCAATGACGATATCCCAATCAGTAGCCTTTTGTTCTTCTACCGTGGCAATGAATACCTGG\n-TTTACTTCACACTTGCGGTGTGTACGTCTGATGATTGTATCCCCCTCACGAAACACAATC\n-ATATCAGGATTGGTAGTGCGGTACGCGGTTTTACCCGCGCACACTTCATTAAGCATATCT\n-TCGTATGTCATTATAAAACCTTTACACGTTGAACGATGGTTTGTTTAACGTCTTTGTATT\n-CTCCGTGCTCTTTAACGGTTGCTTTGAAAGTGATTTCATCACCTTCGTTTGCAATGTTAT\n-TACCGAAGTAAACAACAACATTACCATCAACATTAATTTTGGTCATGAATCTTTCTACAG\n-AAGTGTAGTAAGAAACTTGAGTATATCCCAGTGAAATCACTTTCTCAACGGTTCCGGTCA\n-TTTCCAGACGTTGTTTGATTTCACCGATGTGGTTAGCTTTAGAAATGCGTTCCTGGCGCT\n-CTTGTTCCCACTGTTCGCGGATTTCTTCGCGTTTGGCGATATAATCCTTTTCCAGTGCAA\n-CACCCATGCAGTAAGCGCACACAGCATCGAATACAGGGCTGTTTTTGTTGTCTTCTTTAG\n-ACTGGTCAGCCCACCAAAGAACAGTAAACATTGGCATTTCTGCAATTACTTCGCCTTTAC\n-GCTTGCCAATCGGCATGATCCCTTTTTCCAGCAGTTCCAGTTTTTCAGTATCGAACACTG\n-ACAGTTTACCGCGACGTTCGAACAGATCGAAATCTGCAAAGCCCTGGAATATCATTTTGA\n-AAGTATCAGTTTCAGTTAAACGGGCTGAAACACGGTCGAAATACTCACGCGCTTTCGCTT\n-CCGCTTTCTCCGGATCGGTAGATAAGTTGCAGATATAGTTATCAGAAGTATAACCGCCGC\n-CTCTACGCTCAACACGCAAGGTATACATTGCATTTTTACGACCAGAAGAAATGAAGTAAG\n-TGGTAGTAACTACGGTTGCGTTAGTCATGGTATTTCTCCTTAAAGGGTATCTCGTTTCGA\n-TATGGCTAATATAGCAAAAGCCCCTGACCGAAGTCAAGGGCTTTTTCATCATTCATTCGA\n-ATCTTTCATTGTTTTATGAAGATGAATATCAAAAATTTTCCAGTACGCCTTTCCGCGAGG\n-ATAAATTTTTGCTTTGTCAATATCGTTGTTGCTTCCCCATGTGTTGTTTGGGCCACGACA\n-TCGATTTTTTATATAATCTGTATGCCAGAATAAGCGCTGAACCGATGATTCCGTACCTAA\n-TGGATCTTCTTTACTGAACAGAATTTGTATACTCATAAGAAGAACCCAGTGCGAACAATC\n-AGATCGATTTTCTTTTCTGGTTCAAACGGTGATTTGCTATCGATGTTACACTGATAGAAC\n-ATACCAACATACTTTTCAGGAATGTTGGATTCACGCGCCCATTTCAGGTTATCATCGGTA\n-TTCGGCCCCAGCATGAGGTTAACAACATCAACAGCATAATCCTGTTCTTTGGTGTTGCCG\n-GAACCGTTTACATAATGCCCGTGGGCGGTTTTGAGGATCTCAATTACTTCGCTATCGTCA\n-GTTTCGACTTTGTAAAGCGTTGTATTTTCAGGAATTTCTTCAAGAATGATCAAAGCGGTT\n-TTCATCACACTTACCTTTGTGTTTCTGTTTACGTTTTGCTTCTTTAAATGCTCGCTTGCG\n-ATCGCGGTGAGTAGAAGCGCGGTTGAAATCATGTTTCGCTACCAAATTATTCATATAAGC\n-CCCTTAAAGAAAAATATTTAGGGGCTTTCGCCCCTGTATTAATCCAGCAATTTGCGGATC\n-TTGTCTGCGATACGTCCGGCGCGGGTTGCACTTGCAGTATGATCGCTTTCTTTTGAAGCC\n-AGTTCCGCCAGCTTACGCTGATGTTCTTCTTCTGCTGCTTGACGATCTGCTGCAACCTGT\n-GCAACCTGCTCATTATCGTGAGCAATACGCGCTTCCAGTTCAGACAGGGTTTTGTCGAAA\n-GTTGCTACGATTTCATCTACAGAACGAATTTTATTAAACAGTTTCATAATTTATCTCAAT\n-TGGTTAGTTTTAATCAGTATACATCAATATGGTTGAAATTCAAAATCATAAATGTCATTC\n-AGTGCGCGGTTCCACTCGGTGTAGTTTTCACCAGCACCATAACGCATTTGAATAGCACTT\n-TCGAACGTTGATCCGTTGAGGTTCGGGAAACCGAACAGGTTTTTGATTTTGTCATGTGCT\n-ACATAATACAGAGAAGCACTTTCCAGCATCGCAACCATCGCAGACGGTTCGTGTTCGCGG\n-CGCTTGATACGTAACAGAGTACGACTAGCACCAGTTTTACGGCGTTGATTTGGTGCTACG\n-TAGAAACGGAATACTACGCGCCCTGTTTTATCATCAACTACCAGGTAAAACCCGTTTTCT\n-TTCAGATCCACGCCTTCGAATTTCTTGAAGGTTCCGCGTTTCATGTCACCAATTTTAATT\n-GCATATTTGTGAATGTCAAGTCTTGTCAGAATTCTTTTCATATTTTTTAGATACCAGTTT\n-GCCTAATTTTGTAATTTCGCCTGTTTTTACGTTAACAAACAAGGCGATGCTCAGAAATGG\n-GATGCTAATCACTACGCTGATCAATGTAAACAGAAAACGTATCACAAAAAGAACAGCACG\n-TTCAAGATATCGTTGCATCCACGCGATTCCTAAACAACTATACCCTACTATAAAGGTGGT\n-TGCAACATAAAATGCACCAAATCCTTTACGAAATACGTAACCTTTCCCGGATTCTATCCG\n-GTCGTCGGCCCACATTTCACGGGCAGTTTTCAGAATAGATTCACCACTAGCGCGAGTTTC\n-GTTAGCCGAAGGCATGTTTTTAAATTTCATGATAGTCTCCTATGCGCCCAGAACTCTCCA\n-GGCGCGGTTGTTTAG\n' |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd cpt_genome_editor/tsv.py --- a/cpt_genome_editor/tsv.py Fri Jun 17 12:52:23 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,30 +0,0 @@ -import sys - - -# Like 'import json' / 'import yaml', except.. tab data. -def loads(str_data): - return NotImplementedError() - - -def load(handle): - return NotImplementedError() - - -def dump(data, handle=sys.stdout): - for row in data: - handle.write("%s\n" % "\t".join(map(str, row))) - - -def dumps(data): - output = "" - for row in data: - output += "%s\n" % "\t".join(map(str, row)) - return output - - -def dump_line(row, handle=sys.stdout): - dump([row], handle=handle) - - -def dumps_line(row): - return dumps([row]) |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd genome_editor.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_editor.py Mon Jun 05 02:43:21 2023 +0000 |
[ |
@@ -0,0 +1,170 @@ +#!/usr/bin/env python +import logging +import copy +import argparse +import tsv +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqFeature import FeatureLocation +from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature, convertSeqRec +from gff3 import feature_lambda, feature_test_contains + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +def mutate(gff3, fasta, changes, customSeqs, new_id): + # Change Language + # - we can only accept ONE genome as an input. (TODO: support multiple?) + # - we can only build ONE genome as an output. (TODO: support multiple?) + # - must allow selection of various regions + # '1,1000,+ 40,100,- custom_seq_1' + try: + custom_seqs = SeqIO.to_dict(SeqIO.parse(customSeqs, "fasta")) + except: + custom_seqs = {} + seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) + # Pull first and onl record + rec = list(gffParse(gff3, base_dict=seq_dict))[0] + # Create a "clean" record + new_record = copy.deepcopy(rec) + new_record.id = new_id + new_record.seq = Seq("") + new_record.features = [] + new_record.annotations = {} + # Process changes. + chain = [] + topFeats = {} + covered = 0 + for feat in rec.features: + if "ID" in feat.qualifiers.keys(): + topFeats[feat.qualifiers["ID"][0]] = feat.location.start + for change in changes: + if "," in change: + (start, end, strand) = change.split(",") + start = int(start) - 1 + end = int(end) + + # Make any complaints + broken_feature_start = list( + feature_lambda( + rec.features, + feature_test_contains, + {"index": start}, + subfeatures=False, + ) + ) + if len(broken_feature_start) > 0: + pass + # log.info("DANGER: Start index chosen (%s) is in the middle of a feature (%s %s). This feature will disappear from the output", start, broken_feature_start[0].id, broken_feature_start[0].location) + broken_feature_end = list( + feature_lambda( + rec.features, + feature_test_contains, + {"index": end}, + subfeatures=False, + ) + ) + if len(broken_feature_end) > 0: + pass + # log.info("DANGER: End index chosen (%s) is in the middle of a feature (%s %s). This feature will disappear from the output", end, broken_feature_end[0].id, broken_feature_end[0].location) + + # Ok, fetch features + if strand == "+": + tmp_req = rec[start:end] + else: + tmp_req = rec[start:end].reverse_complement( + id=True, + name=True, + description=True, + features=True, + annotations=True, + letter_annotations=True, + dbxrefs=True, + ) + tmp_req = convertSeqRec(tmp_req)[0] + + def update_location(feature, shiftS): + feature.location = FeatureLocation( + feature.location.start + shiftS, + feature.location.end + shiftS, + feature.strand, + ) + for i in feature.sub_features: + i = update_location(i, shiftS) + return feature + + # for feature in tmp_req.features: + + chain.append( + [ + rec.id, + start + 1, + end, + strand, + new_record.id, + len(new_record) + 1, + len(new_record) + (end - start), + "+", + ] + ) + + covered += len(new_record.seq) + print(covered) + new_record.seq += tmp_req.seq + # NB: THIS MUST USE BIOPYTHON 1.67. 1.68 Removes access to + # subfeatures, which means you will only get top-level features. + startInd = len(new_record.features) + new_record.features += tmp_req.features + + for i in new_record.features[startInd:]: + i.location = FeatureLocation( + i.location.start + covered, + i.location.end + covered, + i.location.strand, + ) + if "ID" not in i.qualifiers.keys(): + continue + diffS = i.location.start - topFeats[i.qualifiers["ID"][0]] + subFeats = i.sub_features + for j in subFeats: + j = update_location(j, diffS) + else: + new_record.seq += custom_seqs[change].seq + yield new_record, chain + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("fasta", type=argparse.FileType("r"), help="Sequence") + parser.add_argument("gff3", type=argparse.FileType("r"), help="Annotations") + parser.add_argument("new_id", help="Append to ID", default="_v2") + parser.add_argument( + "--out_fasta", + type=argparse.FileType("w"), + help="Output fasta", + default="out.fa", + ) + parser.add_argument( + "--out_gff3", + type=argparse.FileType("w"), + help="Output gff3", + default="out.gff3", + ) + parser.add_argument( + "--out_simpleChain", + type=argparse.FileType("w"), + help="Output simple chain (i.e. not a real UCSC chain file)", + default="out.chain", + ) + parser.add_argument("--changes", nargs="+") + parser.add_argument("--customSeqs", type=argparse.FileType("r")) + args = parser.parse_args() + + for rec, chain in mutate( + args.gff3, args.fasta, args.changes, args.customSeqs, args.new_id + ): + # TODO: Check that this appends and doesn't overwirte + gffWrite([rec], args.out_gff3) + SeqIO.write([rec], args.out_fasta, "fasta") + tsv.dump(chain, args.out_simpleChain) |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd genome_editor.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_editor.xml Mon Jun 05 02:43:21 2023 +0000 |
[ |
@@ -0,0 +1,167 @@ +<tool id="edu.tamu.cpt.gff3.genome_editor" name="Genome Editor" version="2.1"> + <description>allows you to re-arrange a genome</description> + <macros> + <import>macros.xml</import> + <import>cpt-macros.xml</import> + </macros> + <expand macro="requirements"/> + <command><![CDATA[ +@GENOME_SELECTOR_PRE@ +'python $__tool_directory__/genome_editor.py' + +@GENOME_SELECTOR@ +@INPUT_GFF@ +"$new_id" + +--out_fasta "$out_fasta" +--out_gff3 "$out_gff3" +--out_simpleChain "$out_chain" +--customSeqs "$custom_seqs" +--changes +#for $idx, $change in enumerate($changes): + #if $change.input_type.input_type_select == "region": + ${change.input_type.start},${change.input_type.end},${change.input_type.revcom} + #else + custom${idx} + #end if +#end for +]]></command> + <configfiles> + <configfile name="custom_seqs"> + <![CDATA[ +#for $idx, $change in enumerate($changes): + #if $change.input_type.input_type_select == "custom": +>custom${idx} +${change.input_type.seq} + #end if +#end for + ]]> + </configfile> + </configfiles> + <inputs> + <expand macro="input/gff3+fasta"/> + <param label="New ID" name="new_id" type="text" help="New ID for the sequence to uniquely identify it from the previous build of the sequence. E.g. append Miro could become Miro.2 or Miro.v2 "> + <validator type="expression" message="You must specify a new ID"><![CDATA[value and len(value) > 0]]></validator> + </param> + <repeat name="changes" title="Sequence Component Selections"> + <conditional name="input_type"> + <param name="input_type_select" type="select" label="Data Source"> + <option value="region" selected="True">Region from FASTA file</option> + <option value="custom">Custom Additional Sequence</option> + </param> + <when value="region"> + <param label="Start" name="start" type="integer" min="1" value="1" help="Inclusive range, 1-indexed genome. (I.e. specifying 1-2000 will include base number 1)"/> + <param label="End" name="end" type="integer" min="1" value="1" help="Inclusive range. (I.e. specifying 1-2000 will include base number 2000)"/> + <param label="Reverse + Complement" name="revcom" type="boolean" truevalue="-" falsevalue="+"/> + </when> + <when value="custom"> + <param label="Custom Sequence" name="seq" type="text" help="Enter the sequence, e.g. 'ACTG'. No FASTA definition line."/> + </when> + </conditional> + </repeat> + </inputs> + <outputs> + <data format="gff3" name="out_gff3" label="${new_id} Features"/> + <data format="fasta" name="out_fasta" label="${new_id}"/> + <data format="tabular" name="out_chain" label="${new_id} Change Table"/> + </outputs> + <tests> + <test> + <param name="reference_genome_source" value="history"/> + <param name="genome_fasta" value="genome_editor.simple.fa"/> + <param name="gff3_data" value="genome_editor.simple.gff3"/> + <param name="new_id" value="test2"/> + <repeat name="changes"> + <conditional name="input_type"> + <param name="input_type_select" value="region"/> + <param name="start" value="1"/> + <param name="end" value="4"/> + <param name="revcom" value="+"/> + </conditional> + </repeat> + <repeat name="changes"> + <conditional name="input_type"> + <param name="input_type_select" value="custom"/> + <param name="seq" value="cccggg"/> + </conditional> + </repeat> + <repeat name="changes"> + <conditional name="input_type"> + <param name="input_type_select" value="region"/> + <param name="start" value="5"/> + <param name="end" value="8"/> + <param name="revcom" value="-"/> + </conditional> + </repeat> + <output name="out_gff3" file="genome_editor.simple.out.gff3"/> + <output name="out_fasta" file="genome_editor.simple.out.fa"/> + <output name="out_chain" file="genome_editor.simple.out.chain"/> + </test> + <test> + <param name="reference_genome_source" value="history"/> + <param name="genome_fasta" value="miro.fa"/> + <param name="gff3_data" value="miro.2.gff3"/> + <param name="new_id" value="Miro.v2"/> + <repeat name="changes"> + <conditional name="input_type"> + <param name="input_type_select" value="region"/> + <param name="start" value="1"/> + <param name="end" value="950"/> + <param name="revcom" value="+"/> + </conditional> + </repeat> + <repeat name="changes"> + <conditional name="input_type"> + <param name="input_type_select" value="custom"/> + <param name="seq" value="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"/> + </conditional> + </repeat> + <repeat name="changes"> + <conditional name="input_type"> + <param name="input_type_select" value="region"/> + <param name="start" value="3170"/> + <param name="end" value="3450"/> + <param name="revcom" value="+"/> + </conditional> + </repeat> + <output name="out_gff3" file="genome_editor.mirov2.gff3"/> + <output name="out_fasta" file="genome_editor.mirov2.fa"/> + <output name="out_chain" file="genome_editor.mirov2.chain"/> + </test> + </tests> + <help><![CDATA[ +**What it does** + +Allows for re-arranging a FASTA genomic sequence, and remaps the associated features +from a gff3 file with the new coordinates. Segments of the genome are moved around +and stitched back together according to user-specified positions. + +**Example FASTA input** (spaces added for clarity):: + >Miro + TTA GTA ATG GCT AAA + +With user-specified *sequence component selections*: + +- start: 1, end: 10, strand: + +- start: 6, end: 10, strand: + + +the first ten bases will be listed, followed by a duplication of bases 6-10. +Bases 11-15 are not part of the sequence component selection parameters and +are therefore not in the output:: + + >Miro.v2 + TTA GTA ATG GAA TGG + +Alternatively, with user-specified *sequence component selections*:: + +- start: 1, end: 10, strand: + +- start: 6, end: 10, strand: - + +the last section with be reverse-complemented and give the following output:: + + >Miro.v2 + TTA GTA ATG GCC ATT + +]]></help> + <expand macro="citations"/> +</tool> |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd gff3.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff3.py Mon Jun 05 02:43:21 2023 +0000 |
[ |
b'@@ -0,0 +1,346 @@\n+import copy\n+import logging\n+\n+log = logging.getLogger()\n+log.setLevel(logging.WARN)\n+\n+\n+def feature_lambda(\n+ feature_list,\n+ test,\n+ test_kwargs,\n+ subfeatures=True,\n+ parent=None,\n+ invert=False,\n+ recurse=True,\n+):\n+ """Recursively search through features, testing each with a test function, yielding matches.\n+\n+ GFF3 is a hierachical data structure, so we need to be able to recursively\n+ search through features. E.g. if you\'re looking for a feature with\n+ ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n+ case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n+\n+ :type feature_list: list\n+ :param feature_list: an iterable of features\n+\n+ :type test: function reference\n+ :param test: a closure with the method signature (feature, **kwargs) where\n+ the kwargs are those passed in the next argument. This\n+ function should return True or False, True if the feature is\n+ to be yielded as part of the main feature_lambda function, or\n+ False if it is to be ignored. This function CAN mutate the\n+ features passed to it (think "apply").\n+\n+ :type test_kwargs: dictionary\n+ :param test_kwargs: kwargs to pass to your closure when it is called.\n+\n+ :type subfeatures: boolean\n+ :param subfeatures: when a feature is matched, should just that feature be\n+ yielded to the caller, or should the entire sub_feature\n+ tree for that feature be included? subfeatures=True is\n+ useful in cases such as searching for a gene feature,\n+ and wanting to know what RBS/Shine_Dalgarno_sequences\n+ are in the sub_feature tree (which can be accomplished\n+ with two feature_lambda calls). subfeatures=False is\n+ useful in cases when you want to process (and possibly\n+ return) the entire feature tree, such as applying a\n+ qualifier to every single feature.\n+\n+ :type invert: boolean\n+ :param invert: Negate/invert the result of the filter.\n+\n+ :rtype: yielded list\n+ :return: Yields a list of matching features.\n+ """\n+ # Either the top level set of [features] or the subfeature attribute\n+ for feature in feature_list:\n+ feature._parent = parent\n+ if not parent:\n+ # Set to self so we cannot go above root.\n+ feature._parent = feature\n+ test_result = test(feature, **test_kwargs)\n+ # if (not invert and test_result) or (invert and not test_result):\n+ if invert ^ test_result:\n+ if not subfeatures:\n+ feature_copy = copy.deepcopy(feature)\n+ feature_copy.sub_features = list()\n+ yield feature_copy\n+ else:\n+ yield feature\n+\n+ if recurse and hasattr(feature, "sub_features"):\n+ for x in feature_lambda(\n+ feature.sub_features,\n+ test,\n+ test_kwargs,\n+ subfeatures=subfeatures,\n+ parent=feature,\n+ invert=invert,\n+ recurse=recurse,\n+ ):\n+ yield x\n+\n+\n+def fetchParent(feature):\n+ if not hasattr(feature, "_parent") or feature._parent is None:\n+ return feature\n+ else:\n+ return fetchParent(feature._parent)\n+\n+\n+def feature_test_true(feature, **kwargs):\n+ return True\n+\n+\n+def feature_test_type(feature, **kwargs):\n+ if "type" in kwargs:\n+ return str(feature.type).upper() == str(kwargs["type"]).upper()\n+ elif "types" in kwargs:\n+ for x in kwargs["types"]:\n+ if str(feature.type).upper() == str(x).upper():\n+ return True\n+ return False\n+ raise Exception("Incorrect feature_test'..b'feature.location.start,\n+ # feature.location.end,\n+ # feature.location.strand\n+ # )\n+ return result\n+\n+\n+def get_gff3_id(gene):\n+ return gene.qualifiers.get("Name", [gene.id])[0]\n+\n+\n+def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n+ # This prevents frameshift errors\n+ while start < 0:\n+ start += 3\n+ while end < 0:\n+ end += 3\n+ while start > parent_length:\n+ start -= 3\n+ while end > parent_length:\n+ end -= 3\n+ return (start, end)\n+\n+\n+def coding_genes(feature_list):\n+ for x in genes(feature_list):\n+ if (\n+ len(\n+ list(\n+ feature_lambda(\n+ x.sub_features,\n+ feature_test_type,\n+ {"type": "CDS"},\n+ subfeatures=False,\n+ )\n+ )\n+ )\n+ > 0\n+ ):\n+ yield x\n+\n+\n+def genes(feature_list, feature_type="gene", sort=False):\n+ """\n+ Simple filter to extract gene features from the feature set.\n+ """\n+\n+ if not sort:\n+ for x in feature_lambda(\n+ feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n+ ):\n+ yield x\n+ else:\n+ data = list(genes(feature_list, feature_type=feature_type, sort=False))\n+ data = sorted(data, key=lambda feature: feature.location.start)\n+ for x in data:\n+ yield x\n+\n+\n+def wa_unified_product_name(feature):\n+ """\n+ Try and figure out a name. We gave conflicting instructions, so\n+ this isn\'t as trivial as it should be. Sometimes it will be in\n+ \'product\' or \'Product\', othertimes in \'Name\'\n+ """\n+ # Manually applied tags.\n+ protein_product = feature.qualifiers.get(\n+ "product", feature.qualifiers.get("Product", [None])\n+ )[0]\n+\n+ # If neither of those are available ...\n+ if protein_product is None:\n+ # And there\'s a name...\n+ if "Name" in feature.qualifiers:\n+ if not is_uuid(feature.qualifiers["Name"][0]):\n+ protein_product = feature.qualifiers["Name"][0]\n+\n+ return protein_product\n+\n+\n+def is_uuid(name):\n+ return name.count("-") == 4 and len(name) == 36\n+\n+\n+def get_rbs_from(gene):\n+ # Normal RBS annotation types\n+ rbs_rbs = list(\n+ feature_lambda(\n+ gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n+ )\n+ )\n+ rbs_sds = list(\n+ feature_lambda(\n+ gene.sub_features,\n+ feature_test_type,\n+ {"type": "Shine_Dalgarno_sequence"},\n+ subfeatures=False,\n+ )\n+ )\n+ # Fraking apollo\n+ apollo_exons = list(\n+ feature_lambda(\n+ gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n+ )\n+ )\n+ apollo_exons = [x for x in apollo_exons if len(x) < 10]\n+ # These are more NCBI\'s style\n+ regulatory_elements = list(\n+ feature_lambda(\n+ gene.sub_features,\n+ feature_test_type,\n+ {"type": "regulatory"},\n+ subfeatures=False,\n+ )\n+ )\n+ rbs_regulatory = list(\n+ feature_lambda(\n+ regulatory_elements,\n+ feature_test_quals,\n+ {"regulatory_class": ["ribosome_binding_site"]},\n+ subfeatures=False,\n+ )\n+ )\n+ # Here\'s hoping you find just one ;)\n+ return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n+\n+\n+def nice_name(record):\n+ """\n+ get the real name rather than NCBI IDs and so on. If fails, will return record.id\n+ """\n+ name = record.id\n+ likely_parental_contig = list(genes(record.features, feature_type="contig"))\n+ if len(likely_parental_contig) == 1:\n+ name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n+ return name\n+\n+\n+def fsort(it):\n+ for i in sorted(it, key=lambda x: int(x.location.start)):\n+ yield i\n' |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Jun 05 02:43:21 2023 +0000 |
b |
@@ -0,0 +1,74 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package">progressivemauve</requirement> + <!--<requirement type="package" version="2.7">python</requirement>--> + <requirement type="package" version="0.6.4">bcbiogff</requirement> + <yield/> + </requirements> + </xml> + <token name="@WRAPPER_VERSION@">2.4.0</token> + <xml name="citation/progressive_mauve"> + <citation type="doi">10.1371/journal.pone.0011147</citation> + </xml> + <xml name="citation/gepard"> + <citation type="doi">10.1093/bioinformatics/btm039</citation> + </xml> + <token name="@XMFA_INPUT@"> + '$xmfa' + </token> + <xml name="xmfa_input" token_formats="xmfa"> + <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/> + </xml> + <token name="@XMFA_FA_INPUT@"> + '$sequences' + </token> + <xml name="xmfa_fa_input"> + <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/> + </xml> + <xml name="genome_selector"> + <conditional name="reference_genome"> + <param name="reference_genome_source" type="select" label="Reference Genome"> + <option value="history" selected="True">From History</option> + <option value="cached">Locally Cached</option> + </param> + <when value="cached"> + <param name="fasta_indexes" type="select" label="Source FASTA Sequence"> + <options from_data_table="all_fasta"/> + </param> + </when> + <when value="history"> + <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/> + </when> + </conditional> + </xml> + <xml name="gff3_input"> + <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/> + </xml> + <xml name="input/gff3+fasta"> + <expand macro="gff3_input"/> + <expand macro="genome_selector"/> + </xml> + <token name="@INPUT_GFF@"> + '$gff3_data' + </token> + <token name="@INPUT_FASTA@"> + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + </token> + <token name="@GENOME_SELECTOR_PRE@"> + #if $reference_genome.reference_genome_source == 'history': + ln -s '$reference_genome.genome_fasta' genomeref.fa; + #end if + </token> + <token name="@GENOME_SELECTOR@"> + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + </token> +</macros> |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.mirov2.chain --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_editor.mirov2.chain Mon Jun 05 02:43:21 2023 +0000 |
b |
@@ -0,0 +1,2 @@ +Miro 1 950 + Miro.v2 1 950 + +Miro 3170 3450 + Miro.v2 1017 1297 + |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.mirov2.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_editor.mirov2.fa Mon Jun 05 02:43:21 2023 +0000 |
b |
@@ -0,0 +1,23 @@ +>Miro.v2 <unknown description> +TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT +TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG +AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG +GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT +CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG +TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC +ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC +CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA +ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT +ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC +TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG +CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA +TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT +AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC +CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT +TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTaaaaaaaaaa +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaTCTC +ACTTAACATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCAT +ATTCTACTACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCT +GTTTCTTCATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCAC +AAAATCCGAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTT +ACCCCTCTTTGCGAATGTATGCAAGTTCTTCATGGGT |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.mirov2.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_editor.mirov2.gff3 Mon Jun 05 02:43:21 2023 +0000 |
b |
@@ -0,0 +1,3 @@ +##gff-version 3 +Miro.v2 gffSeqFeature gene 1 910 . - . ID=8861dd01-d426-40d4-96b5-f8e4b81c93d2;Name=Miro.gene_1;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis@tamu.edu; +Miro.v2 gffSeqFeature gene 1019 1264 . - . ID=b8da12d7-fe5c-42bd-b020-6a8ab205a133;Name=Miroa;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis@tamu.edu; |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.simple.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_editor.simple.fa Mon Jun 05 02:43:21 2023 +0000 |
b |
@@ -0,0 +1,2 @@ +>test +AAAATTTT |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.simple.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_editor.simple.gff3 Mon Jun 05 02:43:21 2023 +0000 |
b |
@@ -0,0 +1,5 @@ +##gff-version 3 +test feature gene 1 4 . + . ID=1 +test GenBank CDS 1 4 . + 1 ID=1.cds +test feature gene 5 8 . - . ID=2 +test GenBank CDS 5 8 . - 1 ID=2.cds |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.simple.out.chain --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_editor.simple.out.chain Mon Jun 05 02:43:21 2023 +0000 |
b |
@@ -0,0 +1,2 @@ +test 1 4 + test2 1 4 + +test 5 8 - test2 11 14 + |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.simple.out.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_editor.simple.out.fa Mon Jun 05 02:43:21 2023 +0000 |
b |
@@ -0,0 +1,2 @@ +>test2 <unknown description> +AAAAcccgggAAAA |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/genome_editor.simple.out.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_editor.simple.out.gff3 Mon Jun 05 02:43:21 2023 +0000 |
b |
@@ -0,0 +1,5 @@ +##gff-version 3 +test2 gffSeqFeature gene 1 4 . + . ID=1; +test2 gffSeqFeature CDS 1 4 . + 0 ID=1.cds; +test2 gffSeqFeature gene 11 14 . + . ID=2; +test2 gffSeqFeature CDS 11 14 . + 0 ID=2.cds; |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/miro.2.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/miro.2.gff3 Mon Jun 05 02:43:21 2023 +0000 |
b |
b'@@ -0,0 +1,1371 @@\n+##gff-version 3\n+##sequence-region Miro 1 176055\n+Miro\tCPT\tgene\t1\t910\t.\t-\t.\tID=8861dd01-d426-40d4-96b5-f8e4b81c93d2;Name=Miro.gene_1;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t1\t910\t.\t-\t.\tID=1d8680f9-d6bc-4ef1-a535-83fe555cd097;Name=Miro.gene_1-00001;Parent=8861dd01-d426-40d4-96b5-f8e4b81c93d2;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tCDS\t1\t900\t.\t-\t0\tID=cd13f44c-b958-4dcc-a6b1-b901ad465268;Name=cd13f44c-b958-4dcc-a6b1-b901ad465268-CDS;Parent=1d8680f9-d6bc-4ef1-a535-83fe555cd097;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\texon\t908\t910\t.\t-\t.\tID=532024dc-47d1-40e1-9255-9ec5ee018cc0;Name=532024dc-47d1-40e1-9255-9ec5ee018cc0-exon;Parent=1d8680f9-d6bc-4ef1-a535-83fe555cd097;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\texon\t1\t900\t.\t-\t.\tID=c85f1713-7e6e-4cf9-8d09-1f9ffc943c93;Name=c85f1713-7e6e-4cf9-8d09-1f9ffc943c93-exon;Parent=1d8680f9-d6bc-4ef1-a535-83fe555cd097;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\tgene\t900\t3173\t.\t-\t.\tID=93e09f06-6e7f-40ce-9308-bfe491cf0a24;Name=fdsa;date_creation=2016-09-27;date_last_modified=2017-04-07;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t900\t3173\t.\t-\t.\tID=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;Name=fdsa;Parent=93e09f06-6e7f-40ce-9308-bfe491cf0a24;date_creation=2016-09-27;date_last_modified=2017-04-07;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tCDS\t900\t3161\t.\t-\t0\tID=afd28aa9-d1d9-4125-923d-f62a8cbf8307;Name=afd28aa9-d1d9-4125-923d-f62a8cbf8307-CDS;Parent=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\texon\t3171\t3173\t.\t-\t.\tID=c452f5b8-598e-4122-961c-302e28fdd0a4;Name=c452f5b8-598e-4122-961c-302e28fdd0a4-exon;Parent=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\texon\t900\t3161\t.\t-\t.\tID=4350b621-93f7-45fb-85b8-a60ce93f2944;Name=4350b621-93f7-45fb-85b8-a60ce93f2944-exon;Parent=72ac3362-c7eb-48c7-b8a0-1fce24f817fc;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\tgene\t3172\t3417\t.\t-\t.\tID=b8da12d7-fe5c-42bd-b020-6a8ab205a133;Name=Miroa;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t3172\t3417\t.\t-\t.\tID=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;Name=Miroa-00001;Parent=b8da12d7-fe5c-42bd-b020-6a8ab205a133;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\texon\t3414\t3417\t.\t-\t.\tID=6bc4ea79-a43e-43e2-90fe-6af95e31e214;Name=6bc4ea79-a43e-43e2-90fe-6af95e31e214-exon;Parent=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\texon\t3172\t3408\t.\t-\t.\tID=27847554-35ea-48a8-b5cd-11e64a233d41;Name=27847554-35ea-48a8-b5cd-11e64a233d41-exon;Parent=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\tCDS\t3172\t3408\t.\t-\t0\tID=a508e82a-5e35-4ac5-8082-e52d06b2edad;Name=a508e82a-5e35-4ac5-8082-e52d06b2edad-CDS;Parent=fc89ee6f-ce7d-49e1-b6a1-765a60e80825;date_creation=2016-09-27;date_last_modified=2016-09-27\n+Miro\tCPT\tgene\t3412\t3979\t.\t-\t.\tID=47402bc1-9d3d-4881-8456-cd85ed73e3d9;Name=Miro.gene_4;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t3412\t3979\t.\t-\t.\tID=d350bc7f-e8b4-49d1-a0a4-2046bbb4a677;Name=Miro.gene_4-00001;Parent=47402bc1-9d3d-4881-8456-cd85ed73e3d9;date_creation=2016-09-27;date_last_modified=2016-09-27;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tCDS\t3412\t3966\t.\t-\t0\tID=f21739ad-e0a2-4f1c-af3f-6cbbe58572b9;Name=f21739ad-e0a2-4f1c-af3f-6cbbe58572b9-CDS;Note=Manually set translation start;Parent=d350bc7f-e8b4-49d1-a0a4-2046bbb4a677;date_creation=2016-09-27;date_last_modified=2016-09-28\n+Miro\tCPT\texon\t3975\t3979\t.\t-\t.\tID=a77e978d-fba5-4782-9d07-0172c8fa6df9;Name=a77e978d-fba5-4782-9d07-0172c8fa6df9-exon;Parent=d350bc7f-e8b4-49d1-a0a4-2046bbb4a677;date_creation=2016-09-27;date_last_modif'..b'fea64;Name=Miro%C4%8B-00001;Parent=1e943ae8-2418-4212-a066-65a568705f1c;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\texon\t174760\t174894\t.\t-\t.\tID=51429ca4-a6d5-4a29-8a3a-7151ab119c36;Name=51429ca4-a6d5-4a29-8a3a-7151ab119c36-exon;Parent=56f6ccd6-af67-4c2a-a34b-f6331d4fea64;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\texon\t174900\t174903\t.\t-\t.\tID=c024b6e9-b5ae-4dd6-8e3c-6818dca7c255;Name=c024b6e9-b5ae-4dd6-8e3c-6818dca7c255-exon;Parent=56f6ccd6-af67-4c2a-a34b-f6331d4fea64;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tCDS\t174760\t174894\t.\t-\t0\tID=4b13974a-0ee4-4e88-bdbe-659aa2b204ba;Name=4b13974a-0ee4-4e88-bdbe-659aa2b204ba-CDS;Parent=56f6ccd6-af67-4c2a-a34b-f6331d4fea64;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tgene\t174939\t175197\t.\t-\t.\tID=7ada7c78-27c7-4b75-a47e-0ecab4dfe665;Name=gene_274;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t174939\t175197\t.\t-\t.\tID=872415a0-d11f-469c-bb74-0dfc428d105f;Name=gene_274-00001;Parent=7ada7c78-27c7-4b75-a47e-0ecab4dfe665;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\texon\t175195\t175197\t.\t-\t.\tID=6d561d73-b40e-4be2-91b8-0c11508c6e03;Name=6d561d73-b40e-4be2-91b8-0c11508c6e03-exon;Parent=872415a0-d11f-469c-bb74-0dfc428d105f;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tCDS\t174939\t175187\t.\t-\t0\tID=d5367d3a-56eb-417f-8056-36c5a82d618a;Name=d5367d3a-56eb-417f-8056-36c5a82d618a-CDS;Parent=872415a0-d11f-469c-bb74-0dfc428d105f;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\texon\t174939\t175187\t.\t-\t.\tID=e0bd32bc-9332-4d12-b92e-358ae5432569;Name=e0bd32bc-9332-4d12-b92e-358ae5432569-exon;Parent=872415a0-d11f-469c-bb74-0dfc428d105f;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tgene\t175224\t175675\t.\t-\t.\tID=4596ff39-c162-4c8d-b40f-1403a578f3cd;Name=gene_275;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t175224\t175675\t.\t-\t.\tID=71e48bbd-f449-4964-a088-b216bdcc36b6;Name=gene_275-00001;Parent=4596ff39-c162-4c8d-b40f-1403a578f3cd;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\texon\t175673\t175675\t.\t-\t.\tID=87fb1a55-1430-444a-95cc-0e40cfcde490;Name=87fb1a55-1430-444a-95cc-0e40cfcde490-exon;Parent=71e48bbd-f449-4964-a088-b216bdcc36b6;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\texon\t175224\t175661\t.\t-\t.\tID=484e9b53-43f7-4a20-8ffe-aa7298d0c95c;Name=484e9b53-43f7-4a20-8ffe-aa7298d0c95c-exon;Parent=71e48bbd-f449-4964-a088-b216bdcc36b6;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tCDS\t175224\t175661\t.\t-\t0\tID=6ba184ad-ba33-49f7-8acd-d2dd30cc8dc8;Name=6ba184ad-ba33-49f7-8acd-d2dd30cc8dc8-CDS;Parent=71e48bbd-f449-4964-a088-b216bdcc36b6;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tgene\t175636\t176021\t.\t-\t.\tID=9cbd3255-806e-4d6f-9e7e-b974495fe09e;Name=Miro%C4%8C;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\tmRNA\t175636\t176021\t.\t-\t.\tID=5c9e6321-2990-4cc9-b58b-69d43d1304ff;Name=Miro%C4%8C-00001;Parent=9cbd3255-806e-4d6f-9e7e-b974495fe09e;date_creation=2016-09-28;date_last_modified=2016-09-28;owner=elenimijalis%40tamu.edu\n+Miro\tCPT\texon\t176017\t176021\t.\t-\t.\tID=91dd7574-88be-4c6b-a9a1-48cb6f600abb;Name=91dd7574-88be-4c6b-a9a1-48cb6f600abb-exon;Parent=5c9e6321-2990-4cc9-b58b-69d43d1304ff;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\tCDS\t175636\t176010\t.\t-\t0\tID=94a131f5-9235-4efc-afe1-3e33c7b2ddb6;Name=94a131f5-9235-4efc-afe1-3e33c7b2ddb6-CDS;Parent=5c9e6321-2990-4cc9-b58b-69d43d1304ff;date_creation=2016-09-28;date_last_modified=2016-09-28\n+Miro\tCPT\texon\t175636\t176010\t.\t-\t.\tID=c422b844-7ef3-42ed-b3fb-f2a60cb04b6a;Name=c422b844-7ef3-42ed-b3fb-f2a60cb04b6a-exon;Parent=5c9e6321-2990-4cc9-b58b-69d43d1304ff;date_creation=2016-09-28;date_last_modified=2016-09-28\n' |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd test-data/miro.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/miro.fa Mon Jun 05 02:43:21 2023 +0000 |
b |
b'@@ -0,0 +1,2936 @@\n+>Miro\n+TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT\n+TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG\n+AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG\n+GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT\n+CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG\n+TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC\n+ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC\n+CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA\n+ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT\n+ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC\n+TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG\n+CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA\n+TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT\n+AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC\n+CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT\n+TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTGTAAATCGGA\n+CGAATTGAATAAGAATCGCTATGTTCAACAAGTGAAGCCAGTAACGGGTTTAGTGATTTA\n+AAGATTTCCCATGCCTGATCAACACGTTTTTTCATATTGCTGCGTTTAACGCGCATTGAT\n+GCTACGGACTCACGCAGAATCGGACAACGCACCCTGGAAAGATTTTTACCATCTTTCTCA\n+TACCCTTCCAGACACACTATACGTTCGAGGGTATCAACAATCTTATACAGTTTTTCATTA\n+TATCGGTTTTTCACAATCCGATCAAGCGATACACCGAAACGGCTATGCAATGCGTCTGTT\n+TCTGTTGAGTGATCCTTCCCAATCCATCCAGGCAAGCAATTATCTTTCAATGCCTTTTCA\n+GATTTAACATACTGCTTGCACAGCATATCATCAAAGCATACCAGATTTGAATCCGGGATC\n+CACTTCCAGAGGCTGTTACGTATAGCAAACACAACAGGGATCCCAGTATGGCGCATGATA\n+CGCGATAAAGTAGATTCTTTCATTGCTGAATCCATAGACAACCCGGAATTTTCACCATCT\n+AAGCGGCTATATTCATCAATACCATACAACCGAACACCTGGGGCTTTATCCAGTGATAAA\n+AACTCTGATTTTGTCATAAACAGAGAAGTTTTTGCCAGATTGCCGTTACTATCCAACTCA\n+TAACGATATACGGTCGGGGTTTTTGGGCGCGGTTCTGAATTTTTCGGTGCATATAGCGCT\n+TTTGATTTTTCGCGATCTGCATCATAGATCTCTTTTTCTTTTGTCATTTCACTGGTACGG\n+AGATACACAATTTCCGATTCATCAAAATGACCTTTCCGAACGATATCATTAACAATCTCA\n+CGCTTTGAATCACTATCGTAATATGCAACAAAGCTAACACGGCTTAGGTTATGCATTTTA\n+GCATACCCGACGATATACGGTTTAACCGTGTTGGTATCCACTTTTAACAGAATGAGTTTC\n+TTCTGTTTCCACGGATAATAAATGCGTGTGATGTCCTGGCGTTTGGTTGTTTCTGGCTTA\n+TACTTGCTCCAGCGGCCTCCGCTACCAGTTACCTGATACCATGCATCTTTACCGTCGTAT\n+TCGTTCGCCCAATACCCAGCAACATAATCATCATTATATTTGTTTGGTTTGACTAGTTCG\n+CTATGGATCCAGCCAATAGAATCACCATTGATGCGAAAATTAGCATCTTTACCAACAAAG\n+TTTTGTACCATTGAAGGCAGAGAATGGAACCACGTCAGTTTATCACGCACGGTTTGTAAC\n+TTATCGAATTCTGATTTAACTCGATTGAAATATACCCGGCTGATTTGTTTCAGACGTTCT\n+TTAACAATCCCTACTGTCATTTTATCCATACTCAACTCTTCGCGAGAAGGCATGAAATCA\n+AGTTCACCGATCGGGAAGTCAATAATATACGTATACTGGCTTTCTGTATAGCAATAGAAC\n+ATCGAGGTATCATACAAATCTTTATCCAGAGGATAAATGATGTTACCCATGCGAGCATAT\n+ACACCGCTAGTGTATGCTGATTTATGACGGATCACCCCGCTATCGTTGGTTGCTTCTTTC\n+GGCTGATAGTTGATTTTGAGAATAGAAGCACCAACAAAGTTAGGACGAATATCAGTAAAT\n+GATTCGTATACCCTTGCTGCTTCGTTTTCCCATTCTTTGATATCTTCAACCTTAACCGGA\n+ACAGTGATAGTAACCCCGTTAGGTTCATCGCTTTCAATCTCATACAGAGGATCGCAGAAA\n+GGTTCCCCATCATCCATATAGATTGTGTAACCGCATTTGATACCGTCTTTTACGGATTCC\n+ACCGTGAAAGCATCGGAATAGCAAAGCGGAGATTTGCAACCCAGACCCATAGAACCGATC\n+AGGTCGTTTGAATCATTTTTAGTTGATTCGAAGTAAACGGTAAACGCATCACTAACGAAA\n+TCAGGAGACATACCGATCCCGTAGTCACGAATAACAAAACGAGGATCAACAGCAGTTGGC\n+AACTGGACATCAAACGGGTTCTGATTTCCCGCTTCTTTGTGTCCATCAATCGCATTACAA\n+GACAGTTCGCGAATGATTGCGCGGATCTTGTATTTGTATACTGTCGAAGAAAGGATCTTA\n+TACGCTTTCTTGTTTGCGCGTAGAGATAGTTTGTTTCGTCCCTTGCTGGTATCTGTACCA\n+ACACGGTAGATGGTTTGCGGTGTATCTTCGCGTAATTTCATTGTTTATTTCTCACTTAAC\n+ATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCATATTCTAC\n+TACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCTGTTTCTT\n+CATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCACAAAATCC\n+GAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTTACCCCTC\n+TTTGCGAATGTATGCAAGTTCTTCATGGGTTACTGGACGGATATACAGACGGCCTTTTGT\n+ATATGCCTTGCGCCCGCTGATCCAAATGTTTTTCATATCCTTAACACCGTTCATCACATC\n+GTTGTAAAACTTCTTATCAGCTTTAGCCTGATAGACTTCACGGCCTTGATAATCTTTCAT\n+GAACAAACAATAAAGGATCTCATTCTTATCAACTAGATTAGCATCCTTTGTAGTTGTTTT\n+ACTTGGTGAAGGTTTCGCACCCAGGCGCAAGGCCATAGCTTGCCACACTTTACCATGTTC\n+ATAACCGCGCCCGACAAGAGCATGAGCGATTTCGTGTAAAAGAGTGTCTAAAATATCCTC\n+GTAGATATCTTCCGCAACATGACGACCAGACAGTTCGATCAGTTTTTTGGTATAACTGCA\n+ACGG'..b'TCGGGTAATATCG\n+TTTGTGATGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTT\n+ATGTTTCTTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGT\n+GCCCTTGTAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCC\n+GCGTTGGAACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTG\n+GCATTCAGACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTA\n+CCATCCCAAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAA\n+TACACGAATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCG\n+ACACGTGATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAAC\n+TACTATTCTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATG\n+AAGATAAACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGATTAACG\n+TAATGCTGGTGTTAAATTGTGTGATCTTCCAATAGCCCGTTTGATTGCTTTAAAGAAGTT\n+CATCACCGGGCTATTTTTCTCGTAAATATCCCAAACTTTCAATTTGTCCCACGGATCCGG\n+AACATAATCTTCATTCCTTGCCGAAACCCCCAACGTAACCCTCCTGTATCCAGCGCTGTC\n+GTGATAATACACGAAGAATGGCCTACCTATTGGCGCAACCTTACAACCTCTCTTAGCGGC\n+TCTCATAGCCTCATCGAACGTCATGGATTCCCCCAAAAATTTCTATGCATGAATGGTCGA\n+ATTCCGATAGTTTCACTCAAAATGAATATCGGATGATCTAGCTCATCGTGGTTTTCTTCG\n+TCAATGACGATATCCCAATCAGTAGCCTTTTGTTCTTCTACCGTGGCAATGAATACCTGG\n+TTTACTTCACACTTGCGGTGTGTACGTCTGATGATTGTATCCCCCTCACGAAACACAATC\n+ATATCAGGATTGGTAGTGCGGTACGCGGTTTTACCCGCGCACACTTCATTAAGCATATCT\n+TCGTATGTCATTATAAAACCTTTACACGTTGAACGATGGTTTGTTTAACGTCTTTGTATT\n+CTCCGTGCTCTTTAACGGTTGCTTTGAAAGTGATTTCATCACCTTCGTTTGCAATGTTAT\n+TACCGAAGTAAACAACAACATTACCATCAACATTAATTTTGGTCATGAATCTTTCTACAG\n+AAGTGTAGTAAGAAACTTGAGTATATCCCAGTGAAATCACTTTCTCAACGGTTCCGGTCA\n+TTTCCAGACGTTGTTTGATTTCACCGATGTGGTTAGCTTTAGAAATGCGTTCCTGGCGCT\n+CTTGTTCCCACTGTTCGCGGATTTCTTCGCGTTTGGCGATATAATCCTTTTCCAGTGCAA\n+CACCCATGCAGTAAGCGCACACAGCATCGAATACAGGGCTGTTTTTGTTGTCTTCTTTAG\n+ACTGGTCAGCCCACCAAAGAACAGTAAACATTGGCATTTCTGCAATTACTTCGCCTTTAC\n+GCTTGCCAATCGGCATGATCCCTTTTTCCAGCAGTTCCAGTTTTTCAGTATCGAACACTG\n+ACAGTTTACCGCGACGTTCGAACAGATCGAAATCTGCAAAGCCCTGGAATATCATTTTGA\n+AAGTATCAGTTTCAGTTAAACGGGCTGAAACACGGTCGAAATACTCACGCGCTTTCGCTT\n+CCGCTTTCTCCGGATCGGTAGATAAGTTGCAGATATAGTTATCAGAAGTATAACCGCCGC\n+CTCTACGCTCAACACGCAAGGTATACATTGCATTTTTACGACCAGAAGAAATGAAGTAAG\n+TGGTAGTAACTACGGTTGCGTTAGTCATGGTATTTCTCCTTAAAGGGTATCTCGTTTCGA\n+TATGGCTAATATAGCAAAAGCCCCTGACCGAAGTCAAGGGCTTTTTCATCATTCATTCGA\n+ATCTTTCATTGTTTTATGAAGATGAATATCAAAAATTTTCCAGTACGCCTTTCCGCGAGG\n+ATAAATTTTTGCTTTGTCAATATCGTTGTTGCTTCCCCATGTGTTGTTTGGGCCACGACA\n+TCGATTTTTTATATAATCTGTATGCCAGAATAAGCGCTGAACCGATGATTCCGTACCTAA\n+TGGATCTTCTTTACTGAACAGAATTTGTATACTCATAAGAAGAACCCAGTGCGAACAATC\n+AGATCGATTTTCTTTTCTGGTTCAAACGGTGATTTGCTATCGATGTTACACTGATAGAAC\n+ATACCAACATACTTTTCAGGAATGTTGGATTCACGCGCCCATTTCAGGTTATCATCGGTA\n+TTCGGCCCCAGCATGAGGTTAACAACATCAACAGCATAATCCTGTTCTTTGGTGTTGCCG\n+GAACCGTTTACATAATGCCCGTGGGCGGTTTTGAGGATCTCAATTACTTCGCTATCGTCA\n+GTTTCGACTTTGTAAAGCGTTGTATTTTCAGGAATTTCTTCAAGAATGATCAAAGCGGTT\n+TTCATCACACTTACCTTTGTGTTTCTGTTTACGTTTTGCTTCTTTAAATGCTCGCTTGCG\n+ATCGCGGTGAGTAGAAGCGCGGTTGAAATCATGTTTCGCTACCAAATTATTCATATAAGC\n+CCCTTAAAGAAAAATATTTAGGGGCTTTCGCCCCTGTATTAATCCAGCAATTTGCGGATC\n+TTGTCTGCGATACGTCCGGCGCGGGTTGCACTTGCAGTATGATCGCTTTCTTTTGAAGCC\n+AGTTCCGCCAGCTTACGCTGATGTTCTTCTTCTGCTGCTTGACGATCTGCTGCAACCTGT\n+GCAACCTGCTCATTATCGTGAGCAATACGCGCTTCCAGTTCAGACAGGGTTTTGTCGAAA\n+GTTGCTACGATTTCATCTACAGAACGAATTTTATTAAACAGTTTCATAATTTATCTCAAT\n+TGGTTAGTTTTAATCAGTATACATCAATATGGTTGAAATTCAAAATCATAAATGTCATTC\n+AGTGCGCGGTTCCACTCGGTGTAGTTTTCACCAGCACCATAACGCATTTGAATAGCACTT\n+TCGAACGTTGATCCGTTGAGGTTCGGGAAACCGAACAGGTTTTTGATTTTGTCATGTGCT\n+ACATAATACAGAGAAGCACTTTCCAGCATCGCAACCATCGCAGACGGTTCGTGTTCGCGG\n+CGCTTGATACGTAACAGAGTACGACTAGCACCAGTTTTACGGCGTTGATTTGGTGCTACG\n+TAGAAACGGAATACTACGCGCCCTGTTTTATCATCAACTACCAGGTAAAACCCGTTTTCT\n+TTCAGATCCACGCCTTCGAATTTCTTGAAGGTTCCGCGTTTCATGTCACCAATTTTAATT\n+GCATATTTGTGAATGTCAAGTCTTGTCAGAATTCTTTTCATATTTTTTAGATACCAGTTT\n+GCCTAATTTTGTAATTTCGCCTGTTTTTACGTTAACAAACAAGGCGATGCTCAGAAATGG\n+GATGCTAATCACTACGCTGATCAATGTAAACAGAAAACGTATCACAAAAAGAACAGCACG\n+TTCAAGATATCGTTGCATCCACGCGATTCCTAAACAACTATACCCTACTATAAAGGTGGT\n+TGCAACATAAAATGCACCAAATCCTTTACGAAATACGTAACCTTTCCCGGATTCTATCCG\n+GTCGTCGGCCCACATTTCACGGGCAGTTTTCAGAATAGATTCACCACTAGCGCGAGTTTC\n+GTTAGCCGAAGGCATGTTTTTAAATTTCATGATAGTCTCCTATGCGCCCAGAACTCTCCA\n+GGCGCGGTTGTTTAG\n' |
b |
diff -r 787ce84e8d16 -r 134bb2d7cdfd tsv.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tsv.py Mon Jun 05 02:43:21 2023 +0000 |
[ |
@@ -0,0 +1,30 @@ +import sys + + +# Like 'import json' / 'import yaml', except.. tab data. +def loads(str_data): + return NotImplementedError() + + +def load(handle): + return NotImplementedError() + + +def dump(data, handle=sys.stdout): + for row in data: + handle.write("%s\n" % "\t".join(map(str, row))) + + +def dumps(data): + output = "" + for row in data: + output += "%s\n" % "\t".join(map(str, row)) + return output + + +def dump_line(row, handle=sys.stdout): + dump([row], handle=handle) + + +def dumps_line(row): + return dumps([row]) |