Next changeset 1:2e00254fefc7 (2022-05-20) |
Commit message:
Uploaded |
added:
cpt_lipory/cpt-macros.xml cpt_lipory/gff3.py cpt_lipory/lipory.py cpt_lipory/lipory.xml cpt_lipory/macros.xml cpt_lipory/test-data/T7_LiporyIn.fasta cpt_lipory/test-data/T7_LiporyIn.gff3 cpt_lipory/test-data/T7_LiporyOut.gff3 |
b |
diff -r 000000000000 -r 7b4923695cfd cpt_lipory/cpt-macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_lipory/cpt-macros.xml Fri May 13 05:21:38 2022 +0000 |
[ |
@@ -0,0 +1,115 @@ +<?xml version="1.0"?> +<macros> + <xml name="gff_requirements"> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.65">biopython</requirement> + <requirement type="package" version="2.12.1">requests</requirement> + <yield/> + </requirements> + <version_command> + <![CDATA[ + cd $__tool_directory__ && git rev-parse HEAD + ]]> + </version_command> + </xml> + <xml name="citation/mijalisrasche"> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex">@unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-crr"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020-AJC-solo"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-clm"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="sl-citations-clm"> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </xml> +</macros> |
b |
diff -r 000000000000 -r 7b4923695cfd cpt_lipory/gff3.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_lipory/gff3.py Fri May 13 05:21:38 2022 +0000 |
[ |
b'@@ -0,0 +1,346 @@\n+import copy\n+import logging\n+\n+log = logging.getLogger()\n+log.setLevel(logging.WARN)\n+\n+\n+def feature_lambda(\n+ feature_list,\n+ test,\n+ test_kwargs,\n+ subfeatures=True,\n+ parent=None,\n+ invert=False,\n+ recurse=True,\n+):\n+ """Recursively search through features, testing each with a test function, yielding matches.\n+\n+ GFF3 is a hierachical data structure, so we need to be able to recursively\n+ search through features. E.g. if you\'re looking for a feature with\n+ ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n+ case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n+\n+ :type feature_list: list\n+ :param feature_list: an iterable of features\n+\n+ :type test: function reference\n+ :param test: a closure with the method signature (feature, **kwargs) where\n+ the kwargs are those passed in the next argument. This\n+ function should return True or False, True if the feature is\n+ to be yielded as part of the main feature_lambda function, or\n+ False if it is to be ignored. This function CAN mutate the\n+ features passed to it (think "apply").\n+\n+ :type test_kwargs: dictionary\n+ :param test_kwargs: kwargs to pass to your closure when it is called.\n+\n+ :type subfeatures: boolean\n+ :param subfeatures: when a feature is matched, should just that feature be\n+ yielded to the caller, or should the entire sub_feature\n+ tree for that feature be included? subfeatures=True is\n+ useful in cases such as searching for a gene feature,\n+ and wanting to know what RBS/Shine_Dalgarno_sequences\n+ are in the sub_feature tree (which can be accomplished\n+ with two feature_lambda calls). subfeatures=False is\n+ useful in cases when you want to process (and possibly\n+ return) the entire feature tree, such as applying a\n+ qualifier to every single feature.\n+\n+ :type invert: boolean\n+ :param invert: Negate/invert the result of the filter.\n+\n+ :rtype: yielded list\n+ :return: Yields a list of matching features.\n+ """\n+ # Either the top level set of [features] or the subfeature attribute\n+ for feature in feature_list:\n+ feature._parent = parent\n+ if not parent:\n+ # Set to self so we cannot go above root.\n+ feature._parent = feature\n+ test_result = test(feature, **test_kwargs)\n+ # if (not invert and test_result) or (invert and not test_result):\n+ if invert ^ test_result:\n+ if not subfeatures:\n+ feature_copy = copy.deepcopy(feature)\n+ feature_copy.sub_features = list()\n+ yield feature_copy\n+ else:\n+ yield feature\n+\n+ if recurse and hasattr(feature, "sub_features"):\n+ for x in feature_lambda(\n+ feature.sub_features,\n+ test,\n+ test_kwargs,\n+ subfeatures=subfeatures,\n+ parent=feature,\n+ invert=invert,\n+ recurse=recurse,\n+ ):\n+ yield x\n+\n+\n+def fetchParent(feature):\n+ if not hasattr(feature, "_parent") or feature._parent is None:\n+ return feature\n+ else:\n+ return fetchParent(feature._parent)\n+\n+\n+def feature_test_true(feature, **kwargs):\n+ return True\n+\n+\n+def feature_test_type(feature, **kwargs):\n+ if "type" in kwargs:\n+ return str(feature.type).upper() == str(kwargs["type"]).upper()\n+ elif "types" in kwargs:\n+ for x in kwargs["types"]:\n+ if str(feature.type).upper() == str(x).upper():\n+ return True\n+ return False\n+ raise Exception("Incorrect feature_test_type call, ne'..b'feature.location.start,\n+ # feature.location.end,\n+ # feature.location.strand\n+ # )\n+ return result\n+\n+\n+def get_gff3_id(gene):\n+ return gene.qualifiers.get("Name", [gene.id])[0]\n+\n+\n+def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n+ # This prevents frameshift errors\n+ while start < 0:\n+ start += 3\n+ while end < 0:\n+ end += 3\n+ while start > parent_length:\n+ start -= 3\n+ while end > parent_length:\n+ end -= 3\n+ return (start, end)\n+\n+\n+def coding_genes(feature_list):\n+ for x in genes(feature_list):\n+ if (\n+ len(\n+ list(\n+ feature_lambda(\n+ x.sub_features,\n+ feature_test_type,\n+ {"type": "CDS"},\n+ subfeatures=False,\n+ )\n+ )\n+ )\n+ > 0\n+ ):\n+ yield x\n+\n+\n+def genes(feature_list, feature_type="gene", sort=False):\n+ """\n+ Simple filter to extract gene features from the feature set.\n+ """\n+\n+ if not sort:\n+ for x in feature_lambda(\n+ feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n+ ):\n+ yield x\n+ else:\n+ data = list(genes(feature_list, feature_type=feature_type, sort=False))\n+ data = sorted(data, key=lambda feature: feature.location.start)\n+ for x in data:\n+ yield x\n+\n+\n+def wa_unified_product_name(feature):\n+ """\n+ Try and figure out a name. We gave conflicting instructions, so\n+ this isn\'t as trivial as it should be. Sometimes it will be in\n+ \'product\' or \'Product\', othertimes in \'Name\'\n+ """\n+ # Manually applied tags.\n+ protein_product = feature.qualifiers.get(\n+ "product", feature.qualifiers.get("Product", [None])\n+ )[0]\n+\n+ # If neither of those are available ...\n+ if protein_product is None:\n+ # And there\'s a name...\n+ if "Name" in feature.qualifiers:\n+ if not is_uuid(feature.qualifiers["Name"][0]):\n+ protein_product = feature.qualifiers["Name"][0]\n+\n+ return protein_product\n+\n+\n+def is_uuid(name):\n+ return name.count("-") == 4 and len(name) == 36\n+\n+\n+def get_rbs_from(gene):\n+ # Normal RBS annotation types\n+ rbs_rbs = list(\n+ feature_lambda(\n+ gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n+ )\n+ )\n+ rbs_sds = list(\n+ feature_lambda(\n+ gene.sub_features,\n+ feature_test_type,\n+ {"type": "Shine_Dalgarno_sequence"},\n+ subfeatures=False,\n+ )\n+ )\n+ # Fraking apollo\n+ apollo_exons = list(\n+ feature_lambda(\n+ gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n+ )\n+ )\n+ apollo_exons = [x for x in apollo_exons if len(x) < 10]\n+ # These are more NCBI\'s style\n+ regulatory_elements = list(\n+ feature_lambda(\n+ gene.sub_features,\n+ feature_test_type,\n+ {"type": "regulatory"},\n+ subfeatures=False,\n+ )\n+ )\n+ rbs_regulatory = list(\n+ feature_lambda(\n+ regulatory_elements,\n+ feature_test_quals,\n+ {"regulatory_class": ["ribosome_binding_site"]},\n+ subfeatures=False,\n+ )\n+ )\n+ # Here\'s hoping you find just one ;)\n+ return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n+\n+\n+def nice_name(record):\n+ """\n+ get the real name rather than NCBI IDs and so on. If fails, will return record.id\n+ """\n+ name = record.id\n+ likely_parental_contig = list(genes(record.features, feature_type="contig"))\n+ if len(likely_parental_contig) == 1:\n+ name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n+ return name\n+\n+\n+def fsort(it):\n+ for i in sorted(it, key=lambda x: int(x.location.start)):\n+ yield i\n' |
b |
diff -r 000000000000 -r 7b4923695cfd cpt_lipory/lipory.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_lipory/lipory.py Fri May 13 05:21:38 2022 +0000 |
[ |
@@ -0,0 +1,116 @@ +#!/usr/bin/env python +import re +import sys +import argparse +import logging +from Bio import SeqIO +from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature +from gff3 import feature_lambda, feature_test_type, get_id +from Bio.SeqFeature import SeqFeature, FeatureLocation + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +def find_lipoprotein(gff3_file, fasta_genome, lipobox_mindist=10, lipobox_maxdist=40): + seq_dict = SeqIO.to_dict(SeqIO.parse(fasta_genome, "fasta")) + + CASES = [ + re.compile( + "^.{%s,%s}[ILMFTV][^REKD][GAS]C" % (lipobox_mindist, lipobox_maxdist) + ), + re.compile( + "^.{%s,%s}AW[AGS]C" % (lipobox_mindist, lipobox_maxdist) + ), + # Make sure to not have multiple cases that share matches, will introduce duplicate features into gff3 file + ] + + for record in gffParse(gff3_file, base_dict=seq_dict): + good_features = [] + + genes = list( + feature_lambda( + record.features, feature_test_type, {"type": "gene"}, subfeatures=True + ) + ) + for gene in genes: + cdss = list( + feature_lambda( + gene.sub_features, + feature_test_type, + {"type": "CDS"}, + subfeatures=False, + ) + ) + if len(cdss) == 0: + continue + + for cds in cdss: + try: + tmpseq = str( + cds.extract(record.seq).translate(table=11, cds=True) + ).replace("*", "") + except: + continue + + for case in CASES: + m = case.search(tmpseq) + if m: + if cds.location.strand > 0: + start = cds.location.start + (3 * (m.end() - 4)) + end = cds.location.start + (3 * m.end()) + else: + start = cds.location.end - (3 * (m.end() - 4)) + end = cds.location.end - (3 * m.end()) + + tmp = gffSeqFeature( + FeatureLocation( + min(start, end), + max(start, end), + strand=cds.location.strand, + ), + type="Lipobox", + qualifiers={ + "source": "CPT_LipoRy", + "ID": "%s.lipobox" % get_id(gene), + }, + ) + tmp.qualifiers["sequence"] = str( + tmp.extract(record).seq.translate() + ) + + gene.sub_features.append(tmp) + good_features.append(gene) + + record.features = good_features + yield [record] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Filter out lipoproteins", epilog="") + parser.add_argument( + "gff3_file", type=argparse.FileType("r"), help="Naive ORF Calls" + ) + parser.add_argument( + "fasta_genome", type=argparse.FileType("r"), help="Fasta genome sequence" + ) + + parser.add_argument( + "--lipobox_mindist", + type=int, + help="Minimum distance in codons to start of lipobox", + default=10, + ) + parser.add_argument( + "--lipobox_maxdist", + type=int, + help="Maximum distance in codons to start of lipobox", + default=40, + ) + + args = parser.parse_args() + + args = vars(parser.parse_args()) + for record in find_lipoprotein(**args): + record[0].annotations = {} + gffWrite(record, sys.stdout) |
b |
diff -r 000000000000 -r 7b4923695cfd cpt_lipory/lipory.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_lipory/lipory.xml Fri May 13 05:21:38 2022 +0000 |
[ |
@@ -0,0 +1,60 @@ +<?xml version="1.0"?> +<tool id="edu.tamu.cpt.fasta.lipory" name="Identify Lipoboxes" version="19.1.0.0"> + <description> in protein sequences</description> + <macros> + <import>macros.xml</import> + <import>cpt-macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="package" version="2019.06.08">regex</requirement> + </expand> + <command interpreter="python" detect_errors="aggressive"><![CDATA[lipory.py +$positional_1 +$positional_2 + +--lipobox_mindist $lipobox_mindist +--lipobox_maxdist $lipobox_maxdist + +> $default]]></command> + <inputs> + <param label="Naive orf calls" name="positional_1" type="data" format="gff3"/> + <param label="Genome" name="positional_2" type="data" format="fasta"/> + + <param label="Minimum distance in codons to start of lipobox" name="lipobox_mindist" type="integer" value="10"/> + <param label="Maximum distance in codons to start of lipobox" name="lipobox_maxdist" type="integer" value="40"/> + </inputs> + <outputs> + <data format="gff3" name="default" label="Lipoboxes from ${on_string}"/> + </outputs> + <tests> + <test> + <param name="positional_1" value="T7_LiporyIn.gff3" /> + <param name="positional_2" value="T7_LiporyIn.fasta" /> + <param name="lipobox_mindist" value="10" /> + <param name="lipobox_maxdist" value="60" /> + <output name="default" value="T7_LiporyOut.gff3" /> + </test> + </tests> + <help><![CDATA[ +**What it does** + +Identifies possible LipoBoxes from an input GFF3 and FASTA. + +**How it works** + +Searches in the first 10-40 amino acids of an input protein sequence using regular expressions +for a 4-amino acid motif based on the consensus sequences described in (**Babu** et al. 2006. *J +Bacteriol.* 188(8):2761-2773 and **Kongari** *et al.* 2018 *BMC Bioinformatics*. 19:326). The +amino acids allowed here are relaxed to allow for the diversity of amino acids known to occur in lipoboxes. + +Position 1: ILMFTV or only A + +Position 2: any residue except REKD or only W + +Position 3: GAS + +Position 4: C + + ]]></help> + <expand macro="citations-2020" /> +</tool> |
b |
diff -r 000000000000 -r 7b4923695cfd cpt_lipory/macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_lipory/macros.xml Fri May 13 05:21:38 2022 +0000 |
b |
@@ -0,0 +1,85 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="3.6">python</requirement> + <requirement type="package" version="1.77">biopython</requirement> + <requirement type="package" version="1.1.3">cpt_gffparser</requirement> + <yield/> + </requirements> + </xml> + <token name="@BLAST_TSV@"> + "$blast_tsv" + </token> + <xml name="blast_tsv"> + <param label="Blast Results" help="TSV/tabular (25 Column)" + name="blast_tsv" type="data" format="tabular" /> + </xml> + + <token name="@BLAST_XML@"> + "$blast_xml" + </token> + <xml name="blast_xml"> + <param label="Blast Results" help="XML format" + name="blast_xml" type="data" format="blastxml" /> + </xml> + <xml name="gff3_with_fasta"> + <param label="Genome Sequences" name="fasta" type="data" format="fasta" /> + <param label="Genome Annotations" name="gff3" type="data" format="gff3" /> + </xml> + <xml name="genome_selector"> + <conditional name="reference_genome"> + <param name="reference_genome_source" type="select" label="Reference Genome"> + <option value="history" selected="True">From History</option> + <option value="cached">Locally Cached</option> + </param> + <when value="cached"> + <param name="fasta_indexes" type="select" label="Source FASTA Sequence"> + <options from_data_table="all_fasta"/> + </param> + </when> + <when value="history"> + <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/> + </when> + </conditional> + </xml> + <xml name="gff3_input"> + <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/> + </xml> + <xml name="input/gff3+fasta"> + <expand macro="gff3_input" /> + <expand macro="genome_selector" /> + </xml> + <token name="@INPUT_GFF@"> + "$gff3_data" + </token> + <token name="@INPUT_FASTA@"> +#if str($reference_genome.reference_genome_source) == 'cached': + "${reference_genome.fasta_indexes.fields.path}" +#else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa +#end if + </token> + <token name="@GENOME_SELECTOR_PRE@"> +#if $reference_genome.reference_genome_source == 'history': + ln -s $reference_genome.genome_fasta genomeref.fa; +#end if + </token> + <token name="@GENOME_SELECTOR@"> +#if str($reference_genome.reference_genome_source) == 'cached': + "${reference_genome.fasta_indexes.fields.path}" +#else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa +#end if + </token> + <xml name="input/fasta"> + <param label="Fasta file" name="sequences" type="data" format="fasta"/> + </xml> + + <token name="@SEQUENCE@"> + "$sequences" + </token> + <xml name="input/fasta/protein"> + <param label="Protein fasta file" name="sequences" type="data" format="fasta"/> + </xml> +</macros> |
b |
diff -r 000000000000 -r 7b4923695cfd cpt_lipory/test-data/T7_LiporyIn.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_lipory/test-data/T7_LiporyIn.fasta Fri May 13 05:21:38 2022 +0000 |
b |
b'@@ -0,0 +1,667 @@\n+>NC_001604\n+TCTCACAGTGTACGGACCTAAAGTTCCCCCATAGGGGGTACCTAAAGCCCAGCCAATCAC\n+CTAAAGTCAACCTTCGGTTGACCTTGAGGGTTCCCTAAGGGTTGGGGATGACCCTTGGGT\n+TTGTCTTTGGGTGTTACCTTGAGTGTCTCTCTGTGTCCCTATCTGTTACAGTCTCCTAAA\n+GTATCCTCCTAAAGTCACCTCCTAACGTCCATCCTAAAGCCAACACCTAAAGCCTACACC\n+TAAAGACCCATCAAGTCAACGCCTATCTTAAAGTTTAAACATAAAGACCAGACCTAAAGA\n+CCAGACCTAAAGACACTACATAAAGACCAGACCTAAAGACGCCTTGTTGTTAGCCATAAA\n+GTGATAACCTTTAATCATTGTCTTTATTAATACAACTCACTATAAGGAGAGACAACTTAA\n+AGAGACTTAAAAGATTAATTTAAAATTTATCAAAAAGAGTATTGACTTAAAGTCTAACCT\n+ATAGGATACTTACAGCCATCGAGAGGGACACGGCGAATAGCCATCCCAATCGACACCGGG\n+GTCAACCGGATAAGTAGACAGCCTGATAAGTCGCACGAAAAACAGGTATTGACAACATGA\n+AGTAACATGCAGTAAGATACAAATCGCTAGGTAACACTAGCAGCGTCAACCGGGCGCACA\n+GTGCCTTCTAGGTGACTTAAGCGCACCACGGCACATAAGGTGAAACAAAACGGTTGACAA\n+CATGAAGTAAACACGGTACGATGTACCACATGAAACGACAGTGAGTCACCACACTGAAAG\n+GTGATGCGGTCTAACGAAACCTGACCTAAGACGCTCTTTAACAATCTGGTAAATAGCTCT\n+TGAGTGCATGACTAGCGGATAACTCAAGGGTATCGCAAGGTGCCCTTTATGATATTCACT\n+AATAACTGCACGAGGTAACACAAGATGGCTATGTCTAACATGACTTACAACAACGTTTTC\n+GACCACGCTTACGAAATGCTGAAAGAAAACATCCGTTATGATGACATCCGTGACACTGAT\n+GACCTGCACGATGCTATTCACATGGCTGCCGATAATGCAGTTCCGCACTACTACGCTGAC\n+ATCTTTAGCGTAATGGCAAGTGAGGGCATTGACCTTGAGTTCGAAGACTCTGGTCTGATG\n+CCTGACACCAAGGACGTAATCCGCATCCTGCAAGCGCGTATCTATGAGCAATTAACGATT\n+GACCTCTGGGAAGACGCAGAAGACTTGCTCAATGAATACTTGGAGGAAGTCGAGGAGTAC\n+GAGGAGGATGAAGAGTAATGTCTACTACCAACGTGCAATACGGTCTGACCGCTCAAACTG\n+TACTTTTCTATAGCGACATGGTGCGCTGTGGCTTTAACTGGTCACTCGCAATGGCACAGC\n+TCAAAGAACTGTACGAAAACAACAAGGCAATAGCTTTAGAATCTGCTGAGTGATAGACTC\n+AAGGTCGCTCCTAGCGAGTGGCCTTTATGATTATCACTTTACTTATGAGGGAGTAATGTA\n+TATGCTTACTATCGGTCTACTCACCGCTCTAGGTCTAGCTGTAGGTGCATCCTTTGGGAA\n+GGCTTTAGGTGTAGCTGTAGGTTCCTACTTTACCGCTTGCATCATCATAGGAATCATCAA\n+AGGGGCACTACGCAAATGATGAAGCACTACGTTATGCCAATCCACACGTCCAACGGGGCA\n+ACCGTATGTACACCTGATGGGTTCGCAATGAAACAACGAATCGAACGCCTTAAGCGTGAA\n+CTCCGCATTAACCGCAAGATTAACAAGATAGGTTCCGGCTATGACAGAACGCACTGATGG\n+CTTAAAGAAAGGTTATATGCCCAATGGCACACTATACGCTGCAAATCGGCGAATAGTGAG\n+AACTTGGCGAGAGAACAACCTCGAACGCCGCAAGGACAAGAGAGGGCGGCGTGGCATAGA\n+CGAAAGGAAAAGGTTAAAGCCAAGAAACTCGCCGCACTTGAACAGGCACTAGCCAACACA\n+CTGAACGCTATCTCATAACGAACATAAAGGACACAATGCAATGAACATTACCGACATCAT\n+GAACGCTATCGACGCAATCAAAGCACTGCCAATCTGTGAACTTGACAAGCGTCAAGGTAT\n+GCTTATCGACTTACTGGTCGAGATGGTCAACAGCGAGACGTGTGATGGCGAGCTAACCGA\n+ACTAAATCAGGCACTTGAGCATCAAGATTGGTGGACTACCTTGAAGTGTCTCACGGCTGA\n+CGCAGGGTTCAAGATGCTCGGTAATGGTCACTTCTCGGCTGCTTATAGTCACCCGCTGCT\n+ACCTAACAGAGTGATTAAGGTGGGCTTTAAGAAAGAGGATTCAGGCGCAGCCTATACCGC\n+ATTCTGCCGCATGTATCAGGGTCGTCCTGGTATCCCTAACGTCTACGATGTACAGCGCCA\n+CGCTGGATGCTATACGGTGGTACTTGACGCACTTAAGGATTGCGAGCGTTTCAACAATGA\n+TGCCCATTATAAATACGCTGAGATTGCAAGCGACATCATTGATTGCAATTCGGATGAGCA\n+TGATGAGTTAACTGGATGGGATGGTGAGTTTGTTGAAACTTGTAAACTAATCCGCAAGTT\n+CTTTGAGGGCATCGCCTCATTCGACATGCATAGCGGGAACATCATGTTCTCAAATGGAGA\n+CGTACCATACATCACCGACCCGGTATCATTCTCGCAGAAGAAAGACGGTGGCGCATTCAG\n+CATCGACCCTGAGGAACTCATCAAGGAAGTCGAGGAAGTCGCACGACAGAAAGAAATTGA\n+CCGCGCTAAGGCCCGTAAAGAACGTCACGAGGGGCGCTTAGAGGCACGCAGATTCAAACG\n+TCGCAACCGCAAGGCACGTAAAGCACACAAAGCTAAGCGCGAAAGAATGCTTGCTGCGTG\n+GCGATGGGCTGAACGTCAAGAACGGCGTAACCATGAGGTAGCTGTAGATGTACTAGGAAG\n+AACCAATAACGCTATGCTCTGGGTCAACATGTTCTCTGGGGACTTTAAGGCGCTTGAGGA\n+ACGAATCGCGCTGCACTGGCGTAATGCTGACCGGATGGCTATCGCTAATGGTCTTACGCT\n+CAACATTGATAAGCAACTTGACGCAATGTTAATGGGCTGATAGTCTTATCTTACAGGTCA\n+TCTGCGGGTGGCCTGAATAGGTACGATTTACTAACTGGAAGAGGCACTAAATGAACACGA\n+TTAACATCGCTAAGAACGACTTCTCTGACATCGAACTGGCTGCTATCCCGTTCAACACTC\n+TGGCTGACCATTACGGTGAGCGTTTAGCTCGCGAACAGTTGGCCCTTGAGCATGAGTCTT\n+ACGAGATGGGTGAAGCACGCTTCCGCAAGATGTTTGAGCGTCAACTTAAAGCTGGTGAGG\n+TTGCGGATAACGCTGCCGCCAAGCCTCTCATCACTACCCTACTCCCTAAGATGATTGCAC\n+GCATCAACGACTGGTTTGAGGAAGTGAAAGCTAAGCGCGGCAAGCGCCCGACAGCCTTCC\n+AGTTCCTGCAAGAAATCAAGCCGGAAGCCGTAGCGTACATCACCATTAAGACCACTCTGG\n+CTTGCCTAACCAGTGCTGACAATACAACCGTTCAGGCTGTAGCAAGCGCAATCGGTCGGG\n+CCATTGAGGACGAGGCTCGCTTCGGTCGTATCCGTGACCTTGAAGCTAAGCACTTCAAGA\n+AAAACGTTGAGGAACAACTCAACAAGCGCGTAGGGCACGTCTACAAGAAAGCATTTATGC\n+AAGTTGTCGAGGCTGACATGCTCTCTAAGGGTCTACTCGGTGGCGAGGCGTGGTCTTCGT\n+GGCATAAGGAAGACTCTATTCATGTAGGAGTACGCTGCATCGAGATGCTCATTGAGTCAA\n+'..b'TAAGTGTGACTGTTTCACAGGATCTCCGCTTCCGCAATATCTGGATTAAGTGT\n+GCCAACAACTCTTGGAACTTCTTCCGTACTGGCCCCGATGGAATCTACTTCATAGCCTCT\n+GATGGTGGATGGTTACGATTCCAAATACACTCCAACGGTCTCGGATTCAAGAATATTGCA\n+GACAGTCGTTCAGTACCTAATGCAATCATGGTGGAGAACGAGTAATTGGTAAATCACAAG\n+GAAAGACGTGTAGTCCACGGATGGACTCTCAAGGAGGTACAAGGTGCTATCATTAGACTT\n+TAACAACGAATTGATTAAGGCTGCTCCAATTGTTGGGACGGGTGTAGCAGATGTTAGTGC\n+TCGACTGTTCTTTGGGTTAAGCCTTAACGAATGGTTCTACGTTGCTGCTATCGCCTACAC\n+AGTGGTTCAGATTGGTGCCAAGGTAGTCGATAAGATGATTGACTGGAAGAAAGCCAATAA\n+GGAGTGATATGTATGGAAAAGGATAAGAGCCTTATTACATTCTTAGAGATGTTGGACACT\n+GCGATGGCTCAGCGTATGCTTGCGGACCTTTCGGACCATGAGCGTCGCTCTCCGCAACTC\n+TATAATGCTATTAACAAACTGTTAGACCGCCACAAGTTCCAGATTGGTAAGTTGCAGCCG\n+GATGTTCACATCTTAGGTGGCCTTGCTGGTGCTCTTGAAGAGTACAAAGAGAAAGTCGGT\n+GATAACGGTCTTACGGATGATGATATTTACACATTACAGTGATATACTCAAGGCCACTAC\n+AGATAGTGGTCTTTATGGATGTCATTGTCTATACGAGATGCTCCTACGTGAAATCTGAAA\n+GTTAACGGGAGGCATTATGCTAGAATTTTTACGTAAGCTAATCCCTTGGGTTCTCGCTGG\n+GATGCTATTCGGGTTAGGATGGCATCTAGGGTCAGACTCAATGGACGCTAAATGGAAACA\n+GGAGGTACACAATGAGTACGTTAAGAGAGTTGAGGCTGCGAAGAGCACTCAAAGAGCAAT\n+CGATGCGGTATCTGCTAAGTATCAAGAAGACCTTGCCGCGCTGGAAGGGAGCACTGATAG\n+GATTATTTCTGATTTGCGTAGCGACAATAAGCGGTTGCGCGTCAGAGTCAAAACTACCGG\n+AACCTCCGATGGTCAGTGTGGATTCGAGCCTGATGGTCGAGCCGAACTTGACGACCGAGA\n+TGCTAAACGTATTCTCGCAGTGACCCAGAAGGGTGACGCATGGATTCGTGCGTTACAGGA\n+TACTATTCGTGAACTGCAACGTAAGTAGGAAATCAAGTAAGGAGGCAATGTGTCTACTCA\n+ATCCAATCGTAATGCGCTCGTAGTGGCGCAACTGAAAGGAGACTTCGTGGCGTTCCTATT\n+CGTCTTATGGAAGGCGCTAAACCTACCGGTGCCCACTAAGTGTCAGATTGACATGGCTAA\n+GGTGCTGGCGAATGGAGACAACAAGAAGTTCATCTTACAGGCTTTCCGTGGTATCGGTAA\n+GTCGTTCATCACATGTGCGTTCGTTGTGTGGTCCTTATGGAGAGACCCTCAGTTGAAGAT\n+ACTTATCGTATCAGCCTCTAAGGAGCGTGCAGACGCTAACTCCATCTTTATTAAGAACAT\n+CATTGACCTGCTGCCATTCCTATCTGAGTTAAAGCCAAGACCCGGACAGCGTGACTCGGT\n+AATCAGCTTTGATGTAGGCCCAGCCAATCCTGACCACTCTCCTAGTGTGAAATCAGTAGG\n+TATCACTGGTCAGTTAACTGGTAGCCGTGCTGACATTATCATTGCGGATGACGTTGAGAT\n+TCCGTCTAACAGCGCAACTATGGGTGCCCGTGAGAAGCTATGGACTCTGGTTCAGGAGTT\n+CGCTGCGTTACTTAAACCGCTGCCTTCCTCTCGCGTTATCTACCTTGGTACACCTCAGAC\n+AGAGATGACTCTCTATAAGGAACTTGAGGATAACCGTGGGTACACAACCATTATCTGGCC\n+TGCTCTGTACCCAAGGACACGTGAAGAGAACCTCTATTACTCACAGCGTCTTGCTCCTAT\n+GTTACGCGCTGAGTACGATGAGAACCCTGAGGCACTTGCTGGGACTCCAACAGACCCAGT\n+GCGCTTTGACCGTGATGACCTGCGCGAGCGTGAGTTGGAATACGGTAAGGCTGGCTTTAC\n+GCTACAGTTCATGCTTAACCCTAACCTTAGTGATGCCGAGAAGTACCCGCTGAGGCTTCG\n+TGACGCTATCGTAGCGGCCTTAGACTTAGAGAAGGCCCCAATGCATTACCAGTGGCTTCC\n+GAACCGTCAGAACATCATTGAGGACCTTCCTAACGTTGGCCTTAAGGGTGATGACCTGCA\n+TACGTACCACGATTGTTCCAACAACTCAGGTCAGTACCAACAGAAGATTCTGGTCATTGA\n+CCCTAGTGGTCGCGGTAAGGACGAAACAGGTTACGCTGTGCTGTACACACTGAACGGTTA\n+CATCTACCTTATGGAAGCTGGAGGTTTCCGTGATGGCTACTCCGATAAGACCCTTGAGTT\n+ACTCGCTAAGAAGGCAAAGCAATGGGGAGTCCAGACGGTTGTCTACGAGAGTAACTTCGG\n+TGACGGTATGTTCGGTAAGGTATTCAGTCCTATCCTTCTTAAACACCACAACTGTGCGAT\n+GGAAGAGATTCGTGCCCGTGGTATGAAAGAGATGCGTATTTGCGATACCCTTGAGCCAGT\n+CATGCAGACTCACCGCCTTGTAATTCGTGATGAGGTCATTAGGGCCGACTACCAGTCCGC\n+TCGTGACGTAGACGGTAAGCATGACGTTAAGTACTCGTTGTTCTACCAGATGACCCGTAT\n+CACTCGTGAGAAAGGCGCTCTGGCTCATGATGACCGATTGGATGCCCTTGCGTTAGGCAT\n+TGAGTATCTCCGTGAGTCCATGCAGTTGGATTCCGTTAAGGTCGAGGGTGAAGTACTTGC\n+TGACTTCCTTGAGGAACACATGATGCGTCCTACGGTTGCTGCTACGCATATCATTGAGAT\n+GTCTGTGGGAGGAGTTGATGTGTACTCTGAGGACGATGAGGGTTACGGTACGTCTTTCAT\n+TGAGTGGTGATTTATGCATTAGGACTGCATAGGGATGCACTATAGACCACGGATGGTCAG\n+TTCTTTAAGTTACTGAAAAGACACGATAAATTAATACGACTCACTATAGGGAGAGGAGGG\n+ACGAAAGGTTACTATATAGATACTGAATGAATACTTATAGAGTGCATAAAGTATGCATAA\n+TGGTGTACCTAGAGTGACCTCTAAGAATGGTGATTATATTGTATTAGTATCACCTTAACT\n+TAAGGACCAACATAAAGGGAGGAGACTCATGTTCCGCTTATTGTTGAACCTACTGCGGCA\n+TAGAGTCACCTACCGATTTCTTGTGGTACTTTGTGCTGCCCTTGGGTACGCATCTCTTAC\n+TGGAGACCTCAGTTCACTGGAGTCTGTCGTTTGCTCTATACTCACTTGTAGCGATTAGGG\n+TCTTCCTGACCGACTGATGGCTCACCGAGGGATTCAGCGGTATGATTGCATCACACCACT\n+TCATCCCTATAGAGTCAAGTCCTAAGGTATACCCATAAAGAGCCTCTAATGGTCTATCCT\n+AAGGTCTATACCTAAAGATAGGCCATCCTATCAGTGTCACCTAAAGAGGGTCTTAGAGAG\n+GGCCTATGGAGTTCCTATAGGGTCCTTTAAAATATACCATAAAAATCTGAGTGACTATCT\n+CACAGTGTACGGACCTAAAGTTCCCCCATAGGGGGTACCTAAAGCCCAGCCAATCACCTA\n+AAGTCAACCTTCGGTTGACCTTGAGGGTTCCCTAAGGGTTGGGGATGACCCTTGGGTTTG\n+TCTTTGGGTGTTACCTTGAGTGTCTCTCTGTGTCCCT\n' |
b |
diff -r 000000000000 -r 7b4923695cfd cpt_lipory/test-data/T7_LiporyIn.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_lipory/test-data/T7_LiporyIn.gff3 Fri May 13 05:21:38 2022 +0000 |
b |
b'@@ -0,0 +1,3145 @@\n+##gff-version 3\n+NC_001604\tcpt.fixModel\tgene\t542\t666\t.\t-\t.\tID=ORF.0.2504_0.5545204186518331;\n+NC_001604\tcpt.fixModel\tmRNA\t542\t649\t.\t-\t.\tID=ORF.0.2504_0.5545204186518331.mRNA;Parent=ORF.0.2504_0.5545204186518331;\n+NC_001604\tgetOrfsOrCds\tCDS\t542\t649\t.\t-\t0\tID=ORF.0.2504_0.5545204186518331.CDS;Parent=ORF.0.2504_0.5545204186518331.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t664\t666\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2504_0.5545204186518331.rbs-0;Parent=ORF.0.2504_0.5545204186518331;\n+NC_001604\tcpt.fixModel\tgene\t542\t651\t.\t-\t.\tID=ORF.0.2506_0.7313234548298825;\n+NC_001604\tcpt.fixModel\tmRNA\t542\t637\t.\t-\t.\tID=ORF.0.2506_0.7313234548298825.mRNA;Parent=ORF.0.2506_0.7313234548298825;\n+NC_001604\tgetOrfsOrCds\tCDS\t542\t637\t.\t-\t0\tID=ORF.0.2506_0.7313234548298825.CDS;Parent=ORF.0.2506_0.7313234548298825.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t649\t651\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2506_0.7313234548298825.rbs-0;Parent=ORF.0.2506_0.7313234548298825;\n+NC_001604\tcpt.fixModel\tgene\t627\t839\t.\t-\t.\tID=ORF.0.3363_0.9803284230217932;\n+NC_001604\tcpt.fixModel\tmRNA\t627\t824\t.\t-\t.\tID=ORF.0.3363_0.9803284230217932.mRNA;Parent=ORF.0.3363_0.9803284230217932;\n+NC_001604\tgetOrfsOrCds\tCDS\t627\t824\t.\t-\t0\tID=ORF.0.3363_0.9803284230217932.CDS;Parent=ORF.0.3363_0.9803284230217932.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t837\t839\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.3363_0.9803284230217932.rbs-0;Parent=ORF.0.3363_0.9803284230217932;\n+NC_001604\tcpt.fixModel\tgene\t627\t747\t.\t-\t.\tID=ORF.0.3367_0.1254781548971481;\n+NC_001604\tcpt.fixModel\tmRNA\t627\t734\t.\t-\t.\tID=ORF.0.3367_0.1254781548971481.mRNA;Parent=ORF.0.3367_0.1254781548971481;\n+NC_001604\tgetOrfsOrCds\tCDS\t627\t734\t.\t-\t0\tID=ORF.0.3367_0.1254781548971481.CDS;Parent=ORF.0.3367_0.1254781548971481.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t745\t747\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.3367_0.1254781548971481.rbs-0;Parent=ORF.0.3367_0.1254781548971481;\n+NC_001604\tcpt.fixModel\tgene\t766\t1206\t.\t-\t.\tID=ORF.0.2960_0.21096600108012498;\n+NC_001604\tcpt.fixModel\tmRNA\t766\t1191\t.\t-\t.\tID=ORF.0.2960_0.21096600108012498.mRNA;Parent=ORF.0.2960_0.21096600108012498;\n+NC_001604\tgetOrfsOrCds\tCDS\t766\t1191\t.\t-\t0\tID=ORF.0.2960_0.21096600108012498.CDS;Parent=ORF.0.2960_0.21096600108012498.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t1202\t1206\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2960_0.21096600108012498.rbs-0;Parent=ORF.0.2960_0.21096600108012498;\n+NC_001604\tcpt.fixModel\tgene\t766\t885\t.\t-\t.\tID=ORF.0.2976_0.8667531510652563;\n+NC_001604\tcpt.fixModel\tmRNA\t766\t867\t.\t-\t.\tID=ORF.0.2976_0.8667531510652563.mRNA;Parent=ORF.0.2976_0.8667531510652563;\n+NC_001604\tgetOrfsOrCds\tCDS\t766\t867\t.\t-\t0\tID=ORF.0.2976_0.8667531510652563.CDS;Parent=ORF.0.2976_0.8667531510652563.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t883\t885\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2976_0.8667531510652563.rbs-0;Parent=ORF.0.2976_0.8667531510652563;\n+NC_001604\tcpt.fixModel\tgene\t912\t1278\t.\t+\t.\tID=ORF.0.2_0.39432314427019266;\n+NC_001604\tcpt.fixModel\tmRNA\t925\t1278\t.\t+\t.\tID=ORF.0.2_0.39432314427019266.mRNA;Parent=ORF.0.2_0.39432314427019266;\n+NC_001604\tgetOrfsOrCds\tCDS\t925\t1278\t.\t+\t0\tID=ORF.0.2_0.39432314427019266.CDS;Parent=ORF.0.2_0.39432314427019266.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t912\t916\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.2_0.39432314427019266.rbs-0;Parent=ORF.0.2_0.39432314427019266;\n+NC_001604\tcpt.fixModel\tgene\t912\t1278\t.\t+\t.\tID=ORF.0.3_0.8154113297998601;\n+NC_001604\tcpt.fixModel\tmRNA\t931\t1278\t.\t+\t.\tID=ORF.0.3_0.8154113297998601.mRNA;Parent=ORF.0.3_0.8154113297998601;\n+NC_001604\tgetOrfsOrCds\tCDS\t931\t1278\t.\t+\t0\tID=ORF.0.3_0.8154113297998601.CDS;Parent=ORF.0.3_0.8154113297998601.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t912\t916\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.3_0.8154113297998601.rbs-0;Parent=ORF.0.3_0.8154113297998601;\n+NC_001604\tcpt.fixModel\tgene\t1182\t1310\t.\t-\t.\tID=ORF.0.3359_0.6449951188722964;\n+NC_001604\tcpt.fixModel\tmRNA\t1182\t1298\t.\t-\t.\tID=ORF.0.3359_0.6449951188722964.mRNA;Parent=ORF.0.3359_0.64499511887229'..b'93689042043441.rbs-0;Parent=ORF.0.1411_0.9793689042043441;\n+NC_001604\tcpt.fixModel\tgene\t39011\t39130\t.\t+\t.\tID=ORF.0.1412_0.30622712641637695;\n+NC_001604\tcpt.fixModel\tmRNA\t39020\t39130\t.\t+\t.\tID=ORF.0.1412_0.30622712641637695.mRNA;Parent=ORF.0.1412_0.30622712641637695;\n+NC_001604\tgetOrfsOrCds\tCDS\t39020\t39130\t.\t+\t0\tID=ORF.0.1412_0.30622712641637695.CDS;Parent=ORF.0.1412_0.30622712641637695.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39011\t39014\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1412_0.30622712641637695.rbs-0;Parent=ORF.0.1412_0.30622712641637695;\n+NC_001604\tcpt.fixModel\tgene\t39012\t39130\t.\t+\t.\tID=ORF.0.1413_0.6484168178188776;\n+NC_001604\tcpt.fixModel\tmRNA\t39023\t39130\t.\t+\t.\tID=ORF.0.1413_0.6484168178188776.mRNA;Parent=ORF.0.1413_0.6484168178188776;\n+NC_001604\tgetOrfsOrCds\tCDS\t39023\t39130\t.\t+\t0\tID=ORF.0.1413_0.6484168178188776.CDS;Parent=ORF.0.1413_0.6484168178188776.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39012\t39015\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1413_0.6484168178188776.rbs-0;Parent=ORF.0.1413_0.6484168178188776;\n+NC_001604\tcpt.fixModel\tgene\t39378\t39538\t.\t+\t.\tID=ORF.0.1414_0.38280168913440005;\n+NC_001604\tcpt.fixModel\tmRNA\t39389\t39538\t.\t+\t.\tID=ORF.0.1414_0.38280168913440005.mRNA;Parent=ORF.0.1414_0.38280168913440005;\n+NC_001604\tgetOrfsOrCds\tCDS\t39389\t39538\t.\t+\t0\tID=ORF.0.1414_0.38280168913440005.CDS;Parent=ORF.0.1414_0.38280168913440005.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39378\t39382\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1414_0.38280168913440005.rbs-0;Parent=ORF.0.1414_0.38280168913440005;\n+NC_001604\tcpt.fixModel\tgene\t39423\t39557\t.\t+\t.\tID=ORF.0.2020_0.5190345053482985;\n+NC_001604\tcpt.fixModel\tmRNA\t39441\t39557\t.\t+\t.\tID=ORF.0.2020_0.5190345053482985.mRNA;Parent=ORF.0.2020_0.5190345053482985;\n+NC_001604\tgetOrfsOrCds\tCDS\t39441\t39557\t.\t+\t0\tID=ORF.0.2020_0.5190345053482985.CDS;Parent=ORF.0.2020_0.5190345053482985.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39423\t39425\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.2020_0.5190345053482985.rbs-0;Parent=ORF.0.2020_0.5190345053482985;\n+NC_001604\tcpt.fixModel\tgene\t39445\t39557\t.\t+\t.\tID=ORF.0.2021_0.3406547997303282;\n+NC_001604\tcpt.fixModel\tmRNA\t39453\t39557\t.\t+\t.\tID=ORF.0.2021_0.3406547997303282.mRNA;Parent=ORF.0.2021_0.3406547997303282;\n+NC_001604\tgetOrfsOrCds\tCDS\t39453\t39557\t.\t+\t0\tID=ORF.0.2021_0.3406547997303282.CDS;Parent=ORF.0.2021_0.3406547997303282.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39445\t39447\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.2021_0.3406547997303282.rbs-0;Parent=ORF.0.2021_0.3406547997303282;\n+NC_001604\tcpt.fixModel\tgene\t39445\t39557\t.\t+\t.\tID=ORF.0.2023_0.2547887662353707;\n+NC_001604\tcpt.fixModel\tmRNA\t39462\t39557\t.\t+\t.\tID=ORF.0.2023_0.2547887662353707.mRNA;Parent=ORF.0.2023_0.2547887662353707;\n+NC_001604\tgetOrfsOrCds\tCDS\t39462\t39557\t.\t+\t0\tID=ORF.0.2023_0.2547887662353707.CDS;Parent=ORF.0.2023_0.2547887662353707.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39445\t39447\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.2023_0.2547887662353707.rbs-0;Parent=ORF.0.2023_0.2547887662353707;\n+NC_001604\tcpt.fixModel\tgene\t39494\t39623\t.\t-\t.\tID=ORF.0.2029_0.06575596254471638;\n+NC_001604\tcpt.fixModel\tmRNA\t39494\t39604\t.\t-\t.\tID=ORF.0.2029_0.06575596254471638.mRNA;Parent=ORF.0.2029_0.06575596254471638;\n+NC_001604\tgetOrfsOrCds\tCDS\t39494\t39604\t.\t-\t0\tID=ORF.0.2029_0.06575596254471638.CDS;Parent=ORF.0.2029_0.06575596254471638.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39620\t39623\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2029_0.06575596254471638.rbs-0;Parent=ORF.0.2029_0.06575596254471638;\n+NC_001604\tcpt.fixModel\tgene\t39713\t39861\t.\t-\t.\tID=ORF.0.2026_0.08836418353296771;\n+NC_001604\tcpt.fixModel\tmRNA\t39713\t39847\t.\t-\t.\tID=ORF.0.2026_0.08836418353296771.mRNA;Parent=ORF.0.2026_0.08836418353296771;\n+NC_001604\tgetOrfsOrCds\tCDS\t39713\t39847\t.\t-\t0\tID=ORF.0.2026_0.08836418353296771.CDS;Parent=ORF.0.2026_0.08836418353296771.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39858\t39861\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2026_0.08836418353296771.rbs-0;Parent=ORF.0.2026_0.08836418353296771;\n' |
b |
diff -r 000000000000 -r 7b4923695cfd cpt_lipory/test-data/T7_LiporyOut.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_lipory/test-data/T7_LiporyOut.gff3 Fri May 13 05:21:38 2022 +0000 |
b |
b'@@ -0,0 +1,296 @@\n+##gff-version 3\n+NC_001604\tcpt.fixModel\tgene\t542\t666\t.\t-\t.\tID=ORF.0.2504_0.5545204186518331;\n+NC_001604\tcpt.fixModel\tmRNA\t542\t649\t.\t-\t.\tID=ORF.0.2504_0.5545204186518331.mRNA;Parent=ORF.0.2504_0.5545204186518331;\n+NC_001604\tgetOrfsOrCds\tCDS\t542\t649\t.\t-\t0\tID=ORF.0.2504_0.5545204186518331.CDS;Parent=ORF.0.2504_0.5545204186518331.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t664\t666\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2504_0.5545204186518331.rbs-0;Parent=ORF.0.2504_0.5545204186518331;\n+NC_001604\tfeature\tLipobox\t605\t616\t.\t-\t.\tsource=CPT_LipoRy;ID=ORF.0.2504_0.5545204186518331.lipobox;sequence=LTAC;Parent=ORF.0.2504_0.5545204186518331;\n+NC_001604\tcpt.fixModel\tgene\t627\t839\t.\t-\t.\tID=ORF.0.3363_0.9803284230217932;\n+NC_001604\tcpt.fixModel\tmRNA\t627\t824\t.\t-\t.\tID=ORF.0.3363_0.9803284230217932.mRNA;Parent=ORF.0.3363_0.9803284230217932;\n+NC_001604\tgetOrfsOrCds\tCDS\t627\t824\t.\t-\t0\tID=ORF.0.3363_0.9803284230217932.CDS;Parent=ORF.0.3363_0.9803284230217932.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t837\t839\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.3363_0.9803284230217932.rbs-0;Parent=ORF.0.3363_0.9803284230217932;\n+NC_001604\tfeature\tLipobox\t720\t731\t.\t-\t.\tsource=CPT_LipoRy;ID=ORF.0.3363_0.9803284230217932.lipobox;sequence=FTSC;Parent=ORF.0.3363_0.9803284230217932;\n+NC_001604\tcpt.fixModel\tgene\t1487\t1639\t.\t+\t.\tID=ORF.0.566_0.7631590264556776;\n+NC_001604\tcpt.fixModel\tmRNA\t1496\t1639\t.\t+\t.\tID=ORF.0.566_0.7631590264556776.mRNA;Parent=ORF.0.566_0.7631590264556776;\n+NC_001604\tgetOrfsOrCds\tCDS\t1496\t1639\t.\t+\t0\tID=ORF.0.566_0.7631590264556776.CDS;Parent=ORF.0.566_0.7631590264556776.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t1487\t1490\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.566_0.7631590264556776.rbs-0;Parent=ORF.0.566_0.7631590264556776;\n+NC_001604\tfeature\tLipobox\t1589\t1600\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.566_0.7631590264556776.lipobox;sequence=FTAC;Parent=ORF.0.566_0.7631590264556776;\n+NC_001604\tcpt.fixModel\tgene\t1490\t1639\t.\t+\t.\tID=ORF.0.567_0.10768222865442767;\n+NC_001604\tcpt.fixModel\tmRNA\t1502\t1639\t.\t+\t.\tID=ORF.0.567_0.10768222865442767.mRNA;Parent=ORF.0.567_0.10768222865442767;\n+NC_001604\tgetOrfsOrCds\tCDS\t1502\t1639\t.\t+\t0\tID=ORF.0.567_0.10768222865442767.CDS;Parent=ORF.0.567_0.10768222865442767.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t1490\t1493\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.567_0.10768222865442767.rbs-0;Parent=ORF.0.567_0.10768222865442767;\n+NC_001604\tfeature\tLipobox\t1589\t1600\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.567_0.10768222865442767.lipobox;sequence=FTAC;Parent=ORF.0.567_0.10768222865442767;\n+NC_001604\tcpt.fixModel\tgene\t3341\t3547\t.\t-\t.\tID=ORF.0.2469_0.7331780084741397;\n+NC_001604\tcpt.fixModel\tmRNA\t3341\t3535\t.\t-\t.\tID=ORF.0.2469_0.7331780084741397.mRNA;Parent=ORF.0.2469_0.7331780084741397;\n+NC_001604\tgetOrfsOrCds\tCDS\t3341\t3535\t.\t-\t0\tID=ORF.0.2469_0.7331780084741397.CDS;Parent=ORF.0.2469_0.7331780084741397.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t3545\t3547\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2469_0.7331780084741397.rbs-0;Parent=ORF.0.2469_0.7331780084741397;\n+NC_001604\tfeature\tLipobox\t3488\t3499\t.\t-\t.\tsource=CPT_LipoRy;ID=ORF.0.2469_0.7331780084741397.lipobox;sequence=LISC;Parent=ORF.0.2469_0.7331780084741397;\n+NC_001604\tcpt.fixModel\tgene\t3433\t5822\t.\t+\t.\tID=ORF.0.1457_0.7756036756597531;\n+NC_001604\tcpt.fixModel\tmRNA\t3444\t5822\t.\t+\t.\tID=ORF.0.1457_0.7756036756597531.mRNA;Parent=ORF.0.1457_0.7756036756597531;\n+NC_001604\tgetOrfsOrCds\tCDS\t3444\t5822\t.\t+\t0\tID=ORF.0.1457_0.7756036756597531.CDS;Parent=ORF.0.1457_0.7756036756597531.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t3433\t3435\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1457_0.7756036756597531.rbs-0;Parent=ORF.0.1457_0.7756036756597531;\n+NC_001604\tfeature\tLipobox\t3534\t3545\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1457_0.7756036756597531.lipobox;sequence=TLAC;Parent=ORF.0.1457_0.7756036756597531;\n+NC_001604\tcpt.fixModel\tgene\t4440\t5822\t.\t+\t.\tID=ORF.0.1500_0.6062249049400771;\n+NC_001604\tcpt.fixModel\tmRNA\t4455\t5822\t.\t+\t.\tID=ORF.0.1500_0.6062249049400771.mRNA;Parent=ORF.0.1500_0.6'..b'ID=ORF.0.1322_0.7094403889052515.lipobox;sequence=LYGC;Parent=ORF.0.1322_0.7094403889052515;\n+NC_001604\tcpt.fixModel\tgene\t36787\t36898\t.\t+\t.\tID=ORF.0.1324_0.11087411288527005;\n+NC_001604\tcpt.fixModel\tmRNA\t36797\t36898\t.\t+\t.\tID=ORF.0.1324_0.11087411288527005.mRNA;Parent=ORF.0.1324_0.11087411288527005;\n+NC_001604\tgetOrfsOrCds\tCDS\t36797\t36898\t.\t+\t0\tID=ORF.0.1324_0.11087411288527005.CDS;Parent=ORF.0.1324_0.11087411288527005.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t36787\t36789\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1324_0.11087411288527005.rbs-0;Parent=ORF.0.1324_0.11087411288527005;\n+NC_001604\tfeature\tLipobox\t36851\t36862\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1324_0.11087411288527005.lipobox;sequence=LYGC;Parent=ORF.0.1324_0.11087411288527005;\n+NC_001604\tcpt.fixModel\tgene\t36787\t36898\t.\t+\t.\tID=ORF.0.1325_0.22902888411750433;\n+NC_001604\tcpt.fixModel\tmRNA\t36800\t36898\t.\t+\t.\tID=ORF.0.1325_0.22902888411750433.mRNA;Parent=ORF.0.1325_0.22902888411750433;\n+NC_001604\tgetOrfsOrCds\tCDS\t36800\t36898\t.\t+\t0\tID=ORF.0.1325_0.22902888411750433.CDS;Parent=ORF.0.1325_0.22902888411750433.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t36787\t36789\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1325_0.22902888411750433.rbs-0;Parent=ORF.0.1325_0.22902888411750433;\n+NC_001604\tfeature\tLipobox\t36851\t36862\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1325_0.22902888411750433.lipobox;sequence=LYGC;Parent=ORF.0.1325_0.22902888411750433;\n+NC_001604\tcpt.fixModel\tgene\t37020\t37283\t.\t+\t.\tID=ORF.0.1961_0.03848108116896165;\n+NC_001604\tcpt.fixModel\tmRNA\t37032\t37283\t.\t+\t.\tID=ORF.0.1961_0.03848108116896165.mRNA;Parent=ORF.0.1961_0.03848108116896165;\n+NC_001604\tgetOrfsOrCds\tCDS\t37032\t37283\t.\t+\t0\tID=ORF.0.1961_0.03848108116896165.CDS;Parent=ORF.0.1961_0.03848108116896165.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t37020\t37026\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1961_0.03848108116896165.rbs-0;Parent=ORF.0.1961_0.03848108116896165;\n+NC_001604\tfeature\tLipobox\t37167\t37178\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1961_0.03848108116896165.lipobox;sequence=ISGC;Parent=ORF.0.1961_0.03848108116896165;\n+NC_001604\tcpt.fixModel\tgene\t37034\t37283\t.\t+\t.\tID=ORF.0.1962_0.6590821562203436;\n+NC_001604\tcpt.fixModel\tmRNA\t37050\t37283\t.\t+\t.\tID=ORF.0.1962_0.6590821562203436.mRNA;Parent=ORF.0.1962_0.6590821562203436;\n+NC_001604\tgetOrfsOrCds\tCDS\t37050\t37283\t.\t+\t0\tID=ORF.0.1962_0.6590821562203436.CDS;Parent=ORF.0.1962_0.6590821562203436.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t37034\t37036\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1962_0.6590821562203436.rbs-0;Parent=ORF.0.1962_0.6590821562203436;\n+NC_001604\tfeature\tLipobox\t37167\t37178\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1962_0.6590821562203436.lipobox;sequence=ISGC;Parent=ORF.0.1962_0.6590821562203436;\n+NC_001604\tcpt.fixModel\tgene\t37074\t37283\t.\t+\t.\tID=ORF.0.1964_0.6899335526754861;\n+NC_001604\tcpt.fixModel\tmRNA\t37083\t37283\t.\t+\t.\tID=ORF.0.1964_0.6899335526754861.mRNA;Parent=ORF.0.1964_0.6899335526754861;\n+NC_001604\tgetOrfsOrCds\tCDS\t37083\t37283\t.\t+\t0\tID=ORF.0.1964_0.6899335526754861.CDS;Parent=ORF.0.1964_0.6899335526754861.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t37074\t37076\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1964_0.6899335526754861.rbs-0;Parent=ORF.0.1964_0.6899335526754861;\n+NC_001604\tfeature\tLipobox\t37167\t37178\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1964_0.6899335526754861.lipobox;sequence=ISGC;Parent=ORF.0.1964_0.6899335526754861;\n+NC_001604\tcpt.fixModel\tgene\t37213\t37379\t.\t-\t.\tID=ORF.0.2528_0.10906489943882225;\n+NC_001604\tcpt.fixModel\tmRNA\t37213\t37368\t.\t-\t.\tID=ORF.0.2528_0.10906489943882225.mRNA;Parent=ORF.0.2528_0.10906489943882225;\n+NC_001604\tgetOrfsOrCds\tCDS\t37213\t37368\t.\t-\t0\tID=ORF.0.2528_0.10906489943882225.CDS;Parent=ORF.0.2528_0.10906489943882225.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t37377\t37379\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2528_0.10906489943882225.rbs-0;Parent=ORF.0.2528_0.10906489943882225;\n+NC_001604\tfeature\tLipobox\t37315\t37326\t.\t-\t.\tsource=CPT_LipoRy;ID=ORF.0.2528_0.10906489943882225.lipobox;sequence=IVSC;Parent=ORF.0.2528_0.10906489943882225;\n' |