Repository 'cpt_remove_annotations'
hg clone https://toolshed.g2.bx.psu.edu/repos/cpt/cpt_remove_annotations

Changeset 0:03d27abd1dfd (2022-05-13)
Next changeset 1:b46f0c9a4679 (2022-05-20)
Commit message:
Uploaded
added:
cpt_rem_annotes/cpt-macros.xml
cpt_rem_annotes/macros.xml
cpt_rem_annotes/remove_annotations.py
cpt_rem_annotes/remove_annotations.xml
cpt_rem_annotes/test-data/RemoveAnnote_In.gff3
cpt_rem_annotes/test-data/RemoveAnnote_Out.gff3
b
diff -r 000000000000 -r 03d27abd1dfd cpt_rem_annotes/cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_rem_annotes/cpt-macros.xml Fri May 13 05:26:10 2022 +0000
[
@@ -0,0 +1,115 @@
+<?xml version="1.0"?>
+<macros>
+ <xml name="gff_requirements">
+ <requirements>
+ <requirement type="package" version="2.7">python</requirement>
+ <requirement type="package" version="1.65">biopython</requirement>
+ <requirement type="package" version="2.12.1">requests</requirement>
+ <yield/>
+ </requirements>
+ <version_command>
+ <![CDATA[
+ cd $__tool_directory__ && git rev-parse HEAD
+ ]]>
+ </version_command>
+ </xml>
+ <xml name="citation/mijalisrasche">
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">@unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+ </xml>
+ <xml name="citations">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation> 
+ <yield/>
+ </citations>
+ </xml>
+     <xml name="citations-crr">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+ <yield/>
+ </citations>
+ </xml>
+        <xml name="citations-2020">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+                        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+                        <yield/>
+ </citations>
+ </xml>
+        <xml name="citations-2020-AJC-solo">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+                        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+                        <yield/>
+ </citations>
+ </xml>
+        <xml name="citations-clm">
+ <citations>
+ <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+                        <yield/>
+ </citations>
+ </xml>
+        <xml name="sl-citations-clm">
+ <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+                        <yield/>
+ </xml>
+</macros>
b
diff -r 000000000000 -r 03d27abd1dfd cpt_rem_annotes/macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_rem_annotes/macros.xml Fri May 13 05:26:10 2022 +0000
b
@@ -0,0 +1,85 @@
+<?xml version="1.0"?>
+<macros>
+ <xml name="requirements">
+ <requirements>
+ <requirement type="package" version="3.6">python</requirement>
+ <requirement type="package" version="1.77">biopython</requirement>
+ <requirement type="package" version="1.1.3">cpt_gffparser</requirement>  
+ <yield/>
+ </requirements>
+ </xml>
+ <token name="@BLAST_TSV@">
+ "$blast_tsv"
+ </token>
+ <xml name="blast_tsv">
+ <param label="Blast Results" help="TSV/tabular (25 Column)"
+ name="blast_tsv" type="data" format="tabular" />
+ </xml>
+
+ <token name="@BLAST_XML@">
+ "$blast_xml"
+ </token>
+ <xml name="blast_xml">
+ <param label="Blast Results" help="XML format"
+ name="blast_xml" type="data" format="blastxml" />
+ </xml>
+ <xml name="gff3_with_fasta">
+ <param label="Genome Sequences" name="fasta" type="data" format="fasta" />
+ <param label="Genome Annotations" name="gff3" type="data" format="gff3" />
+ </xml>
+ <xml name="genome_selector">
+ <conditional name="reference_genome">
+ <param name="reference_genome_source" type="select" label="Reference Genome">
+ <option value="history" selected="True">From History</option>
+ <option value="cached">Locally Cached</option>
+ </param>
+ <when value="cached">
+ <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+ <options from_data_table="all_fasta"/>
+ </param>
+ </when>
+ <when value="history">
+ <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+ </when>
+ </conditional>
+ </xml>
+ <xml name="gff3_input">
+ <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+ </xml>
+ <xml name="input/gff3+fasta">
+ <expand macro="gff3_input" />
+ <expand macro="genome_selector" />
+ </xml>
+ <token name="@INPUT_GFF@">
+ "$gff3_data"
+ </token>
+ <token name="@INPUT_FASTA@">
+#if str($reference_genome.reference_genome_source) == 'cached':
+ "${reference_genome.fasta_indexes.fields.path}"
+#else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+#end if
+ </token>
+ <token name="@GENOME_SELECTOR_PRE@">
+#if $reference_genome.reference_genome_source == 'history':
+ ln -s $reference_genome.genome_fasta genomeref.fa;
+#end if
+ </token>
+ <token name="@GENOME_SELECTOR@">
+#if str($reference_genome.reference_genome_source) == 'cached':
+ "${reference_genome.fasta_indexes.fields.path}"
+#else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+#end if
+ </token>
+        <xml name="input/fasta">
+ <param label="Fasta file" name="sequences" type="data" format="fasta"/>
+ </xml>
+
+ <token name="@SEQUENCE@">
+ "$sequences"
+ </token>
+ <xml name="input/fasta/protein">
+ <param label="Protein fasta file" name="sequences" type="data" format="fasta"/>
+ </xml>
+</macros>
b
diff -r 000000000000 -r 03d27abd1dfd cpt_rem_annotes/remove_annotations.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_rem_annotes/remove_annotations.py Fri May 13 05:26:10 2022 +0000
[
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+import sys
+import argparse
+from CPT_GFFParser import gffParse, gffWrite
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")
+    parser.add_argument("--remark", action="store_true", help="Remove remark features")
+    parser.add_argument("--region", action="store_true", help="Remove region features")
+    args = parser.parse_args()
+
+    for rec in gffParse(args.gff3):
+        rec.annotations = {}
+        if args.remark:
+            rec.features = [x for x in rec.features if x.type != "remark"]
+        if args.region:
+            rec.features = [x for x in rec.features if x.type != "region"]
+        gffWrite([rec], sys.stdout)
b
diff -r 000000000000 -r 03d27abd1dfd cpt_rem_annotes/remove_annotations.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_rem_annotes/remove_annotations.xml Fri May 13 05:26:10 2022 +0000
[
@@ -0,0 +1,48 @@
+<?xml version="1.0"?>
+<tool id="edu.tamu.cpt.gff3.remove_annots" name="Remove Annotation Feature" version="19.1.0.1" profile="16.04">
+  <description>that's unused in our GFF tools</description>
+  <macros>
+    <import>macros.xml</import>
+ <import>cpt-macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <command detect_errors="aggressive"><![CDATA[
+python $__tool_directory__/remove_annotations.py
+$gff3_data
+$remark
+$region
+> $default]]></command>
+  <inputs>
+      <expand macro="gff3_input" />
+      <param checked="true" label="Remove 'remark' features" name="remark" type="boolean" truevalue="--remark" falsevalue="" />
+      <param checked="true" label="Remove 'region' features" name="region" type="boolean" truevalue="--region" falsevalue="" />
+  </inputs>
+  <outputs>
+    <data format="gff3" name="default"/>
+  </outputs>
+  <tests>
+ <test>
+ <param name="gff3_data" value="RemoveAnnote_In.gff3" />
+ <param name="remark" value="--remark" />
+                        <param name="region" value= "--region" />
+ <output name="default" file="RemoveAnnote_Out.gff3" />
+ </test>
+  </tests>
+  <help><![CDATA[
+**What it does**
+
+For an input GFF3, this tool specifically removes the feature entry with remark and/or region type (column 3) as needed for compatibility 
+with certain tools. These feature typically encompasses the entire length of the sequence upon which the GFF3 is based.
+
+Example input:
+    Miro annotation remark 1 167935 . . . gff-version=3;sequence-region=%28%27Miro%27%2C 0%2C 172788%29
+
+    Miro cpt gene 1231 5436 . . . ID=CDS1;
+
+Example output:
+    Miro cpt gene 1231 5436 . . . ID=CDS1;
+
+
+      ]]></help>
+ <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r 03d27abd1dfd cpt_rem_annotes/test-data/RemoveAnnote_In.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_rem_annotes/test-data/RemoveAnnote_In.gff3 Fri May 13 05:26:10 2022 +0000
b
b'@@ -0,0 +1,292 @@\n+##gff-version 3\n+##sequence-region Pipo 1 41839\n+Pipo\tannotation\tremark\t1\t41839\t.\t.\t.\tgff-version=3;sequence-region=%28%27NODE_1_length_42045_cov_96%27%2C 0%2C 41956%29\n+Pipo\tGenBank\tgene\t258\t566\t.\t+\t1\tID=CPT_Pipo_001;Name=CPT_Pipo_001\n+Pipo\tGenBank\tmRNA\t273\t566\t.\t+\t1\tID=CPT_Pipo_001.t01;Parent=CPT_Pipo_001\n+Pipo\tGenBank\tCDS\t273\t566\t.\t+\t1\tID=CPT_Pipo_001.p01;Name=CPT_Pipo_001;Note=detected two transmembrane domains,8782901 HHPR;Parent=CPT_Pipo_001.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t273\t566\t.\t+\t1\tParent=CPT_Pipo_001.t01\n+Pipo\tGenBank\tRBS\t258\t261\t.\t+\t1\tName=CPT_Pipo_001;Parent=CPT_Pipo_001\n+Pipo\tGenBank\tgene\t841\t1031\t.\t+\t1\tID=CPT_Pipo_002;Name=CPT_Pipo_002\n+Pipo\tGenBank\tmRNA\t852\t1031\t.\t+\t1\tID=CPT_Pipo_002.t01;Parent=CPT_Pipo_002\n+Pipo\tGenBank\tCDS\t852\t1031\t.\t+\t1\tID=CPT_Pipo_002.p01;Name=CPT_Pipo_002;Note=4.9466e-34 %5BAcinetobacter phage vB_ApiP_P1%5D;Parent=CPT_Pipo_002.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t852\t1031\t.\t+\t1\tParent=CPT_Pipo_002.t01\n+Pipo\tGenBank\tRBS\t841\t844\t.\t+\t1\tName=CPT_Pipo_002;Parent=CPT_Pipo_002\n+Pipo\tGenBank\tgene\t1090\t1303\t.\t+\t1\tID=CPT_Pipo_003;Name=CPT_Pipo_003\n+Pipo\tGenBank\tmRNA\t1100\t1303\t.\t+\t1\tID=CPT_Pipo_003.t01;Parent=CPT_Pipo_003\n+Pipo\tGenBank\tCDS\t1100\t1303\t.\t+\t1\tID=CPT_Pipo_003.p01;Name=CPT_Pipo_003;Note=1.1538e-41%5BAcinetobacter phage vB_AbaP_B09_Aci08%5D;Parent=CPT_Pipo_003.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t1100\t1303\t.\t+\t1\tParent=CPT_Pipo_003.t01\n+Pipo\tGenBank\tRBS\t1090\t1092\t.\t+\t1\tName=CPT_Pipo_003;Parent=CPT_Pipo_003\n+Pipo\tGenBank\tgene\t1732\t1992\t.\t+\t1\tID=CPT_Pipo_004;Name=CPT_Pipo_004\n+Pipo\tGenBank\tmRNA\t1744\t1992\t.\t+\t1\tID=CPT_Pipo_004.t01;Parent=CPT_Pipo_004\n+Pipo\tGenBank\tCDS\t1744\t1992\t.\t+\t1\tID=CPT_Pipo_004.p01;Name=CPT_Pipo_004;Note=8.14463e-44 %5BAcinetobacter phage vB_AbaP_46-62_Aci07%5D;Parent=CPT_Pipo_004.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t1744\t1992\t.\t+\t1\tParent=CPT_Pipo_004.t01\n+Pipo\tGenBank\tRBS\t1732\t1736\t.\t+\t1\tName=CPT_Pipo_004;Parent=CPT_Pipo_004\n+Pipo\tGenBank\tgene\t2008\t2572\t.\t+\t1\tID=CPT_Pipo_005;Name=CPT_Pipo_005\n+Pipo\tGenBank\tmRNA\t2021\t2572\t.\t+\t1\tID=CPT_Pipo_005.t01;Parent=CPT_Pipo_005\n+Pipo\tGenBank\tCDS\t2021\t2572\t.\t+\t1\tID=CPT_Pipo_005.p01;Name=CPT_Pipo_005;Note=3.57359e-23 phiKMV NP_877442.1;Parent=CPT_Pipo_005.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t2021\t2572\t.\t+\t1\tParent=CPT_Pipo_005.t01\n+Pipo\tGenBank\tRBS\t2008\t2010\t.\t+\t1\tName=CPT_Pipo_005;Parent=CPT_Pipo_005\n+Pipo\tGenBank\tgene\t2564\t2948\t.\t+\t1\tID=CPT_Pipo_006;Name=CPT_Pipo_006\n+Pipo\tGenBank\tmRNA\t2574\t2948\t.\t+\t1\tID=CPT_Pipo_006.t01;Parent=CPT_Pipo_006\n+Pipo\tGenBank\tCDS\t2574\t2948\t.\t+\t1\tID=CPT_Pipo_006.p01;Name=CPT_Pipo_006;Note=1.39987e-77 %5BAcinetobacter phage IME200%5D,detected coil fragment;Parent=CPT_Pipo_006.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t2574\t2948\t.\t+\t1\tParent=CPT_Pipo_006.t01\n+Pipo\tGenBank\tRBS\t2564\t2567\t.\t+\t1\tName=CPT_Pipo_006;Parent=CPT_Pipo_006\n+Pipo\tGenBank\tgene\t2930\t3052\t.\t+\t1\tID=CPT_Pipo_007;Name=CPT_Pipo_007\n+Pipo\tGenBank\tmRNA\t2939\t3052\t.\t+\t1\tID=CPT_Pipo_007.t01;Parent=CPT_Pipo_007\n+Pipo\tGenBank\tCDS\t2939\t3052\t.\t+\t1\tID=CPT_Pipo_007.p01;Name=CPT_Pipo_007;Parent=CPT_Pipo_007.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t2939\t3052\t.\t+\t1\tParent=CPT_Pipo_007.t01\n+Pipo\tGenBank\tRBS\t2930\t2933\t.\t+\t1\tName=CPT_Pipo_007;Parent=CPT_Pipo_007\n+Pipo\tGenBank\tgene\t3027\t3263\t.\t+\t1\tID=CPT_Pipo_008;Name=CPT_Pipo_008\n+Pipo\tGenBank\tmRNA\t3039\t3263\t.\t+\t1\tID=CPT_Pipo_008.t01;Parent=CPT_Pipo_008\n+Pipo\tGenBank\tCDS\t3039\t3263\t.\t+\t1\tID=CPT_Pipo_008.p01;Name=CPT_Pipo_008;Note=2.88488e-44 %5BAcinetobacter phage Fri1%5D;Parent=CPT_Pipo_008.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t3039\t3263\t.\t+\t1\tParent=CPT_Pipo_008.t01\n+Pipo\tGenBank\tRBS\t3027\t3030\t.\t+\t1\tName=CPT_Pipo_008;Parent=CPT_Pipo_008\n+Pipo\tGenBank\tgene\t3336\t3944\t.\t+\t1\tID=CPT_Pipo_009;Name=CPT_Pipo_009\n+Pipo\tGenBank\tmRNA\t3348\t3944\t.\t+\t1\tID=CPT_Pipo_009.t01;Parent=CPT_Pipo_009\n+Pipo\tGenBank\tCDS\t3348\t3944\t.\t+\t1\tID=CPT_Pipo_009.p01;Name=CPT_Pipo_009;Note=2.06752e-145 %5BAcine'..b'o_050.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t30356\t33241\t.\t+\t1\tParent=CPT_Pipo_050.t01\n+Pipo\tGenBank\tRBS\t30347\t30351\t.\t+\t1\tName=CPT_Pipo_050;Parent=CPT_Pipo_050\n+Pipo\tGenBank\tgene\t33240\t36349\t.\t+\t1\tID=CPT_Pipo_051;Name=CPT_Pipo_051\n+Pipo\tGenBank\tmRNA\t33251\t36349\t.\t+\t1\tID=CPT_Pipo_051.t01;Parent=CPT_Pipo_051\n+Pipo\tGenBank\tCDS\t33251\t36349\t.\t+\t1\tID=CPT_Pipo_051.p01;Name=CPT_Pipo_051;Note=putative membrane protein,blast to phiKMV with score %3D7.0935e-9;Parent=CPT_Pipo_051.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t33251\t36349\t.\t+\t1\tParent=CPT_Pipo_051.t01\n+Pipo\tGenBank\tRBS\t33240\t33245\t.\t+\t1\tName=CPT_Pipo_051;Parent=CPT_Pipo_051\n+Pipo\tGenBank\tgene\t36341\t38392\t.\t+\t1\tID=CPT_Pipo_052;Name=CPT_Pipo_052\n+Pipo\tGenBank\tmRNA\t36356\t38392\t.\t+\t1\tID=CPT_Pipo_052.t01;Parent=CPT_Pipo_052\n+Pipo\tGenBank\tCDS\t36356\t38392\t.\t+\t1\tID=CPT_Pipo_052.p01;Name=CPT_Pipo_052;Note=potential tail fiber/tail spike,5924656 HHPR hit 5JS4_C%3B 6EU4_C%3B 5W5P %28To be published by Leiman%29,PMID: 28209973;Parent=CPT_Pipo_052.t01;product=tailspike protein\n+Pipo\tGenBank\texon\t36356\t38392\t.\t+\t1\tParent=CPT_Pipo_052.t01\n+Pipo\tGenBank\tRBS\t36341\t36344\t.\t+\t1\tName=CPT_Pipo_052;Parent=CPT_Pipo_052\n+Pipo\tGenBank\tgene\t38392\t38737\t.\t+\t1\tID=CPT_Pipo_053;Name=CPT_Pipo_053\n+Pipo\tGenBank\tmRNA\t38402\t38737\t.\t+\t1\tID=CPT_Pipo_053.t01;Parent=CPT_Pipo_053\n+Pipo\tGenBank\tCDS\t38402\t38737\t.\t+\t1\tID=CPT_Pipo_053.p01;Name=CPT_Pipo_053;Note=7.76278e-74,IPR006481,detected three transmembrane domains;Parent=CPT_Pipo_053.t01;product=holin\n+Pipo\tGenBank\texon\t38402\t38737\t.\t+\t1\tParent=CPT_Pipo_053.t01\n+Pipo\tGenBank\tRBS\t38392\t38396\t.\t+\t1\tName=CPT_Pipo_053;Parent=CPT_Pipo_053\n+Pipo\tGenBank\tgene\t38715\t39281\t.\t+\t1\tID=CPT_Pipo_054;Name=CPT_Pipo_054\n+Pipo\tGenBank\tmRNA\t38724\t39281\t.\t+\t1\tID=CPT_Pipo_054.t01;Parent=CPT_Pipo_054\n+Pipo\tGenBank\tCDS\t38724\t39281\t.\t+\t1\tID=CPT_Pipo_054.p01;Name=CPT_Pipo_054;Note=IPR023346 1.63e-19,Random endo-hydrolysis of N-acetyl-beta-D-glucosaminide %281-%3E4%29-beta-linkages in chitin and chitodextrins.;Parent=CPT_Pipo_054.t01;product=endolysin\n+Pipo\tGenBank\texon\t38724\t39281\t.\t+\t1\tParent=CPT_Pipo_054.t01\n+Pipo\tGenBank\tRBS\t38715\t38718\t.\t+\t1\tName=CPT_Pipo_054;Parent=CPT_Pipo_054\n+Pipo\tGenBank\tgene\t39280\t39598\t.\t+\t1\tID=CPT_Pipo_055;Name=CPT_Pipo_055\n+Pipo\tGenBank\tmRNA\t39290\t39598\t.\t+\t1\tID=CPT_Pipo_055.t01;Parent=CPT_Pipo_055\n+Pipo\tGenBank\tCDS\t39290\t39598\t.\t+\t1\tID=CPT_Pipo_055.p01;Name=CPT_Pipo_055;Note=1.10536e-8 %5BphiKMV%5D;Parent=CPT_Pipo_055.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t39290\t39598\t.\t+\t1\tParent=CPT_Pipo_055.t01\n+Pipo\tGenBank\tRBS\t39280\t39284\t.\t+\t1\tName=CPT_Pipo_055;Parent=CPT_Pipo_055\n+Pipo\tGenBank\tgene\t39598\t41545\t.\t+\t1\tID=CPT_Pipo_056;Name=CPT_Pipo_056\n+Pipo\tGenBank\tmRNA\t39608\t41545\t.\t+\t1\tID=CPT_Pipo_056.t01;Parent=CPT_Pipo_056\n+Pipo\tGenBank\tCDS\t39608\t41545\t.\t+\t1\tID=CPT_Pipo_056.p01;Name=CPT_Pipo_056;Note=4.73143e-112,probably DNA maturase B phiKMV;Parent=CPT_Pipo_056.t01;product=large terminase subunit\n+Pipo\tGenBank\texon\t39608\t41545\t.\t+\t1\tParent=CPT_Pipo_056.t01\n+Pipo\tGenBank\tRBS\t39598\t39602\t.\t+\t1\tName=CPT_Pipo_056;Parent=CPT_Pipo_056\n+Pipo\tGenBank\tgene\t41528\t41676\t.\t+\t1\tID=CPT_Pipo_057;Name=CPT_Pipo_057\n+Pipo\tGenBank\tmRNA\t41542\t41676\t.\t+\t1\tID=CPT_Pipo_057.t01;Parent=CPT_Pipo_057\n+Pipo\tGenBank\tCDS\t41542\t41676\t.\t+\t1\tID=CPT_Pipo_057.p01;Name=CPT_Pipo_057;Note=1.21677e-21 Acinetobacter phage vB_AbaP_B3;Parent=CPT_Pipo_057.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t41542\t41676\t.\t+\t1\tParent=CPT_Pipo_057.t01\n+Pipo\tGenBank\tRBS\t41528\t41533\t.\t+\t1\tName=CPT_Pipo_057;Parent=CPT_Pipo_057\n+Pipo\tGenBank\tgene\t41624\t41839\t.\t+\t1\tID=CPT_Pipo_058;Name=CPT_Pipo_058\n+Pipo\tGenBank\tmRNA\t41636\t41839\t.\t+\t1\tID=CPT_Pipo_058.t01;Parent=CPT_Pipo_058\n+Pipo\tGenBank\tCDS\t41636\t41839\t.\t+\t1\tID=CPT_Pipo_058.p01;Name=CPT_Pipo_058;Note=1.12427e-36,Acinetobacter phage vB_AbaP_B5;Parent=CPT_Pipo_058.t01;product=hypothetical protein\n+Pipo\tGenBank\texon\t41636\t41839\t.\t+\t1\tParent=CPT_Pipo_058.t01\n+Pipo\tGenBank\tRBS\t41624\t41628\t.\t+\t1\tName=CPT_Pipo_058;Parent=CPT_Pipo_058\n'
b
diff -r 000000000000 -r 03d27abd1dfd cpt_rem_annotes/test-data/RemoveAnnote_Out.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_rem_annotes/test-data/RemoveAnnote_Out.gff3 Fri May 13 05:26:10 2022 +0000
[
b'@@ -0,0 +1,290 @@\n+##gff-version 3\n+Pipo\tGenBank\tgene\t258\t566\t.\t+\t1\tID=CPT_Pipo_001;Name=CPT_Pipo_001;\n+Pipo\tGenBank\tmRNA\t273\t566\t.\t+\t1\tID=CPT_Pipo_001.t01;Parent=CPT_Pipo_001;\n+Pipo\tGenBank\tCDS\t273\t566\t.\t+\t1\tID=CPT_Pipo_001.p01;Name=CPT_Pipo_001;Note=detected two transmembrane domains,8782901 HHPR;Parent=CPT_Pipo_001.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t273\t566\t.\t+\t1\tParent=CPT_Pipo_001.t01;\n+Pipo\tGenBank\tRBS\t258\t261\t.\t+\t1\tName=CPT_Pipo_001;Parent=CPT_Pipo_001;\n+Pipo\tGenBank\tgene\t841\t1031\t.\t+\t1\tID=CPT_Pipo_002;Name=CPT_Pipo_002;\n+Pipo\tGenBank\tmRNA\t852\t1031\t.\t+\t1\tID=CPT_Pipo_002.t01;Parent=CPT_Pipo_002;\n+Pipo\tGenBank\tCDS\t852\t1031\t.\t+\t1\tID=CPT_Pipo_002.p01;Name=CPT_Pipo_002;Note=4.9466e-34 [Acinetobacter phage vB_ApiP_P1];Parent=CPT_Pipo_002.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t852\t1031\t.\t+\t1\tParent=CPT_Pipo_002.t01;\n+Pipo\tGenBank\tRBS\t841\t844\t.\t+\t1\tName=CPT_Pipo_002;Parent=CPT_Pipo_002;\n+Pipo\tGenBank\tgene\t1090\t1303\t.\t+\t1\tID=CPT_Pipo_003;Name=CPT_Pipo_003;\n+Pipo\tGenBank\tmRNA\t1100\t1303\t.\t+\t1\tID=CPT_Pipo_003.t01;Parent=CPT_Pipo_003;\n+Pipo\tGenBank\tCDS\t1100\t1303\t.\t+\t1\tID=CPT_Pipo_003.p01;Name=CPT_Pipo_003;Note=1.1538e-41[Acinetobacter phage vB_AbaP_B09_Aci08];Parent=CPT_Pipo_003.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t1100\t1303\t.\t+\t1\tParent=CPT_Pipo_003.t01;\n+Pipo\tGenBank\tRBS\t1090\t1092\t.\t+\t1\tName=CPT_Pipo_003;Parent=CPT_Pipo_003;\n+Pipo\tGenBank\tgene\t1732\t1992\t.\t+\t1\tID=CPT_Pipo_004;Name=CPT_Pipo_004;\n+Pipo\tGenBank\tmRNA\t1744\t1992\t.\t+\t1\tID=CPT_Pipo_004.t01;Parent=CPT_Pipo_004;\n+Pipo\tGenBank\tCDS\t1744\t1992\t.\t+\t1\tID=CPT_Pipo_004.p01;Name=CPT_Pipo_004;Note=8.14463e-44 [Acinetobacter phage vB_AbaP_46-62_Aci07];Parent=CPT_Pipo_004.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t1744\t1992\t.\t+\t1\tParent=CPT_Pipo_004.t01;\n+Pipo\tGenBank\tRBS\t1732\t1736\t.\t+\t1\tName=CPT_Pipo_004;Parent=CPT_Pipo_004;\n+Pipo\tGenBank\tgene\t2008\t2572\t.\t+\t1\tID=CPT_Pipo_005;Name=CPT_Pipo_005;\n+Pipo\tGenBank\tmRNA\t2021\t2572\t.\t+\t1\tID=CPT_Pipo_005.t01;Parent=CPT_Pipo_005;\n+Pipo\tGenBank\tCDS\t2021\t2572\t.\t+\t1\tID=CPT_Pipo_005.p01;Name=CPT_Pipo_005;Note=3.57359e-23 phiKMV NP_877442.1;Parent=CPT_Pipo_005.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t2021\t2572\t.\t+\t1\tParent=CPT_Pipo_005.t01;\n+Pipo\tGenBank\tRBS\t2008\t2010\t.\t+\t1\tName=CPT_Pipo_005;Parent=CPT_Pipo_005;\n+Pipo\tGenBank\tgene\t2564\t2948\t.\t+\t1\tID=CPT_Pipo_006;Name=CPT_Pipo_006;\n+Pipo\tGenBank\tmRNA\t2574\t2948\t.\t+\t1\tID=CPT_Pipo_006.t01;Parent=CPT_Pipo_006;\n+Pipo\tGenBank\tCDS\t2574\t2948\t.\t+\t1\tID=CPT_Pipo_006.p01;Name=CPT_Pipo_006;Note=1.39987e-77 [Acinetobacter phage IME200],detected coil fragment;Parent=CPT_Pipo_006.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t2574\t2948\t.\t+\t1\tParent=CPT_Pipo_006.t01;\n+Pipo\tGenBank\tRBS\t2564\t2567\t.\t+\t1\tName=CPT_Pipo_006;Parent=CPT_Pipo_006;\n+Pipo\tGenBank\tgene\t2930\t3052\t.\t+\t1\tID=CPT_Pipo_007;Name=CPT_Pipo_007;\n+Pipo\tGenBank\tmRNA\t2939\t3052\t.\t+\t1\tID=CPT_Pipo_007.t01;Parent=CPT_Pipo_007;\n+Pipo\tGenBank\tCDS\t2939\t3052\t.\t+\t1\tID=CPT_Pipo_007.p01;Name=CPT_Pipo_007;Parent=CPT_Pipo_007.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t2939\t3052\t.\t+\t1\tParent=CPT_Pipo_007.t01;\n+Pipo\tGenBank\tRBS\t2930\t2933\t.\t+\t1\tName=CPT_Pipo_007;Parent=CPT_Pipo_007;\n+Pipo\tGenBank\tgene\t3027\t3263\t.\t+\t1\tID=CPT_Pipo_008;Name=CPT_Pipo_008;\n+Pipo\tGenBank\tmRNA\t3039\t3263\t.\t+\t1\tID=CPT_Pipo_008.t01;Parent=CPT_Pipo_008;\n+Pipo\tGenBank\tCDS\t3039\t3263\t.\t+\t1\tID=CPT_Pipo_008.p01;Name=CPT_Pipo_008;Note=2.88488e-44 [Acinetobacter phage Fri1];Parent=CPT_Pipo_008.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t3039\t3263\t.\t+\t1\tParent=CPT_Pipo_008.t01;\n+Pipo\tGenBank\tRBS\t3027\t3030\t.\t+\t1\tName=CPT_Pipo_008;Parent=CPT_Pipo_008;\n+Pipo\tGenBank\tgene\t3336\t3944\t.\t+\t1\tID=CPT_Pipo_009;Name=CPT_Pipo_009;\n+Pipo\tGenBank\tmRNA\t3348\t3944\t.\t+\t1\tID=CPT_Pipo_009.t01;Parent=CPT_Pipo_009;\n+Pipo\tGenBank\tCDS\t3348\t3944\t.\t+\t1\tID=CPT_Pipo_009.p01;Name=CPT_Pipo_009;Note=2.06752e-145 [Acinetobacter phage vB_AbaP_B1];Parent=CPT_Pipo_009.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t3348\t3944\t.\t+\t1\tParent=CPT_Pipo_0'..b'l protein;\n+Pipo\tGenBank\texon\t30356\t33241\t.\t+\t1\tParent=CPT_Pipo_050.t01;\n+Pipo\tGenBank\tRBS\t30347\t30351\t.\t+\t1\tName=CPT_Pipo_050;Parent=CPT_Pipo_050;\n+Pipo\tGenBank\tgene\t33240\t36349\t.\t+\t1\tID=CPT_Pipo_051;Name=CPT_Pipo_051;\n+Pipo\tGenBank\tmRNA\t33251\t36349\t.\t+\t1\tID=CPT_Pipo_051.t01;Parent=CPT_Pipo_051;\n+Pipo\tGenBank\tCDS\t33251\t36349\t.\t+\t1\tID=CPT_Pipo_051.p01;Name=CPT_Pipo_051;Note=putative membrane protein,blast to phiKMV with score %3D7.0935e-9;Parent=CPT_Pipo_051.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t33251\t36349\t.\t+\t1\tParent=CPT_Pipo_051.t01;\n+Pipo\tGenBank\tRBS\t33240\t33245\t.\t+\t1\tName=CPT_Pipo_051;Parent=CPT_Pipo_051;\n+Pipo\tGenBank\tgene\t36341\t38392\t.\t+\t1\tID=CPT_Pipo_052;Name=CPT_Pipo_052;\n+Pipo\tGenBank\tmRNA\t36356\t38392\t.\t+\t1\tID=CPT_Pipo_052.t01;Parent=CPT_Pipo_052;\n+Pipo\tGenBank\tCDS\t36356\t38392\t.\t+\t1\tID=CPT_Pipo_052.p01;Name=CPT_Pipo_052;Note=potential tail fiber/tail spike,5924656 HHPR hit 5JS4_C%3B 6EU4_C%3B 5W5P (To be published by Leiman),PMID: 28209973;Parent=CPT_Pipo_052.t01;product=tailspike protein;\n+Pipo\tGenBank\texon\t36356\t38392\t.\t+\t1\tParent=CPT_Pipo_052.t01;\n+Pipo\tGenBank\tRBS\t36341\t36344\t.\t+\t1\tName=CPT_Pipo_052;Parent=CPT_Pipo_052;\n+Pipo\tGenBank\tgene\t38392\t38737\t.\t+\t1\tID=CPT_Pipo_053;Name=CPT_Pipo_053;\n+Pipo\tGenBank\tmRNA\t38402\t38737\t.\t+\t1\tID=CPT_Pipo_053.t01;Parent=CPT_Pipo_053;\n+Pipo\tGenBank\tCDS\t38402\t38737\t.\t+\t1\tID=CPT_Pipo_053.p01;Name=CPT_Pipo_053;Note=7.76278e-74,IPR006481,detected three transmembrane domains;Parent=CPT_Pipo_053.t01;product=holin;\n+Pipo\tGenBank\texon\t38402\t38737\t.\t+\t1\tParent=CPT_Pipo_053.t01;\n+Pipo\tGenBank\tRBS\t38392\t38396\t.\t+\t1\tName=CPT_Pipo_053;Parent=CPT_Pipo_053;\n+Pipo\tGenBank\tgene\t38715\t39281\t.\t+\t1\tID=CPT_Pipo_054;Name=CPT_Pipo_054;\n+Pipo\tGenBank\tmRNA\t38724\t39281\t.\t+\t1\tID=CPT_Pipo_054.t01;Parent=CPT_Pipo_054;\n+Pipo\tGenBank\tCDS\t38724\t39281\t.\t+\t1\tID=CPT_Pipo_054.p01;Name=CPT_Pipo_054;Note=IPR023346 1.63e-19,Random endo-hydrolysis of N-acetyl-beta-D-glucosaminide (1->4)-beta-linkages in chitin and chitodextrins.;Parent=CPT_Pipo_054.t01;product=endolysin;\n+Pipo\tGenBank\texon\t38724\t39281\t.\t+\t1\tParent=CPT_Pipo_054.t01;\n+Pipo\tGenBank\tRBS\t38715\t38718\t.\t+\t1\tName=CPT_Pipo_054;Parent=CPT_Pipo_054;\n+Pipo\tGenBank\tgene\t39280\t39598\t.\t+\t1\tID=CPT_Pipo_055;Name=CPT_Pipo_055;\n+Pipo\tGenBank\tmRNA\t39290\t39598\t.\t+\t1\tID=CPT_Pipo_055.t01;Parent=CPT_Pipo_055;\n+Pipo\tGenBank\tCDS\t39290\t39598\t.\t+\t1\tID=CPT_Pipo_055.p01;Name=CPT_Pipo_055;Note=1.10536e-8 [phiKMV];Parent=CPT_Pipo_055.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t39290\t39598\t.\t+\t1\tParent=CPT_Pipo_055.t01;\n+Pipo\tGenBank\tRBS\t39280\t39284\t.\t+\t1\tName=CPT_Pipo_055;Parent=CPT_Pipo_055;\n+Pipo\tGenBank\tgene\t39598\t41545\t.\t+\t1\tID=CPT_Pipo_056;Name=CPT_Pipo_056;\n+Pipo\tGenBank\tmRNA\t39608\t41545\t.\t+\t1\tID=CPT_Pipo_056.t01;Parent=CPT_Pipo_056;\n+Pipo\tGenBank\tCDS\t39608\t41545\t.\t+\t1\tID=CPT_Pipo_056.p01;Name=CPT_Pipo_056;Note=4.73143e-112,probably DNA maturase B phiKMV;Parent=CPT_Pipo_056.t01;product=large terminase subunit;\n+Pipo\tGenBank\texon\t39608\t41545\t.\t+\t1\tParent=CPT_Pipo_056.t01;\n+Pipo\tGenBank\tRBS\t39598\t39602\t.\t+\t1\tName=CPT_Pipo_056;Parent=CPT_Pipo_056;\n+Pipo\tGenBank\tgene\t41528\t41676\t.\t+\t1\tID=CPT_Pipo_057;Name=CPT_Pipo_057;\n+Pipo\tGenBank\tmRNA\t41542\t41676\t.\t+\t1\tID=CPT_Pipo_057.t01;Parent=CPT_Pipo_057;\n+Pipo\tGenBank\tCDS\t41542\t41676\t.\t+\t1\tID=CPT_Pipo_057.p01;Name=CPT_Pipo_057;Note=1.21677e-21 Acinetobacter phage vB_AbaP_B3;Parent=CPT_Pipo_057.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t41542\t41676\t.\t+\t1\tParent=CPT_Pipo_057.t01;\n+Pipo\tGenBank\tRBS\t41528\t41533\t.\t+\t1\tName=CPT_Pipo_057;Parent=CPT_Pipo_057;\n+Pipo\tGenBank\tgene\t41624\t41839\t.\t+\t1\tID=CPT_Pipo_058;Name=CPT_Pipo_058;\n+Pipo\tGenBank\tmRNA\t41636\t41839\t.\t+\t1\tID=CPT_Pipo_058.t01;Parent=CPT_Pipo_058;\n+Pipo\tGenBank\tCDS\t41636\t41839\t.\t+\t1\tID=CPT_Pipo_058.p01;Name=CPT_Pipo_058;Note=1.12427e-36,Acinetobacter phage vB_AbaP_B5;Parent=CPT_Pipo_058.t01;product=hypothetical protein;\n+Pipo\tGenBank\texon\t41636\t41839\t.\t+\t1\tParent=CPT_Pipo_058.t01;\n+Pipo\tGenBank\tRBS\t41624\t41628\t.\t+\t1\tName=CPT_Pipo_058;Parent=CPT_Pipo_058;\n'