Repository 'pharokka'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/pharokka

Changeset 0:1d4fcd38d899 (2023-03-02)
Next changeset 1:f05c3732764a (2023-03-22)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/pharokka commit 4f57302e4d14a2b32bcd9d41d25d987a29317bf3
added:
macros.xml
pharokka.xml
test-data/SAOMS1.fasta
test-data/pharokka.gbk
test-data/pharokka.gff
test-data/pharokka_db.loc
test-data/subset_pharokka_db/5Jan2023_data.tsv
test-data/subset_pharokka_db/5Jan2023_genomes.fa.msh
test-data/subset_pharokka_db/CARD
test-data/subset_pharokka_db/CARD.dbtype
test-data/subset_pharokka_db/CARD.index
test-data/subset_pharokka_db/CARD.lookup
test-data/subset_pharokka_db/CARD.source
test-data/subset_pharokka_db/CARD_h
test-data/subset_pharokka_db/CARD_h.dbtype
test-data/subset_pharokka_db/CARD_h.index
test-data/subset_pharokka_db/VFDB_setB_pro.fas
test-data/subset_pharokka_db/aro_index.tsv
test-data/subset_pharokka_db/phrog_annot_v4.tsv
test-data/subset_pharokka_db/phrog_hhm_db
test-data/subset_pharokka_db/phrog_hhm_db.index
test-data/subset_pharokka_db/phrogs_db
test-data/subset_pharokka_db/phrogs_db.dbtype
test-data/subset_pharokka_db/phrogs_db.index
test-data/subset_pharokka_db/phrogs_profile_db
test-data/subset_pharokka_db/phrogs_profile_db.dbtype
test-data/subset_pharokka_db/phrogs_profile_db.index
test-data/subset_pharokka_db/phrogs_profile_db_consensus
test-data/subset_pharokka_db/phrogs_profile_db_consensus.dbtype
test-data/subset_pharokka_db/phrogs_profile_db_consensus.index
test-data/subset_pharokka_db/phrogs_profile_db_h
test-data/subset_pharokka_db/phrogs_profile_db_h.dbtype
test-data/subset_pharokka_db/phrogs_profile_db_h.index
test-data/subset_pharokka_db/phrogs_profile_db_seq
test-data/subset_pharokka_db/phrogs_profile_db_seq.dbtype
test-data/subset_pharokka_db/phrogs_profile_db_seq.index
test-data/subset_pharokka_db/phrogs_profile_db_seq_h
test-data/subset_pharokka_db/phrogs_profile_db_seq_h.index
test-data/subset_pharokka_db/protein_fasta_protein_homolog_model.fasta
test-data/subset_pharokka_db/vfdb
test-data/subset_pharokka_db/vfdb.dbtype
test-data/subset_pharokka_db/vfdb.index
test-data/subset_pharokka_db/vfdb.lookup
test-data/subset_pharokka_db/vfdb.source
test-data/subset_pharokka_db/vfdb_h
test-data/subset_pharokka_db/vfdb_h.dbtype
test-data/subset_pharokka_db/vfdb_h.index
tool-data/pharokka_db.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 1d4fcd38d899 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">1.2.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="biotools">
+        <xrefs>
+            <xref type="bio.tools">
+                pharokka
+            </xref>
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">
+                pharokka
+            </requirement>
+            <requirement type="package" version="3.0">
+                zip
+            </requirement>
+        </requirements>
+    </xml>
+    <xml name="version">
+        <version_command>
+            pharokka.py --version
+        </version_command>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">
+                10.1093/bioinformatics/btac776
+            </citation>
+        </citations>
+    </xml>
+    <xml name="creator">
+    <creator>
+        <person givenName="Paul" familyName="Zierep" email="zierep@informatik.uni-freiburg.de" />
+    </creator>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 1d4fcd38d899 pharokka.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pharokka.xml Thu Mar 02 16:21:40 2023 +0000
[
@@ -0,0 +1,165 @@
+<tool id="pharokka" name="bacteriophage annotation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>
+        rapid standardised annotation tool for bacteriophage genomes and metagenomes
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="biotools" />
+    <expand macro="requirements" />
+    <expand macro="version" />
+    <command detect_errors="exit_code">
+        <![CDATA[
+
+        ##run tool
+        #if str( $terminase.terminase_selector ) == 'no_terminase':
+        pharokka.py 
+        -i '$fasta'
+        -o pharokka_output
+        -d '$db_cached.fields.path'
+        -t \${GALAXY_SLOTS:-8} 
+        $gene_predictor
+        $meta
+        -e '$evalue' 
+        #else:
+        pharokka.py 
+        -i '$fasta' 
+        -o pharokka_output 
+        -d '$db_cached.fields.path' 
+        -t \${GALAXY_SLOTS:-8} 
+        $gene_predictor 
+        $meta 
+        -e '$evalue' 
+        --terminase 
+        --terminase_strand '$terminase.terminase_strand' 
+        --terminase_start '$terminase.terminase_start' 
+        #end if
+        
+        ## create output
+        #if $zip_output == 'true':
+            && zip -r out.zip pharokka_output
+        #end if
+        ]]>
+    </command>
+    <inputs>
+        <!-- the genome -->
+        <param type="data" name="fasta" format="fasta" help="Please upload an genome file of a bacteriophage in fasta format." label="Bacteriophage genome" />
+        <param name="db_cached" type="select" label="Using built-in pharokka DB" help="Using built-in pharokka DB">
+            <options from_data_table="pharokka_db">
+            </options>
+            <validator type="no_options" message="A built-in pharokka DB is not available. Please ask the galaxy admins to install one on the server." />
+        </param>
+        <param name="gene_predictor" type="select" label="User specified gene predictor">
+            <option value="-g phanotate">
+                Phanotate
+            </option>
+            <option value="-g prodigal">
+                Prodigal
+            </option>
+        </param>
+        <param name="meta" type="boolean" checked="false" truevalue="--meta" falsevalue="" label="meta mode for metavirome input samples" />
+        <param name="evalue" type="float" value="1E-5" min="1E-20" max="10" label="E-value threshold for mmseqs2 PHROGs database search. Defaults to 1E-05." />
+        <!-- optional arguments -->
+        <conditional name="terminase">
+            <param name="terminase_selector" type="select" label="Runs - terminase large subunit - re-orientation mode. Single genome input only and requires --terminase_strand and --terminase_start to be specified.">
+                <option value="no_terminase">
+                    Do not run 'terminase large subunit' re-orientation mode.
+                </option>
+                <option value="run_terminase">
+                    Runs 'terminase large subunit' re-orientation mode.
+                </option>
+            </param>
+            <when value="no_terminase">
+            </when>
+            <when value="run_terminase">
+                <param name="terminase_strand" type="select" label="Strand of terminase large subunit.">
+                    <option value="pos">
+                        Positive
+                    </option>
+                    <option value="neg">
+                        Negative
+                    </option>
+                </param>
+                <param name="terminase_start" type="integer" value="1" label="Start coordinate of the terminase large subunit." />
+            </when>
+        </conditional>
+        <!-- optional outputs -->
+        <param name="zip_output" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Create a Zip archive of the complete output for further investigation." />
+    </inputs>
+    <outputs>
+        <data name="pharokka_gbk" format="genbank" from_work_dir="pharokka_output/pharokka.gbk" label="${tool.name} on ${on_string}: Genbank" />
+        <data name="pharokka_gff" format="gff" from_work_dir="pharokka_output/pharokka.gff" label="${tool.name} on ${on_string}: GFF" />
+        <data name="archive_output" format="zip" from_work_dir="out.zip" label="${tool.name} on ${on_string}: zip of the complete output" >
+            <filter>zip_output</filter>
+        </data>
+
+    </outputs>
+    <tests>
+        <!-- test input from DB - no zip -->
+        <test expect_num_outputs="2">
+            <param name="db_cached" value="pharokka_db" />
+            <param name="fasta" value="SAOMS1.fasta" />
+            <param name="zip_output" value="false" />
+            <!-- check file size and text since output is non-deterministic -->
+            <output name="pharokka_gbk">
+                <assert_contents>
+                    <has_size value="353875" delta="10" />
+                    <has_text text="VERSION     MW460250_1" />
+                </assert_contents>
+            </output>
+            <output name="pharokka_gff">
+                <assert_contents>
+                    <has_size value="191497" delta="10" />
+                    <has_text text="##sequence-region MW460250_1 1 140135" />
+                </assert_contents>
+            </output>
+        </test>
+        <!-- test input from DB -->
+        <test expect_num_outputs="3">
+            <param name="db_cached" value="pharokka_db" />
+            <param name="fasta" value="SAOMS1.fasta" />
+            <param name="zip_output" value="true" />
+            <!-- check file size and text since output is non-deterministic -->
+            <output name="pharokka_gbk">
+                <assert_contents>
+                    <has_size value="353875" delta="10" />
+                    <has_text text="VERSION     MW460250_1" />
+                </assert_contents>
+            </output>
+            <output name="pharokka_gff">
+                <assert_contents>
+                    <has_size value="191497" delta="10" />
+                    <has_text text="##sequence-region MW460250_1 1 140135" />
+                </assert_contents>
+            </output>
+            <!-- check created zip -->
+            <output name="archive_output">
+                <assert_contents>
+                    <has_archive_member path=".*\/pharokka\.gff" />
+                    <has_archive_member path=".*\/pharokka\.gbk" />
+                    <has_archive_member path=".*\/pharokka.*\.log" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+        Pharokka is a rapid standardised annotation tool for bacteriophage genomes and metagenomes.
+        Pharokka identifies predicted coding sequences (CDS), transfer RNAs (tRNAs),
+        transfer-messenger RNAs (tmRNAs) and clustered regularly interspaced short 
+        palindromic repeats (CRISPRs), providing functional annotation for CDS using the PHROGs database. 
+        
+        Pharokka requires assembled DNA sequences in FASTA format.
+        For phage isolates, this usually consists of one complete contig, 
+        but Pharokka can also annotate incomplete assemblies or metavirome samples with multiple 
+        contigs in the multiFASTA format. 
+        Furthermore, metagenomically assembled phage genomes and genomic contigs, 
+        derived using programs such as Virstorter2, Hecatomb and Cenote-taker 2, 
+        are also suitable to be annotated using Pharokka using meta mode.
+
+        If you are looking for rapid standardised annotation of bacterial genomes, please use bakta.
+        ]]>
+    </help>
+    <expand macro="citations" />
+    <expand macro="creator" />
+</tool>
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/SAOMS1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SAOMS1.fasta Thu Mar 02 16:21:40 2023 +0000
b
b'@@ -0,0 +1,2003 @@\n+>MW460250_1\n+CCCCACCCCGCCATCCCCGATTCATGAGCTATGTTCTAAGTCGATACCATTTAATAAGATAGGGTCATCT\n+TCTTTACCTACCATATAATCAGATAGTAAGTCTGCTTCAGCTTTTTGCCCTGGTCGTGATAGTTTAGATT\n+TCTTAGTTTCAATACGCATAATGTGACCATTGTATTAAATAATTAGAATACTATTTTAAAAGATTCTATT\n+CTGTTTGGATTAATATATACTTGAGGTGAAGTTATAGCACTTTCAGTATATACTTTTATAGAGGTTTCAT\n+CCATTCCTCTTAACATATAATCTATATCTTGCCTATTGTAACTCTTTTCATCAGTAGATACTAAAAAGTA\n+TTTAGCTCCACTTGACATTGTTATTTCAATATGTTTTGACATCTACAATCTCTCCTATGCAAATTTGTTA\n+AAGACAAAGGATAATATAGCTCCTAGAACAAGTAAAAGAACCTTCTCAGTTGTATCCTTTTTCTCAGTAT\n+CCTTAGTTTTTGTACTTTCAGCAAGTTCTGAAATCTTTTCATCAAGTCTTTCTAATTGGACGTAAATTGC\n+TGATTGTTTTTCACTATTGACAGCTACATCTTTATCTATACTAACTATCATTTTTCTTAGTTCAGCTACC\n+TCAACTTCTAAATCTTTGAAAGTTCCTCTATCTATATAATTACCTTCTTGTATCTTAGACTTAATAGTTT\n+CTACTTGAGAAACAAGGTTGTTTATCTCCTTATCCAACTAGAATCACCTCTAAGGTCTAACCGTTTCAGA\n+TTCAGAATGGATATCATAATTTTCTAAGAAATCATTGATAATCTCCATATAATTATCCGTAACGACTTTT\n+CCGTAAGATGTTTTTGTATCAATTTCAAACCTAAGCTTACCAAAACTTTGGAGGTCTAATTCTTTTATTA\n+CAATATTAGGGTCATCAGAAGGAAGGTAATAATAGTCGAAGTATATAATTGAGCCATTTATTAATACTCT\n+GTCTATTCTATAGACGTGGAAATAGCGTCTGTCTCTTTTAAAATGGGCTAGTGCATCTTTAAACTCTAAC\n+TTAAGGATATCCTTATATTTAATCAAAGTGGTAACCTCCTTACTATTAATTTTTAAATTTACTTATTTTG\n+TGGTATAATAGTTATGATAAAGGCAGTTATTATAATTATATTAAGAATAATGATAATAATTATTTTTTCT\n+GAGAAAATAAGCCAAATACTAAAAACAGATAAAGCATAGATAGCTGATAGATATACTATATTAAGAGTTA\n+CCTTACTTTTATCTTTTCTATAGATAGAATAACCTAAAGACGTTGTAACACCACTAAGTATAAAATAATA\n+GAAACAAAAAAGAGGTATAGACAGAAAAAAAGATACGATAATCATTGTTAAACACCTATTTCTTTTTGAC\n+CTATTATTTCTAGAACTTTTAGATTACACCACTAATATAACATTAAAAGCCAGTCATAAAAGTCAATTGT\n+TAGATTAATAATATAATAAAAAAAGACAATAGGAGGTTAAAGTGGTTGAATAATAACATAGCTATATTCA\n+TATTCAAAACACTGGTTATCATTATATTCTTACTACTAATTTTGTCTGTTATTAATTCCTTGTCCCTTAT\n+TTACTCAATAAGACCGAGTGTAGTTATGACATACTTTATCTTTGGTGGTATTGTTTCTAATGTCGCACTT\n+ACTGTAACAGATAAGTTCTTACTGAAGAAAGAAGACCCCCTACCTGAATATGTTCTTAAAAAAGTAGAGA\n+TAAATGATAAAGAAATAAGAATAATCAAGAAAATAATAGAAAGTAATTATGGTATAACAGCAGAAGAGAT\n+AAAAGTTAGGGCTAAAGCACAAAGAAGAGTAGAGGAAGATAGTAAAAAGGAAGATTACAATGAAAACAAA\n+GAAAGAAATTAAAGAACAAAGGAAAGAACTTAAGGATGGTGCTACATCTGTTTCTTTAGTAAAAAAGGGA\n+GATAAGAGAATAGCTAGCCCTAGTAGAATCTGTAGTCTATGTGGTCAGCAGTTATCAGGTATGAATTACA\n+CTAAAGGAAAAGCATTATCAAAAGTTAATCATTTTCATTTACAGTATTCTAAGTATATTTATTTTGATAT\n+TTGCGCAGATATCAACAATTGTTATAAAAATTTAAGAAAACGAGGTGAAATGGATTGAGTGCAGAAAATA\n+TTAGAGATATAATTAACAAGAAAAAGTTAGAAGAAGAGGATACAAGAAAATATATAGCTGATGGGTTTAT\n+GAATGGTATCGGTAAATTAATGTACGAATTCAATAAGAAAGTAGATAACAAAGAAATAGAAGTTAAAGAC\n+CCGAATGATTTATACAAATTATTTGTGATATTCTCTCAAATGCAAAATATGGTCAATGAAACTTCTGAAG\n+GAGGAGCAATACCTCAACTATCTAGACCTCAACAGGAATTATTTGATGAGATTACAACAGAAGATAGTAA\n+TGGAGAATCTACAGTTGATTTACAGAAGATATCAGAAATGTCAGCGGAAGATATTACAGCAATGATTTCT\n+GAAAAGGAAAAAGTAATGAATGAGGAAAATTCAGAAACATTCTAAGGAGAAAGATATAAATGGATGGAAA\n+AGAACTAATTAAGATAGCACAAGAAACATTTCAAACTGAAAAAATAACAAGAGAACAGATAGACCATATA\n+ATCAATATGCTAAATCCTTCTACCTATATGCTTAAGTATCATACACTGAGAGGGCATCCTATAACTTTTA\n+GTATTCCTAATAGAGATAGAAGTAAAGCACAGGCTCATAGACCGTAAATTTTGTTGCGGTCTCTAAACTC\n+TGTTAAACGGGCATAGTAAAATAATGTATAGAAAAATAATATACTGGTAAGAAACCCTAAATCTGACTAT\n+ACACAGACAGTGGGAACCAACCGTGCTAAATCTGTAATTTAATTATTGACAAAATAACAATATATGATAT\n+AATTATATTATAGTAAAAGCCTAACGACTAAATTTCTAGTTATCTATATAAATGGATATAGTGAGAACTA\n+GATAAGAAAATTAACTATACTTAAAAAAACTAAAATATTTTAAAAGAGAGGTGAAAACAGTTGACATTAG\n+AAAAGAGAAAACAAGAATATCTTAAGAAGTTAAAGCAAATTAAAAATGATGAGTTTGAATTGCTAGGAGG\n+ATTTACTAAAACAAGAGAGAAAGCTTTATTTAAACATAAAGTTTGTGGTTATGAATGGTATACTACTCCT\n+TATAATTTATTGAAGTCTAAAGGGACAGGATGTCCTAAATGTCAATACAGAGATAAATCATATACTACTG\n+ATGAATTTAAAAAGAAACTTAAAGATAAATTTGGTTATGAATATGAGTTAATAGAAGGACAAGAGTATAA\n+AAATAGTAGAGAAAAATTATTGTTTATTCATAATAAGTGCGGTACTGAATTTAAAATTACAAGTGATAGT\n+TTATTTCGAAGTAAAGTACCTTGTCATAAATGTTCTAAAGAAAATAGAAAAACTAAAAAGAAAACAACAG\n+AACAGTTTAAAAATGAATTGTATAATAAACATAAAGATGAATATATACTTGTTGAAGGGTCAGAATATAA\n+GACAGCTCTTGAAAAGGTTAGAATAATTCATACGAAATGTGGATATACATGGGATGTTAGAGCCTCACAT\n+ATTTTGCATACTAGTAAATGTCCTAATTGTAATGAGTCTAAAGGTGAGAGTTTAATTAAAGACATTCTTG\n+AAGATAATAATTTTTCTTATATAAGAGAGTATACTTTTGAAGATTTAAAGAATGTTAAAAAATTACCTTT\n+TGATTTTGCACTATTCATTGATAATGAATTAGTAGGTTTAATTGAATATGATGGTTCTCAACACTTTATT\n+CCTTTT'..b'TAAATTTCCTACACTATTATCATTTTTACCATTTAAATGGTTA\n+ACTTGTTCTTTATTATCAGGATTAGGTATAAAAGCCATAGCAACTAAACGATGTATTTTAGGTGAATGGT\n+ATCGTAACCTTACAAACAAGTAACCCTTGTTATTTTTTTGAAGTTTTAACTTTTTAGGCTCTTTACCTTT\n+ATAAGATATTACTTCTCCTTTATCAGTAATAGTGTAATTTTCATATATTTCTAATCCAGGTATTTCATTT\n+AATTTCTTTTCCATAATAACATCTCCTTTACTTAAGTATATAGGAAAGTTATTATGTTGTCAAGTAGTTT\n+TTTAAAACAATATTCTTGGATGCTGATTAGCATAGCTTGATAGCCTTAGCCTCCCAGTCAATTAAAAGAA\n+CTTTCATTATACATTACTGTATAACAGGGCAATTTATTTACCCGTGTGCCAAGCAATTTGATTCTTAGCA\n+TCTATTGCTTCCCATACATAACCTTCAGAGCCGTAGTAATGAGCAATACCATTAGCGTATCTAGCATAAC\n+CTGCATTAGCTAATGAATTCTCGTATTGTTGTCCTGAAGAACGACCTGCATCGTTGTGTATTACCATTCC\n+TTCAGGTTTTTTACCACGTTTATCCATTGTATAGTTAATGTGATTCTTAGAAACTTTTAGTGTTGCTTTC\n+TTTTTAGGTGCAGGCGTTTTACTTGCGCTTTTCTTAGCTGTTTCTTTTTTAACAGTAGTTCCTGCTTTTA\n+CAGGTATTTCAATGAAGTGAGTTAATCCGTAATAATTATCTACACGTTTTGTAGGTTTTTTATTAGCATA\n+ACCATTCCAGTTTTGCTCTAAAATAGTAAATGTAGAAGTATTACCTCCATCATATACAATACCTATGTGA\n+CCCCACTGTTCATAACTACCGGATGTAAATACCGCAATCCAACCTTTTTTAGGTACAGTAGAAGGTTTAT\n+TTTCATGTATTTTAAATCCAGTACCATAACTCTGTTTAATTTGGTCTTTAGCATTACCCCAAGTTCTAAC\n+TTTATTATCTGTTAACCATAAAACATAGTCTGTAATAAGGTCTTGACACTGAGCGTGATAGTAACCATCT\n+GCATCAATGGCTCCTGCTTCCATTACACCAAATGATGGGTCATAACTTGTAGCTTTTTTAACTCTGTAAG\n+GGCTATCTACTGTTCCTTTTGCATAAGCATCTAAACGTTTATTTATTTCTGCTTGAGTCTTAGCCATTAC\n+TTAACTTCCTCCTCTGCAAATACTTTACCATGTTCCTCGGTATCTTCTTCATCTTGAGAAGGTGCTGAAC\n+CACCATCAATTTCATCTTCAATAGCAGGTACTTCATCACTATCATCTGTGTCAGGTTCTGCATTGTTTTC\n+GTAGCTGTCTATCTCAAAAGTACTAGTGTTATTTGCATTTGCTTGCCATTGAACGAATTCATTAGGGTCT\n+TTACTATCACGAGGTTTAAGATAGTCTGTTTGAACAATATCACTATCTTTAAGACCTTTAGTATTATTAT\n+CAACAATAATACCTAAACCTGCTAATAGTGTTAGTATAGAACCTACAATATTTACACCTTGCTCAATTTG\n+AGCTGAGTAGTCTAAACCGAAAGCACCTATAATTTGGTTAGCAAATAATGCTACTGCTGATATAATTGCT\n+ACCCAAAATGTTTTGCTCTTAGTTCTTGTGCTAAGGTTTATTCCTCCAACAACTTTAGGTTGTTTAGTTT\n+CATTAGCCATTAAAAAACCGACCTTTCTATTATATTTATTTCTAACAATAATATAACAGTAGGTCGGTCA\n+TGTTTATCTATATTAATTTAACACTTACTCATTAATTTGGTTTAGTTTTTTGATAACTTCAGACATTTGT\n+TTGTTATCTAAATCTTCTAATTTAGTTTCAGGAAGTAGCTCTAACTTATCCCAAACTTCTTCTTTATTAG\n+ATACTTTATTATTAATAATTGCCTTACCAACTAAACTTTCCGTATAATATAATTGTTTTGCTGATGCCAT\n+TTGTATCTCTCCTTTTAAATATGTAAAGTATATAGCTAGTATCGTATCCTAGGAACAAACACTTGCGCTA\n+TATACTCAATGAAATCCTACCCTCATTCGAGGACACAGCAAACCGGTTCGTCAACCGCACATATGAATTC\n+TCAGATTTCATTTATGTAAAACACACCCTCTTTGATTTGCACAAAGACTAAGGGTTTTGGAGACCCTTGT\n+ACTACTAATTATACTAAGGGTGTTTATTATGGTTTCTATTGGATTTGAACCAATGACACCTAGAGCTTCA\n+ATCTAGTGCTCTACCATCTGAGCTAAGAAACCTTAAAACGACCCATACGAGACTCGAACTCGTACTCTCT\n+GCCGTGACAGGGCAGTGTGTTAACCAGTTACACCAATGAGCCAAAATTATAATGCTATACCCTAACCTTA\n+CCTTAATGTATAGCAGGTTTTTATATAAGCTCGAAGCAACGATTATTACCACTCATAACAACTATATATT\n+AAGTGAAAGGAGGTGAAATGAACAAAACGTGGTAATTGGTACTTATATAGGAAATATGTATAATCTACAA\n+GGAGTAAGTTATTGGTTCATAAAGGAGTGTGAACAATAAATACATGAAAGAGTGAAAGTTTACTCCCTGT\n+AGATTCTTTTTTTAATTATCAATCAAAGGAGGAAACTGATAATTGTTAATAATAAACTATAAAGAGGAAA\n+ATATTTATAGTCACATTCTGATATAATGCAACTAAATATCCAAGCATAACCCGTCTCACGAGGAACCTAC\n+CTATAAGACCTGTTATTAAGTGAATCACTACGATTGACTCTATTAAGGAGCTACCTTAAGTCCATCTCAC\n+GCAATTTAAAAGGGACTTACAAACCGTAAAACGGTAATAAGTTTATTAAATAATGTGATATTAACATATT\n+AGTTAATAACTTTCACATGGTCGAAGAAAAGTAAATTTATTTGATTACCAAATTATTTTTATCAAATATA\n+GCTCTTTTGAACCTGTAGATTTATGCTACTTATACTGATAACCTCTATTATCTAACACATTTCTGTGCTC\n+CAACTACAGTTAGTCGTTACAGCGTATCTTTCTAGGATTCCGCTAAGACCCTAAAAAGAAATTAAACCCT\n+AGCCGTTATCATACTCTACAGACCTTATAAGTAAGTACCAAGTATACCAATCGTATTTAACAATACTAAT\n+GACGACCCATCCTACCGATATATCTCCGATAGGTTTTGATTCGTTTGATTATCTTGTACCTTATGACTAC\n+CAAATCATTATTCAGTCACTATGCTCAGATATTTAGTTGTATTATTTATATATTAATTATAACATAATTT\n+TTATTACTTGTCAAGTTAATTTCAAAAAAATTATAGAAGTAGGGACGTTTACCTACTTCTATTTAATTTA\n+CACAAGGATGATAACATTGTTATTGTTTTATACTGGAAAACAATGTAATAAAAACAGTGATGTGTAAGGT\n+ATTTGTTTTATTGTTAATTATATTATAGCATATACTGATACCTTTGTCAAGTTAATTTAATACTTTTTTT\n+AAAACATTAGTTATCTTTTGTTAGTTCCTCCTGAATAGCATCCCATCTTCTTTCTGCTTCACTACGATTA\n+TCTTCTATATGTTTTGTAGTTTTACAACATTTGATACAATATATATCTTTGATATGACCTTCTTCTCTTT\n+TATTTGCTCTTTTTCTTGGTACTTTGAATACATTTCCACATTCTTTACATATTAAACTTGAGTAAAACAT\n+TTTTTGTCTTTTCATAATTAATCAATTCCTTTTCTCTTTTATTTGATAATTTAACTATATACTATATTGA\n+TAAATAAGTCAACAGTTTTCTAAAAATAATTTAAATTATTTTGAAGAATACTTTAATATCAAGGG\n'
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/pharokka.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pharokka.gbk Thu Mar 02 16:21:40 2023 +0000
b
b'@@ -0,0 +1,5739 @@\n+LOCUS       MW460250_1            140135 bp    DNA     linear   VRL 14-FEB-2023\n+DEFINITION  MW460250_1.\n+ACCESSION   MW460250_1\n+VERSION     MW460250_1\n+KEYWORDS    .\n+SOURCE      .\n+  ORGANISM  .\n+            .\n+FEATURES             Location/Qualifiers\n+     CDS             complement(22..159)\n+                     /ID="SLQLISVF_CDS_0001"\n+                     /phrog="No_PHROG"\n+                     /top_hit="No_PHROG"\n+                     /locus_tag="SLQLISVF_CDS_0001"\n+                     /function="unknown function"\n+                     /product="hypothetical protein"\n+                     /source="PHANOTATE"\n+                     /score="-15.399247982708186"\n+                     /phase="0"\n+                     /translation="MRIETKKSKLSRPGQKAEADLLSDYMVGKEDDPILLNGIDLEHSS\n+                     "\n+     CDS             complement(183..392)\n+                     /ID="SLQLISVF_CDS_0002"\n+                     /phrog="No_PHROG"\n+                     /top_hit="No_PHROG"\n+                     /locus_tag="SLQLISVF_CDS_0002"\n+                     /function="unknown function"\n+                     /product="hypothetical protein"\n+                     /source="PHANOTATE"\n+                     /score="-39.52422521577095"\n+                     /phase="0"\n+                     /translation="MSKHIEITMSSGAKYFLVSTDEKSYNRQDIDYMLRGMDETSIKVY\n+                     TESAITSPQVYINPNRIESFKIVF"\n+     CDS             complement(405..737)\n+                     /ID="SLQLISVF_CDS_0003"\n+                     /phrog="No_PHROG"\n+                     /top_hit="No_PHROG"\n+                     /locus_tag="SLQLISVF_CDS_0003"\n+                     /function="unknown function"\n+                     /product="hypothetical protein"\n+                     /source="PHANOTATE"\n+                     /score="-307.9875183481112"\n+                     /phase="0"\n+                     /translation="LDKEINNLVSQVETIKSKIQEGNYIDRGTFKDLEVEVAELRKMIV\n+                     SIDKDVAVNSEKQSAIYVQLERLDEKISELAESTKTKDTEKKDTTEKVLLLVLGAILSF\n+                     VFNKFA"\n+     CDS             complement(750..1076)\n+                     /ID="SLQLISVF_CDS_0004"\n+                     /phrog="No_PHROG"\n+                     /top_hit="No_PHROG"\n+                     /locus_tag="SLQLISVF_CDS_0004"\n+                     /function="unknown function"\n+                     /product="hypothetical protein"\n+                     /source="PHANOTATE"\n+                     /score="-8.448608623432387"\n+                     /phase="0"\n+                     /translation="LIKYKDILKLEFKDALAHFKRDRRYFHVYRIDRVLINGSIIYFDY\n+                     YYLPSDDPNIVIKELDLQSFGKLRFEIDTKTSYGKVVTDNYMEIINDFLENYDIHSESE\n+                     TVRP"\n+     CDS             complement(1109..1375)\n+                     /ID="SLQLISVF_CDS_0005"\n+                     /phrog="No_PHROG"\n+                     /top_hit="No_PHROG"\n+                     /locus_tag="SLQLISVF_CDS_0005"\n+                     /function="unknown function"\n+                     /product="hypothetical protein"\n+                     /source="PHANOTATE"\n+                     /score="-7.658463659485386"\n+                     /phase="0"\n+                     /translation="MIIVSFFLSIPLFCFYYFILSGVTTSLGYSIYRKDKSKVTLNIVY\n+                     LSAIYALSVFSIWLIFSEKIIIIIILNIIIITAFIITIIPQNK"\n+     CDS             complement(1470..1562)\n+                     /ID="SLQLISVF_CDS_0006"\n+                     /phrog="No_PHROG"\n+                     /top_hit="No_PHROG"\n+                     /locus_tag="SLQLISVF_CDS_0006"\n+                     /function="unknown function"\n+                     /product="hypothetical protein"\n+                     /source="PHANOTATE"\n+                     /score="-0.9164331843686035"\n+                     /phase="0"\n+                     /translation="MITSVLNMNIAMLLFNHFNLLLSFFIILLI"\n+     CDS             1636..1902\n+                     /ID="SLQLISVF_CDS_0007"\n+                     /phrog="No_PHROG"\n+                     /top_hit="No_P'..b'attatc tacacgtttt\n+   137041 gtaggttttt tattagcata accattccag ttttgctcta aaatagtaaa tgtagaagta\n+   137101 ttacctccat catatacaat acctatgtga ccccactgtt cataactacc ggatgtaaat\n+   137161 accgcaatcc aacctttttt aggtacagta gaaggtttat tttcatgtat tttaaatcca\n+   137221 gtaccataac tctgtttaat ttggtcttta gcattacccc aagttctaac tttattatct\n+   137281 gttaaccata aaacatagtc tgtaataagg tcttgacact gagcgtgata gtaaccatct\n+   137341 gcatcaatgg ctcctgcttc cattacacca aatgatgggt cataacttgt agctttttta\n+   137401 actctgtaag ggctatctac tgttcctttt gcataagcat ctaaacgttt atttatttct\n+   137461 gcttgagtct tagccattac ttaacttcct cctctgcaaa tactttacca tgttcctcgg\n+   137521 tatcttcttc atcttgagaa ggtgctgaac caccatcaat ttcatcttca atagcaggta\n+   137581 cttcatcact atcatctgtg tcaggttctg cattgttttc gtagctgtct atctcaaaag\n+   137641 tactagtgtt atttgcattt gcttgccatt gaacgaattc attagggtct ttactatcac\n+   137701 gaggtttaag atagtctgtt tgaacaatat cactatcttt aagaccttta gtattattat\n+   137761 caacaataat acctaaacct gctaatagtg ttagtataga acctacaata tttacacctt\n+   137821 gctcaatttg agctgagtag tctaaaccga aagcacctat aatttggtta gcaaataatg\n+   137881 ctactgctga tataattgct acccaaaatg ttttgctctt agttcttgtg ctaaggttta\n+   137941 ttcctccaac aactttaggt tgtttagttt cattagccat taaaaaaccg acctttctat\n+   138001 tatatttatt tctaacaata atataacagt aggtcggtca tgtttatcta tattaattta\n+   138061 acacttactc attaatttgg tttagttttt tgataacttc agacatttgt ttgttatcta\n+   138121 aatcttctaa tttagtttca ggaagtagct ctaacttatc ccaaacttct tctttattag\n+   138181 atactttatt attaataatt gccttaccaa ctaaactttc cgtataatat aattgttttg\n+   138241 ctgatgccat ttgtatctct ccttttaaat atgtaaagta tatagctagt atcgtatcct\n+   138301 aggaacaaac acttgcgcta tatactcaat gaaatcctac cctcattcga ggacacagca\n+   138361 aaccggttcg tcaaccgcac atatgaattc tcagatttca tttatgtaaa acacaccctc\n+   138421 tttgatttgc acaaagacta agggttttgg agacccttgt actactaatt atactaaggg\n+   138481 tgtttattat ggtttctatt ggatttgaac caatgacacc tagagcttca atctagtgct\n+   138541 ctaccatctg agctaagaaa ccttaaaacg acccatacga gactcgaact cgtactctct\n+   138601 gccgtgacag ggcagtgtgt taaccagtta caccaatgag ccaaaattat aatgctatac\n+   138661 cctaacctta ccttaatgta tagcaggttt ttatataagc tcgaagcaac gattattacc\n+   138721 actcataaca actatatatt aagtgaaagg aggtgaaatg aacaaaacgt ggtaattggt\n+   138781 acttatatag gaaatatgta taatctacaa ggagtaagtt attggttcat aaaggagtgt\n+   138841 gaacaataaa tacatgaaag agtgaaagtt tactccctgt agattctttt tttaattatc\n+   138901 aatcaaagga ggaaactgat aattgttaat aataaactat aaagaggaaa atatttatag\n+   138961 tcacattctg atataatgca actaaatatc caagcataac ccgtctcacg aggaacctac\n+   139021 ctataagacc tgttattaag tgaatcacta cgattgactc tattaaggag ctaccttaag\n+   139081 tccatctcac gcaatttaaa agggacttac aaaccgtaaa acggtaataa gtttattaaa\n+   139141 taatgtgata ttaacatatt agttaataac tttcacatgg tcgaagaaaa gtaaatttat\n+   139201 ttgattacca aattattttt atcaaatata gctcttttga acctgtagat ttatgctact\n+   139261 tatactgata acctctatta tctaacacat ttctgtgctc caactacagt tagtcgttac\n+   139321 agcgtatctt tctaggattc cgctaagacc ctaaaaagaa attaaaccct agccgttatc\n+   139381 atactctaca gaccttataa gtaagtacca agtataccaa tcgtatttaa caatactaat\n+   139441 gacgacccat cctaccgata tatctccgat aggttttgat tcgtttgatt atcttgtacc\n+   139501 ttatgactac caaatcatta ttcagtcact atgctcagat atttagttgt attatttata\n+   139561 tattaattat aacataattt ttattacttg tcaagttaat ttcaaaaaaa ttatagaagt\n+   139621 agggacgttt acctacttct atttaattta cacaaggatg ataacattgt tattgtttta\n+   139681 tactggaaaa caatgtaata aaaacagtga tgtgtaaggt atttgtttta ttgttaatta\n+   139741 tattatagca tatactgata cctttgtcaa gttaatttaa tacttttttt aaaacattag\n+   139801 ttatcttttg ttagttcctc ctgaatagca tcccatcttc tttctgcttc actacgatta\n+   139861 tcttctatat gttttgtagt tttacaacat ttgatacaat atatatcttt gatatgacct\n+   139921 tcttctcttt tatttgctct ttttcttggt actttgaata catttccaca ttctttacat\n+   139981 attaaacttg agtaaaacat tttttgtctt ttcataatta atcaattcct tttctctttt\n+   140041 atttgataat ttaactatat actatattga taaataagtc aacagttttc taaaaataat\n+   140101 ttaaattatt ttgaagaata ctttaatatc aaggg\n+//\n'
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/pharokka.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pharokka.gff Thu Mar 02 16:21:40 2023 +0000
b
b'@@ -0,0 +1,2589 @@\n+##gff-version 3\n+##sequence-region MW460250_1 1 140135\n+MW460250_1\tPHANOTATE\tCDS\t22\t159\t-15.399247982708186\t-\t0\tID=SLQLISVF_CDS_0001;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0001;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t183\t392\t-39.52422521577095\t-\t0\tID=SLQLISVF_CDS_0002;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0002;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t405\t737\t-307.9875183481112\t-\t0\tID=SLQLISVF_CDS_0003;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0003;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t750\t1076\t-8.448608623432387\t-\t0\tID=SLQLISVF_CDS_0004;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0004;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t1109\t1375\t-7.658463659485386\t-\t0\tID=SLQLISVF_CDS_0005;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0005;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t1470\t1562\t-0.9164331843686035\t-\t0\tID=SLQLISVF_CDS_0006;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0006;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t1636\t1902\t-288.8081246860113\t+\t0\tID=SLQLISVF_CDS_0007;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0007;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t1880\t2158\t-76.27345803195993\t+\t0\tID=SLQLISVF_CDS_0008;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0008;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t2239\t2565\t-866.1511100970354\t+\t0\tID=SLQLISVF_CDS_0009;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0009;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t2580\t2777\t-82.44911427554912\t+\t0\tID=SLQLISVF_CDS_0010;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0010;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t3071\t4042\t-2179259576.416292\t+\t0\tID=SLQLISVF_CDS_0011;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0011;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t4183\t5730\t-664781254181330.5\t+\t0\tID=SLQLISVF_CDS_0012;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0012;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t5723\t6544\t-111100907389.75069\t+\t0\tID=SLQLISVF_CDS_0013;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0013;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t6615\t6704\t-6.909502262400356\t+\t0\tID=SLQLISVF_CDS_0014;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0014;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t6701\t7180\t-3232611.9712114143\t+\t0\tID=SLQLISVF_CDS_0015;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0015;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t7273\t8415\t-175147462430.0088\t+\t0\tID=SLQLISVF_CDS_0016;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0016;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t8558\t8842\t-706.13273617966\t+\t0\tID=SLQLISVF_CDS_0017;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0017;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t8920\t9231\t-751.5376752780932\t+\t0\tID=SLQLISVF_CDS_0018;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0018;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t9235\t10926\t-1.1007237496073629e+17\t+\t0\tID=SLQLISVF_CDS_0019;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0019;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCDS\t11153\t11893\t-27852504.233262\t+\t0\tID=SLQLISVF_CDS_0020;phrog=No_PHROG;top_hit=No_PHROG;locus_tag=SLQLISVF_CDS_0020;function=unknown function;product=hypothetical protein\n+MW460250_1\tPHANOTATE\tCD'..b'ATCATTTTTACCATTTAAATGGTTAACTTGTTCTTTATTATCAGGATTAGGTATA\n+AAAGCCATAGCAACTAAACGATGTATTTTAGGTGAATGGTATCGTAACCTTACAAACAAG\n+TAACCCTTGTTATTTTTTTGAAGTTTTAACTTTTTAGGCTCTTTACCTTTATAAGATATT\n+ACTTCTCCTTTATCAGTAATAGTGTAATTTTCATATATTTCTAATCCAGGTATTTCATTT\n+AATTTCTTTTCCATAATAACATCTCCTTTACTTAAGTATATAGGAAAGTTATTATGTTGT\n+CAAGTAGTTTTTTAAAACAATATTCTTGGATGCTGATTAGCATAGCTTGATAGCCTTAGC\n+CTCCCAGTCAATTAAAAGAACTTTCATTATACATTACTGTATAACAGGGCAATTTATTTA\n+CCCGTGTGCCAAGCAATTTGATTCTTAGCATCTATTGCTTCCCATACATAACCTTCAGAG\n+CCGTAGTAATGAGCAATACCATTAGCGTATCTAGCATAACCTGCATTAGCTAATGAATTC\n+TCGTATTGTTGTCCTGAAGAACGACCTGCATCGTTGTGTATTACCATTCCTTCAGGTTTT\n+TTACCACGTTTATCCATTGTATAGTTAATGTGATTCTTAGAAACTTTTAGTGTTGCTTTC\n+TTTTTAGGTGCAGGCGTTTTACTTGCGCTTTTCTTAGCTGTTTCTTTTTTAACAGTAGTT\n+CCTGCTTTTACAGGTATTTCAATGAAGTGAGTTAATCCGTAATAATTATCTACACGTTTT\n+GTAGGTTTTTTATTAGCATAACCATTCCAGTTTTGCTCTAAAATAGTAAATGTAGAAGTA\n+TTACCTCCATCATATACAATACCTATGTGACCCCACTGTTCATAACTACCGGATGTAAAT\n+ACCGCAATCCAACCTTTTTTAGGTACAGTAGAAGGTTTATTTTCATGTATTTTAAATCCA\n+GTACCATAACTCTGTTTAATTTGGTCTTTAGCATTACCCCAAGTTCTAACTTTATTATCT\n+GTTAACCATAAAACATAGTCTGTAATAAGGTCTTGACACTGAGCGTGATAGTAACCATCT\n+GCATCAATGGCTCCTGCTTCCATTACACCAAATGATGGGTCATAACTTGTAGCTTTTTTA\n+ACTCTGTAAGGGCTATCTACTGTTCCTTTTGCATAAGCATCTAAACGTTTATTTATTTCT\n+GCTTGAGTCTTAGCCATTACTTAACTTCCTCCTCTGCAAATACTTTACCATGTTCCTCGG\n+TATCTTCTTCATCTTGAGAAGGTGCTGAACCACCATCAATTTCATCTTCAATAGCAGGTA\n+CTTCATCACTATCATCTGTGTCAGGTTCTGCATTGTTTTCGTAGCTGTCTATCTCAAAAG\n+TACTAGTGTTATTTGCATTTGCTTGCCATTGAACGAATTCATTAGGGTCTTTACTATCAC\n+GAGGTTTAAGATAGTCTGTTTGAACAATATCACTATCTTTAAGACCTTTAGTATTATTAT\n+CAACAATAATACCTAAACCTGCTAATAGTGTTAGTATAGAACCTACAATATTTACACCTT\n+GCTCAATTTGAGCTGAGTAGTCTAAACCGAAAGCACCTATAATTTGGTTAGCAAATAATG\n+CTACTGCTGATATAATTGCTACCCAAAATGTTTTGCTCTTAGTTCTTGTGCTAAGGTTTA\n+TTCCTCCAACAACTTTAGGTTGTTTAGTTTCATTAGCCATTAAAAAACCGACCTTTCTAT\n+TATATTTATTTCTAACAATAATATAACAGTAGGTCGGTCATGTTTATCTATATTAATTTA\n+ACACTTACTCATTAATTTGGTTTAGTTTTTTGATAACTTCAGACATTTGTTTGTTATCTA\n+AATCTTCTAATTTAGTTTCAGGAAGTAGCTCTAACTTATCCCAAACTTCTTCTTTATTAG\n+ATACTTTATTATTAATAATTGCCTTACCAACTAAACTTTCCGTATAATATAATTGTTTTG\n+CTGATGCCATTTGTATCTCTCCTTTTAAATATGTAAAGTATATAGCTAGTATCGTATCCT\n+AGGAACAAACACTTGCGCTATATACTCAATGAAATCCTACCCTCATTCGAGGACACAGCA\n+AACCGGTTCGTCAACCGCACATATGAATTCTCAGATTTCATTTATGTAAAACACACCCTC\n+TTTGATTTGCACAAAGACTAAGGGTTTTGGAGACCCTTGTACTACTAATTATACTAAGGG\n+TGTTTATTATGGTTTCTATTGGATTTGAACCAATGACACCTAGAGCTTCAATCTAGTGCT\n+CTACCATCTGAGCTAAGAAACCTTAAAACGACCCATACGAGACTCGAACTCGTACTCTCT\n+GCCGTGACAGGGCAGTGTGTTAACCAGTTACACCAATGAGCCAAAATTATAATGCTATAC\n+CCTAACCTTACCTTAATGTATAGCAGGTTTTTATATAAGCTCGAAGCAACGATTATTACC\n+ACTCATAACAACTATATATTAAGTGAAAGGAGGTGAAATGAACAAAACGTGGTAATTGGT\n+ACTTATATAGGAAATATGTATAATCTACAAGGAGTAAGTTATTGGTTCATAAAGGAGTGT\n+GAACAATAAATACATGAAAGAGTGAAAGTTTACTCCCTGTAGATTCTTTTTTTAATTATC\n+AATCAAAGGAGGAAACTGATAATTGTTAATAATAAACTATAAAGAGGAAAATATTTATAG\n+TCACATTCTGATATAATGCAACTAAATATCCAAGCATAACCCGTCTCACGAGGAACCTAC\n+CTATAAGACCTGTTATTAAGTGAATCACTACGATTGACTCTATTAAGGAGCTACCTTAAG\n+TCCATCTCACGCAATTTAAAAGGGACTTACAAACCGTAAAACGGTAATAAGTTTATTAAA\n+TAATGTGATATTAACATATTAGTTAATAACTTTCACATGGTCGAAGAAAAGTAAATTTAT\n+TTGATTACCAAATTATTTTTATCAAATATAGCTCTTTTGAACCTGTAGATTTATGCTACT\n+TATACTGATAACCTCTATTATCTAACACATTTCTGTGCTCCAACTACAGTTAGTCGTTAC\n+AGCGTATCTTTCTAGGATTCCGCTAAGACCCTAAAAAGAAATTAAACCCTAGCCGTTATC\n+ATACTCTACAGACCTTATAAGTAAGTACCAAGTATACCAATCGTATTTAACAATACTAAT\n+GACGACCCATCCTACCGATATATCTCCGATAGGTTTTGATTCGTTTGATTATCTTGTACC\n+TTATGACTACCAAATCATTATTCAGTCACTATGCTCAGATATTTAGTTGTATTATTTATA\n+TATTAATTATAACATAATTTTTATTACTTGTCAAGTTAATTTCAAAAAAATTATAGAAGT\n+AGGGACGTTTACCTACTTCTATTTAATTTACACAAGGATGATAACATTGTTATTGTTTTA\n+TACTGGAAAACAATGTAATAAAAACAGTGATGTGTAAGGTATTTGTTTTATTGTTAATTA\n+TATTATAGCATATACTGATACCTTTGTCAAGTTAATTTAATACTTTTTTTAAAACATTAG\n+TTATCTTTTGTTAGTTCCTCCTGAATAGCATCCCATCTTCTTTCTGCTTCACTACGATTA\n+TCTTCTATATGTTTTGTAGTTTTACAACATTTGATACAATATATATCTTTGATATGACCT\n+TCTTCTCTTTTATTTGCTCTTTTTCTTGGTACTTTGAATACATTTCCACATTCTTTACAT\n+ATTAAACTTGAGTAAAACATTTTTTGTCTTTTCATAATTAATCAATTCCTTTTCTCTTTT\n+ATTTGATAATTTAACTATATACTATATTGATAAATAAGTCAACAGTTTTCTAAAAATAAT\n+TTAAATTATTTTGAAGAATACTTTAATATCAAGGG\n'
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/pharokka_db.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pharokka_db.loc Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,1 @@
+pharokka_db pharokka_db Minimal DB for testing ${__HERE__}/subset_pharokka_db
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/5Jan2023_data.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/5Jan2023_data.tsv Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,2 @@
+Accession Description Classification Genome Length (bp) Jumbophage molGC (%) Molecule Modification Date Number CDS Positive Strand (%) Negative Strand (%) Coding Capacity (%) Low Coding Capacity Warning tRNAs Host Lowest Taxa Genus Sub-family Family Order Class Phylum Kingdom Realm Baltimore Group Genbank Division Isolation Host (beware inconsistent and nonsense values)
+MF417929 Uncultured Caudovirales phage clone 2F_1 Uncultured Caudovirales phage clone 2F_1 Bracchivirus U2F1 Bracchivirus Peduoviridae Caudoviricetes Uroviricota Heunggongvirae Duplodnaviria Viruses 32618 FALSE 39.218 DNA 1-Nov-22 42 16.66666667 83.33333333 89.41688638 NA 0 Unspecified Bracchivirus Bracchivirus Unclassified Peduoviridae Caudovirales Caudoviricetes Uroviricota Heunggongvirae Duplodnaviria Group I ENV Unspecified
\ No newline at end of file
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/5Jan2023_genomes.fa.msh
b
Binary file test-data/subset_pharokka_db/5Jan2023_genomes.fa.msh has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/CARD
b
Binary file test-data/subset_pharokka_db/CARD has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/CARD.dbtype
b
Binary file test-data/subset_pharokka_db/CARD.dbtype has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/CARD.index
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/CARD.index Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,2 @@
+0 0 298
+1 298 288
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/CARD.lookup
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/CARD.lookup Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,2 @@
+0 ACT97415.1 0
+1 AEJ08681.1 0
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/CARD.source
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/CARD.source Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,1 @@
+0 protein_fasta_protein_homolog_model.fasta
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/CARD_h
b
Binary file test-data/subset_pharokka_db/CARD_h has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/CARD_h.dbtype
b
Binary file test-data/subset_pharokka_db/CARD_h.dbtype has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/CARD_h.index
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/CARD_h.index Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,2 @@
+0 0 75
+1 75 59
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/VFDB_setB_pro.fas
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/VFDB_setB_pro.fas Thu Mar 02 16:21:40 2023 +0000
[
@@ -0,0 +1,14 @@
+>VFG037170(gb|WP_001081754) (plc1) phospholipase C [Phospholipase C (VF0470) - Exotoxin (VFC0235)] [Acinetobacter baumannii 1656-2]
+MNRREFLLNSTKTMFGTAALASFPLSIQKALAIDAKVESGTIQDVKHIVILTQENRSFDN
+YFGTLKGVRGFGDRFTIPMTEGRKVWEQYDANKKKVLPYHLDSRLGNAQRVTGTNHSWSD
+GQGAWDNGRMSDWVAHKQPQSMGYYKKQEVEYQFALANAFTICDAYHCAMHAGTNPNRKF
+IWTGTNGPTGAGVASVVNEFDGIGPSTEGYEWTTYPERLQQAGVTWKVYQNMPDNFTDNP
+LAGFKQYRRANEQSGQPVSNDTLICLAYDEKIDATQPLYKGIANTMPDGGFLGAFKADIA
+QGKLPQVSWLVAPATYSEHPGPSSPVQGAWYIQEVLNALTENTQVWSQTVLLVNFDENDG
+FFDHVPSPSAPSKDINGVVYGKTTLTDQQVSYEYFNHPAVATSKSQPETDGRVYGPGVRV
+PMYVISPWSRGGWVNSQVFDHTSILQFLEKRFGVQEPNISPYRRAVCGDLTTAFNFKTPN
+LLPVAELDGKKTKAEADAIRVAQELLPQVSVPSQQQFPQQEIGIRPSRALPYILHTSAKV
+DVTQKTVKLMFSNTGKQAAVFHVYNRLDLTAIPRRYMVEAGKQLDDAWNTINGQYDLWVL
+GPNGFHRAFKGNLSQANQTQALPEIRVCVEECDANLYLKVRHDGNKSVKLNVKANAYLPN
+KTWMIETNSSEKELVWDMSEFGGWYDFTVTLADDATFSRRFAGRIETQEDSISDPYMGYL
+ES
\ No newline at end of file
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/aro_index.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/aro_index.tsv Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,3 @@
+ARO Accession CVTERM ID Model Sequence ID Model ID Model Name ARO Name Protein Accession DNA Accession AMR Gene Family Drug Class Resistance Mechanism CARD Short Name
+ARO:3001109 37489 1393 4 SHV-52 SHV-52 AEJ08681.1 HQ845196.1 SHV beta-lactamase carbapenem;cephalosporin;penam antibiotic inactivation SHV-52
+ARO:3002999 39433 1188 2 CblA-1 CblA-1 ACT97415.1 GQ343019.1 CblA beta-lactamase cephalosporin antibiotic inactivation CblA-1
\ No newline at end of file
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/phrog_annot_v4.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/phrog_annot_v4.tsv Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,2 @@
+phrog color annot category
+1 #fea328 integrase integration and excision
\ No newline at end of file
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/phrog_hhm_db
b
Binary file test-data/subset_pharokka_db/phrog_hhm_db has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/phrog_hhm_db.index
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/phrog_hhm_db.index Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,1 @@
+phrog_1.hhm 0 95355
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/phrogs_profile_db
b
Binary file test-data/subset_pharokka_db/phrogs_profile_db has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/phrogs_profile_db.dbtype
b
Binary file test-data/subset_pharokka_db/phrogs_profile_db.dbtype has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/phrogs_profile_db.index
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/phrogs_profile_db.index Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,1 @@
+0 0 15802
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/phrogs_profile_db_h
b
Binary file test-data/subset_pharokka_db/phrogs_profile_db_h has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/phrogs_profile_db_h.dbtype
b
Binary file test-data/subset_pharokka_db/phrogs_profile_db_h.dbtype has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/phrogs_profile_db_h.index
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/phrogs_profile_db_h.index Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,1 @@
+0 0 18
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/protein_fasta_protein_homolog_model.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/protein_fasta_protein_homolog_model.fasta Thu Mar 02 16:21:40 2023 +0000
[
@@ -0,0 +1,4 @@
+>gb|ACT97415.1|ARO:3002999|CblA-1 [mixed culture bacterium AX_gF3SD01_15] 
+MKAYFIAILTLFTCIATVVRAQQMSELENRIDSLLNGKKATVGIAVWTDKGDMLRYNDHVHFPLLSVFKFHVALAVLDKMDKQSISLDSIVSIKASQMPPNTYSPLRKKFPDQDFTITLRELMQYSISQSDNNACDILIEYAGGIKHINDYIHRLSIDSFNLSETEDGMHSSFEAVYRNWSTPSAMVRLLRTADEKELFSNKELKDFLWQTMIDTETGANKLKGMLPAKTVVGHKTGSSDRNADGMKTADNDAGLVILPDGRKYYIAAFVMDSYETDEDNANIIARISRMVYDAMR
+>gb|AEJ08681.1|ARO:3001109|SHV-52 [Klebsiella pneumoniae] 
+MRYIRLCIISLLAALPLAVHASPQPLEQIKQSESQLSGRVGMIEMDLASGRTLTAWRADERFPMISTFKVVLCGAVLARVDAGDEQLERKIHYRQQDLVDYSPVSEKHLADGMTVGELCAAAITMSDNSAANLLLAIVGGPAGLTAFLRQIGDNVTRLDRWETELNEALPGDARDTTTPASMAATLRKLLTSQRLSARSQRQLLQWMVDDRVAGPLIRSVLPAGWFIADKTGAGERGARGIVALLGPNNKAERIVVIYLRDTPASMAERNQQIAGIGAALIEHWQR
\ No newline at end of file
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/vfdb
b
Binary file test-data/subset_pharokka_db/vfdb has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/vfdb.dbtype
b
Binary file test-data/subset_pharokka_db/vfdb.dbtype has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/vfdb.index
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/vfdb.index Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,1 @@
+0 0 724
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/vfdb.lookup
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/vfdb.lookup Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,1 @@
+0 VFG037170(gb|WP_001081754) 0
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/vfdb.source
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/vfdb.source Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,1 @@
+0 VFDB_setB_pro.fas
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/vfdb_h
b
Binary file test-data/subset_pharokka_db/vfdb_h has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/vfdb_h.dbtype
b
Binary file test-data/subset_pharokka_db/vfdb_h.dbtype has changed
b
diff -r 000000000000 -r 1d4fcd38d899 test-data/subset_pharokka_db/vfdb_h.index
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/subset_pharokka_db/vfdb_h.index Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,1 @@
+0 0 132
b
diff -r 000000000000 -r 1d4fcd38d899 tool-data/pharokka_db.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/pharokka_db.loc.sample Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,12 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a pharokka DB folder. The pharokka_db.loc 
+#file needs this format (longer white space is the TAB character):
+
+#<unique_build_id> <dbkey> <display_name> <DB_folder_path>
+
+# for example:
+
+# pharokka_db pharokka_db Pharokka Database v1.2.0 /data/pharokka_db
+
+# To retrieve the complete DB look at https://github.com/gbouras13/pharokka or
+# use `wget "https://zenodo.org/record/7563578/files/pharokka_v1.2.0_database.tar.gz"` (14.02.2023)
\ No newline at end of file
b
diff -r 000000000000 -r 1d4fcd38d899 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of pharokka DB file -->
+    <table name="pharokka_db" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/pharokka_db.loc" />
+    </table>
+</tables>
\ No newline at end of file
b
diff -r 000000000000 -r 1d4fcd38d899 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Thu Mar 02 16:21:40 2023 +0000
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of pharokka DB file -->
+    <table name="pharokka_db" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/pharokka_db.loc" />
+    </table>
+</tables>
\ No newline at end of file