changeset 0:97f310b8e308 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_pharokka_database_fetcher/ commit 31e99045208605780b3fe4b89a999137adcabe13
author iuc
date Thu, 20 Apr 2023 07:51:43 +0000
parents
children 50caca83e7c9
files data_manager/data_manager_fetch_pharokka_db.py data_manager/macros.xml data_manager/pharokka_db_fetcher.xml data_manager/subset_pharokka_db/5Jan2023_data.tsv data_manager/subset_pharokka_db/5Jan2023_genomes.fa.msh data_manager/subset_pharokka_db/CARD data_manager/subset_pharokka_db/CARD.dbtype data_manager/subset_pharokka_db/CARD.index data_manager/subset_pharokka_db/CARD.lookup data_manager/subset_pharokka_db/CARD.source data_manager/subset_pharokka_db/CARD_h data_manager/subset_pharokka_db/CARD_h.dbtype data_manager/subset_pharokka_db/CARD_h.index data_manager/subset_pharokka_db/VFDB_setB_pro.fas data_manager/subset_pharokka_db/aro_index.tsv data_manager/subset_pharokka_db/phrog_annot_v4.tsv data_manager/subset_pharokka_db/phrog_hhm_db data_manager/subset_pharokka_db/phrog_hhm_db.index data_manager/subset_pharokka_db/phrogs_db data_manager/subset_pharokka_db/phrogs_db.dbtype data_manager/subset_pharokka_db/phrogs_db.index data_manager/subset_pharokka_db/phrogs_profile_db data_manager/subset_pharokka_db/phrogs_profile_db.dbtype data_manager/subset_pharokka_db/phrogs_profile_db.index data_manager/subset_pharokka_db/phrogs_profile_db_consensus data_manager/subset_pharokka_db/phrogs_profile_db_consensus.dbtype data_manager/subset_pharokka_db/phrogs_profile_db_consensus.index data_manager/subset_pharokka_db/phrogs_profile_db_h data_manager/subset_pharokka_db/phrogs_profile_db_h.dbtype data_manager/subset_pharokka_db/phrogs_profile_db_h.index data_manager/subset_pharokka_db/phrogs_profile_db_seq data_manager/subset_pharokka_db/phrogs_profile_db_seq.dbtype data_manager/subset_pharokka_db/phrogs_profile_db_seq.index data_manager/subset_pharokka_db/phrogs_profile_db_seq_h data_manager/subset_pharokka_db/phrogs_profile_db_seq_h.index data_manager/subset_pharokka_db/protein_fasta_protein_homolog_model.fasta data_manager/subset_pharokka_db/vfdb data_manager/subset_pharokka_db/vfdb.dbtype data_manager/subset_pharokka_db/vfdb.index data_manager/subset_pharokka_db/vfdb.lookup data_manager/subset_pharokka_db/vfdb.source data_manager/subset_pharokka_db/vfdb_h data_manager/subset_pharokka_db/vfdb_h.dbtype data_manager/subset_pharokka_db/vfdb_h.index data_manager_conf.xml test-data/pharokka_db.loc tool-data/pharokka_db.loc tool_data_table_conf.xml.sample
diffstat 37 files changed, 247 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_fetch_pharokka_db.py	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+from datetime import datetime
+
+
+def main():
+    # Parse Command Line
+    parser = argparse.ArgumentParser(description="Create data manager JSON.")
+    parser.add_argument("--out", dest="output", action="store", help="JSON filename")
+    parser.add_argument("--version", dest="version", action="store", help="Version of the DB")
+    parser.add_argument(
+        "--test",
+        action="store_true",
+        help="option to test the script with an lighted database",
+    )
+
+    args = parser.parse_args()
+
+    # the output file of a DM is a json containing args that can be used by the DM
+    # most tools mainly use these args to find the extra_files_path for the DM, which can be used
+    # to store the DB data
+    with open(args.output) as fh:
+        params = json.load(fh)
+
+    workdir = params["output_data"][0]["extra_files_path"]
+    os.mkdir(workdir)
+
+    time = datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
+    db_value = "db_from_{0}".format(time)
+    db_path = os.path.join(workdir, db_value)
+
+    # create DB
+    if args.test:  # the test only checks that the pharokka download script is available and copies the test DB
+
+        # check if install_databases.py is there
+        command_args = ["install_databases.py", "-h"]
+        proc = subprocess.Popen(args=command_args, shell=False)
+        return_code = proc.wait()
+        if return_code:
+            print("Error downloading Pharokka database.", file=sys.stderr)
+            sys.exit(return_code)
+
+        # copy the test DB
+        test_db_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "subset_pharokka_db")
+        command_args = ["cp", "-r", test_db_path, db_path]
+    else:
+        command_args = ["install_databases.py", "-o", db_path]
+
+    proc = subprocess.Popen(args=command_args, shell=False)
+    return_code = proc.wait()
+    if return_code:
+        print("Error downloading Pharokka database.", file=sys.stderr)
+        sys.exit(return_code)
+
+    # Update Data Manager JSON and write to file
+    data_manager_entry = {
+        "data_tables": {
+            "pharokka_db": {
+                "value": db_value,
+                "dbkey": db_value,
+                "version": args.version,
+                "name": f"Pharokka DB version {args.version} downloaded at {datetime.now()}",
+                "path": db_path,
+            }
+        }
+    }
+
+    with open(os.path.join(args.output), "w+") as fh:
+        json.dump(data_manager_entry, fh, sort_keys=True)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/macros.xml	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">1.3.0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@PROFILE@">22.05</token>
+    <xml name="biotools">
+        <xrefs>
+            <xref type="bio.tools">
+                pharokka
+            </xref>
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">pharokka</requirement>
+        </requirements>
+    </xml>
+    <xml name="version">
+        <version_command>
+            pharokka.py --version
+        </version_command>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">
+                10.1093/bioinformatics/btac776
+            </citation>
+        </citations>
+    </xml>
+    <xml name="creator">
+        <creator>
+            <person givenName="Paul" familyName="Zierep" email="zierep@informatik.uni-freiburg.de" />
+        </creator>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/pharokka_db_fetcher.xml	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,41 @@
+<?xml version="1.0"?>
+<tool id="pharokka_db_fetcher" name="phrokka DB fetcher" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="biotools" />
+    <expand macro="requirements" />
+    <expand macro="version" />
+    <description>Fetches the DB required for phrokka</description>
+    <command detect_errors="exit_code">
+    <![CDATA[
+        python '$__tool_directory__/data_manager_fetch_pharokka_db.py' --out '${out_file}' --version '${version}' 
+        $test_data_manager
+    ]]>
+    </command>
+    <inputs>
+    <param name="test_data_manager" type="hidden" value=""/>
+    <param argument="--version" type="text" value="v1.2.0" help="Check https://github.com/gbouras13/pharokka/blob/c822b4c3b8cf642512a5c236fc4036ad53f48587/bin/databases.py#L102 for current version."/>
+    </inputs>
+    <outputs>
+        <data format="data_manager_json" name="out_file" />
+    </outputs>
+    <tests>
+    <test expect_num_outputs="1">
+        <param name="test_data_manager" value="--test"/>
+        <param name="version" value="v1.2.0"/>
+        <output name="out_file">
+                <assert_contents>
+                    <has_text text="Pharokka DB version"/>
+                    <has_text text="db_from_"/>
+                    <has_text text="pharokka_db"/>
+                    <has_text text="v1.2.0"/>
+                </assert_contents>
+        </output>
+    </test>
+    </tests>
+    <help>
+    </help>
+    <expand macro="citations" />
+    <expand macro="creator" />
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/5Jan2023_data.tsv	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,2 @@
+Accession	Description	Classification	Genome Length (bp)	Jumbophage	molGC (%)	Molecule	Modification Date	Number CDS	Positive Strand (%)	Negative Strand (%)	Coding Capacity (%)	Low Coding Capacity Warning	tRNAs	Host	Lowest Taxa	Genus	Sub-family	Family	Order	Class	Phylum	Kingdom	Realm	Baltimore Group	Genbank Division	Isolation Host (beware inconsistent and nonsense values)
+MF417929	Uncultured Caudovirales phage clone 2F_1	Uncultured Caudovirales phage clone 2F_1 Bracchivirus U2F1 Bracchivirus Peduoviridae Caudoviricetes Uroviricota Heunggongvirae Duplodnaviria Viruses	32618	FALSE	39.218	DNA	1-Nov-22	42	16.66666667	83.33333333	89.41688638	NA	0	Unspecified	Bracchivirus	Bracchivirus	Unclassified	Peduoviridae	Caudovirales	Caudoviricetes	Uroviricota	Heunggongvirae	Duplodnaviria	Group I	ENV	Unspecified
\ No newline at end of file
Binary file data_manager/subset_pharokka_db/5Jan2023_genomes.fa.msh has changed
Binary file data_manager/subset_pharokka_db/CARD has changed
Binary file data_manager/subset_pharokka_db/CARD.dbtype has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/CARD.index	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,2 @@
+0	0	298
+1	298	288
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/CARD.lookup	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,2 @@
+0	ACT97415.1	0
+1	AEJ08681.1	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/CARD.source	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,1 @@
+0	protein_fasta_protein_homolog_model.fasta
Binary file data_manager/subset_pharokka_db/CARD_h has changed
Binary file data_manager/subset_pharokka_db/CARD_h.dbtype has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/CARD_h.index	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,2 @@
+0	0	75
+1	75	59
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/VFDB_setB_pro.fas	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,14 @@
+>VFG037170(gb|WP_001081754) (plc1) phospholipase C [Phospholipase C (VF0470) - Exotoxin (VFC0235)] [Acinetobacter baumannii 1656-2]
+MNRREFLLNSTKTMFGTAALASFPLSIQKALAIDAKVESGTIQDVKHIVILTQENRSFDN
+YFGTLKGVRGFGDRFTIPMTEGRKVWEQYDANKKKVLPYHLDSRLGNAQRVTGTNHSWSD
+GQGAWDNGRMSDWVAHKQPQSMGYYKKQEVEYQFALANAFTICDAYHCAMHAGTNPNRKF
+IWTGTNGPTGAGVASVVNEFDGIGPSTEGYEWTTYPERLQQAGVTWKVYQNMPDNFTDNP
+LAGFKQYRRANEQSGQPVSNDTLICLAYDEKIDATQPLYKGIANTMPDGGFLGAFKADIA
+QGKLPQVSWLVAPATYSEHPGPSSPVQGAWYIQEVLNALTENTQVWSQTVLLVNFDENDG
+FFDHVPSPSAPSKDINGVVYGKTTLTDQQVSYEYFNHPAVATSKSQPETDGRVYGPGVRV
+PMYVISPWSRGGWVNSQVFDHTSILQFLEKRFGVQEPNISPYRRAVCGDLTTAFNFKTPN
+LLPVAELDGKKTKAEADAIRVAQELLPQVSVPSQQQFPQQEIGIRPSRALPYILHTSAKV
+DVTQKTVKLMFSNTGKQAAVFHVYNRLDLTAIPRRYMVEAGKQLDDAWNTINGQYDLWVL
+GPNGFHRAFKGNLSQANQTQALPEIRVCVEECDANLYLKVRHDGNKSVKLNVKANAYLPN
+KTWMIETNSSEKELVWDMSEFGGWYDFTVTLADDATFSRRFAGRIETQEDSISDPYMGYL
+ES
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/aro_index.tsv	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,3 @@
+ARO Accession	CVTERM ID	Model Sequence ID	Model ID	Model Name	ARO Name	Protein Accession	DNA Accession	AMR Gene Family	Drug Class	Resistance Mechanism	CARD Short Name
+ARO:3001109	37489	1393	4	SHV-52	SHV-52	AEJ08681.1	HQ845196.1	SHV beta-lactamase	carbapenem;cephalosporin;penam	antibiotic inactivation	SHV-52
+ARO:3002999	39433	1188	2	CblA-1	CblA-1	ACT97415.1	GQ343019.1	CblA beta-lactamase	cephalosporin	antibiotic inactivation	CblA-1
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/phrog_annot_v4.tsv	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,2 @@
+phrog	color	annot	category
+1	#fea328	integrase	integration and excision
\ No newline at end of file
Binary file data_manager/subset_pharokka_db/phrog_hhm_db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/phrog_hhm_db.index	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,1 @@
+phrog_1.hhm	0	95355
Binary file data_manager/subset_pharokka_db/phrogs_profile_db has changed
Binary file data_manager/subset_pharokka_db/phrogs_profile_db.dbtype has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/phrogs_profile_db.index	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,1 @@
+0	0	15802
Binary file data_manager/subset_pharokka_db/phrogs_profile_db_h has changed
Binary file data_manager/subset_pharokka_db/phrogs_profile_db_h.dbtype has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/phrogs_profile_db_h.index	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,1 @@
+0	0	18
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/protein_fasta_protein_homolog_model.fasta	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,4 @@
+>gb|ACT97415.1|ARO:3002999|CblA-1 [mixed culture bacterium AX_gF3SD01_15] 
+MKAYFIAILTLFTCIATVVRAQQMSELENRIDSLLNGKKATVGIAVWTDKGDMLRYNDHVHFPLLSVFKFHVALAVLDKMDKQSISLDSIVSIKASQMPPNTYSPLRKKFPDQDFTITLRELMQYSISQSDNNACDILIEYAGGIKHINDYIHRLSIDSFNLSETEDGMHSSFEAVYRNWSTPSAMVRLLRTADEKELFSNKELKDFLWQTMIDTETGANKLKGMLPAKTVVGHKTGSSDRNADGMKTADNDAGLVILPDGRKYYIAAFVMDSYETDEDNANIIARISRMVYDAMR
+>gb|AEJ08681.1|ARO:3001109|SHV-52 [Klebsiella pneumoniae] 
+MRYIRLCIISLLAALPLAVHASPQPLEQIKQSESQLSGRVGMIEMDLASGRTLTAWRADERFPMISTFKVVLCGAVLARVDAGDEQLERKIHYRQQDLVDYSPVSEKHLADGMTVGELCAAAITMSDNSAANLLLAIVGGPAGLTAFLRQIGDNVTRLDRWETELNEALPGDARDTTTPASMAATLRKLLTSQRLSARSQRQLLQWMVDDRVAGPLIRSVLPAGWFIADKTGAGERGARGIVALLGPNNKAERIVVIYLRDTPASMAERNQQIAGIGAALIEHWQR
\ No newline at end of file
Binary file data_manager/subset_pharokka_db/vfdb has changed
Binary file data_manager/subset_pharokka_db/vfdb.dbtype has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/vfdb.index	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,1 @@
+0	0	724
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/vfdb.lookup	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,1 @@
+0	VFG037170(gb|WP_001081754)	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/vfdb.source	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,1 @@
+0	VFDB_setB_pro.fas
Binary file data_manager/subset_pharokka_db/vfdb_h has changed
Binary file data_manager/subset_pharokka_db/vfdb_h.dbtype has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/subset_pharokka_db/vfdb_h.index	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,1 @@
+0	0	132
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<data_managers>
+
+    <data_manager tool_file="data_manager/pharokka_db_fetcher.xml" id="pharokka_db_fetcher" version="1.2">
+        <data_table name="pharokka_db">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="version" />
+                <column name="name" />
+                <column name="path" output_ref="out_file" >
+                    <move type="directory" relativize_symlinks="True">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">pharokka_database/${value}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/pharokka_database/${value}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pharokka_db.loc	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,12 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a pharokka DB folder. The pharokka_db.loc 
+#file needs this format (longer white space is the TAB character):
+
+#<unique_build_id>	<dbkey>	<version>	<display_name>	<DB_folder_path>
+
+# for example:
+
+# pharokka_db	pharokka_db v1.2.1	Pharokka Database v1.2.0	/data/pharokka_db
+
+# To retrieve the complete DB look at https://github.com/gbouras13/pharokka or
+# use `wget "https://zenodo.org/record/7563578/files/pharokka_v1.2.0_database.tar.gz"` (14.02.2023)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/pharokka_db.loc	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,12 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a pharokka DB folder. The pharokka_db.loc 
+#file needs this format (longer white space is the TAB character):
+
+#<unique_build_id>	<dbkey>	<version>	<display_name>	<DB_folder_path>
+
+# for example:
+
+# pharokka_db	pharokka_db v1.2.1	Pharokka Database v1.2.0	/data/pharokka_db
+
+# To retrieve the complete DB look at https://github.com/gbouras13/pharokka or
+# use `wget "https://zenodo.org/record/7563578/files/pharokka_v1.2.0_database.tar.gz"` (14.02.2023)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Apr 20 07:51:43 2023 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of pharokka DB file -->
+    <table name="pharokka_db" comment_char="#">
+        <columns>value, dbkey, version, name, path</columns>
+        <file path="tool-data/pharokka_db.loc" />
+    </table>
+</tables>
\ No newline at end of file