Mercurial > repos > brinkmanlab > microbedb

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/all_fasta.sql	Fri Oct 30 21:04:47 2020 +0000
@@ -0,0 +1,14 @@
+SELECT json_group_array(json_object('value', unique_build_id, 'dbkey', dbkey, 'name', label, 'path', file_path))
+FROM (
+ SELECT (r.rep_accnum || '.' || r.rep_version)                                           AS unique_build_id,
+        (r.rep_accnum || '_' || r.rep_version)                                           AS dbkey,
+        (REPLACE(r.definition, ', complete genome.', '') || ' [' || r.rep_accnum || '.' || r.rep_version ||
+         ']')                                                                            AS label,
+        (:basepath || '/' || genomeproject.gpv_directory || '/' || genomeproject.filename || '_genomic.fna') AS file_path
+ FROM genomeproject
+          JOIN replicon r ON genomeproject.gpv_id = r.gpv_id AND r.rep_type = 'chromosome'
+ WHERE genomeproject.version_id = (SELECT version_id FROM version WHERE is_current == 1)
+   AND genomeproject.file_types IS NOT NULL
+   AND genomeproject.file_types LIKE '%.fna%'
+   AND r.rep_type = 'chromosome'
+)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/all_fasta.xml	Fri Oct 30 21:04:47 2020 +0000
@@ -0,0 +1,50 @@
+<tool id="microbedb_all_fasta" name="MicrobeDB All FASTA" version="1.0" tool_type="manage_data">
+    <description>Load all fasta files from MicrobeDB into the all_fasta data table</description>
+    <edam_topics>
+        <edam_topic>topic_3301</edam_topic>
+        <edam_topic>topic_0080</edam_topic>
+        <edam_topic>topic_0091</edam_topic>
+    </edam_topics>
+    <edam_operations>
+        <edam_operation>operation_2422</edam_operation>
+    </edam_operations>
+    <requirements>
+        <requirement type="package" version="3">sqlite</requirement>
+    </requirements>
+    <version_command>sqlite3 --version</version_command>
+    <command><![CDATA[
+        #import os.path
+        sqlite3 -readonly $db '.param set :basepath ${os.path.dirname($db)}' '.read $__tool_directory__/all_fasta.sql' > '$output';
+        #if $builds
+        sqlite3 -readonly $db < '$builds_sql'
+        #endif
+    ]]></command>
+    <configfiles>
+        <configfile name="builds_sql"><![CDATA[
+            #include $__tool_directory__/builds.sql
+        ]]></configfile>
+    </configfiles>
+    <inputs>
+        <param name="db" type="text" value="/cvmfs/microbedb.brinkmanlab.ca/microbedb.sqlite" label="Path" help="Absolute path to microbedb.sqlite on Galaxy host server"/>
+        <param name="builds" type="boolean" label="Append references to builds" help="Append genomes to Galaxies builds.txt, listing them at /api/genomes"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="data_manager_json" />
+    </outputs>
+    <tests>
+    </tests>
+    <help><![CDATA[
+    Analysis of microbial genomes often requires the general organization and comparison of tens to thousands of genomes both from public repositories
+    and unpublished sources. MicrobeDB provides a foundation for such projects by the automation of downloading published, completed bacterial and
+    archaeal genomes from key sources, parsing annotations of all genomes (both public and private) into a local database, and allowing interaction
+    with the database through an easy to use programming interface. MicrobeDB creates a simple to use, easy to maintain, centralized local resource
+    for various large-scale comparative genomic analyses and a back-end for future microbial application design.
+
+    This tool populates the all_fasta table with the paths of the reference genomes included in MicrobeDB. MicrobeDB must first be manually loaded
+    onto the Galaxy filesystem. The easiest way of doing that is mounting it using the [CVMFS client](https://cvmfs.readthedocs.io/en/stable/cpt-quickstart.html).
+    The MicrobeDB CVMFS repository is hosted at microbedb.brinkmanlab.ca.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/bts273</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/builds.sql	Fri Oct 30 21:04:47 2020 +0000
@@ -0,0 +1,19 @@
+DROP TABLE IF EXISTS builds_temp;
+CREATE TEMP TABLE builds_temp(dbkey unique, label);
+.mode csv
+.separator '	'
+.import $__app__.config.builds_file_path builds_temp
+INSERT OR REPLACE INTO builds_temp (dbkey, label)
+SELECT (r.rep_accnum || '_' || r.rep_version)                                           AS dbkey,
+    (REPLACE(r.definition, ', complete genome.', '') || ' [' || r.rep_accnum || '.' || r.rep_version ||
+     ']')                                                                            AS label
+FROM genomeproject
+      JOIN replicon r ON genomeproject.gpv_id = r.gpv_id AND r.rep_type = 'chromosome'
+WHERE genomeproject.version_id = (SELECT MAX(version_id) FROM version)
+    AND genomeproject.file_types IS NOT NULL
+    AND genomeproject.file_types LIKE '%.fna%'
+    AND r.rep_type = 'chromosome';
+.mode list
+.separator '	'
+.once $__app__.config.builds_file_path
+SELECT dbkey, label FROM builds_temp ORDER BY dbkey;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Fri Oct 30 21:04:47 2020 +0000
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="all_fasta.xml" version="1.0.0">
+        <!-- Defines a Data Table to be modified -->
+        <data_table name="all_fasta">
+            <!-- Handle the output of the Data Manager Tool -->
+            <output>
+                <!-- Columns that will be specified by the Data Manager Tool -->
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Oct 30 21:04:47 2020 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="all_fasta.loc" />
+    </table>
+</tables>
\ No newline at end of file