Mercurial > repos > diodupima > data_manager_coast_taxonomic_filters

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Thu Jul 15 16:55:05 2021 +0000
@@ -0,0 +1,9 @@
+COAST's Taxonomic Filters - Data Manager
+________________________________________
+
+This data manager helps an admin provide the galaxy instance users with pre-cached taxonomic filters. This helps increase
+the speed of the queries against larger databases.
+This is a BLAST aimed setting tool because diamond is capable of taking higher order taxids as filters.
+
+To use it you only need to provide the taxid for your desired root node, and the tool will take it from there.
+Optionally you can provide a name, that will also be visible to your users alongside the taxid, to help your users identify the filter.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Thu Jul 15 16:55:05 2021 +0000
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="txids_dm.xml" id="taxonomic_filters">
+        <data_table name="coast_taxonomic_filters">
+            <output>
+                <column name="value" />
+                <column name="name" />
+                <column name="node_name" />
+                <column name="path" output_ref="out_file" >
+                    <move type="directory" relativize_symlinks="True">
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">COAST_taxonomic_filters/${path}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/COAST_taxonomic_filters/${path}/${value}.txids</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ho2s.py	Thu Jul 15 16:55:05 2021 +0000
@@ -0,0 +1,62 @@
+import argparse
+import os
+from urllib import request
+from shutil import unpack_archive
+import datetime
+import subprocess
+import json
+
+def script_cli():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--file",
+                        help="JSON options file",
+                        type=str)
+    parser.add_argument("--tool_data_table_name",
+                        help="Tool data table name",
+                        type=str)
+    args = parser.parse_args()
+
+    with open(args.file) as params_file:
+        params = json.load(params_file)
+
+    target_directory = params["output_data"][0]["extra_files_path"]
+    os.mkdir(target_directory)
+
+    taxid = params["param_dict"]["taxid"]
+    node_name = params["param_dict"]["node_name"]
+    name = " ".join([node_name, f"(taxid - {taxid})", f"(date - {datetime.datetime.now().strftime('%Y_%m_%d_%H')})"])
+    tool_data_table_name = args.tool_data_table_name
+    data_id = "_".join([taxid, datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")])
+
+    url = 'ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz'
+    request.urlretrieve(url, os.path.join(target_directory, "taxdump.tar.gz"))
+    unpack_archive(os.path.join(target_directory, "taxdump.tar.gz"), extract_dir=target_directory)
+
+    subprocess.call(
+        f"taxonkit list --ids {taxid} --data-dir {target_directory}"
+        f"| taxonkit filter --equal-to Species --lower-than Species --save-predictable-norank --data-dir {target_directory}"
+        f"> {os.path.join(target_directory, data_id)}.txids",
+        shell=True
+    )
+
+    # cleanup
+    for filename in os.listdir(target_directory):
+        if filename != str(data_id+".txids"):
+            os.remove(os.path.join(target_directory,filename))
+
+    data_table_entry = {
+        "value": data_id,
+        "name": name,
+        "node_name": node_name,
+        "path": os.path.join(taxid, data_id)+".txids"
+    }
+    data_manager_dict = {
+        "data_tables": {tool_data_table_name: [data_table_entry]}
+    }
+    output_json = open(args.file, "w")
+    output_json.write(json.dumps(data_manager_dict))
+    output_json.close()
+
+
+if __name__ == "__main__":
+    script_cli()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/coast_taxonomic_filters.loc.sample	Thu Jul 15 16:55:05 2021 +0000
@@ -0,0 +1,10 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of coast_taxonomic_filters.  You will need
+#to create these data files and then create a coast_taxonomic_filters.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The coast_taxonomic_filters.loc
+#file has this format (white space characters are TAB characters):
+#
+#value	<name>	<node_name>	<path>
+#
+#2_2021_07_08_00_33_22	bac (taxid - 2)	(date - 2021_07_08_00)	bac 2/2_2021_07_07_23_57_17.txids
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Jul 15 16:55:05 2021 +0000
@@ -0,0 +1,7 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <table name="coast_taxonomic_filters" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, node_name, path</columns>
+        <file path="tool-data/coast_taxonomic_filters.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/txids_dm.xml	Thu Jul 15 16:55:05 2021 +0000
@@ -0,0 +1,55 @@
+<tool id="taxonomic_filters" name="COAST taxonomic filter generator" version="0.1" tool_type="manage_data">
+    <description>
+    </description>
+    <requirements>
+        <requirement type="package" version="0.8">taxonkit</requirement>
+    </requirements>
+    <command>python3 '$__tool_directory__/ho2s.py' --file "${out_file}" --tool_data_table_name "coast_taxonomic_filters"</command>
+    <inputs>
+        <param name="taxid" optional="False" value="" type="integer" label="TAXID for the desired top node" help="Root Taxonomy node."/>
+        <param name="node_name" optional="False" value="" type="text" label="The name you want for the node" help="Label for the filter."/>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="taxid" value="10239"/>
+            <param name="node_name" value="Virus"/>
+            <output name="out_file"/>
+        </test>
+    </tests>
+    <help>
+        Generates taxonomic filters from high order taxid nodes.
+        Used by PhageCOAST for Super Kingdomn scale filtering.
+        It generates species level taxid list in a file, ready to be provided to BLAST.
+        Can also be used for lower ranked taxids, if you desire to provide for example a genus or phylos level filter.
+    </help>
+    <citations>
+        <citation type="bibtex">@misc{noauthor_coast_nodate,
+                title = {{COAST} - {Compartive} {Ominc} {Alignment} {Search} {Tool}},
+                url = {https://gitlab.com/coast_tool/COAST},
+                abstract = {Alignment search tool that identifies close proteomes},
+                language = {en},
+                urldate = {2021-06-22},
+            }
+        </citation>
+        <citation type="bibtex">@article{shen_taxonkit_2021,
+                abstract = {The National Center for Biotechnology Information (NCBI) Taxonomy is widely applied in biomedical and ecological studies. Typical demands include querying taxonomy identifier (TaxIds) by taxonomy names, querying complete taxonomic lineages by TaxIds, listing descendants of given TaxIds, and others. However, existed tools are either limited in functionalities or inefficient in terms of runtime. In this work, we present TaxonKit, a command-line toolkit for comprehensive and efficient manipulation of NCBI Taxonomy data. TaxonKit comprises seven core subcommands providing functions, including TaxIds querying, listing, filtering, lineage retrieving and reformatting, lowest common ancestor computation, and TaxIds change tracking. The practical functions, competitive processing performance, scalability with different scales of datasets and good accessibility could facilitate taxonomy data manipulations. TaxonKit provides free access under the permissive MIT license on GitHub, Brewsci, and Bioconda. The documents are also available at https://bioinf.shenwei.me/taxonkit/.},
+                author = {Shen, Wei and Ren, Hong},
+                doi = {10.1016/j.jgg.2021.03.006},
+                file = {ScienceDirect Snapshot:/home/dm/Zotero/storage/Q3KYT6QS/S1673852721000837.html:text/html},
+                issn = {1673-8527},
+                journal = {Journal of Genetics and Genomics},
+                keywords = {Lineage; NCBI Taxonomy; TaxId; TaxId changelog; TaxonKit},
+                language = {en},
+                month = apr,
+                shorttitle = {{TaxonKit}},
+                title = {{TaxonKit}: {A} practical and efficient {NCBI} taxonomy toolkit},
+                url = {https://www.sciencedirect.com/science/article/pii/S1673852721000837},
+                urldate = {2021-06-21},
+                year = {2021}
+            }
+        </citation>
+    </citations>
+</tool>
\ No newline at end of file