changeset 0:f59e7e242bde draft

planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
author jjohnson
date Sun, 24 Nov 2019 21:54:57 -0500
parents
children 2bec6d7877fc
files data_manager/data_manager_cat.py data_manager/data_manager_cat.xml data_manager/macros.xml data_manager_conf.xml tool-data/cat_database.loc.sample tool_data_table_conf.xml.sample
diffstat 6 files changed, 455 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_cat.py	Sun Nov 24 21:54:57 2019 -0500
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import argparse
+import json
+import os.path
+import subprocess
+import tarfile
+import tempfile
+import zipfile
+try:
+    # For Python 3.0 and later
+    from urllib.request import urlopen
+except ImportError:
+    # Fall back to Python 2 imports
+    from urllib2 import urlopen
+
+
+def url_download(url, workdir):
+    file_path = os.path.join(workdir, 'download.dat')
+    src = None
+    dst = None
+    try:
+        src = urlopen(url)
+        with open(file_path, 'wb') as dst:
+            while True:
+                chunk = src.read(2**10)
+                if chunk:
+                    dst.write(chunk)
+                else:
+                    break
+    finally:
+        if src:
+            src.close()
+    if tarfile.is_tarfile(file_path):
+        fh = tarfile.open(file_path, 'r:*')
+    elif zipfile.is_zipfile(file_path):
+        fh = zipfile.ZipFile(file_path, 'r')
+    else:
+        return
+    fh.extractall(workdir)
+    os.remove(file_path)
+
+
+def cat_prepare(install_dir):
+    cmd = ['CAT', 'prepare' '--fresh', '-q']
+    cmd_stdout = tempfile.NamedTemporaryFile()
+    cmd_stderr = tempfile.NamedTemporaryFile()
+    return_code = subprocess.call(cmd, shell=True, cwd=install_dir,
+                                  stdout=cmd_stdout, stderr=cmd_stderr)
+    if return_code:
+        msg = "stdout:\n%s\nstderr:\n%s" % (cmd_stdout.read(),
+                                            cmd_stderr.read())
+        cmd_stdout.close()
+        cmd_stderr.close()
+        raise Exception('Error: (%s), returncode=%s %s'
+                        % (' '.join(cmd), return_code, msg))
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config_file')
+    parser.add_argument('--install_path')
+    parser.add_argument('--db_url', default=None)
+    args = parser.parse_args()
+
+    if not os.path.exists(args.install_path):
+        os.makedirs(args.install_path)
+    if args.db_url:
+        url_download(args.db_url, args.install_path)
+    else:
+        cat_prepare(args.install_path)
+
+    cat_path = None
+    cat_db = None
+    tax_db = None
+    for root, dirs, files in os.walk(args.install_path):
+        for dname in dirs:
+            if dname.endswith('CAT_database'):
+                cat_db = dname
+            elif dname.endswith('taxonomy'):
+                tax_db = dname
+        if cat_db and tax_db:
+            cat_path = root
+            break
+
+    cat_dir = os.path.basename(cat_path)
+    # params = json.loads(open(args.config_file).read())
+    dm_dict = {}
+    dm_dict['data_tables'] = dm_dict.get('data_tables', {})
+    data_table = 'cat_database'
+    dm_dict['data_tables'][data_table]\
+        = dm_dict['data_tables'].get(data_table, [])
+    data_table_entry = dict(value=cat_dir, name=cat_dir,
+                            database_folder=os.path.join(cat_dir, cat_db),
+                            taxonomy_folder=os.path.join(cat_dir, tax_db))
+    dm_dict['data_tables'][data_table].append(data_table_entry)
+    # save info to json file
+    open(args.config_file, 'wb').write(json.dumps(dm_dict))
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_cat.xml	Sun Nov 24 21:54:57 2019 -0500
@@ -0,0 +1,44 @@
+<tool id="data_manager_cat" name="CAT DB" version="@VERSION@.0" tool_type="manage_data">
+    <description>Install a new CAT database</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="exit_code"><![CDATA[
+#import json, os
+#set params = json.loads(open(str($out_file)).read())
+#set install_path = $params['output_data'][0]['extra_files_path'].encode('ascii', 'replace')
+mkdir -p $install_path &&
+python '${__tool_directory__}/data_manager_cat.py' --config_file '$out_file' --install_path '$install_path' 
+#if $db.src == 'download'
+    --db_url '$db_url'
+#end if
+    ]]></command>
+    <inputs>
+        <conditional name="db">
+            <param name="src" type="select" label="Download or Build DBs">
+                <option value="download">download</option>
+                <option value="build">build</option>
+            </param>
+            <when value="download">
+                <param name="db_url" type="text" label="DB rul at https://tbb.bio.uu.nl/bastiaan/CAT_prepare/"
+                     help="example: https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz">
+                </param>
+            </when>
+            <when value="build">
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" label="${tool.name}"/>
+    </outputs>
+    <tests>
+    </tests>
+    <help><![CDATA[
+This tool prepares reference data for CAT, the Contig Annotation Tool.  
+It can either download prebuilt reference data from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/
+or build new reference data using the CAT prepare application.
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/macros.xml	Sun Nov 24 21:54:57 2019 -0500
@@ -0,0 +1,267 @@
+<macros>
+    <token name="@VERSION@">5.0.3</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">cat</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="version_command">
+        <version_command>CAT --version</version_command>
+    </xml>
+    <token name="@DATABASE_FOLDER@">CAT_database</token>
+    <token name="@TAXONOMY_FOLDER@">taxonomy</token>
+    <xml name="cat_db">
+        <conditional name="db">
+            <param name="db_src" type="select" label="CAT database from">
+                <option value="cached">local cached database</option>
+                <option value="history">history</option>
+            </param>
+            <when value="cached">
+                <param name="cat_builtin" type="select" label="Use a built-in CAT database" help="If the CAT database of interest is not listed, contact your Galaxy administrator">
+                    <options from_data_table="cat_databases">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No CAT database is available." />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="cat_db" type="data" format="txt" label="A history dataset from CAT prepare tool"/>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@CAT_DB@"><![CDATA[
+        #if $db.db_src == 'cached':
+        --database_folder $db.cat_builtin.fields.database_folder
+        --taxonomy_folder $db.cat_builtin.fields.taxonomy_folder
+        #else
+        #import os.path
+        #set $catdb = $db.cat_db.extra_files_path
+        --database_folder '$os.path.join($catdb,"@DATABASE_FOLDER@")'
+        --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
+        #end if
+]]></token>
+    <token name="@CAT_TAXONOMY@"><![CDATA[
+        #if $db.db_src == 'cached':
+        --taxonomy_folder $db.cat_builtin.fields.taxonomy_folder
+        #else
+        #import os.path
+        #set $catdb = $db.cat_db.extra_files_path
+        --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
+        #end if
+]]></token>
+    <xml name="test_catdb">
+        <conditional name="db">
+            <param name="db_src" value="cached"/>
+            <param name="cat_builtin" value="CAT_prepare_test"/>
+        </conditional>
+    </xml>
+
+    <xml name="use_intermediates">
+        <conditional name="previous">
+            <param name="use_previous" type="select" label="Use previous gene prediction and diamond alignment">
+                <option value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
+                <param argument="--proteins_fasta" type="data" format="fasta" label="predicted proteins fasta"/>
+                <param argument="--diamond_alignment" type="data" format="fasta" label="alignments file"/>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@USE_INTERMEDIATES@"><![CDATA[
+      #if $previous.use_previous == 'yes'
+      --proteins_fasta '$previous.proteins_fasta'
+      --diamond_alignment '$previous.diamond_alignment'
+      #end if
+      --out_prefix 'cat_output'
+]]></token>
+
+    <xml name="custom_settings">
+        <param argument="--range" type="integer" value="10" min="0" max="49" label="range"/>
+        <param argument="--fraction" type="float" value="0.5" min="0" max="0.99" label="fraction"/>
+    </xml>
+    <token name="@CUSTOM_SETTINGS@"><![CDATA[
+      --range $range
+      --fraction $fraction
+]]></token>
+    <xml name="add_names_options">
+        <param argument="--only_official" type="boolean" truevalue="--only_official" falsevalue="" checked="true" 
+            label="Only output official level names."/>
+        <param argument="--exclude_scores" type="boolean" truevalue="--exclude_scores" falsevalue="" checked="false" 
+            label="Exclude bit-score support scores in the lineage."/>
+    </xml>
+    <token name="@ADD_NAMES_OPTIONS@"><![CDATA[
+    $only_official $exclude_scores 
+]]></token>
+    <xml name="add_names">
+        <conditional name="names">
+            <param name="add_names" type="select" label="add_names">
+                <option value="no">No</option>
+                <option value="orf2lca">ORF2LCA.txt</option>
+                <option value="classification">classification.txt</option>
+                <option value="both">ORF2LCA.txt and classification.txt</option>
+            </param>
+            <when value="no"/>
+            <when value="orf2lca">
+                <expand macro="add_names_options"/>
+            </when>
+            <when value="classification">
+                <expand macro="add_names_options"/>
+            </when>
+            <when value="both">
+                <expand macro="add_names_options"/>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@ADD_NAMES@"><![CDATA[
+    #if $names.add_names in ['classification','both']:
+        && CAT add_names $names.only_official $names.exclude_scores 
+            @CAT_TAXONOMY@
+            #if $bcat == 'CAT'
+                -i cat_output.contigs2classification.tsv
+            #else
+                -i cat_output.bin2classification.tsv
+            #end if
+            -o classification_names.txt
+        && @TXT2TSV@ -i classification_names -o $classification_names
+    #end if
+    #if $names.add_names in ['orf2lca','both']:
+        && CAT add_names $names.only_official $names.exclude_scores 
+            @CAT_TAXONOMY@
+            -i cat_output.ORF2LCA.tsv 
+            -o orf2lca_names.txt
+        && @TXT2TSV@ -i orf2lca_names.txt -o $orf2lca_names
+    #end if
+]]></token>
+    <xml name="summarise">
+        <param name="summarise" type="select" label="summarise">
+            <option value="no">No</option>
+            <option value="classification">classification.txt</option>
+        </param>
+    </xml>
+    <token name="@SUMMARISE@"><![CDATA[
+    #if $summarise in ['classification']:
+        #if $names.add_names in ['classification','both'] and $names.only_official: 
+            #set $summary_input = $classification_names
+        #else 
+            #set $summary_input = classification_offical_names
+            && CAT add_names --only_official
+                @CAT_TAXONOMY@
+                #if $bcat == 'CAT'
+                    -i cat_output.contigs2classification.tsv
+                #else
+                    -i cat_output.bin2classification.tsv
+                #end if
+                -o $summary_input
+        #end if
+        && CAT summarise 
+            #if $bcat == 'CAT'
+                -c $contigs_fasta
+            #end if
+            -i $summary_input
+            -o classification_summary.txt
+        && @TXT2TSV@ -i classification_summary.txt -o $classification_summary
+    #end if
+]]></token>
+    
+    <xml name="select_outputs">
+        <param name="select_outputs" type="select" multiple="true" optional="false" label="Select outputs">
+            <option value="log" selected="true">CAT.log</option>
+            <option value="predicted_proteins_faa" selected="true">predicted_proteins.faa</option>
+            <option value="predicted_proteins_gff">predicted_proteins.gff</option>
+            <option value="alignment_diamond">alignment.diamond</option>
+            <option value="orf2lca" selected="true">ORF2LCA.txt</option>
+            <yield/>
+        </param>
+    </xml>
+    <xml name="select_cat_outputs">
+        <param name="bcat" type="hidden" value="CAT"/>
+        <param name="seqtype" type="hidden" value="contig"/>
+        <expand macro="select_outputs">
+            <option value="contig2classification" selected="true">contig2classification.txt</option>
+        </expand>
+    </xml>
+    <xml name="select_bat_outputs">
+        <param name="bcat" type="hidden" value="BAT"/>
+        <param name="seqtype" type="hidden" value="bin"/>
+        <expand macro="select_outputs">
+            <option value="bin2classification" selected="true">bin2classification.txt</option>
+        </expand>
+    </xml>
+    
+    <token name="@TXT2TSV@"><![CDATA[
+    $__tool_directory__/tabpad.py 
+]]></token>
+    <xml name="outputs">
+        <data name="log" format="txt" label="${bcat}.log" from_work_dir="cat_output.log">
+            <filter>'log' in select_outputs or not select_outputs</filter>
+        </data>
+        <data name="predicted_proteins_faa" format="fasta" label="${bcat}.predicted_proteins.faa" from_work_dir="cat_output.predicted_proteins.faa">
+            <filter>'predicted_proteins_faa' in select_outputs</filter>
+        </data>
+        <data name="predicted_proteins_gff" format="gff" label="${bcat}.predicted_proteins.gff" from_work_dir="cat_output.predicted_proteins.gff">
+            <filter>'predicted_proteins_gff' in select_outputs</filter>
+        </data>
+        <data name="alignment_diamond" format="tabular" label="${bcat}.alignment.diamond" from_work_dir="cat_output.alignment.diamond">
+            <filter>'alignment_diamond' in select_outputs</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore" />
+            </actions>
+        </data>
+        <data name="orf2lca" format="tabular" label="${bcat}.ORF2LCA.txt" from_work_dir="cat_output.ORF2LCA.tsv">
+            <filter>'orf2lca' in select_outputs</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="ORF,lineage,bit-score" />
+            </actions>
+        </data>
+        <data name="contig2classification" format="tabular" label="${bcat}.contig2classification.txt" from_work_dir="cat_output.contig2classification.tsv">
+            <filter>'contig2classification' in select_outputs</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="contig,classification,reason,lineage,lineage scores" />
+            </actions>
+        </data>
+        <data name="bin2classification" format="tabular" label="${bcat}.bin2classification.txt" from_work_dir="cat_output.bin2classification.tsv">
+            <filter>'bin2classification' in select_outputs</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="bin,classification,reason,lineage,lineage scores" />
+            </actions>
+        </data>
+        <data name="orf2lca_names" format="tabular" label="${bcat}.ORF2LCA.names.txt">
+            <filter>'orf2lca' in names.add_names</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="ORF,lineage,bit-score,superkingdom,phylum,class,order,family,genus,species" />
+            </actions>
+        </data>
+        <data name="classification_names" format="tabular" label="${bcat}.${seqtype}2classification.names.txt">
+            <filter>'classification' in names.add_names</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="${seqtype},classification,reason,lineage,lineage scores,superkingdom,phylum,class,order,family,genus,species" />
+            </actions>
+        </data>
+        <data name="classification_summary" format="tabular" label="${bcat}.${seqtype}2classification.summary.txt">
+            <filter>'classification' in summarise</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="rank,clade,number of contigs,number of ORFs,number of positions" />
+            </actions>
+        </data>
+    </xml>
+    <token name="@COMMON_HELP@"><![CDATA[
+The CAT/BAT workflow is described at: https://github.com/dutilh/CAT    
+]]></token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">https://doi.org/10.1101/072868</citation>
+            <citation type="doi">https://doi.org/10.1186/s13059-019-1817-x</citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Sun Nov 24 21:54:57 2019 -0500
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<data_managers>
+  <data_manager tool_file="data_manager/data_manager_cat.xml" id="data_manager_cat" >
+    <data_table name="cat_database">  <!-- Defines a Data Table to be modified. -->
+      <output> <!-- Handle the output of the Data Manager Tool -->
+        <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+        <column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+        <column name="database_folder" output_ref="out_file" >
+          <move type="directory" relativize_symlinks="True">
+            <src >${database_folder}</target>
+            <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">CAT/${database_folder}</target>
+          </move>
+          <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/CAT/${database_folder}</value_translation>
+          <value_translation type="function">abspath</value_translation>
+        </column>
+        <column name="taxonomy_folder" output_ref="out_file" >
+          <move type="directory" relativize_symlinks="True">
+            <src >${taxonomy_folder}</target>
+            <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">CAT/${taxonomy_folder}</target>
+          </move>
+          <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/CAT/${taxonomy_folder}</value_translation>
+          <value_translation type="function">abspath</value_translation>
+        </column>
+      </output>
+    </data_table>
+  </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/cat_database.loc.sample	Sun Nov 24 21:54:57 2019 -0500
@@ -0,0 +1,7 @@
+## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz
+# ls CAT_prepare_20190719/
+#  2019-07-19.CAT_prepare.fresh.log
+#  2019-07-19_CAT_database
+#  2019-07-19_taxonomy
+#value  name    database_folder	taxonomy_folder
+#2019-07-19_CAT_database	2019-07-19_CAT_database	/opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database	/opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Sun Nov 24 21:54:57 2019 -0500
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of CAT databases -->
+    <table name="cat_database" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, database_folder, taxonomy_folder</columns>
+        <file path="tool-data/cat_database.loc" />
+    </table>
+</tables>