diff kofamscan.xml @ 0:24adf43898ec draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kofamscan commit dc0dc99c95658757036bbe7decfd241856e2f412"
author iuc
date Fri, 15 Jan 2021 10:48:52 +0000
parents
children 23494763e39e
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kofamscan.xml	Fri Jan 15 10:48:52 2021 +0000
@@ -0,0 +1,246 @@
+<tool id="kofamscan" name="KofamScan" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description>gene function annotation based on KEGG orthology and HMM</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.3.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <xml name="reportannotation" token_selected="">
+            <param name="reportannotation" type="boolean" truevalue="--report-unannotated" falsevalue="--no-report-unannotated" checked="@SELECTED@" label="Include sequence name to outputs even if no KOs are assigned?"/>
+        </xml>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">kofamscan</requirement>
+        <requirement type="package" version="3.0">zip</requirement>
+    </requirements>
+    <version_command><![CDATA[sansa -v | grep "Sansa " | cut -d "v" -f 3]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+## preprocessing
+mkdir 'profile' &&
+#if $p_cond.p_sel == 'compressed'
+    tar -xf '${p_cond.p}' -C 'profile' &&
+#elif $p_cond.p_sel == 'hmm'
+    ## input files require prefix 'K' and file extension '.hmm'
+    #for $i, $current in enumerate($p_cond.p)
+        ln -s '$current' 'profile/K${i}.hmm' &&
+    #end for
+#end if
+
+## run
+exec_annotation
+-p 'profile'
+-o 'result.txt'
+-k '$k'
+--cpu \${GALAXY_SLOTS:-4}
+-E $E
+-T $ap.T
+-f '$ap.f_cond.f_sel'
+$ap.f_cond.reportannotation
+#if 'alignments' in $ap.out
+    --create-alignment
+#end if
+'$query'
+
+## postprocessing
+#if 'alignments' in $ap.out
+    && test -d 'tmp/alignment' && zip -q -r 'tmp/alignments.zip' tmp/alignment/* || echo 'No alignment files.'
+#end if
+    ]]></command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Select query sequence file" help="Nucleotide sequences are not accepted."/>
+        <conditional name="p_cond">
+            <param name="p_sel" type="select" label="Select profile database format">
+                <option value="compressed" selected="true">Compressed set of HMM and HAL file(s)</option>
+                <option value="hmm">HMM file(s)</option>
+            </param>
+            <when value="compressed">
+                <param argument="-p" type="data" format="tar" label="Select a compressed file with HMM and HAL file(s)" help="Compressed archives are available from KofamKOALA web service (https://www.genome.jp/tools/kofamkoala/)."/>
+            </when>
+            <when value="hmm">
+                <param argument="-p" type="data" format="hmm3" multiple="true" label="Select profile HMM file(s)"/>
+            </when>
+        </conditional>
+        <param argument="-k" type="data" format="tabular" label="Select KO list file"/>
+        <param argument="-E" type="float" min="0.0" max="1.0" value="0.01" label="Set E-value threshold"/>
+        <section name="ap" title="Advanced parameters" expanded="true">
+            <param argument="-T" type="integer" value="1" label="Set threshold scale" help="The score thresholds will be multiplied by this value."/>
+            <conditional name="f_cond">
+                <param name="f_sel" type="select" label="Select output format">
+                    <option value="detail" selected="true">Details for each hit (including hits below threshold) (detail)</option>
+                    <option value="detail-tsv">Tab separeted values for detail format (detail-tsv)</option>
+                    <option value="mapper">KEGG Mapper compatible format (mapper)</option>
+                    <option value="mapper-one-line">KEGG Mapper compatible format, but all hit KOs are listed in one line (mapper-oneline)</option>
+                </param>
+                <when value="detail">
+                    <expand macro="reportannotation" selected="false"/>
+                </when>
+                <when value="detail-tsv">
+                    <expand macro="reportannotation" selected="false"/>
+                </when>
+                <when value="mapper">
+                    <expand macro="reportannotation" selected="true"/>
+                </when>
+                <when value="mapper-one-line">
+                    <expand macro="reportannotation" selected="true"/>
+                </when>
+            </conditional>
+            <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)" help="Only shown in history if selected here and generated by the specific run.">
+                <option value="result" selected="true">KofamScan Result</option>
+                <option value="alignments">HMMER alignments</option>
+                <option value="output">HMMER run output</option>
+                <option value="tabular">HMMER tabular summary</option>
+            </param>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="out_alignments" format="zip" from_work_dir="tmp/alignments.zip" label="${tool.name} on ${on_string}: HMMER alignments">
+            <filter>'alignments' in ap['out']</filter>
+        </data>
+        <data name="out_output" format="txt" from_work_dir="tmp/output/output.txt" label="${tool.name} on ${on_string}: HMMER output">
+            <filter>'output' in ap['out']</filter>
+        </data>
+        <data name="out_result" format="txt" from_work_dir="result.txt" label="${tool.name} on ${on_string}: Results">
+            <filter>'result' in ap['out']</filter>
+        </data>
+        <data name="out_tabular" format="txt" from_work_dir="tmp/tabular/tabular.txt" label="${tool.name} on ${on_string}: HMMER tabular">
+            <filter>'tabular' in ap['out']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- #1 default -->
+        <test expect_num_outputs="1">
+            <param name="query" value="query.fasta"/>
+            <conditional name="p_cond">
+                <param name="p_sel" value="compressed"/>
+                <param name="p" value="profiles.tar.gz"/>
+            </conditional>
+            <param name="k" value="ko"/>
+            <output name="out_result">
+                <assert_contents>
+                    <has_n_lines n="5"/>
+                    <has_text_matching expression=".+sp\|P00329\|ADH1_MOUSE.+"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #2 -->
+        <test expect_num_outputs="4">
+            <param name="query" value="query.fasta"/>
+            <conditional name="p_cond">
+                <param name="p_sel" value="hmm"/>
+                <param name="p" value="K00001.hmm,K00002.hmm,K00003.hmm"/>
+            </conditional>
+            <param name="k" value="ko"/>
+            <param name="E" value="0.02"/>
+            <section name="ap">
+                <param name="T" value="2"/>
+                <conditional name="f_cond">
+                    <param name="f_sel" value="detail-tsv"/>
+                    <param name="reportannotation" value="true"/>
+                </conditional>
+                <param name="out" value="alignments,output,result,tabular"/>
+            </section>
+            <output name="out_alignments">
+                <assert_contents>
+                    <has_size value="4099"/>
+                </assert_contents>
+            </output>
+            <output name="out_output">
+                <assert_contents>
+                    <has_n_lines n="224"/>
+                    <has_line line="Internal pipeline statistics summary:"/>
+                </assert_contents>
+            </output>
+            <output name="out_result">
+                <assert_contents>
+                    <has_n_lines n="9"/>
+                    <has_text_matching expression=".+sp\|P19858\|LDHA_BOVIN"/>
+                </assert_contents>
+            </output>
+            <output name="out_tabular">
+                <assert_contents>
+                    <has_n_lines n="48"/>
+                    <has_line line="K1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #3 -->
+        <test expect_num_outputs="1">
+            <param name="query" value="query.fasta"/>
+            <conditional name="p_cond">
+                <param name="p_sel" value="hmm"/>
+                <param name="p" value="K00001.hmm,K00002.hmm,K00003.hmm"/>
+            </conditional>
+            <param name="k" value="ko"/>
+            <section name="ap">
+                <conditional name="f_cond">
+                    <param name="f_sel" value="mapper"/>
+                </conditional>
+            </section>
+            <output name="out_result">
+                <assert_contents>
+                    <has_n_lines n="7"/>
+                    <has_line line="sp|P19858|LDHA_BOVIN"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #4 -->
+        <test expect_num_outputs="1">
+            <param name="query" value="query.fasta"/>
+            <conditional name="p_cond">
+                <param name="p_sel" value="hmm"/>
+                <param name="p" value="K00001.hmm,K00002.hmm,K00003.hmm"/>
+            </conditional>
+            <param name="k" value="ko"/>
+            <section name="ap">
+                <conditional name="f_cond">
+                    <param name="f_sel" value="mapper-one-line"/>
+                </conditional>
+            </section>
+            <output name="out_result">
+                <assert_contents>
+                    <has_n_lines n="7"/>
+                    <has_line line="sp|P19858|LDHA_BOVIN"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+KofamScan is a gene function annotation tool based on KEGG Orthology and hidden Markov model.
+
+KofamScan assigns K numbers to the user's sequence data by HMMER/HMMSEARCH against KOfam (a customized HMM database of KEGG Orthologs (KOs)). K number assignments with scores above the predefined thresholds for individual KOs are more reliable than other proposed assignments. Such high score assignments are highlighted with asterisks '*' in the output. The K number assignments facilitate the interpretation of the annotation results by linking the user's sequence data to the KEGG pathways and EC numbers.
+
+**Input**
+
+- a query file in FASTA format with one or more amino acid sequences. Each sequence must have a unique name. A name of a sequence is a string between the header symbol (">") and the first blank character (whitespace, tab, line break, etc.). Do not put a whitespace right after ">".
+
+    ::
+
+        >sp|P00325|ADH1B_HUMAN Alcohol dehydrogenase 1B OS=Homo sapiens GN=ADH1B PE=1 SV=2
+        MSTAGKVIKCKAAVLWEVKKPFSIEDVEVAPPKAYEVRIKMVAVGICRTDDHVVSGNLVT
+
+- a KO list file of KOfam
+
+    ::
+
+        knum    threshold   score_type  profile_type    F-measure   nseq    nseq_used   alen    mlen    eff_nseq    re/pos  definition
+        K00001  361.33  domain  trim    0.326825    1601    1149    1538    393 13.33   0.590   alcohol dehydrogenase [EC:1.1.1.1]
+
+- KOfam profile files in HMM3 format or a compressed dataset containing HMM3 profiles and HAL filtering files available `here <ftp://ftp.genome.jp/pub/db/kofam/>`_.
+
+**Output**
+
+- KofamScan output
+- HMMER results as alignments, run output and tabular summary
+
+.. class:: infomark
+
+**References**
+
+More information are available on `GitHub <https://github.com/takaram/kofam_scan>`_ and the `KofamKOALA webserver <https://www.genome.jp/tools/kofamkoala/>`_.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btz859</citation>
+    </citations>
+</tool>
\ No newline at end of file