changeset 0:e7d2c4ed18a5 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/kaiju commit 59064c06143fdc7d7b17178e46911ba1009cd32e
author iuc
date Tue, 22 Apr 2025 14:03:27 +0000
parents
children eedcb4cc9e5a
files kaiju2table.xml macros.xml test-data/kaiju-taxnames.out test-data/kaiju-test-db/database.fmi test-data/kaiju-test-db/names.dmp test-data/kaiju-test-db/nodes.dmp test-data/kaiju.loc test-data/kaiju.out test-data/kaiju2krona.out test-data/kaiju2table.out test-data/kaijux.out test-data/query.fa tool-data/kaiju.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 15 files changed, 310 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kaiju2table.xml	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,95 @@
+<tool id="kaiju_kaiju2table" name="kaiju2table" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        #import re
+
+        #for $kaiju_table in $kaiju_tables
+            ln -s '$kaiju_table' #echo re.sub('[^\w\-_]', '_', str($kaiju_table.element_identifier))
+            &&
+        #end for
+
+        kaiju2table 
+            -t '$reference.fields.path'/nodes.dmp
+            -n '$reference.fields.path'/names.dmp
+            -r $rank
+            -o '$kaiju_summary'
+            #for $kaiju_table in $kaiju_tables
+                #echo re.sub('[^\w\-_]', '_', str($kaiju_table.element_identifier))
+            #end for
+            #if str($optional.m)
+                -m $optional.m
+            #end if
+            #if str($optional.c)
+                -m $optional.c
+            #end if
+            $optional.e
+            $optional.u
+            #if $optional.l
+                -l #echo ".".join($optional.l)
+            #end if
+    ]]></command>
+    <inputs>
+        <param name="kaiju_tables" type="data" format="tabular" multiple="true" optional="false" label="kaiju output tables"/>
+        <expand macro="reference"/>
+        <param argument="-r" name="rank" type="select" label="rank">
+            <option value="phylum">phylum</option>
+            <option value="class">class</option>
+            <option value="order">order</option>
+            <option value="family">family</option>
+            <option value="genus">genus</option>
+            <option value="species">species</option>
+        </param>
+        <section name="optional" title="Optional arguments" expanded="false">
+            <param argument="-m" type="float" min="0" max="100" optional="true" value="" label="Minimum reporting percentage" help="Can not be combined with -c" />
+            <param argument="-c" type="integer" min="1" optional="true" value="" label="Minimum required number of reads" help="Can not be combined with -m" />
+            <param argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="false" label="Expand viruses" help="which are always hown as full taxon path and read counts are not summarized in higher taxonomic levels" />
+            <param argument="-u" type="boolean" truevalue="-u" falsevalue="" checked="false" label="Do not count unclassified reads" help="Disables counting unclassified reads towards the total number of reads when calculating percentages."/>
+            <expand macro="l"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="kaiju_summary" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="kaiju_tables" value="kaiju.out"/>
+            <param name="reference" value="test"/>
+            <param name="rank" value="phylum"/>
+            <output name="kaiju_summary" value="kaiju2table.out"/>
+        </test>
+        <test>
+            <param name="kaiju_tables" value="kaiju.out"/>
+            <param name="reference" value="test"/>
+            <param name="rank" value="order"/>
+            <section name="optional">
+                <param name="e" value="true"/>
+            </section>
+            <output name="kaiju_summary" value="kaiju2table.out" lines_diff="4">
+                <assert_contents>
+                    <has_text text="Severe acute respiratory syndrome coronavirus 2"/>
+                    <has_text text="cannot be assigned to a (non-viral) order"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+**What it does**
+
+The program kaiju2table converts Kaiju's output file(s) into a summary table for a given taxonomic rank, e.g., genus.
+It uses the taxonomic information of the reference data for mapping the taxon identifiers from the third column in the
+Kaiju output to the corresponding taxon names.
+
+The program can also filter out taxa with low abundances, e.g. for only showing genera that comprise at least 1 percent of the total reads
+Similarly, a threshold on the absolute read count can be given.
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,96 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.10.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">23.2</token>
+
+    <xml name="xrefs">
+        <xrefs>
+            <xref type="bio.tools">kaiju</xref>
+        </xrefs>
+    </xml>
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">kaiju</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/ncomms11257</citation>
+        </citations>
+    </xml>
+
+    <xml name="reference">
+        <param name="reference" type="select" label="kaiju refernce data">
+            <options from_data_table="kaiju"/>
+            <validator type="no_options" message="No reference data installed. Ask your Galaxy admin."/>
+        </param>
+    </xml>
+
+    <xml name="r" tokens="optional">
+        <param argument="-r" name="rank" type="select" optional="@OPTIONAL@" label="rank">
+            <option value="phylum">phylum</option>
+            <option value="class">class</option>
+            <option value="order">order</option>
+            <option value="family">family</option>
+            <option value="genus">genus</option>
+            <option value="species">species</option>
+        </param>
+    </xml>
+
+    <xml name="l">
+        <param argument="-l" type="select" multiple="true" optional="true" label="Print taxon path containing selected ranks ranks specified ">
+            <option value="domain">Domain</option>
+            <option value="realm">Realm</option>
+            <option value="kingdom">Kingdom</option>
+            <option value="subkingdom">Subkingdom</option>
+            <option value="superphylum">Superphylum</option>
+            <option value="phylum">Phylum</option>
+            <option value="subphylum">Subphylum</option>
+            <option value="infraphylum">Infraphylum</option>
+            <option value="superclass">Superclass</option>
+            <option value="class">Class</option>
+            <option value="subclass">Subclass</option>
+            <option value="infraclass">Infraclass</option>
+            <option value="cohort">Cohort</option>
+            <option value="subcohort">Subcohort</option>
+            <option value="superorder">Superorder</option>
+            <option value="order">Order</option>
+            <option value="suborder">Suborder</option>
+            <option value="infraorder">Infraorder</option>
+            <option value="parvorder">Parvorder</option>
+            <option value="superfamily">Superfamily</option>
+            <option value="family">Family</option>
+            <option value="subfamily">Subfamily</option>
+            <option value="tribe">Tribe</option>
+            <option value="subtribe">Subtribe</option>
+            <option value="genus">Genus</option>
+            <option value="subgenus">Subgenus</option>
+            <option value="species_group">Species Group</option>
+            <option value="species_subgroup">Species Subgroup</option>
+            <option value="species">Species</option>
+            <option value="subspecies">Subspecies</option>
+            <option value="forma_specialis">Forma Specialis</option>
+            <option value="varietas">Variety</option>
+            <option value="subvariety">Subvariety</option>
+            <option value="forma">Form</option>
+            <option value="section">Section</option>
+            <option value="subsection">Subsection</option>
+            <option value="series">Series</option>
+            <option value="subseries">Subseries</option>
+            <option value="strain">Strain</option>
+            <option value="isolate">Isolate</option>
+            <option value="serogroup">Serogroup</option>
+            <option value="serotype">Serotype</option>
+            <option value="biotype">Biotype</option>
+            <option value="genotype">Genotype</option>
+            <option value="morph">Morph</option>
+            <option value="pathogroup">Pathogroup</option>
+        </param>
+    </xml>
+
+    <xml name="u">
+        <param argument="-u" type="boolean" truevalue="-u" falsevalue="" checked="false" label="Do not count unclassified reads" help="Disables counting unclassified reads towards the total number of reads when calculating percentages."/>
+    </xml>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju-taxnames.out	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,1 @@
+C	testseq	2697049	Severe acute respiratory syndrome coronavirus 2
Binary file test-data/kaiju-test-db/database.fmi has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju-test-db/names.dmp	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,53 @@
+10239	|	Vira	|		|	synonym	|
+10239	|	Viridae	|		|	synonym	|
+10239	|	viruses	|		|	blast name	|
+10239	|	Viruses	|		|	scientific name	|
+11118	|	Coronaviridae	|		|	scientific name	|
+58346	|	CCUG 11118	|	CCUG 11118 <type strain>	|	type material	|
+76804	|	Nidovirales	|		|	scientific name	|
+79912	|	NCIMB 11118	|	NCIMB 11118 <type strain>	|	type material	|
+153721	|	DSM 11118	|	DSM 11118 <type strain>	|	type material	|
+457096	|	MICH 11118	|	MICH 11118 <holotype>	|	type material	|
+457096	|	MICH-11118	|	MICH-11118 <holotype>	|	type material	|
+457096	|	MICH:11118	|	MICH:11118 <holotype>	|	type material	|
+557004	|	Matayba cf. opaca Acevedo 11118	|		|	scientific name	|
+569542	|	Nocardia sp. 11118	|		|	scientific name	|
+652290	|	Sordariomycetes sp. 11118	|		|	scientific name	|
+682956	|	CGMCC 1.10239	|	CGMCC 1.10239 <type strain>	|	type material	|
+694002	|	Betacoronavirus	|		|	scientific name	|
+694002	|	Coronavirus	|	Coronavirus <Betacoronavirus>	|	in-part	|
+694002	|	Coronavirus group 2	|		|	equivalent name	|
+694002	|	Group 2 species	|		|	equivalent name	|
+694009	|	HCoV-SARS	|		|	acronym	|
+694009	|	Human coronavirus (strain SARS)	|		|	equivalent name	|
+694009	|	SARS	|		|	acronym	|
+694009	|	SARS-like coronavirus	|		|	equivalent name	|
+694009	|	SARSr-CoV	|		|	acronym	|
+694009	|	SARSrCoV	|		|	acronym	|
+694009	|	SARS-related coronavirus	|		|	equivalent name	|
+694009	|	Severe acute respiratory syndrome-related coronavirus	|		|	scientific name	|
+926565	|	Sporocytophaga myxococcoides DSM 11118	|		|	scientific name	|
+2268389	|	Arora 11118	|	Arora 11118 <type material>	|	type material	|
+2499399	|	Cornidovirineae	|		|	scientific name	|
+2501931	|	Orthocoronavirinae	|		|	scientific name	|
+2509511	|	Sarbecovirus	|		|	scientific name	|
+2559587	|	Riboviria	|		|	scientific name	|
+2559587	|	RNA viruses and retroviruses	|		|	genbank common name	|
+2559587	|	RNA viruses and viroids	|		|	common name	|
+2559587	|	RNA viruses	|		|	common name	|
+2697049	|	2019-nCoV	|		|	equivalent name	|
+2697049	|	COVID-19 virus	|		|	equivalent name	|
+2697049	|	HCoV-19	|		|	equivalent name	|
+2697049	|	Human coronavirus 2019	|		|	equivalent name	|
+2697049	|	SARS-2	|		|	equivalent name	|
+2697049	|	SARS2	|		|	equivalent name	|
+2697049	|	SARS-CoV-2	|		|	acronym	|
+2697049	|	SARS-CoV2	|		|	equivalent name	|
+2697049	|	Severe acute respiratory syndrome coronavirus 2	|		|	scientific name	|
+2705539	|	CLZhao 10239	|	CLZhao 10239 <holotype>	|	type material	|
+2732396	|	Orthornavirae	|		|	scientific name	|
+2732408	|	Pisuviricota	|		|	scientific name	|
+2732506	|	Pisoniviricetes	|		|	scientific name	|
+3093623	|	FAKU:10239	|	FAKU:10239 <paratype>	|	type material	|
+1	|	all	|		|	synonym	|
+1	|	root	|		|	scientific name	|
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju-test-db/nodes.dmp	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,14 @@
+1	|	1	|	no rank	|		|	8	|	0	|	1	|	0	|	0	|	0	|	0	|	0	|		|
+2697049	|	694009	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|		|
+694009	|	2509511	|	species	|	SA	|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant; specified	|
+2509511	|	694002	|	subgenus	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant	|
+694002	|	2501931	|	genus	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant	|
+2501931	|	11118	|	subfamily	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant	|
+11118	|	2499399	|	family	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant	|
+2499399	|	76804	|	suborder	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant	|
+76804	|	2732506	|	order	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant	|
+2732506	|	2732408	|	class	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|		|
+2732408	|	2732396	|	phylum	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|		|
+2732396	|	2559587	|	kingdom	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|		|
+2559587	|	10239	|	realm	|	RX	|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|		|
+10239	|	1	|	acellular root	|		|	9	|	0	|	1	|	0	|	0	|	0	|	0	|	0	|		|
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju.loc	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,1 @@
+test	test name	${__HERE__}/kaiju-test-db/	1.0
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju.out	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,1 @@
+C	testseq	2697049
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju2krona.out	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,1 @@
+1	root	Viruses	Riboviria	Orthornavirae	Pisuviricota	Pisoniviricetes	Nidovirales	Cornidovirineae	Coronaviridae	Orthocoronavirinae	Betacoronavirus	Sarbecovirus	Severe acute respiratory syndrome-related coronavirus	Severe acute respiratory syndrome coronavirus 2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju2table.out	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,4 @@
+file	percent	reads	taxon_id	taxon_name
+kaiju_out	100.000000	1	10239	Viruses
+kaiju_out	0.000000	0	NA	cannot be assigned to a (non-viral) phylum
+kaiju_out	0.000000	0	NA	unclassified
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaijux.out	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,1 @@
+C	testseq	1918	YP_009725295.1_2697049,YP_009724389.1_2697049,	RSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIWFLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSLETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAPISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLHNWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDG,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query.fa	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,26 @@
+>testseq
+AATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAA
+TAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTT
+ACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG
+CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTAC
+AACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTA
+TTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAG
+CAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAA
+TTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC
+TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAG
+GCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCT
+TAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAA
+TGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCT
+ATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC
+TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATC
+TTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTT
+GTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAG
+GTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGT
+GATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA
+GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCA
+TCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGAC
+AACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAAT
+GTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACT
+AGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT
+AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTG
+AACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/kaiju.loc.sample	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,5 @@
+# id: db name + date
+# name: what is shown to the user in the select
+# path: of the reference data must contain database.fmi, names.dmp and nodes.dmp
+# version: version used for constructing the DB (or just the current version at the time when pre-computed indices were downloaded)
+#id	name	path	version
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="kaiju" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path, version</columns>
+        <file path="tool-data/kaiju.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Tue Apr 22 14:03:27 2025 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="kaiju" comment_char="#">
+        <columns>value, name, path, version</columns>
+        <file path="${__HERE__}/test-data/kaiju.loc" />
+    </table>
+</tables>