changeset 0:cdc8a98ed4fc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/kaiju commit 59064c06143fdc7d7b17178e46911ba1009cd32e
author iuc
date Tue, 22 Apr 2025 14:03:00 +0000
parents
children 01585d091036
files kaiju-mergeOutputs.xml macros.xml test-data/kaiju-taxnames.out test-data/kaiju-test-db/database.fmi test-data/kaiju-test-db/names.dmp test-data/kaiju-test-db/nodes.dmp test-data/kaiju.loc test-data/kaiju.out test-data/kaiju2krona.out test-data/kaiju2table.out test-data/kaijux.out test-data/query.fa tool-data/kaiju.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 15 files changed, 281 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kaiju-mergeOutputs.xml	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,66 @@
+<tool id="kaiju_mergeoutputs" name="kaiju-mergeOutputs" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        sort -k2,2 '$kaiju_table'  > kaiju.out.sort &&
+        sort -k2,2 '$kraken_table' > kraken.out.sort &&
+        kaiju-mergeOutputs
+            -i kaiju.out.sort
+            -j kraken.out.sort
+            -o '$combined_out'
+            -c $optional.conflict.mode
+            #if $optional.conflict.mode in ["lca", "lowest"]
+                -t '$reference.fields.path'/nodes.dmp
+            #end if
+            $optional.s
+            -v
+    ]]></command>
+    <inputs>
+        <param name="kaiju_table" type="data" format="tabular" label="kaiju output table"/>
+        <param name="kraken_table" type="data" format="tabular" label="Another output table" help="e.g. from kraken"/>
+        <section name="optional" title="Optional arguments" expanded="false">
+            <conditional name="conflict">
+                <param argument="-c" name="mode" type="select" label="Conflict resolution mode">
+                    <option value="1">use taxid from 1st input</option>
+                    <option value="2">use taxid from 2nd input</option>
+                    <option value="lca" selected="true">least common ancestor (LCA) of the two taxon IDs</option>
+                    <option value="lowest">lower rank of the two taxa is used if they are within the same lineage and LCA otherwise</option>
+                </param>
+                <when value="1"/>
+                <when value="2"/>
+                <when value="lca">
+                    <expand macro="reference"/>
+                </when>
+                <when value="lowest">
+                    <expand macro="reference"/>
+                </when>
+            </conditional>
+            <param argument="-s" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Use score" help="Use 4th column with classification score to give precedence to taxon with better score" />
+        </section>
+    </inputs>
+    <outputs>
+        <data name="combined_out" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="kaiju_table" value="kaiju.out"/>
+            <param name="kraken_table" value="kaiju.out"/>
+            <output name="combined_out" value="kaiju.out"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+**What it does**
+
+Merge two tab-separated output files in the column format (see above) used by Kaiju and Kraken. Only the first three columns are used.
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,96 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.10.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">23.2</token>
+
+    <xml name="xrefs">
+        <xrefs>
+            <xref type="bio.tools">kaiju</xref>
+        </xrefs>
+    </xml>
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">kaiju</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/ncomms11257</citation>
+        </citations>
+    </xml>
+
+    <xml name="reference">
+        <param name="reference" type="select" label="kaiju refernce data">
+            <options from_data_table="kaiju"/>
+            <validator type="no_options" message="No reference data installed. Ask your Galaxy admin."/>
+        </param>
+    </xml>
+
+    <xml name="r" tokens="optional">
+        <param argument="-r" name="rank" type="select" optional="@OPTIONAL@" label="rank">
+            <option value="phylum">phylum</option>
+            <option value="class">class</option>
+            <option value="order">order</option>
+            <option value="family">family</option>
+            <option value="genus">genus</option>
+            <option value="species">species</option>
+        </param>
+    </xml>
+
+    <xml name="l">
+        <param argument="-l" type="select" multiple="true" optional="true" label="Print taxon path containing selected ranks ranks specified ">
+            <option value="domain">Domain</option>
+            <option value="realm">Realm</option>
+            <option value="kingdom">Kingdom</option>
+            <option value="subkingdom">Subkingdom</option>
+            <option value="superphylum">Superphylum</option>
+            <option value="phylum">Phylum</option>
+            <option value="subphylum">Subphylum</option>
+            <option value="infraphylum">Infraphylum</option>
+            <option value="superclass">Superclass</option>
+            <option value="class">Class</option>
+            <option value="subclass">Subclass</option>
+            <option value="infraclass">Infraclass</option>
+            <option value="cohort">Cohort</option>
+            <option value="subcohort">Subcohort</option>
+            <option value="superorder">Superorder</option>
+            <option value="order">Order</option>
+            <option value="suborder">Suborder</option>
+            <option value="infraorder">Infraorder</option>
+            <option value="parvorder">Parvorder</option>
+            <option value="superfamily">Superfamily</option>
+            <option value="family">Family</option>
+            <option value="subfamily">Subfamily</option>
+            <option value="tribe">Tribe</option>
+            <option value="subtribe">Subtribe</option>
+            <option value="genus">Genus</option>
+            <option value="subgenus">Subgenus</option>
+            <option value="species_group">Species Group</option>
+            <option value="species_subgroup">Species Subgroup</option>
+            <option value="species">Species</option>
+            <option value="subspecies">Subspecies</option>
+            <option value="forma_specialis">Forma Specialis</option>
+            <option value="varietas">Variety</option>
+            <option value="subvariety">Subvariety</option>
+            <option value="forma">Form</option>
+            <option value="section">Section</option>
+            <option value="subsection">Subsection</option>
+            <option value="series">Series</option>
+            <option value="subseries">Subseries</option>
+            <option value="strain">Strain</option>
+            <option value="isolate">Isolate</option>
+            <option value="serogroup">Serogroup</option>
+            <option value="serotype">Serotype</option>
+            <option value="biotype">Biotype</option>
+            <option value="genotype">Genotype</option>
+            <option value="morph">Morph</option>
+            <option value="pathogroup">Pathogroup</option>
+        </param>
+    </xml>
+
+    <xml name="u">
+        <param argument="-u" type="boolean" truevalue="-u" falsevalue="" checked="false" label="Do not count unclassified reads" help="Disables counting unclassified reads towards the total number of reads when calculating percentages."/>
+    </xml>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju-taxnames.out	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,1 @@
+C	testseq	2697049	Severe acute respiratory syndrome coronavirus 2
Binary file test-data/kaiju-test-db/database.fmi has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju-test-db/names.dmp	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,53 @@
+10239	|	Vira	|		|	synonym	|
+10239	|	Viridae	|		|	synonym	|
+10239	|	viruses	|		|	blast name	|
+10239	|	Viruses	|		|	scientific name	|
+11118	|	Coronaviridae	|		|	scientific name	|
+58346	|	CCUG 11118	|	CCUG 11118 <type strain>	|	type material	|
+76804	|	Nidovirales	|		|	scientific name	|
+79912	|	NCIMB 11118	|	NCIMB 11118 <type strain>	|	type material	|
+153721	|	DSM 11118	|	DSM 11118 <type strain>	|	type material	|
+457096	|	MICH 11118	|	MICH 11118 <holotype>	|	type material	|
+457096	|	MICH-11118	|	MICH-11118 <holotype>	|	type material	|
+457096	|	MICH:11118	|	MICH:11118 <holotype>	|	type material	|
+557004	|	Matayba cf. opaca Acevedo 11118	|		|	scientific name	|
+569542	|	Nocardia sp. 11118	|		|	scientific name	|
+652290	|	Sordariomycetes sp. 11118	|		|	scientific name	|
+682956	|	CGMCC 1.10239	|	CGMCC 1.10239 <type strain>	|	type material	|
+694002	|	Betacoronavirus	|		|	scientific name	|
+694002	|	Coronavirus	|	Coronavirus <Betacoronavirus>	|	in-part	|
+694002	|	Coronavirus group 2	|		|	equivalent name	|
+694002	|	Group 2 species	|		|	equivalent name	|
+694009	|	HCoV-SARS	|		|	acronym	|
+694009	|	Human coronavirus (strain SARS)	|		|	equivalent name	|
+694009	|	SARS	|		|	acronym	|
+694009	|	SARS-like coronavirus	|		|	equivalent name	|
+694009	|	SARSr-CoV	|		|	acronym	|
+694009	|	SARSrCoV	|		|	acronym	|
+694009	|	SARS-related coronavirus	|		|	equivalent name	|
+694009	|	Severe acute respiratory syndrome-related coronavirus	|		|	scientific name	|
+926565	|	Sporocytophaga myxococcoides DSM 11118	|		|	scientific name	|
+2268389	|	Arora 11118	|	Arora 11118 <type material>	|	type material	|
+2499399	|	Cornidovirineae	|		|	scientific name	|
+2501931	|	Orthocoronavirinae	|		|	scientific name	|
+2509511	|	Sarbecovirus	|		|	scientific name	|
+2559587	|	Riboviria	|		|	scientific name	|
+2559587	|	RNA viruses and retroviruses	|		|	genbank common name	|
+2559587	|	RNA viruses and viroids	|		|	common name	|
+2559587	|	RNA viruses	|		|	common name	|
+2697049	|	2019-nCoV	|		|	equivalent name	|
+2697049	|	COVID-19 virus	|		|	equivalent name	|
+2697049	|	HCoV-19	|		|	equivalent name	|
+2697049	|	Human coronavirus 2019	|		|	equivalent name	|
+2697049	|	SARS-2	|		|	equivalent name	|
+2697049	|	SARS2	|		|	equivalent name	|
+2697049	|	SARS-CoV-2	|		|	acronym	|
+2697049	|	SARS-CoV2	|		|	equivalent name	|
+2697049	|	Severe acute respiratory syndrome coronavirus 2	|		|	scientific name	|
+2705539	|	CLZhao 10239	|	CLZhao 10239 <holotype>	|	type material	|
+2732396	|	Orthornavirae	|		|	scientific name	|
+2732408	|	Pisuviricota	|		|	scientific name	|
+2732506	|	Pisoniviricetes	|		|	scientific name	|
+3093623	|	FAKU:10239	|	FAKU:10239 <paratype>	|	type material	|
+1	|	all	|		|	synonym	|
+1	|	root	|		|	scientific name	|
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju-test-db/nodes.dmp	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,14 @@
+1	|	1	|	no rank	|		|	8	|	0	|	1	|	0	|	0	|	0	|	0	|	0	|		|
+2697049	|	694009	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|		|
+694009	|	2509511	|	species	|	SA	|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant; specified	|
+2509511	|	694002	|	subgenus	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant	|
+694002	|	2501931	|	genus	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant	|
+2501931	|	11118	|	subfamily	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant	|
+11118	|	2499399	|	family	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant	|
+2499399	|	76804	|	suborder	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant	|
+76804	|	2732506	|	order	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|	code compliant	|
+2732506	|	2732408	|	class	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|		|
+2732408	|	2732396	|	phylum	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|		|
+2732396	|	2559587	|	kingdom	|		|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|		|
+2559587	|	10239	|	realm	|	RX	|	9	|	1	|	1	|	1	|	0	|	1	|	0	|	0	|		|
+10239	|	1	|	acellular root	|		|	9	|	0	|	1	|	0	|	0	|	0	|	0	|	0	|		|
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju.loc	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,1 @@
+test	test name	${__HERE__}/kaiju-test-db/	1.0
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju.out	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,1 @@
+C	testseq	2697049
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju2krona.out	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,1 @@
+1	root	Viruses	Riboviria	Orthornavirae	Pisuviricota	Pisoniviricetes	Nidovirales	Cornidovirineae	Coronaviridae	Orthocoronavirinae	Betacoronavirus	Sarbecovirus	Severe acute respiratory syndrome-related coronavirus	Severe acute respiratory syndrome coronavirus 2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju2table.out	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,4 @@
+file	percent	reads	taxon_id	taxon_name
+kaiju_out	100.000000	1	10239	Viruses
+kaiju_out	0.000000	0	NA	cannot be assigned to a (non-viral) phylum
+kaiju_out	0.000000	0	NA	unclassified
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaijux.out	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,1 @@
+C	testseq	1918	YP_009725295.1_2697049,YP_009724389.1_2697049,	RSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIWFLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSLETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAPISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLHNWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDG,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query.fa	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,26 @@
+>testseq
+AATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAA
+TAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTT
+ACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG
+CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTAC
+AACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTA
+TTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAG
+CAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAA
+TTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC
+TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAG
+GCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCT
+TAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAA
+TGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCT
+ATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC
+TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATC
+TTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTT
+GTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAG
+GTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGT
+GATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA
+GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCA
+TCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGAC
+AACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAAT
+GTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACT
+AGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT
+AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTG
+AACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/kaiju.loc.sample	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,5 @@
+# id: db name + date
+# name: what is shown to the user in the select
+# path: of the reference data must contain database.fmi, names.dmp and nodes.dmp
+# version: version used for constructing the DB (or just the current version at the time when pre-computed indices were downloaded)
+#id	name	path	version
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="kaiju" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path, version</columns>
+        <file path="tool-data/kaiju.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Tue Apr 22 14:03:00 2025 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="kaiju" comment_char="#">
+        <columns>value, name, path, version</columns>
+        <file path="${__HERE__}/test-data/kaiju.loc" />
+    </table>
+</tables>