Repository 'kaiju_mergeoutputs'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/kaiju_mergeoutputs

Changeset 0:cdc8a98ed4fc (2025-04-22)
Next changeset 1:01585d091036 (2025-05-07)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/kaiju commit 59064c06143fdc7d7b17178e46911ba1009cd32e
added:
kaiju-mergeOutputs.xml
macros.xml
test-data/kaiju-taxnames.out
test-data/kaiju-test-db/database.fmi
test-data/kaiju-test-db/names.dmp
test-data/kaiju-test-db/nodes.dmp
test-data/kaiju.loc
test-data/kaiju.out
test-data/kaiju2krona.out
test-data/kaiju2table.out
test-data/kaijux.out
test-data/query.fa
tool-data/kaiju.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r cdc8a98ed4fc kaiju-mergeOutputs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kaiju-mergeOutputs.xml Tue Apr 22 14:03:00 2025 +0000
[
@@ -0,0 +1,66 @@
+<tool id="kaiju_mergeoutputs" name="kaiju-mergeOutputs" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        sort -k2,2 '$kaiju_table'  > kaiju.out.sort &&
+        sort -k2,2 '$kraken_table' > kraken.out.sort &&
+        kaiju-mergeOutputs
+            -i kaiju.out.sort
+            -j kraken.out.sort
+            -o '$combined_out'
+            -c $optional.conflict.mode
+            #if $optional.conflict.mode in ["lca", "lowest"]
+                -t '$reference.fields.path'/nodes.dmp
+            #end if
+            $optional.s
+            -v
+    ]]></command>
+    <inputs>
+        <param name="kaiju_table" type="data" format="tabular" label="kaiju output table"/>
+        <param name="kraken_table" type="data" format="tabular" label="Another output table" help="e.g. from kraken"/>
+        <section name="optional" title="Optional arguments" expanded="false">
+            <conditional name="conflict">
+                <param argument="-c" name="mode" type="select" label="Conflict resolution mode">
+                    <option value="1">use taxid from 1st input</option>
+                    <option value="2">use taxid from 2nd input</option>
+                    <option value="lca" selected="true">least common ancestor (LCA) of the two taxon IDs</option>
+                    <option value="lowest">lower rank of the two taxa is used if they are within the same lineage and LCA otherwise</option>
+                </param>
+                <when value="1"/>
+                <when value="2"/>
+                <when value="lca">
+                    <expand macro="reference"/>
+                </when>
+                <when value="lowest">
+                    <expand macro="reference"/>
+                </when>
+            </conditional>
+            <param argument="-s" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Use score" help="Use 4th column with classification score to give precedence to taxon with better score" />
+        </section>
+    </inputs>
+    <outputs>
+        <data name="combined_out" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="kaiju_table" value="kaiju.out"/>
+            <param name="kraken_table" value="kaiju.out"/>
+            <output name="combined_out" value="kaiju.out"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+**What it does**
+
+Merge two tab-separated output files in the column format (see above) used by Kaiju and Kraken. Only the first three columns are used.
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r cdc8a98ed4fc macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,96 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.10.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">23.2</token>
+
+    <xml name="xrefs">
+        <xrefs>
+            <xref type="bio.tools">kaiju</xref>
+        </xrefs>
+    </xml>
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">kaiju</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/ncomms11257</citation>
+        </citations>
+    </xml>
+
+    <xml name="reference">
+        <param name="reference" type="select" label="kaiju refernce data">
+            <options from_data_table="kaiju"/>
+            <validator type="no_options" message="No reference data installed. Ask your Galaxy admin."/>
+        </param>
+    </xml>
+
+    <xml name="r" tokens="optional">
+        <param argument="-r" name="rank" type="select" optional="@OPTIONAL@" label="rank">
+            <option value="phylum">phylum</option>
+            <option value="class">class</option>
+            <option value="order">order</option>
+            <option value="family">family</option>
+            <option value="genus">genus</option>
+            <option value="species">species</option>
+        </param>
+    </xml>
+
+    <xml name="l">
+        <param argument="-l" type="select" multiple="true" optional="true" label="Print taxon path containing selected ranks ranks specified ">
+            <option value="domain">Domain</option>
+            <option value="realm">Realm</option>
+            <option value="kingdom">Kingdom</option>
+            <option value="subkingdom">Subkingdom</option>
+            <option value="superphylum">Superphylum</option>
+            <option value="phylum">Phylum</option>
+            <option value="subphylum">Subphylum</option>
+            <option value="infraphylum">Infraphylum</option>
+            <option value="superclass">Superclass</option>
+            <option value="class">Class</option>
+            <option value="subclass">Subclass</option>
+            <option value="infraclass">Infraclass</option>
+            <option value="cohort">Cohort</option>
+            <option value="subcohort">Subcohort</option>
+            <option value="superorder">Superorder</option>
+            <option value="order">Order</option>
+            <option value="suborder">Suborder</option>
+            <option value="infraorder">Infraorder</option>
+            <option value="parvorder">Parvorder</option>
+            <option value="superfamily">Superfamily</option>
+            <option value="family">Family</option>
+            <option value="subfamily">Subfamily</option>
+            <option value="tribe">Tribe</option>
+            <option value="subtribe">Subtribe</option>
+            <option value="genus">Genus</option>
+            <option value="subgenus">Subgenus</option>
+            <option value="species_group">Species Group</option>
+            <option value="species_subgroup">Species Subgroup</option>
+            <option value="species">Species</option>
+            <option value="subspecies">Subspecies</option>
+            <option value="forma_specialis">Forma Specialis</option>
+            <option value="varietas">Variety</option>
+            <option value="subvariety">Subvariety</option>
+            <option value="forma">Form</option>
+            <option value="section">Section</option>
+            <option value="subsection">Subsection</option>
+            <option value="series">Series</option>
+            <option value="subseries">Subseries</option>
+            <option value="strain">Strain</option>
+            <option value="isolate">Isolate</option>
+            <option value="serogroup">Serogroup</option>
+            <option value="serotype">Serotype</option>
+            <option value="biotype">Biotype</option>
+            <option value="genotype">Genotype</option>
+            <option value="morph">Morph</option>
+            <option value="pathogroup">Pathogroup</option>
+        </param>
+    </xml>
+
+    <xml name="u">
+        <param argument="-u" type="boolean" truevalue="-u" falsevalue="" checked="false" label="Do not count unclassified reads" help="Disables counting unclassified reads towards the total number of reads when calculating percentages."/>
+    </xml>
+</macros>
\ No newline at end of file
b
diff -r 000000000000 -r cdc8a98ed4fc test-data/kaiju-taxnames.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju-taxnames.out Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,1 @@
+C testseq 2697049 Severe acute respiratory syndrome coronavirus 2
b
diff -r 000000000000 -r cdc8a98ed4fc test-data/kaiju-test-db/database.fmi
b
Binary file test-data/kaiju-test-db/database.fmi has changed
b
diff -r 000000000000 -r cdc8a98ed4fc test-data/kaiju-test-db/names.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju-test-db/names.dmp Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,53 @@
+10239 | Vira | | synonym |
+10239 | Viridae | | synonym |
+10239 | viruses | | blast name |
+10239 | Viruses | | scientific name |
+11118 | Coronaviridae | | scientific name |
+58346 | CCUG 11118 | CCUG 11118 <type strain> | type material |
+76804 | Nidovirales | | scientific name |
+79912 | NCIMB 11118 | NCIMB 11118 <type strain> | type material |
+153721 | DSM 11118 | DSM 11118 <type strain> | type material |
+457096 | MICH 11118 | MICH 11118 <holotype> | type material |
+457096 | MICH-11118 | MICH-11118 <holotype> | type material |
+457096 | MICH:11118 | MICH:11118 <holotype> | type material |
+557004 | Matayba cf. opaca Acevedo 11118 | | scientific name |
+569542 | Nocardia sp. 11118 | | scientific name |
+652290 | Sordariomycetes sp. 11118 | | scientific name |
+682956 | CGMCC 1.10239 | CGMCC 1.10239 <type strain> | type material |
+694002 | Betacoronavirus | | scientific name |
+694002 | Coronavirus | Coronavirus <Betacoronavirus> | in-part |
+694002 | Coronavirus group 2 | | equivalent name |
+694002 | Group 2 species | | equivalent name |
+694009 | HCoV-SARS | | acronym |
+694009 | Human coronavirus (strain SARS) | | equivalent name |
+694009 | SARS | | acronym |
+694009 | SARS-like coronavirus | | equivalent name |
+694009 | SARSr-CoV | | acronym |
+694009 | SARSrCoV | | acronym |
+694009 | SARS-related coronavirus | | equivalent name |
+694009 | Severe acute respiratory syndrome-related coronavirus | | scientific name |
+926565 | Sporocytophaga myxococcoides DSM 11118 | | scientific name |
+2268389 | Arora 11118 | Arora 11118 <type material> | type material |
+2499399 | Cornidovirineae | | scientific name |
+2501931 | Orthocoronavirinae | | scientific name |
+2509511 | Sarbecovirus | | scientific name |
+2559587 | Riboviria | | scientific name |
+2559587 | RNA viruses and retroviruses | | genbank common name |
+2559587 | RNA viruses and viroids | | common name |
+2559587 | RNA viruses | | common name |
+2697049 | 2019-nCoV | | equivalent name |
+2697049 | COVID-19 virus | | equivalent name |
+2697049 | HCoV-19 | | equivalent name |
+2697049 | Human coronavirus 2019 | | equivalent name |
+2697049 | SARS-2 | | equivalent name |
+2697049 | SARS2 | | equivalent name |
+2697049 | SARS-CoV-2 | | acronym |
+2697049 | SARS-CoV2 | | equivalent name |
+2697049 | Severe acute respiratory syndrome coronavirus 2 | | scientific name |
+2705539 | CLZhao 10239 | CLZhao 10239 <holotype> | type material |
+2732396 | Orthornavirae | | scientific name |
+2732408 | Pisuviricota | | scientific name |
+2732506 | Pisoniviricetes | | scientific name |
+3093623 | FAKU:10239 | FAKU:10239 <paratype> | type material |
+1 | all | | synonym |
+1 | root | | scientific name |
b
diff -r 000000000000 -r cdc8a98ed4fc test-data/kaiju-test-db/nodes.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju-test-db/nodes.dmp Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,14 @@
+1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | |
+2697049 | 694009 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | |
+694009 | 2509511 | species | SA | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant; specified |
+2509511 | 694002 | subgenus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant |
+694002 | 2501931 | genus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant |
+2501931 | 11118 | subfamily | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant |
+11118 | 2499399 | family | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant |
+2499399 | 76804 | suborder | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant |
+76804 | 2732506 | order | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant |
+2732506 | 2732408 | class | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | |
+2732408 | 2732396 | phylum | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | |
+2732396 | 2559587 | kingdom | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | |
+2559587 | 10239 | realm | RX | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | |
+10239 | 1 | acellular root | | 9 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | |
b
diff -r 000000000000 -r cdc8a98ed4fc test-data/kaiju.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju.loc Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,1 @@
+test test name ${__HERE__}/kaiju-test-db/ 1.0
\ No newline at end of file
b
diff -r 000000000000 -r cdc8a98ed4fc test-data/kaiju.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju.out Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,1 @@
+C testseq 2697049
b
diff -r 000000000000 -r cdc8a98ed4fc test-data/kaiju2krona.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju2krona.out Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,1 @@
+1 root Viruses Riboviria Orthornavirae Pisuviricota Pisoniviricetes Nidovirales Cornidovirineae Coronaviridae Orthocoronavirinae Betacoronavirus Sarbecovirus Severe acute respiratory syndrome-related coronavirus Severe acute respiratory syndrome coronavirus 2
b
diff -r 000000000000 -r cdc8a98ed4fc test-data/kaiju2table.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju2table.out Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,4 @@
+file percent reads taxon_id taxon_name
+kaiju_out 100.000000 1 10239 Viruses
+kaiju_out 0.000000 0 NA cannot be assigned to a (non-viral) phylum
+kaiju_out 0.000000 0 NA unclassified
b
diff -r 000000000000 -r cdc8a98ed4fc test-data/kaijux.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaijux.out Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,1 @@
+C testseq 1918 YP_009725295.1_2697049,YP_009724389.1_2697049, RSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIWFLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSLETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAPISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLHNWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDG,
b
diff -r 000000000000 -r cdc8a98ed4fc test-data/query.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query.fa Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,26 @@
+>testseq
+AATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAA
+TAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTT
+ACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG
+CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTAC
+AACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTA
+TTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAG
+CAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAA
+TTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC
+TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAG
+GCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCT
+TAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAA
+TGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCT
+ATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC
+TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATC
+TTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTT
+GTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAG
+GTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGT
+GATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA
+GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCA
+TCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGAC
+AACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAAT
+GTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACT
+AGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT
+AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTG
+AACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTA
b
diff -r 000000000000 -r cdc8a98ed4fc tool-data/kaiju.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/kaiju.loc.sample Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,5 @@
+# id: db name + date
+# name: what is shown to the user in the select
+# path: of the reference data must contain database.fmi, names.dmp and nodes.dmp
+# version: version used for constructing the DB (or just the current version at the time when pre-computed indices were downloaded)
+#id name path version
\ No newline at end of file
b
diff -r 000000000000 -r cdc8a98ed4fc tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="kaiju" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path, version</columns>
+        <file path="tool-data/kaiju.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r cdc8a98ed4fc tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Tue Apr 22 14:03:00 2025 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="kaiju" comment_char="#">
+        <columns>value, name, path, version</columns>
+        <file path="${__HERE__}/test-data/kaiju.loc" />
+    </table>
+</tables>