Mercurial > repos > iuc > kaiju_kaiju2table
changeset 0:e7d2c4ed18a5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/kaiju commit 59064c06143fdc7d7b17178e46911ba1009cd32e
author | iuc |
---|---|
date | Tue, 22 Apr 2025 14:03:27 +0000 |
parents | |
children | eedcb4cc9e5a |
files | kaiju2table.xml macros.xml test-data/kaiju-taxnames.out test-data/kaiju-test-db/database.fmi test-data/kaiju-test-db/names.dmp test-data/kaiju-test-db/nodes.dmp test-data/kaiju.loc test-data/kaiju.out test-data/kaiju2krona.out test-data/kaiju2table.out test-data/kaijux.out test-data/query.fa tool-data/kaiju.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 15 files changed, 310 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kaiju2table.xml Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,95 @@ +<tool id="kaiju_kaiju2table" name="kaiju2table" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="xrefs"/> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + #import re + + #for $kaiju_table in $kaiju_tables + ln -s '$kaiju_table' #echo re.sub('[^\w\-_]', '_', str($kaiju_table.element_identifier)) + && + #end for + + kaiju2table + -t '$reference.fields.path'/nodes.dmp + -n '$reference.fields.path'/names.dmp + -r $rank + -o '$kaiju_summary' + #for $kaiju_table in $kaiju_tables + #echo re.sub('[^\w\-_]', '_', str($kaiju_table.element_identifier)) + #end for + #if str($optional.m) + -m $optional.m + #end if + #if str($optional.c) + -m $optional.c + #end if + $optional.e + $optional.u + #if $optional.l + -l #echo ".".join($optional.l) + #end if + ]]></command> + <inputs> + <param name="kaiju_tables" type="data" format="tabular" multiple="true" optional="false" label="kaiju output tables"/> + <expand macro="reference"/> + <param argument="-r" name="rank" type="select" label="rank"> + <option value="phylum">phylum</option> + <option value="class">class</option> + <option value="order">order</option> + <option value="family">family</option> + <option value="genus">genus</option> + <option value="species">species</option> + </param> + <section name="optional" title="Optional arguments" expanded="false"> + <param argument="-m" type="float" min="0" max="100" optional="true" value="" label="Minimum reporting percentage" help="Can not be combined with -c" /> + <param argument="-c" type="integer" min="1" optional="true" value="" label="Minimum required number of reads" help="Can not be combined with -m" /> + <param argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="false" label="Expand viruses" help="which are always hown as full taxon path and read counts are not summarized in higher taxonomic levels" /> + <param argument="-u" type="boolean" truevalue="-u" falsevalue="" checked="false" label="Do not count unclassified reads" help="Disables counting unclassified reads towards the total number of reads when calculating percentages."/> + <expand macro="l"/> + </section> + </inputs> + <outputs> + <data name="kaiju_summary" format="tabular"/> + </outputs> + <tests> + <test> + <param name="kaiju_tables" value="kaiju.out"/> + <param name="reference" value="test"/> + <param name="rank" value="phylum"/> + <output name="kaiju_summary" value="kaiju2table.out"/> + </test> + <test> + <param name="kaiju_tables" value="kaiju.out"/> + <param name="reference" value="test"/> + <param name="rank" value="order"/> + <section name="optional"> + <param name="e" value="true"/> + </section> + <output name="kaiju_summary" value="kaiju2table.out" lines_diff="4"> + <assert_contents> + <has_text text="Severe acute respiratory syndrome coronavirus 2"/> + <has_text text="cannot be assigned to a (non-viral) order"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + +.. class:: infomark + +**What it does** + +The program kaiju2table converts Kaiju's output file(s) into a summary table for a given taxonomic rank, e.g., genus. +It uses the taxonomic information of the reference data for mapping the taxon identifiers from the third column in the +Kaiju output to the corresponding taxon names. + +The program can also filter out taxa with low abundances, e.g. for only showing genera that comprise at least 1 percent of the total reads +Similarly, a threshold on the absolute read count can be given. + + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,96 @@ +<macros> + <token name="@TOOL_VERSION@">1.10.1</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">23.2</token> + + <xml name="xrefs"> + <xrefs> + <xref type="bio.tools">kaiju</xref> + </xrefs> + </xml> + + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">kaiju</requirement> + </requirements> + </xml> + + <xml name="citations"> + <citations> + <citation type="doi">10.1038/ncomms11257</citation> + </citations> + </xml> + + <xml name="reference"> + <param name="reference" type="select" label="kaiju refernce data"> + <options from_data_table="kaiju"/> + <validator type="no_options" message="No reference data installed. Ask your Galaxy admin."/> + </param> + </xml> + + <xml name="r" tokens="optional"> + <param argument="-r" name="rank" type="select" optional="@OPTIONAL@" label="rank"> + <option value="phylum">phylum</option> + <option value="class">class</option> + <option value="order">order</option> + <option value="family">family</option> + <option value="genus">genus</option> + <option value="species">species</option> + </param> + </xml> + + <xml name="l"> + <param argument="-l" type="select" multiple="true" optional="true" label="Print taxon path containing selected ranks ranks specified "> + <option value="domain">Domain</option> + <option value="realm">Realm</option> + <option value="kingdom">Kingdom</option> + <option value="subkingdom">Subkingdom</option> + <option value="superphylum">Superphylum</option> + <option value="phylum">Phylum</option> + <option value="subphylum">Subphylum</option> + <option value="infraphylum">Infraphylum</option> + <option value="superclass">Superclass</option> + <option value="class">Class</option> + <option value="subclass">Subclass</option> + <option value="infraclass">Infraclass</option> + <option value="cohort">Cohort</option> + <option value="subcohort">Subcohort</option> + <option value="superorder">Superorder</option> + <option value="order">Order</option> + <option value="suborder">Suborder</option> + <option value="infraorder">Infraorder</option> + <option value="parvorder">Parvorder</option> + <option value="superfamily">Superfamily</option> + <option value="family">Family</option> + <option value="subfamily">Subfamily</option> + <option value="tribe">Tribe</option> + <option value="subtribe">Subtribe</option> + <option value="genus">Genus</option> + <option value="subgenus">Subgenus</option> + <option value="species_group">Species Group</option> + <option value="species_subgroup">Species Subgroup</option> + <option value="species">Species</option> + <option value="subspecies">Subspecies</option> + <option value="forma_specialis">Forma Specialis</option> + <option value="varietas">Variety</option> + <option value="subvariety">Subvariety</option> + <option value="forma">Form</option> + <option value="section">Section</option> + <option value="subsection">Subsection</option> + <option value="series">Series</option> + <option value="subseries">Subseries</option> + <option value="strain">Strain</option> + <option value="isolate">Isolate</option> + <option value="serogroup">Serogroup</option> + <option value="serotype">Serotype</option> + <option value="biotype">Biotype</option> + <option value="genotype">Genotype</option> + <option value="morph">Morph</option> + <option value="pathogroup">Pathogroup</option> + </param> + </xml> + + <xml name="u"> + <param argument="-u" type="boolean" truevalue="-u" falsevalue="" checked="false" label="Do not count unclassified reads" help="Disables counting unclassified reads towards the total number of reads when calculating percentages."/> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kaiju-taxnames.out Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,1 @@ +C testseq 2697049 Severe acute respiratory syndrome coronavirus 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kaiju-test-db/names.dmp Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,53 @@ +10239 | Vira | | synonym | +10239 | Viridae | | synonym | +10239 | viruses | | blast name | +10239 | Viruses | | scientific name | +11118 | Coronaviridae | | scientific name | +58346 | CCUG 11118 | CCUG 11118 <type strain> | type material | +76804 | Nidovirales | | scientific name | +79912 | NCIMB 11118 | NCIMB 11118 <type strain> | type material | +153721 | DSM 11118 | DSM 11118 <type strain> | type material | +457096 | MICH 11118 | MICH 11118 <holotype> | type material | +457096 | MICH-11118 | MICH-11118 <holotype> | type material | +457096 | MICH:11118 | MICH:11118 <holotype> | type material | +557004 | Matayba cf. opaca Acevedo 11118 | | scientific name | +569542 | Nocardia sp. 11118 | | scientific name | +652290 | Sordariomycetes sp. 11118 | | scientific name | +682956 | CGMCC 1.10239 | CGMCC 1.10239 <type strain> | type material | +694002 | Betacoronavirus | | scientific name | +694002 | Coronavirus | Coronavirus <Betacoronavirus> | in-part | +694002 | Coronavirus group 2 | | equivalent name | +694002 | Group 2 species | | equivalent name | +694009 | HCoV-SARS | | acronym | +694009 | Human coronavirus (strain SARS) | | equivalent name | +694009 | SARS | | acronym | +694009 | SARS-like coronavirus | | equivalent name | +694009 | SARSr-CoV | | acronym | +694009 | SARSrCoV | | acronym | +694009 | SARS-related coronavirus | | equivalent name | +694009 | Severe acute respiratory syndrome-related coronavirus | | scientific name | +926565 | Sporocytophaga myxococcoides DSM 11118 | | scientific name | +2268389 | Arora 11118 | Arora 11118 <type material> | type material | +2499399 | Cornidovirineae | | scientific name | +2501931 | Orthocoronavirinae | | scientific name | +2509511 | Sarbecovirus | | scientific name | +2559587 | Riboviria | | scientific name | +2559587 | RNA viruses and retroviruses | | genbank common name | +2559587 | RNA viruses and viroids | | common name | +2559587 | RNA viruses | | common name | +2697049 | 2019-nCoV | | equivalent name | +2697049 | COVID-19 virus | | equivalent name | +2697049 | HCoV-19 | | equivalent name | +2697049 | Human coronavirus 2019 | | equivalent name | +2697049 | SARS-2 | | equivalent name | +2697049 | SARS2 | | equivalent name | +2697049 | SARS-CoV-2 | | acronym | +2697049 | SARS-CoV2 | | equivalent name | +2697049 | Severe acute respiratory syndrome coronavirus 2 | | scientific name | +2705539 | CLZhao 10239 | CLZhao 10239 <holotype> | type material | +2732396 | Orthornavirae | | scientific name | +2732408 | Pisuviricota | | scientific name | +2732506 | Pisoniviricetes | | scientific name | +3093623 | FAKU:10239 | FAKU:10239 <paratype> | type material | +1 | all | | synonym | +1 | root | | scientific name |
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kaiju-test-db/nodes.dmp Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,14 @@ +1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | +2697049 | 694009 | no rank | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | +694009 | 2509511 | species | SA | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant; specified | +2509511 | 694002 | subgenus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant | +694002 | 2501931 | genus | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant | +2501931 | 11118 | subfamily | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant | +11118 | 2499399 | family | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant | +2499399 | 76804 | suborder | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant | +76804 | 2732506 | order | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | code compliant | +2732506 | 2732408 | class | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | +2732408 | 2732396 | phylum | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | +2732396 | 2559587 | kingdom | | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | +2559587 | 10239 | realm | RX | 9 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | | +10239 | 1 | acellular root | | 9 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | |
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kaiju.loc Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,1 @@ +test test name ${__HERE__}/kaiju-test-db/ 1.0 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kaiju.out Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,1 @@ +C testseq 2697049
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kaiju2krona.out Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,1 @@ +1 root Viruses Riboviria Orthornavirae Pisuviricota Pisoniviricetes Nidovirales Cornidovirineae Coronaviridae Orthocoronavirinae Betacoronavirus Sarbecovirus Severe acute respiratory syndrome-related coronavirus Severe acute respiratory syndrome coronavirus 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kaiju2table.out Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,4 @@ +file percent reads taxon_id taxon_name +kaiju_out 100.000000 1 10239 Viruses +kaiju_out 0.000000 0 NA cannot be assigned to a (non-viral) phylum +kaiju_out 0.000000 0 NA unclassified
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kaijux.out Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,1 @@ +C testseq 1918 YP_009725295.1_2697049,YP_009724389.1_2697049, RSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIWFLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSLETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAPISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLHNWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDG,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/query.fa Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,26 @@ +>testseq +AATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAA +TAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTT +ACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG +CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTAC +AACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTA +TTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAG +CAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAA +TTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC +TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAG +GCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCT +TAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAA +TGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCT +ATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC +TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATC +TTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTT +GTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAG +GTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGT +GATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA +GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCA +TCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGAC +AACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAAT +GTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACT +AGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT +AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTG +AACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/kaiju.loc.sample Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,5 @@ +# id: db name + date +# name: what is shown to the user in the select +# path: of the reference data must contain database.fmi, names.dmp and nodes.dmp +# version: version used for constructing the DB (or just the current version at the time when pre-computed indices were downloaded) +#id name path version \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Apr 22 14:03:27 2025 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="kaiju" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path, version</columns> + <file path="tool-data/kaiju.loc" /> + </table> +</tables>