view kodoja_search.xml @ 2:ee917702dbd8 draft

v0.0.7, support GALAXY_SLOTS etc
author peterjc
date Mon, 10 Sep 2018 09:16:13 -0400
parents 4554fcd4ef6d
children d4111d1de76f
line wrap: on
line source

<tool id="kodoja_search" name="Kodoja database search" version="0.0.7">
    <description>identify viruses from plant RNA sequencing data</description>
    <requirements>
        <requirement type="package" version="0.0.7">kodoja</requirement>
    </requirements>
    <version_command>kodoja_search.py --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
kodoja_search.py
-t="\${GALAXY_SLOTS:-4}"
--kraken_db '${kraken_db.fields.path}'
--kaiju_db '${kaiju_db.fields.path}'

#if $single_paired.single_paired_selector == 'yes'
    #if $forward_input.is_of_type( 'fastq' ):
        --data_format fastq
    #else:
        --data_format fasta
    #end if
    --read1 '${single_paired.forward_input}'
    --read2 '${single_paired.reverse_input}'
#else:
    #if $single_paired.input_sequences.is_of_type('fastq')
        --data_format fastq
    #else:
        --data_format fasta
    #end if
    --read1 '${single_paired.input_sequences}'
#end if

## TODO:
## -m min_trim
## -a trim_adapt
## -q kraken_quick
## -p kraken_preload
## -c kaiju_score
## -l kaiju_minlen
## -i kaiju_mismatch

## We'll capture predictably named output files from here:
-o .
&&
mv ./virus_table.txt '$combined_table'
]]></command>
    <inputs>
        <param label="Select a Kraken database" name="kraken_db" type="select">
            <options from_data_table="kraken_databases">
                <validator message="No Kraken database is available" type="no_options" />
            </options>
        </param>
        <param label="Select a Kaiju database" name="kaiju_db" type="select">
            <options from_data_table="kaiju_databases">
                <validator message="No Kaiju database is available" type="no_options" />
            </options>
        </param>
        <conditional name="single_paired">
            <param name="single_paired_selector" type="select" label="Single or paired reads">
                <!-- TODO?
                <option value="collection">Collection</option>
                -->
                <option value="yes">Paired</option>
                <option selected="True" value="no">Single</option>
            </param>
            <when value="yes">
                <param format="fasta,fastq" name="forward_input" type="data" label="Forward strand" help="FASTA or FASTQ dataset"/>
                <param format="fasta,fastq" name="reverse_input" type="data" label="Reverse strand" help="FASTA or FASTQ dataset"/>
            </when>
            <when value="no">
                <param format="fasta,fastq" label="Input sequences" name="input_sequences" type="data" help="FASTA or FASTQ datasets"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="combined_table" format="tabular" label="Kodoja species report for ${on_string}" />
    </outputs>
    <tests>
        <test>
            <param name="kraken_db" value="kraken3viruses" />
            <param name="kaiju_db" value="kaiju3viruses" />
            <param name="single_paired_selector" value="no" />
            <param name="input_sequences" value="testData_1.fastq" ftype="fastq" />
            <output name="combined_table" file="virus_table_SE_fastq.tabular" ftype="tabular" />
        </test>
        <test>
            <param name="kraken_db" value="kraken3viruses" />
            <param name="kaiju_db" value="kaiju3viruses" />
            <param name="single_paired_selector" value="yes" />
            <param name="forward_input" value="testData_1.fastq" ftype="fastq" />
            <param name="reverse_input" value="testData_2.fastq" ftype="fastq" />
            <output name="combined_table" file="virus_table_PE_fastq.tabular" ftype="tabular" />
        </test>
        <test>
            <param name="kraken_db" value="kraken3viruses" />
            <param name="kaiju_db" value="kaiju3viruses" />
            <param name="single_paired_selector" value="yes" />
            <param name="forward_input" value="testData_1.fasta" ftype="fasta" />
            <param name="reverse_input" value="testData_2.fasta" ftype="fasta" />
            <output name="combined_table" file="virus_table_PE_fasta.tabular" ftype="tabular" />
        </test>
    </tests>
    <help><![CDATA[
Kodoja is a tool intended to identify viral sequences in a
FASTQ/FASTA sequencing run by matching them against both
Kraken and Kaiju databases.

The main output is a tab-separated table as follows (tabular format in Galaxy)
with the following columns:

1. Species name
2. Species NCBI taxonomy identifier (TaxID)
3. Number of reads assigned by *either* Kraken or Kaiju to this species
4. Number of Reads assigned by *both* Kraken and Kaiju to this species
5. Genus name
6. Number of reads assigned by *either* Kraken or Kaiju to this genus
7. Number of reads assigned by *both* Kraken and Kaiju to this genus

The counts in columns 6 and 7 are for reads assigned to that genus, but not
to any species within it.

For example,

================================== ============= ================= ============================= ========== =============== ===========================
Species                            Species TaxID Species sequences Species sequences (stringent) Genus      Genus sequences Genus sequences (stringent)
---------------------------------- ------------- ----------------- ----------------------------- ---------- --------------- ---------------------------
Cassava brown streak virus                137758                45                            45 Ipomovirus               0                           0
Ugandan cassava brown streak virus        946046                28                            28 Ipomovirus               0                           0
Tobacco etch virus                         12227                21                            19 Potyvirus                0                           0
================================== ============= ================= ============================= ========== =============== ===========================

The command line tool offers additional options not currently exposed
in Galaxy, including::

                            Number of threads
      -s, --host_subset     Subset host sequences before Kaiju
      -m TRIM_MINLEN, --trim_minlen TRIM_MINLEN
                            Trimmomatic minimum length
      -a TRIM_ADAPT, --trim_adapt TRIM_ADAPT
                            Illumina adapter sequence file
      -q KRAKEN_QUICK, --kraken_quick KRAKEN_QUICK
                            Number of minium hits by Kraken
      -p, --kraken_preload  Kraken preload database
      -c KAIJU_SCORE, --kaiju_score KAIJU_SCORE
                            Kaju alignment score
      -l KAIJU_MINLEN, --kaiju_minlen KAIJU_MINLEN
                            Kaju minimum length
      -i KAIJU_MISMATCH, --kaiju_mismatch KAIJU_MISMATCH
                            Kaju allowed mismatches


For more information, please see the Kodoja manual
https://github.com/abaizan/kodoja/wiki/Kodoja-Manual
    ]]></help>
    <citations>
        <citation type="bibtex">
@misc{githubkodoja,
  author = {Baizan Edge, Amanda},
  year = {2018},
  title = {Kodoja},
  publisher = {GitHub},
  journal = {GitHub repository},
  url = {https://github.com/abaizan/kodoja},
}</citation>
    </citations>
</tool>