Mercurial > repos > iuc > vapor

<tool id="vapor" name="VAPOR" version="@TOOL_VERSION@+galaxy2" profile="21.05">
    <description>
        Classify Influenza samples from raw short read sequence data
    </description>
    <macros>
        <token name="@TOOL_VERSION@">1.0.2</token>
    </macros>
    <xrefs>
        <xref type="bio.tools">vapor</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">vapor</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        #if str($fastq_input.fastq_input_selector) == "paired"
            #set r1_ext = $fastq_input.fastq1.extension
            #set r2_ext = $fastq_input.fastq2.extension
            ln -s '$fastq_input.fastq1' fastq1.$r1_ext &&
            ln -s '$fastq_input.fastq2' fastq2.$r2_ext &&
        #elif str($fastq_input.fastq_input_selector) == "paired_collection"
            #set r1_ext = $fastq_input.fastq_pairs.forward.extension
            #set r2_ext = $fastq_input.fastq_pairs.reverse.extension
            ln -s '$fastq_input.fastq_pairs.forward' fastq1.$r1_ext &&
            ln -s '$fastq_input.fastq_pairs.reverse' fastq2.$r2_ext &&
        #else
            #set r1_ext = $fastq_input.fastq_single.extension
            ln -s '$fastq_input.fastq_single' fastq1.$r1_ext &&
        #end if
        vapor.py
            --return_best_n $opt.return_best_n
            #if $output_type == "fasta"
                --return_seqs
            #end if
            -k '$opt.kmer_length'
            -t '$opt.score_threshold'
            -c '$opt.min_kmer_cov'
            -m '$opt.min_kmer_prop'
            -fa '$fasta_file'
            -fq
            fastq1.$r1_ext
            #if str($fastq_input.fastq_input_selector) in ["paired", "paired_collection"]
                fastq2.$r2_ext
            #end if
            -f '$opt.top_seed_frac'
            -q
        > out_file
    ]]>    </command>
    <inputs>
        <param name="fasta_file" format="fasta" type="data" label="FASTA file" help="Raw short read sequences (full length reference segment sequences)" />
        <conditional name="fastq_input">
            <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
                <option value="single">Single</option>
                <option value="paired">Paired</option>
                <option value="paired_collection">Paired Collection</option>
            </param>
            <when value="single">
                <param name="fastq_single" format="fastqsanger,fastqsanger.gz" type="data" label="FASTQ file" help="Raw short read sequences (full length reference segment sequences)" />
            </when>
            <when value="paired">
                <param name="fastq1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify reads dataset with forward reads"/>
                <param name="fastq2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify reads dataset with reverse reads"/>
            </when>
            <when value="paired_collection">
                <param name="fastq_pairs" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="Dataset collection with forward and reverse reads"/>
            </when>
        </conditional>
        <param name="output_type" type="select" label="Output type">
            <option value="scores" selected="true">Return scores only</option>
            <option value="fasta">Return FASTA only</option>
        </param>
        <section name="opt" title="Optional arguments" expanded="true">
            <param name="return_best_n" type="integer" min="1" value="1" label="Returns the highest scoring n queries" help="A list of the best n queries instead of only the highest scoring query" />
            <param name="kmer_length" type="integer" min="5" max="30" value="21" label="Kmer Length" help="" />
            <param name="score_threshold" type="float" min="0.0" max="1.0" value="0.2" label="Read kmer filtering threshold" help="" />
            <param name="min_kmer_cov" type="integer" value="5" label="Min coverage kmer culling" help="Minimum coverage kmer culling" />
            <param name="min_kmer_prop" type="float" value="0.1" label="Min kmer proportion" help="Minimum proportion of matched kmers allowed for queries" />
            <param name="top_seed_frac" type="float" min="0.0" max="1.0" value="0.2" label="Fraction of best seeds to extend" help="" />
        </section>
    </inputs>
    <outputs>
        <data name="output_scores" from_work_dir="out_file" format="tabular" label="${tool.name} on ${on_string}: closest reference scores">
            <filter>output_type == "scores"</filter>
            <actions>
                <action name="column_names" type="metadata" default="% of query bases in reads,Total score,Query length,Mean score,Reads after culling,Query description" />
            </actions>
        </data>
        <data name="output_fasta" from_work_dir="out_file" format="fasta" label="${tool.name} on ${on_string}: closest reference fasta">
            <filter>output_type == "fasta"</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <conditional name="fastq_input">
                <param name="fastq_input_selector" value="single" />
                <param name="fastq_single" ftype="fastq" value="test_reads.fq" />
            </conditional>
            <param name="fasta_file" value="HA_sample.fa" />
            <output name="output_scores" file="output1.tab" />
        </test>
        <test expect_num_outputs="1">
            <conditional name="fastq_input">
                <param name="fastq_input_selector" value="paired" />
                <param name="fastq1" ftype="fastq" value="test_reads.fq" />
                <param name="fastq2" ftype="fastq" value="test_reads2.fq" />
            </conditional>
            <param name="fasta_file" value="HA_sample.fa" />
            <output name="output_scores" file="output2.tab" />
        </test>
        <test expect_num_outputs="1">
            <conditional name="fastq_input">
                <param name="fastq_input_selector" value="single" />
                <param name="fastq_single" ftype="fastqsanger.gz" value="test_reads.fastqsanger.gz" />
            </conditional>
            <param name="fasta_file" value="HA_sample.fa" />
            <output name="output_scores" file="output1.tab" />
        </test>
        <test expect_num_outputs="1">
            <conditional name="fastq_input">
                <param name="fastq_input_selector" value="single" />
                <param name="fastq_single" value="test_reads.fq" />
            </conditional>
            <param name="fasta_file" value="HA_sample.fa" />
            <section name="opt">
                <param name="kmer_length" value="29" />
                <param name="score_threshold" value="0.5" />
                <param name="min_kmer_cov" value="7" />
                <param name="min_kmer_prop" value="0.5" />
                <param name="top_seed_frac" value="0.5" />
            </section>
            <output name="output_scores" file="output4.tab" />
        </test>
        <test expect_num_outputs="1">
            <conditional name="fastq_input">
                <param name="fastq_input_selector" value="single" />
                <param name="fastq_single" value="test_reads.fq" />
            </conditional>
            <param name="fasta_file" value="HA_sample.fa" />
            <param name="output_type" value="fasta" />
            <section name="opt">
                <param name="return_best_n" value="3" />
            </section>
            <output name="output_fasta" file="output5.fa" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**

VAPOR is a tool for classification of Influenza samples from raw short read sequence data for downstream bioinformatics analysis.
VAPOR is provided with a fasta file of full-length sequences (> 20,000) for a given segment, a set of reads, and attempts to retrieve a reference that is closest to the sample strain.

`sub_sample` is not an option here (compared to the tool on GitHub), since you can always build a workflow that preprocesses your reads to a (random) subsample. You can use this output as your reads file for VAPOR.
    ]]>    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btz814</citation>
    </citations>
</tool>
author	iuc
date	Mon, 17 Oct 2022 07:28:50 +0000
parents	b1ca81ce88f9
children	244812f5bd1f