Mercurial > repos > iuc > vapor
view vapor.xml @ 2:b1ca81ce88f9 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vapor commit 4eb29c16fda3267d50f57448a28807b03c33a96f
author | iuc |
---|---|
date | Tue, 04 Oct 2022 21:13:05 +0000 |
parents | 7bf891a13ace |
children | f11d2dd29b2b |
line wrap: on
line source
<tool id="vapor" name="VAPOR" version="@TOOL_VERSION@+galaxy1" profile="21.05"> <description> Classify Influenza samples from raw short read sequence data </description> <macros> <token name="@TOOL_VERSION@">1.0.2</token> </macros> <xrefs> <xref type="bio.tools">vapor</xref> </xrefs> <requirements> <requirement type="package" version="@TOOL_VERSION@">vapor</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ #set $fastq_files = [] mkdir fastq_files && #for $i, $fastq in enumerate($fastq_file) #if $fastq.ext.endswith(".gz") #set $ext='.fastq.gz' #else #set $ext='.fastq' #end if #set $out = './fastq_files/input_%s%s' % ($i, $ext) ln -s '${fastq}' $out && $fastq_files.append($out) #end for vapor.py --return_best_n $opt.return_best_n #if $output_type == "fasta" --return_seqs #end if -k '$opt.kmer_length' -t '$opt.score_threshold' -c '$opt.min_kmer_cov' -m '$opt.min_kmer_prop' -fa '$fasta_file' -fq #for $fq in $fastq_files '${fq}' #end for -f '$opt.top_seed_frac' -q > out_file ]]> </command> <inputs> <param name="fasta_file" format="fasta" type="data" label="FASTA file" help="Raw short read sequences (full length reference segment sequences)" /> <param name="fastq_file" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" type="data" multiple="true" label="FASTQ file(s)" help="WGS reads" /> <param name="output_type" type="select" label="Output type"> <option value="scores" selected="true">Return scores only</option> <option value="fasta">Return FASTA only</option> </param> <section name="opt" title="Optional arguments" expanded="true"> <param name="return_best_n" type="integer" min="1" value="1" label="Returns the highest scoring n queries" help="A list of the best n queries instead of only the highest scoring query" /> <param name="kmer_length" type="integer" min="5" max="30" value="21" label="Kmer Length" help="" /> <param name="score_threshold" type="float" min="0.0" max="1.0" value="0.2" label="Read kmer filtering threshold" help="" /> <param name="min_kmer_cov" type="integer" value="5" label="Min coverage kmer culling" help="Minimum coverage kmer culling" /> <param name="min_kmer_prop" type="float" value="0.1" label="Min kmer proportion" help="Minimum proportion of matched kmers allowed for queries" /> <param name="top_seed_frac" type="float" min="0.0" max="1.0" value="0.2" label="Fraction of best seeds to extend" help="" /> </section> </inputs> <outputs> <data name="output_scores" from_work_dir="out_file" format="tabular" label="${tool.name} on ${on_string}: closest reference scores"> <filter>output_type == "scores"</filter> <actions> <action name="column_names" type="metadata" default="% of query bases in reads,Total score,Query length,Mean score,Reads after culling,Query description" /> </actions> </data> <data name="output_fasta" from_work_dir="out_file" format="fasta" label="${tool.name} on ${on_string}: closest reference fasta"> <filter>output_type == "fasta"</filter> </data> </outputs> <tests> <test expect_num_outputs="1"><!-- Test 1: fastq --> <param name="fasta_file" value="HA_sample.fa" /> <param name="fastq_file" ftype="fastq" value="test_reads.fq" /> <output name="output_scores" file="output1.tab" /> </test> <test expect_num_outputs="1"><!-- Test 2: multiple fastq --> <param name="fasta_file" value="HA_sample.fa" /> <param name="fastq_file" ftype="fastq" value="test_reads.fq,test_reads2.fq" /> <output name="output_scores" file="output2.tab" /> </test> <test expect_num_outputs="1"><!-- Test 3: fastqsanger.gz --> <param name="fasta_file" value="HA_sample.fa" /> <param name="fastq_file" ftype="fastqsanger.gz" value="test_reads.fastqsanger.gz" /> <output name="output_scores" file="output1.tab" /> </test> <test expect_num_outputs="1"><!-- Test 4: opt --> <param name="fasta_file" value="HA_sample.fa" /> <param name="fastq_file" value="test_reads.fastqsanger.gz" /> <output name="output_scores" file="output1.tab" /> </test> <test expect_num_outputs="1"> <param name="fasta_file" value="HA_sample.fa" /> <param name="fastq_file" value="test_reads.fq" /> <section name="opt"> <param name="kmer_length" value="29" /> <param name="score_threshold" value="0.5" /> <param name="min_kmer_cov" value="7" /> <param name="min_kmer_prop" value="0.5" /> <param name="top_seed_frac" value="0.5" /> </section> <output name="output_scores" file="output4.tab" /> </test> <test expect_num_outputs="1"><!-- Test 5: fasta output--> <param name="fasta_file" value="HA_sample.fa" /> <param name="fastq_file" value="test_reads.fq" /> <param name="output_type" value="fasta" /> <section name="opt"> <param name="return_best_n" value="3" /> </section> <output name="output_fasta" file="output5.fa" /> </test> </tests> <help><![CDATA[ **What it does** VAPOR is a tool for classification of Influenza samples from raw short read sequence data for downstream bioinformatics analysis. VAPOR is provided with a fasta file of full-length sequences (> 20,000) for a given segment, a set of reads, and attempts to retrieve a reference that is closest to the sample strain. `sub_sample` is not an option here (compared to the tool on GitHub), since you can always build a workflow that preprocesses your reads to a (random) subsample. You can use this output as your reads file for VAPOR. ]]> </help> <citations> <citation type="doi">10.1093/bioinformatics/btz814</citation> </citations> </tool>