comparison vapor.xml @ 4:244812f5bd1f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vapor commit e0546646548259ccc4cfd65dbb793fc910fea583
author iuc
date Wed, 16 Nov 2022 13:41:01 +0000
parents f11d2dd29b2b
children
comparison
equal deleted inserted replaced
3:f11d2dd29b2b 4:244812f5bd1f
1 <tool id="vapor" name="VAPOR" version="@TOOL_VERSION@+galaxy2" profile="21.05"> 1 <tool id="vapor" name="VAPOR" version="@TOOL_VERSION@+galaxy3" profile="21.05">
2 <description> 2 <description>
3 Classify Influenza samples from raw short read sequence data 3 Classify Influenza samples from short reads sequence data
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <token name="@TOOL_VERSION@">1.0.2</token> 6 <token name="@TOOL_VERSION@">1.0.2</token>
7 </macros> 7 </macros>
8 <xrefs> 8 <xrefs>
9 <xref type="bio.tools">vapor</xref> 9 <xref type="bio.tools">vapor</xref>
10 </xrefs> 10 </xrefs>
11 <requirements> 11 <requirements>
12 <requirement type="package" version="@TOOL_VERSION@">vapor</requirement> 12 <requirement type="package" version="@TOOL_VERSION@">vapor</requirement>
13 <!-- gawk only required for circumventing current bug in command line
14 tool => remove once fixed (see command section below) -->
15 <requirement type="package" version="5.1.0">gawk</requirement>
13 </requirements> 16 </requirements>
14 <command detect_errors="exit_code"><![CDATA[ 17 <command detect_errors="exit_code"><![CDATA[
18 #set $total_refs = int($fasta_file.metadata.sequences)
19
20 ## The next two lines for on the fly uppercasing are a workaround for a bug
21 ## in vapor 1.0.2, which caused sequence comparisons to be case-sensitive.
22 ## Got fixed upstream in:
23 ## https://github.com/connor-lab/vapor/commit/b5ec5857cbf53ed45ca7487dac2b4b85ecfe33ea
24 ## but unfortunately no release has been tagged since.
25 ## Remove with next release!!
26 awk '{ if ($0 !~ />/) {print toupper($0)} else {print $0} }' '$fasta_file' > ref_upper.fa &&
27 #set $fasta_file = 'ref_upper.fa'
28
15 #if str($fastq_input.fastq_input_selector) == "paired" 29 #if str($fastq_input.fastq_input_selector) == "paired"
16 #set r1_ext = $fastq_input.fastq1.extension 30 #set r1_ext = $fastq_input.fastq1.extension
17 #set r2_ext = $fastq_input.fastq2.extension 31 #set r2_ext = $fastq_input.fastq2.extension
18 ln -s '$fastq_input.fastq1' fastq1.$r1_ext && 32 ln -s '$fastq_input.fastq1' fastq1.$r1_ext &&
19 ln -s '$fastq_input.fastq2' fastq2.$r2_ext && 33 ln -s '$fastq_input.fastq2' fastq2.$r2_ext &&
25 #else 39 #else
26 #set r1_ext = $fastq_input.fastq_single.extension 40 #set r1_ext = $fastq_input.fastq_single.extension
27 ln -s '$fastq_input.fastq_single' fastq1.$r1_ext && 41 ln -s '$fastq_input.fastq_single' fastq1.$r1_ext &&
28 #end if 42 #end if
29 vapor.py 43 vapor.py
30 --return_best_n $opt.return_best_n 44 #if int($return_best_n) > 0
45 --return_best_n $return_best_n
46 #else
47 --return_best_n $total_refs
48 #end if
31 #if $output_type == "fasta" 49 #if $output_type == "fasta"
32 --return_seqs 50 --return_seqs
33 #end if 51 #end if
34 -k '$opt.kmer_length' 52 -k $opt.kmer_length
35 -t '$opt.score_threshold' 53 -t $opt.threshold
36 -c '$opt.min_kmer_cov' 54 -c $opt.min_kmer_cov
37 -m '$opt.min_kmer_prop' 55 -m $opt.min_kmer_prop
38 -fa '$fasta_file' 56 -fa '$fasta_file'
39 -fq 57 -fq fastq1.$r1_ext
40 fastq1.$r1_ext
41 #if str($fastq_input.fastq_input_selector) in ["paired", "paired_collection"] 58 #if str($fastq_input.fastq_input_selector) in ["paired", "paired_collection"]
42 fastq2.$r2_ext 59 fastq2.$r2_ext
43 #end if 60 #end if
44 -f '$opt.top_seed_frac' 61 -f $opt.top_seed_frac
45 -q
46 > out_file 62 > out_file
47 ]]> </command> 63 ]]></command>
48 <inputs> 64 <inputs>
49 <param name="fasta_file" format="fasta" type="data" label="FASTA file" help="Raw short read sequences (full length reference segment sequences)" /> 65 <param name="fasta_file" format="fasta" type="data" label="Reference sequences" help="Select a multisample fasta dataset with reference sequences to base classification on." />
50 <conditional name="fastq_input"> 66 <conditional name="fastq_input">
51 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> 67 <param name="fastq_input_selector" type="select" label="Type of sequencing data">
52 <option value="single">Single</option> 68 <option value="single">Single-end</option>
53 <option value="paired">Paired</option> 69 <option value="paired">Paired-end</option>
54 <option value="paired_collection">Paired Collection</option> 70 <option value="paired_collection">Paired-end as collection</option>
55 </param> 71 </param>
56 <when value="single"> 72 <when value="single">
57 <param name="fastq_single" format="fastqsanger,fastqsanger.gz" type="data" label="FASTQ file" help="Raw short read sequences (full length reference segment sequences)" /> 73 <param name="fastq_single" format="fastqsanger,fastqsanger.gz" type="data" label="Sequenced reads" help="Specify the sequenced reads dataset." />
58 </when> 74 </when>
59 <when value="paired"> 75 <when value="paired">
60 <param name="fastq1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify reads dataset with forward reads"/> 76 <param name="fastq1" type="data" format="fastqsanger,fastqsanger.gz" label="Forward reads" help="Specify the sequenced reads dataset with forward reads."/>
61 <param name="fastq2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify reads dataset with reverse reads"/> 77 <param name="fastq2" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse reads" help="Specify the sequenced reads dataset with reverse reads."/>
62 </when> 78 </when>
63 <when value="paired_collection"> 79 <when value="paired_collection">
64 <param name="fastq_pairs" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="Dataset collection with forward and reverse reads"/> 80 <param name="fastq_pairs" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Paired collection of sequenced reads" help="Select a collection with forward and reverse reads."/>
65 </when> 81 </when>
66 </conditional> 82 </conditional>
67 <param name="output_type" type="select" label="Output type"> 83 <param name="output_type" type="select" label="Desired output">
68 <option value="scores" selected="true">Return scores only</option> 84 <option value="scores" selected="true">Return scores of best matches</option>
69 <option value="fasta">Return FASTA only</option> 85 <option value="fasta">Return FASTA sequences of best matches</option>
70 </param> 86 </param>
87 <param name="return_best_n" type="integer" min="0" value="1" label="Limit number of reported matches to" help="Determines the maximum number of candidate matches sorted by score that will be reported. Set to zero to get all candidate matches reported." />
71 <section name="opt" title="Optional arguments" expanded="true"> 88 <section name="opt" title="Optional arguments" expanded="true">
72 <param name="return_best_n" type="integer" min="1" value="1" label="Returns the highest scoring n queries" help="A list of the best n queries instead of only the highest scoring query" /> 89 <param argument="-k" name="kmer_length" type="integer" min="5" max="30" value="21" label="Kmer Length" help="Generate k-mers of this length from the reference sequences and the sequenced reads. Note: smaller k-mer sizes come at the cost of decreased specificity and k-mer sizes below 21 have an increased risk of contaminating sequences getting analyzed. Only decrease the default (21) if you know your sample is pure (i.e., sequenced reads represent viral reads only), or if you have increased --threshold sufficiently." />
73 <param name="kmer_length" type="integer" min="5" max="30" value="21" label="Kmer Length" help="" /> 90 <param argument="--threshold" type="float" min="0" max="1" value="0.2" label="Read kmer filtering threshold" help="Sequenced reads that don't have at least this proportion of their k-mers matching k-mers generated from the reference sequences will not be considered in the analysis; default: 0.2" />
74 <param name="score_threshold" type="float" min="0.0" max="1.0" value="0.2" label="Read kmer filtering threshold" help="" /> 91 <param argument="--min_kmer_cov" type="integer" min="1" value="5" label="Coverage threshold for k-mer culling" help="Minimum coverage by sequenced reads for a reference k-mer to be kept during culling; default: 5" />
75 <param name="min_kmer_cov" type="integer" value="5" label="Min coverage kmer culling" help="Minimum coverage kmer culling" /> 92 <param argument="--min_kmer_prop" type="float" min="0" max="1" value="0.1" label="Minimum k-mer proportion" help="Minimum proportion of matching kmers required for queries; default: 0.1" />
76 <param name="min_kmer_prop" type="float" value="0.1" label="Min kmer proportion" help="Minimum proportion of matched kmers allowed for queries" /> 93 <param argument="--top_seed_frac" type="float" min="0" max="1" value="1" label="Fraction of best seeds to extend" help="Of the queries still considered after applying the --min_kmer_prop filter above, the tool will calculate and report scores only for this fraction. Lowering this value leads to shorter runtime because fewer scores will have to be calculated, but also to fewer results getting reported. CAVEAT: this version of the tool will round down the result of applying this parameter so it is possible to end up with zero queries to be considered further. Change only if runtime is an issue!" />
77 <param name="top_seed_frac" type="float" min="0.0" max="1.0" value="0.2" label="Fraction of best seeds to extend" help="" />
78 </section> 94 </section>
79 </inputs> 95 </inputs>
80 <outputs> 96 <outputs>
81 <data name="output_scores" from_work_dir="out_file" format="tabular" label="${tool.name} on ${on_string}: closest reference scores"> 97 <data name="output_scores" from_work_dir="out_file" format="tabular" label="${tool.name} on ${on_string}: closest reference scores">
82 <filter>output_type == "scores"</filter> 98 <filter>output_type == "scores"</filter>
94 <param name="fastq_input_selector" value="single" /> 110 <param name="fastq_input_selector" value="single" />
95 <param name="fastq_single" ftype="fastq" value="test_reads.fq" /> 111 <param name="fastq_single" ftype="fastq" value="test_reads.fq" />
96 </conditional> 112 </conditional>
97 <param name="fasta_file" value="HA_sample.fa" /> 113 <param name="fasta_file" value="HA_sample.fa" />
98 <output name="output_scores" file="output1.tab" /> 114 <output name="output_scores" file="output1.tab" />
115 </test>
116 <test expect_num_outputs="1">
117 <conditional name="fastq_input">
118 <param name="fastq_input_selector" value="single" />
119 <param name="fastq_single" ftype="fastq" value="test_reads.fq" />
120 </conditional>
121 <param name="fasta_file" value="HA_sample.fa" />
122 <param name="return_best_n" value="0" />
123 <output name="output_scores" file="output1_full.tab" />
99 </test> 124 </test>
100 <test expect_num_outputs="1"> 125 <test expect_num_outputs="1">
101 <conditional name="fastq_input"> 126 <conditional name="fastq_input">
102 <param name="fastq_input_selector" value="paired" /> 127 <param name="fastq_input_selector" value="paired" />
103 <param name="fastq1" ftype="fastq" value="test_reads.fq" /> 128 <param name="fastq1" ftype="fastq" value="test_reads.fq" />
120 <param name="fastq_single" value="test_reads.fq" /> 145 <param name="fastq_single" value="test_reads.fq" />
121 </conditional> 146 </conditional>
122 <param name="fasta_file" value="HA_sample.fa" /> 147 <param name="fasta_file" value="HA_sample.fa" />
123 <section name="opt"> 148 <section name="opt">
124 <param name="kmer_length" value="29" /> 149 <param name="kmer_length" value="29" />
125 <param name="score_threshold" value="0.5" /> 150 <param name="threshold" value="0.5" />
126 <param name="min_kmer_cov" value="7" /> 151 <param name="min_kmer_cov" value="7" />
127 <param name="min_kmer_prop" value="0.5" /> 152 <param name="min_kmer_prop" value="0.5" />
128 <param name="top_seed_frac" value="0.5" /> 153 <param name="top_seed_frac" value="0.5" />
129 </section> 154 </section>
130 <output name="output_scores" file="output4.tab" /> 155 <output name="output_scores" file="output4.tab" />
134 <param name="fastq_input_selector" value="single" /> 159 <param name="fastq_input_selector" value="single" />
135 <param name="fastq_single" value="test_reads.fq" /> 160 <param name="fastq_single" value="test_reads.fq" />
136 </conditional> 161 </conditional>
137 <param name="fasta_file" value="HA_sample.fa" /> 162 <param name="fasta_file" value="HA_sample.fa" />
138 <param name="output_type" value="fasta" /> 163 <param name="output_type" value="fasta" />
139 <section name="opt"> 164 <param name="return_best_n" value="3" />
140 <param name="return_best_n" value="3" />
141 </section>
142 <output name="output_fasta" file="output5.fa" /> 165 <output name="output_fasta" file="output5.fa" />
143 </test> 166 </test>
144 </tests> 167 </tests>
145 <help><![CDATA[ 168 <help><![CDATA[
146 **What it does** 169 **What it does**
147 170
148 VAPOR is a tool for classification of Influenza samples from raw short read sequence data for downstream bioinformatics analysis. 171 VAPOR is a tool for classification of Influenza samples from raw short read sequence data for downstream bioinformatics analysis.
149 VAPOR is provided with a fasta file of full-length sequences (> 20,000) for a given segment, a set of reads, and attempts to retrieve a reference that is closest to the sample strain. 172 VAPOR works on a fasta file of full-length reference sequences for a given genome segment and a set of sequenced reads, and attempts to retrieve the reference that is closest to the sequenced strain.
150 173
151 `sub_sample` is not an option here (compared to the tool on GitHub), since you can always build a workflow that preprocesses your reads to a (random) subsample. You can use this output as your reads file for VAPOR. 174 `sub_sample` is not an option here (compared to the tool on GitHub), since you can always build a workflow that preprocesses your reads to a (random) subsample. You can use this output as your reads file for VAPOR.
152 ]]> </help> 175 ]]> </help>
153 <citations> 176 <citations>
154 <citation type="doi">10.1093/bioinformatics/btz814</citation> 177 <citation type="doi">10.1093/bioinformatics/btz814</citation>