comparison hydra.xml @ 5:b69e898b8109 draft

planemo upload for repository https://github.com/phac-nml/quasitools commit e30c0687f755a46c5b3bd265a1478a1abf5dc9f1
author nml
date Fri, 24 Aug 2018 16:50:28 -0400
parents 8cdffc02d2e2
children dcd43b402eb3
comparison
equal deleted inserted replaced
4:8cdffc02d2e2 5:b69e898b8109
1 <tool id="hydra" name="Hydra pipeline" version="0.2.0"> 1 <tool id="hydra" name="Hydra pipeline" version="0.4.2">
2 <description>Identifies drug resistance within an NGS dataset</description> 2 <description>Identifies drug resistance within an NGS dataset</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.3.1">quasitools</requirement> 4 <requirement type="package" version="0.4.2">quasitools</requirement>
5 </requirements> 5 </requirements>
6 <command detect_errors="exit_code"><![CDATA[ 6 <command detect_errors="exit_code"><![CDATA[
7 7
8 quasitools hydra 8 quasitools hydra
9 9
10 ## Preparing file input. 10 ## Preparing file input.
11 #if $data_type.type == "paired": 11 #if $data_type.type == "paired":
12 12
13 '$data_type.fastq_input1' 13 '$data_type.fastq_input1'
14 '$data_type.fastq_input2' 14 '$data_type.fastq_input2'
15 15
16 #elif $data_type.type == "collection": 16 #elif $data_type.type == "collection":
17 17
18 '$data_type.fastq_input1.forward' 18 '$data_type.fastq_input1.forward'
19 '$data_type.fastq_input1.reverse' 19 '$data_type.fastq_input1.reverse'
20 20
21 #elif $data_type.type == "single": 21 #elif $data_type.type == "single":
22 22
23 '$data_type.fastq_input1' 23 '$data_type.fastq_input1'
24 24
25 #end if 25 #end if
26 26
27 #if $mutation_db: 27 #if $mutation_db:
28 -m '$mutation_db' 28 -m '$mutation_db'
29 #end if 29 #end if
30 30
31 #if $reporting_threshold: 31 #if $reporting_threshold:
32 -rt '$reporting_threshold' 32 -rt '$reporting_threshold'
33 #end if 33 #end if
34 34
35 #if $consensus_pct: 35 #if $consensus_pct:
36 -cp '$consensus_pct' 36 -cp '$consensus_pct'
37 #end if 37 #end if
38 38
39 #if $length_cutoff: 39 #if $length_cutoff:
40 -lc '$length_cutoff' 40 -lc '$length_cutoff'
41 #end if 41 #end if
42 42
43 #if $score_cutoff: 43 #if $score_cutoff:
44 -sc '$score_cutoff' 44 -sc '$score_cutoff'
45 #end if 45 #end if
46 46
47 #if $error_rate: 47 #if $error_rate:
48 -e '$error_rate' 48 -e '$error_rate'
49 #end if 49 #end if
50 50
51 #if $min_qual: 51 #if $min_read_qual:
52 -mq '$min_qual' 52 -rq '$min_read_qual'
53 #end if
54
55 #if $min_variant_qual:
56 -vq '$min_variant_qual'
53 #end if 57 #end if
54 58
55 #if $min_depth: 59 #if $min_depth:
56 -md '$min_depth' 60 -md '$min_depth'
57 #end if 61 #end if
58 62
59 #if $min_ac: 63 #if $min_ac:
60 -ma '$min_ac' 64 -ma '$min_ac'
61 #end if 65 #end if
62 66
63 #if $min_freq: 67 #if $min_freq:
64 -mf '$min_freq' 68 -mf '$min_freq'
65 #end if 69 #end if
66 70
67 #if $consensus.consensus_bool == "true_consensus": 71 #if $consensus.consensus_bool == "true_consensus":
68 --generate_consensus 72 --generate_consensus
69 73
70 #if $consensus.fasta_id.type == "default": 74 #if $consensus.fasta_id.type == "default":
71 --id 75 --id
72 #if $data_type.type == "paired": 76 #if $data_type.type == "paired":
73 '${fastq_input1.element_identifier}'_'${fastq_input2.element_identifier}' 77 '${fastq_input1.element_identifier}'_'${fastq_input2.element_identifier}'
74 #elif $data_type.type == "single": 78 #elif $data_type.type == "single":
75 '${fastq_input1.element_identifier}' 79 '${fastq_input1.element_identifier}'
76 #end if 80 #end if
77 #elif $consensus.fasta_id.type == "custom": 81 #elif $consensus.fasta_id.type == "custom":
78 --id '$consensus.fasta_id.custom_id' 82 --id '$consensus.fasta_id.custom_id'
79 #end if 83 #end if
80 #end if 84 #end if
81 85
82 #if $filter_ns: 86 #if $low_quality.qual_selector == "filter_ns":
83 --ns 87 --ns
84 #end if 88 #elif $low_quality.qual_selector == "mask_reads":
89 --mask_reads
90 #end if
91
92 #if $score_type.score_selector == "median":
93 --median
94 #elif $score_type.score_selector == "mean":
95 --mean
96 #end if
97
98 $trim_reads
85 99
86 -o output 100 -o output
87 101
88 ]]></command> 102 ]]></command>
89 <inputs> 103 <inputs>
105 </when> 119 </when>
106 </conditional> 120 </conditional>
107 <param name="mutation_db" type="data" format="tsv" optional="true" label="Mutation DB" help="Defaults to HIV mutation database." /> 121 <param name="mutation_db" type="data" format="tsv" optional="true" label="Mutation DB" help="Defaults to HIV mutation database." />
108 <param name="reporting_threshold" type="integer" optional="true" min="1" max="100" value="1" label="Reporting threshold. Defaults to 1." help="Minimum mutation frequency to report." /> 122 <param name="reporting_threshold" type="integer" optional="true" min="1" max="100" value="1" label="Reporting threshold. Defaults to 1." help="Minimum mutation frequency to report." />
109 <param name="consensus_pct" type="integer" optional="true" min="1" max="20" value="20" label="Consensus percentage" help="Minimum mutation frequency to report. Defaults to 20." /> 123 <param name="consensus_pct" type="integer" optional="true" min="1" max="20" value="20" label="Consensus percentage" help="Minimum mutation frequency to report. Defaults to 20." />
110 <param name="length_cutoff" type="integer" optional="true" min="0" max="1000" label="Length cutoff" value="100" help="Reads which fall short of the specified length will be filtered out. Defaults to 100." /> 124 <param name="length_cutoff" type="integer" optional="true" min="1" max="1000" label="Length cutoff" value="100" help="Reads which fall short of the specified length will be filtered out. Defaults to 100." />
111 <param name="score_cutoff" type="integer" optional="true" min="0" max="40" label="Score cutoff" value="30" help="Reads whose average quality score is less than the specified score will be filtered out. Defaults to 30." /> 125 <param name="score_cutoff" type="integer" optional="true" min="0" max="40" label="Score cutoff" value="30" help="Reads whose median or mean quality score (depending on the score type specified) is less than the specified score cutoff value will be filtered out. Defaults to 30." />
112 <param name="error_rate" type="float" optional="true" min="0" max="1" label="Error rate" value="0.0021" help="Estimated sequencing error rate. Defaults to 0.0021."/> 126 <param name="error_rate" type="float" optional="true" min="0" max="1" label="Error rate" value="0.0021" help="Estimated sequencing error rate. Defaults to 0.0021."/>
113 <param name="min_qual" type="integer" optional="true" min="1" max="100" label="Minimum quality" value="30" help="Minimum required quality for variant to be considered later on in the pipeline. Defaults to 30." /> 127 <param name="min_variant_qual" type="integer" optional="true" min="1" max="100" label="Minimum quality" value="30" help="Minimum required quality for variant to be considered later on in the pipeline. Defaults to 30." />
128 <param name="min_read_qual" type="integer" optional="true" min="1" max="100" label="Minimum quality" value="30" help="Minimum required quality for a position in a read not to be masked, is masking is enabled. Defaults to 30." />
114 <param name="min_depth" type="integer" optional="true" min="0" max="5000" label="Minimum depth" value="100" help="Minimum required depth for variant to be considered later on in the pipeline. Defaults to 100." /> 129 <param name="min_depth" type="integer" optional="true" min="0" max="5000" label="Minimum depth" value="100" help="Minimum required depth for variant to be considered later on in the pipeline. Defaults to 100." />
115 <param name="min_ac" type="integer" optional="true" min="0" max="5000" label="Minimum allele count" value="5" help="Minimum required allele count for variant to be considered later on in the pipeline. Defaults to 5." /> 130 <param name="min_ac" type="integer" optional="true" min="0" max="5000" label="Minimum allele count" value="5" help="Minimum required allele count for variant to be considered later on in the pipeline. Defaults to 5." />
116 <param name="min_freq" type="float" optional="true" min="0" max="1" label="Minimum frequency" value="0.01" help="Minimum required frequency for variant to be considered later on in the pipeline. Defaults to 0.01." /> 131 <param name="min_freq" type="float" optional="true" min="0" max="1" label="Minimum frequency" value="0.01" help="Minimum required frequency for variant to be considered later on in the pipeline. Defaults to 0.01." />
132 <param name="trim_reads" type="boolean" optional="true" checked="false" truevalue="-tr" falsevalue="" label="Trim reads" help="Iteratively trim reads based on filter values if enabled." />
117 <conditional name="consensus"> 133 <conditional name="consensus">
118 <param name="consensus_bool" type="select" label="Generate consensus sequence." multiple="false" display="radio"> 134 <param name="consensus_bool" type="select" label="Generate consensus sequence." multiple="false" display="radio">
119 <option value="true_consensus">True</option> 135 <option value="true_consensus">True</option>
120 <option selected="true" value="false_consensus">False</option> 136 <option selected="true" value="false_consensus">False</option>
121 </param> 137 </param>
122 <when value="true_consensus"> 138 <when value="true_consensus">
123 <conditional name="fasta_id"> 139 <conditional name="fasta_id">
124 <param name="type" type="select" label="Specify consensus fasta identifier" multiple="false" display="radio"> 140 <param name="type" type="select" label="Specify consensus fasta identifier" multiple="false" display="radio">
125 <option value="default" >Use fasta dataset name</option> 141 <option value="default" >Use fasta dataset name</option>
126 <option value="custom">Use custom name</option> 142 <option value="custom">Use custom name</option>
127 </param> 143 </param>
128 <when value="default"> 144 <when value="default">
129 </when> 145 </when>
130 <when value="custom"> 146 <when value="custom">
131 <param name="custom_id" type="text" optional="false" value="custom_id" label="Fasta identifier" help="Type in a fasta identifier."/> 147 <param name="custom_id" type="text" optional="false" value="custom_id" label="Fasta identifier" help="Type in a fasta identifier."/>
132 </when> 148 </when>
133 </conditional> 149 </conditional>
134 </when> 150 </when>
135 <when value="false_consensus"> 151 <when value="false_consensus">
136 </when> 152 </when>
137 </conditional> 153 </conditional>
138 <param name="filter_ns" type="boolean" truevalue="--ns" falsevalue="" checked="False" label="Filter out n's" /> 154 <conditional name="low_quality">
155 <param name="qual_selector" type="select" label="Filter out regions masked, or mask low coverage regions with n's." multiple="false" display="radio">
156 <option value="filter_ns">Filter out regions with n's</option>
157 <option value="mask_reads">Mask low coverage regions with n's</option>
158 <option value="neither" selected="true">Do not filter or mask low coverage regions.</option>
159 </param>
160 <when value="filter_ns">
161 </when>
162 <when value="mask_reads">
163 </when>
164 <when value="neither">
165 </when>
166 </conditional>
167 <conditional name="score_type">
168 <param name="score_selector" type="select" label="Use either median score (default) or mean score for the score cutoff value." multiple="false" display="radio">
169 <option value="median" selected="true">Use median score</option>
170 <option value="mean">Use mean score</option>
171 </param>
172 <when value="median">
173 </when>
174 <when value="mean">
175 </when>
176 </conditional>
139 </inputs> 177 </inputs>
140 <outputs> 178 <outputs>
141 <data format="bam" label="HyDRA: alignment bam output" name="output_bam" from_work_dir="output/align.bam" /> 179 <data format="bam" label="HyDRA: alignment bam output" name="output_bam" from_work_dir="output/align.bam" />
142 <data format="bam" label="HyDRA: bam.bai output" name="output_bam_bai" from_work_dir="output/align.bam.bai" />
143 <data format="csv" label="HyDRA: coverage output" name="output_coverage" from_work_dir="output/coverage_file.csv" /> 180 <data format="csv" label="HyDRA: coverage output" name="output_coverage" from_work_dir="output/coverage_file.csv" />
144 <data format="csv" label="HyDRA: drug resistance output" name="output_dr" from_work_dir="output/dr_report.csv" /> 181 <data format="csv" label="HyDRA: drug resistance output" name="output_dr" from_work_dir="output/dr_report.csv" />
145 <data format="fastq" label="HyDRA: filtered reads output" name="output_filtered" from_work_dir="output/filtered.fastq" /> 182 <data format="fastq" label="HyDRA: filtered reads output" name="output_filtered" from_work_dir="output/filtered.fastq" />
146 <data format="vcf" label="HyDRA: variants output" name="output_hydra" from_work_dir="output/hydra.vcf" /> 183 <data format="vcf" label="HyDRA: variants output" name="output_hydra" from_work_dir="output/hydra.vcf" />
147 <data format="vcf" label="HyDRA: aa mutations output" name="output_aa_mt" from_work_dir="output/mutation_report.hmcf" /> 184 <data format="vcf" label="HyDRA: aa mutations output" name="output_aa_mt" from_work_dir="output/mutation_report.hmcf" />
152 </outputs> 189 </outputs>
153 <tests> 190 <tests>
154 <test> 191 <test>
155 <param name="type" value="single"/> 192 <param name="type" value="single"/>
156 <param name="fastq_input1" value="forward.fastq" /> 193 <param name="fastq_input1" value="forward.fastq" />
194 <param name="score_selector" value="mean" />
157 <output name="output_coverage"> 195 <output name="output_coverage">
158 <assert_contents> 196 <assert_contents>
159 <has_text text="frame: 0" /> 197 <has_text text="frame: 0" />
160 <has_text text="1,0" /> 198 <has_text text="1,0" />
161 <has_text text="948,0" /> 199 <has_text text="948,0" />
235 273
236 The detailed output directory tree looks as follows: 274 The detailed output directory tree looks as follows:
237 275
238 /tmp/hydra_out/ 276 /tmp/hydra_out/
239 * align.bam 277 * align.bam
240 * align.bam.bai
241 * coverage_file.csv 278 * coverage_file.csv
242 * dr_report.csv 279 * dr_report.csv
243 * filtered.fastq 280 * filtered.fastq
244 * hydra.vcf 281 * hydra.vcf
245 * mutation_report.hmcf 282 * mutation_report.hmcf
248 The description of each of these directories/files are as follows: 285 The description of each of these directories/files are as follows:
249 286
250 * __run.conf__: The configuration used when this output was produced. 287 * __run.conf__: The configuration used when this output was produced.
251 * __reads_w_K103N/__: The results directory for the input file reads_w_K103N.fastq 288 * __reads_w_K103N/__: The results directory for the input file reads_w_K103N.fastq
252 * __align.bam__: The alignment file in bam format. 289 * __align.bam__: The alignment file in bam format.
253 * __align.bam.bai__: The index to the alignment file.
254 * __coverage_file.csv__: A file with one entry per line with the AA position and the coverage at the position. 290 * __coverage_file.csv__: A file with one entry per line with the AA position and the coverage at the position.
255 * __dr_report.csv__: A report detailing the drug resistant mutations found, above the reporting threshold (default: 1%). 291 * __dr_report.csv__: A report detailing the drug resistant mutations found, above the reporting threshold (default: 1%).
256 * __filtered.fastq__: The reads remaining after the filtering stage. 292 * __filtered.fastq__: The reads remaining after the filtering stage.
257 * __hydra.vcf__: The variants found by the pipeline. 293 * __hydra.vcf__: The variants found by the pipeline.
258 * __mutation_report.hmcf__: The AA mutations found by the pipeline. 294 * __mutation_report.hmcf__: The AA mutations found by the pipeline.