comparison hydra.xml @ 0:71976cfc9022 draft

planemo upload for repository https://github.com/phac-nml/quasitools commit 8a264400a75945e2e0fdd5a08c007a8b1b7a2f0f
author nml
date Mon, 04 Dec 2017 10:25:26 -0500
parents
children 1abf6b32ecfd
comparison
equal deleted inserted replaced
-1:000000000000 0:71976cfc9022
1 <tool id="hydra" name="Hydra pipeline" version="0.1.0">
2 <description>Identifies drug resistance within an NGS dataset</description>
3 <requirements>
4 <requirement type="package" version="0.2.2">quasitools</requirement>
5 </requirements>
6 <command detect_errors="exit_code"><![CDATA[
7
8 quasitools hydra '$forward'
9
10 #if $reverse:
11 '$reverse'
12 #end if
13
14 #if $mutation_db:
15 -m '$mutation_db'
16 #end if
17
18 #if $reporting_threshold:
19 -rt '$reporting_threshold'
20 #end if
21
22 #if $consensus_pct:
23 -cp '$consensus_pct'
24 #end if
25
26 #if $length_cutoff:
27 -lc '$length_cutoff'
28 #end if
29
30 #if $score_cutoff:
31 -sc '$score_cutoff'
32 #end if
33
34 #if $error_rate:
35 -e '$error_rate'
36 #end if
37
38 #if $min_qual:
39 -mq '$min_qual'
40 #end if
41
42 #if $min_depth:
43 -md '$min_depth'
44 #end if
45
46 #if $min_ac:
47 -ma '$min_ac'
48 #end if
49
50 #if $min_freq:
51 -mf '$min_freq'
52 #end if
53
54 #if $generate_consensus:
55 --generate_consensus
56 #end if
57
58 #if $filter_ns:
59 --ns
60 #end if
61
62 -o output
63
64 ]]></command>
65 <inputs>
66 <param name="forward" type="data" format="fastq" optional="false" label="Forward read" />
67 <param name="reverse" type="data" format="fastq" optional="true" label="Reverse read" help="Not required."/>
68 <param name="mutation_db" type="data" format="tsv" optional="true" label="Mutation DB" help="Defaults to HIV mutation database." />
69 <param name="reporting_threshold" type="integer" optional="true" min="1" max="100" value="1" label="Reporting threshold. Defaults to 1." help="Minimum mutation frequency to report." />
70 <param name="consensus_pct" type="integer" optional="true" min="1" max="20" value="20" label="Reporting threshold" help="Minimum mutation frequency to report. Defaults to 20." />
71 <param name="length_cutoff" type="integer" optional="true" min="0" max="1000" label="Length cutoff" value="100" help="Reads which fall short of the specified length will be filtered out. Defaults to 100." />
72 <param name="score_cutoff" type="integer" optional="true" min="0" max="40" label="Score cutoff" value="30" help="Reads whose average quality score is less than the specified score will be filtered out. Defaults to 30." />
73 <param name="error_rate" type="float" optional="true" min="0" max="1" label="Error rate" value="0.0021" help="Estimated sequencing error rate. Defaults to 0.0021."/>
74 <param name="min_qual" type="integer" optional="true" min="1" max="100" label="Minimum quality" value="30" help="Minimum required quality for variant to be considered later on in the pipeline. Defaults to 30." />
75 <param name="min_depth" type="integer" optional="true" min="0" max="5000" label="Minimum depth" value="100" help="Minimum required depth for variant to be considered later on in the pipeline. Defaults to 100." />
76 <param name="min_ac" type="integer" optional="true" min="0" max="5000" label="Minimum allele count" value="5" help="Minimum required allele count for variant to be considered later on in the pipeline. Defaults to 5." />
77 <param name="min_freq" type="float" optional="true" min="0" max="1" label="Minimum frequency" value="0.01" help="Minimum required frequency for variant to be considered later on in the pipeline. Defaults to 0.01." />
78 <param name="generate_consensus" type="boolean" truevalue="--generate_consensus" falsevalue="" checked="False" label="Generate consensus" />
79 <param name="filter_ns" type="boolean" truevalue="--ns" falsevalue="" checked="False" label="Filter out n's" />
80 </inputs>
81 <outputs>
82 <data format="bam" label="HyDRA: alignment bam output" name="output_bam" from_work_dir="output/align.bam" />
83 <data format="bam" label="HyDRA: bam.bai output" name="output_bam_bai" from_work_dir="output/align.bam.bai" />
84 <data format="csv" label="HyDRA: coverage output" name="output_coverage" from_work_dir="output/coverage_file.csv" />
85 <data format="csv" label="HyDRA: drug resistance output" name="output_dr" from_work_dir="output/dr_report.csv" />
86 <data format="fastq" label="HyDRA: filtered reads output" name="output_filtered" from_work_dir="output/filtered.fastq" />
87 <data format="vcf" label="HyDRA: variants output" name="output_hydra" from_work_dir="output/hydra.vcf" />
88 <data format="vcf" label="HyDRA: aa mutations output" name="output_aa_mt" from_work_dir="output/mutation_report.hmcf" />
89 <data format="txt" label="HyDRA: stats output" name="output_stats" from_work_dir="output/stats.txt" />
90 </outputs>
91 <tests>
92 <test>
93 <param name="forward" value="forward.fastq" />
94 <output name="output_coverage">
95 <assert_contents>
96 <has_text text="frame: 0" />
97 <has_text text="1,0" />
98 <has_text text="948,0" />
99 </assert_contents>
100 </output>
101 <output name="output_dr">
102 <assert_contents>
103 <has_text text="Chromosome,Gene,Category,Surveillance,Wildtype,Position,Mutation,Mutation Frequency,Coverage" />
104 <has_text text="hxb2_pol,RT,NNRTI,Yes,K,101,P,14.23,1574" />
105 <has_text text="hxb2_pol,RT,NNRTI,Yes,K,103,N,5.49,1912" />
106 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,181,C,24.07,4557" />
107 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,181,I,18.04,4557" />
108 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,181,V,20.08,4557" />
109 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,188,C,2.81,3454" />
110 <has_text text="hxb2_pol,RT,NNRTI,Yes,G,190,A,5.20,3233" />
111 <has_text text="hxb2_pol,RT,NNRTI,Yes,G,190,S,6.68,3233" />
112 </assert_contents>
113 </output>
114 <output name="output_hydra">
115 <assert_contents>
116 <has_text_matching expression="#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"/>
117 <has_text_matching expression="hxb2_pol\s576\s.\sa\sg\s100\sPASS\sDP=805;AC=245;AF=0.3043" />
118 <has_text_matching expression="hxb2_pol\s958\s.\sc\sa\s100\sPASS\sDP=2503;AC=28;AF=0.0112" />
119 </assert_contents>
120 </output>
121 <output name="output_aa_mt">
122 <assert_contents>
123 <has_text_matching expression="#CHROM\sGENE\sTYPE\sWILDTYPE\sPOS\sMUTANT\sFILTER\sMUTANT_FREQ\sCOVERAGE\sINFO"/>
124 <has_text_matching expression="hxb2_pol\sRT\smutation\sK\s101\sP\sPASS\s0.1423\s1574\sWC=aaa;MC=CCa;MCF=0.1423;CAT=NNRTI;SRVL=Yes" />
125 <has_text_matching expression="hxb2_pol\sRT\smutation\sH\s221\sN\sPASS\s0.0113\s2475\sWC=cat;MC=Aat;MCF=0.0113;CAT=.;SRVL=." />
126 </assert_contents>
127 </output>
128 <output name="output_stats">
129 <assert_contents>
130 <has_text text="Input Size: 25000"/>
131 <has_text text="Number of reads filtered due to length: 15074"/>
132 <has_text text="Number of reads filtered due to average quality score: 501"/>
133 <has_text text="Number of reads filtered due to presence of Ns: 0"/>
134 <has_text text="Number of reads filtered due to excess coverage: 0"/>
135 <has_text text="Number of reads filtered due to poor mapping: 12"/>
136 <has_text text="Percentage of reads filtered: 62.35"/>
137 </assert_contents>
138 </output>
139 </test>
140 </tests>
141 <help><![CDATA[
142
143 HyDRA - HIV Drug Resistance Analyzer
144 ====================================
145
146 The HyDRA pipeline provides a pipeline for identifying drug resistance within a Next Generation Sequencing dataset. The pipeline takes as input the raw reads produced by a Next Generation Sequencer and produces a report detailing found drug resistance per sample.
147
148 Authors
149 -------
150
151 The HyDRA pipeline was developed by Eric Enns and David Peddle.
152
153 Stages
154 ------
155
156 The HyDRA pipleine proceeds through the following stages:
157
158 1. Quality Control/Filtering
159 2. Reference mapping using bowtie2.
160 3. Variant Calling and filtering using a Poisson distribution.
161 4. AA Mutation Calling and filtering.
162 5. Drug Resistance report generation.
163
164 Details
165 -------
166
167 The following is an example for running the pipeline, using our included test dataset:
168 * Output directory name: "/tmp/hydra_out"
169 * Forward reads: "reads_w_K103N.fastq"
170
171 ### Output ###
172
173 The detailed output directory tree looks as follows:
174
175 /tmp/hydra_out/
176 * align.bam
177 * align.bam.bai
178 * coverage_file.csv
179 * dr_report.csv
180 * filtered.fastq
181 * hydra.vcf
182 * mutation_report.hmcf
183 * stats.txt
184
185 The description of each of these directories/files are as follows:
186
187 * __run.conf__: The configuration used when this output was produced.
188 * __reads_w_K103N/__: The results directory for the input file reads_w_K103N.fastq
189 * __align.bam__: The alignment file in bam format.
190 * __align.bam.bai__: The index to the alignment file.
191 * __coverage_file.csv__: A file with one entry per line with the AA position and the coverage at the position.
192 * __dr_report.csv__: A report detailing the drug resistant mutations found, above the reporting threshold (default: 1%).
193 * __filtered.fastq__: The reads remaining after the filtering stage.
194 * __hydra.vcf__: The variants found by the pipeline.
195 * __mutation_report.hmcf__: The AA mutations found by the pipeline.
196 * __stats.txt__: A log file detailing size after filtering and major stages.
197
198 The __dr_report.csv__ file lists all found drug resistant mutations (mutations included in the mutation database) which have frequency greater than the reporting threshold. An example of this file is given below.
199
200 Example: __dr_report.csv__
201
202 Gene,Category,Surveillance,Wildtype,Position,Mutation,Mutation Frequency,Coverage
203 RT,NNRTI,Yes,K,103,N,9.03,155
204
205 The __mutation_report.hmcf__ files is our custom VCF like file which details all of the AA mutations found by the pipeline. An example if this file is given below.
206
207 Example: __mutation_report.hmcf__
208
209 ##fileformat=HMCFv1
210 ##fileDate=20150008
211 ##source=HyDRA
212 ##reference=/home/ericenns/hydra/var/hxb2_pol.fas
213 ##INFO=<ID=MC,Number=.,Description="String">
214 ##INFO=<ID=MCF,Number=.,Description="String">
215 ##INFO=<ID=WC,Number=.,Description="String">
216 ##FILTER=<ID=mf0.01,Description="Mutant freq below 0.01">
217 #GENE CATEGORY SURVEILLANCE TYPE WILDTYPE POS MUTANT FILTER MUTANT_FREQ COVERAGE INFO
218 RT NNRTI Yes mutation K 103 N PASS 0.0903 155 WC=aaa;MC=aaC;MCF=0.0903
219
220 ]]></help>
221 <citations>
222 </citations>
223 </tool>