Mercurial > repos > nml > quasitools
comparison hydra.xml @ 0:71976cfc9022 draft
planemo upload for repository https://github.com/phac-nml/quasitools commit 8a264400a75945e2e0fdd5a08c007a8b1b7a2f0f
author | nml |
---|---|
date | Mon, 04 Dec 2017 10:25:26 -0500 |
parents | |
children | 1abf6b32ecfd |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:71976cfc9022 |
---|---|
1 <tool id="hydra" name="Hydra pipeline" version="0.1.0"> | |
2 <description>Identifies drug resistance within an NGS dataset</description> | |
3 <requirements> | |
4 <requirement type="package" version="0.2.2">quasitools</requirement> | |
5 </requirements> | |
6 <command detect_errors="exit_code"><![CDATA[ | |
7 | |
8 quasitools hydra '$forward' | |
9 | |
10 #if $reverse: | |
11 '$reverse' | |
12 #end if | |
13 | |
14 #if $mutation_db: | |
15 -m '$mutation_db' | |
16 #end if | |
17 | |
18 #if $reporting_threshold: | |
19 -rt '$reporting_threshold' | |
20 #end if | |
21 | |
22 #if $consensus_pct: | |
23 -cp '$consensus_pct' | |
24 #end if | |
25 | |
26 #if $length_cutoff: | |
27 -lc '$length_cutoff' | |
28 #end if | |
29 | |
30 #if $score_cutoff: | |
31 -sc '$score_cutoff' | |
32 #end if | |
33 | |
34 #if $error_rate: | |
35 -e '$error_rate' | |
36 #end if | |
37 | |
38 #if $min_qual: | |
39 -mq '$min_qual' | |
40 #end if | |
41 | |
42 #if $min_depth: | |
43 -md '$min_depth' | |
44 #end if | |
45 | |
46 #if $min_ac: | |
47 -ma '$min_ac' | |
48 #end if | |
49 | |
50 #if $min_freq: | |
51 -mf '$min_freq' | |
52 #end if | |
53 | |
54 #if $generate_consensus: | |
55 --generate_consensus | |
56 #end if | |
57 | |
58 #if $filter_ns: | |
59 --ns | |
60 #end if | |
61 | |
62 -o output | |
63 | |
64 ]]></command> | |
65 <inputs> | |
66 <param name="forward" type="data" format="fastq" optional="false" label="Forward read" /> | |
67 <param name="reverse" type="data" format="fastq" optional="true" label="Reverse read" help="Not required."/> | |
68 <param name="mutation_db" type="data" format="tsv" optional="true" label="Mutation DB" help="Defaults to HIV mutation database." /> | |
69 <param name="reporting_threshold" type="integer" optional="true" min="1" max="100" value="1" label="Reporting threshold. Defaults to 1." help="Minimum mutation frequency to report." /> | |
70 <param name="consensus_pct" type="integer" optional="true" min="1" max="20" value="20" label="Reporting threshold" help="Minimum mutation frequency to report. Defaults to 20." /> | |
71 <param name="length_cutoff" type="integer" optional="true" min="0" max="1000" label="Length cutoff" value="100" help="Reads which fall short of the specified length will be filtered out. Defaults to 100." /> | |
72 <param name="score_cutoff" type="integer" optional="true" min="0" max="40" label="Score cutoff" value="30" help="Reads whose average quality score is less than the specified score will be filtered out. Defaults to 30." /> | |
73 <param name="error_rate" type="float" optional="true" min="0" max="1" label="Error rate" value="0.0021" help="Estimated sequencing error rate. Defaults to 0.0021."/> | |
74 <param name="min_qual" type="integer" optional="true" min="1" max="100" label="Minimum quality" value="30" help="Minimum required quality for variant to be considered later on in the pipeline. Defaults to 30." /> | |
75 <param name="min_depth" type="integer" optional="true" min="0" max="5000" label="Minimum depth" value="100" help="Minimum required depth for variant to be considered later on in the pipeline. Defaults to 100." /> | |
76 <param name="min_ac" type="integer" optional="true" min="0" max="5000" label="Minimum allele count" value="5" help="Minimum required allele count for variant to be considered later on in the pipeline. Defaults to 5." /> | |
77 <param name="min_freq" type="float" optional="true" min="0" max="1" label="Minimum frequency" value="0.01" help="Minimum required frequency for variant to be considered later on in the pipeline. Defaults to 0.01." /> | |
78 <param name="generate_consensus" type="boolean" truevalue="--generate_consensus" falsevalue="" checked="False" label="Generate consensus" /> | |
79 <param name="filter_ns" type="boolean" truevalue="--ns" falsevalue="" checked="False" label="Filter out n's" /> | |
80 </inputs> | |
81 <outputs> | |
82 <data format="bam" label="HyDRA: alignment bam output" name="output_bam" from_work_dir="output/align.bam" /> | |
83 <data format="bam" label="HyDRA: bam.bai output" name="output_bam_bai" from_work_dir="output/align.bam.bai" /> | |
84 <data format="csv" label="HyDRA: coverage output" name="output_coverage" from_work_dir="output/coverage_file.csv" /> | |
85 <data format="csv" label="HyDRA: drug resistance output" name="output_dr" from_work_dir="output/dr_report.csv" /> | |
86 <data format="fastq" label="HyDRA: filtered reads output" name="output_filtered" from_work_dir="output/filtered.fastq" /> | |
87 <data format="vcf" label="HyDRA: variants output" name="output_hydra" from_work_dir="output/hydra.vcf" /> | |
88 <data format="vcf" label="HyDRA: aa mutations output" name="output_aa_mt" from_work_dir="output/mutation_report.hmcf" /> | |
89 <data format="txt" label="HyDRA: stats output" name="output_stats" from_work_dir="output/stats.txt" /> | |
90 </outputs> | |
91 <tests> | |
92 <test> | |
93 <param name="forward" value="forward.fastq" /> | |
94 <output name="output_coverage"> | |
95 <assert_contents> | |
96 <has_text text="frame: 0" /> | |
97 <has_text text="1,0" /> | |
98 <has_text text="948,0" /> | |
99 </assert_contents> | |
100 </output> | |
101 <output name="output_dr"> | |
102 <assert_contents> | |
103 <has_text text="Chromosome,Gene,Category,Surveillance,Wildtype,Position,Mutation,Mutation Frequency,Coverage" /> | |
104 <has_text text="hxb2_pol,RT,NNRTI,Yes,K,101,P,14.23,1574" /> | |
105 <has_text text="hxb2_pol,RT,NNRTI,Yes,K,103,N,5.49,1912" /> | |
106 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,181,C,24.07,4557" /> | |
107 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,181,I,18.04,4557" /> | |
108 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,181,V,20.08,4557" /> | |
109 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,188,C,2.81,3454" /> | |
110 <has_text text="hxb2_pol,RT,NNRTI,Yes,G,190,A,5.20,3233" /> | |
111 <has_text text="hxb2_pol,RT,NNRTI,Yes,G,190,S,6.68,3233" /> | |
112 </assert_contents> | |
113 </output> | |
114 <output name="output_hydra"> | |
115 <assert_contents> | |
116 <has_text_matching expression="#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"/> | |
117 <has_text_matching expression="hxb2_pol\s576\s.\sa\sg\s100\sPASS\sDP=805;AC=245;AF=0.3043" /> | |
118 <has_text_matching expression="hxb2_pol\s958\s.\sc\sa\s100\sPASS\sDP=2503;AC=28;AF=0.0112" /> | |
119 </assert_contents> | |
120 </output> | |
121 <output name="output_aa_mt"> | |
122 <assert_contents> | |
123 <has_text_matching expression="#CHROM\sGENE\sTYPE\sWILDTYPE\sPOS\sMUTANT\sFILTER\sMUTANT_FREQ\sCOVERAGE\sINFO"/> | |
124 <has_text_matching expression="hxb2_pol\sRT\smutation\sK\s101\sP\sPASS\s0.1423\s1574\sWC=aaa;MC=CCa;MCF=0.1423;CAT=NNRTI;SRVL=Yes" /> | |
125 <has_text_matching expression="hxb2_pol\sRT\smutation\sH\s221\sN\sPASS\s0.0113\s2475\sWC=cat;MC=Aat;MCF=0.0113;CAT=.;SRVL=." /> | |
126 </assert_contents> | |
127 </output> | |
128 <output name="output_stats"> | |
129 <assert_contents> | |
130 <has_text text="Input Size: 25000"/> | |
131 <has_text text="Number of reads filtered due to length: 15074"/> | |
132 <has_text text="Number of reads filtered due to average quality score: 501"/> | |
133 <has_text text="Number of reads filtered due to presence of Ns: 0"/> | |
134 <has_text text="Number of reads filtered due to excess coverage: 0"/> | |
135 <has_text text="Number of reads filtered due to poor mapping: 12"/> | |
136 <has_text text="Percentage of reads filtered: 62.35"/> | |
137 </assert_contents> | |
138 </output> | |
139 </test> | |
140 </tests> | |
141 <help><![CDATA[ | |
142 | |
143 HyDRA - HIV Drug Resistance Analyzer | |
144 ==================================== | |
145 | |
146 The HyDRA pipeline provides a pipeline for identifying drug resistance within a Next Generation Sequencing dataset. The pipeline takes as input the raw reads produced by a Next Generation Sequencer and produces a report detailing found drug resistance per sample. | |
147 | |
148 Authors | |
149 ------- | |
150 | |
151 The HyDRA pipeline was developed by Eric Enns and David Peddle. | |
152 | |
153 Stages | |
154 ------ | |
155 | |
156 The HyDRA pipleine proceeds through the following stages: | |
157 | |
158 1. Quality Control/Filtering | |
159 2. Reference mapping using bowtie2. | |
160 3. Variant Calling and filtering using a Poisson distribution. | |
161 4. AA Mutation Calling and filtering. | |
162 5. Drug Resistance report generation. | |
163 | |
164 Details | |
165 ------- | |
166 | |
167 The following is an example for running the pipeline, using our included test dataset: | |
168 * Output directory name: "/tmp/hydra_out" | |
169 * Forward reads: "reads_w_K103N.fastq" | |
170 | |
171 ### Output ### | |
172 | |
173 The detailed output directory tree looks as follows: | |
174 | |
175 /tmp/hydra_out/ | |
176 * align.bam | |
177 * align.bam.bai | |
178 * coverage_file.csv | |
179 * dr_report.csv | |
180 * filtered.fastq | |
181 * hydra.vcf | |
182 * mutation_report.hmcf | |
183 * stats.txt | |
184 | |
185 The description of each of these directories/files are as follows: | |
186 | |
187 * __run.conf__: The configuration used when this output was produced. | |
188 * __reads_w_K103N/__: The results directory for the input file reads_w_K103N.fastq | |
189 * __align.bam__: The alignment file in bam format. | |
190 * __align.bam.bai__: The index to the alignment file. | |
191 * __coverage_file.csv__: A file with one entry per line with the AA position and the coverage at the position. | |
192 * __dr_report.csv__: A report detailing the drug resistant mutations found, above the reporting threshold (default: 1%). | |
193 * __filtered.fastq__: The reads remaining after the filtering stage. | |
194 * __hydra.vcf__: The variants found by the pipeline. | |
195 * __mutation_report.hmcf__: The AA mutations found by the pipeline. | |
196 * __stats.txt__: A log file detailing size after filtering and major stages. | |
197 | |
198 The __dr_report.csv__ file lists all found drug resistant mutations (mutations included in the mutation database) which have frequency greater than the reporting threshold. An example of this file is given below. | |
199 | |
200 Example: __dr_report.csv__ | |
201 | |
202 Gene,Category,Surveillance,Wildtype,Position,Mutation,Mutation Frequency,Coverage | |
203 RT,NNRTI,Yes,K,103,N,9.03,155 | |
204 | |
205 The __mutation_report.hmcf__ files is our custom VCF like file which details all of the AA mutations found by the pipeline. An example if this file is given below. | |
206 | |
207 Example: __mutation_report.hmcf__ | |
208 | |
209 ##fileformat=HMCFv1 | |
210 ##fileDate=20150008 | |
211 ##source=HyDRA | |
212 ##reference=/home/ericenns/hydra/var/hxb2_pol.fas | |
213 ##INFO=<ID=MC,Number=.,Description="String"> | |
214 ##INFO=<ID=MCF,Number=.,Description="String"> | |
215 ##INFO=<ID=WC,Number=.,Description="String"> | |
216 ##FILTER=<ID=mf0.01,Description="Mutant freq below 0.01"> | |
217 #GENE CATEGORY SURVEILLANCE TYPE WILDTYPE POS MUTANT FILTER MUTANT_FREQ COVERAGE INFO | |
218 RT NNRTI Yes mutation K 103 N PASS 0.0903 155 WC=aaa;MC=aaC;MCF=0.0903 | |
219 | |
220 ]]></help> | |
221 <citations> | |
222 </citations> | |
223 </tool> |