Mercurial > repos > xuebing > sharplabtool
comparison tools/ngs_simulation/ngs_simulation.xml @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 <tool id="ngs_simulation" name="Simulate" version="1.0.0"> | |
2 <!--<tool id="ngs_simulation" name="Simulate" force_history_refresh="True" version="1.0.0">--> | |
3 <description>Illumina runs</description> | |
4 <command interpreter="python"> | |
5 ngs_simulation.py | |
6 #if $in_type.input_type == "built-in" | |
7 --input="${ filter( lambda x: str( x[0] ) == str( $in_type.genome ), $__app__.tool_data_tables[ 'ngs_sim_fasta' ].get_fields() )[0][-1] }" | |
8 --genome=$in_type.genome | |
9 #else | |
10 --input=$in_type.input1 | |
11 #end if | |
12 --read_len=$read_len | |
13 --avg_coverage=$avg_coverage | |
14 --error_rate=$error_rate | |
15 --num_sims=$num_sims | |
16 --polymorphism=$polymorphism | |
17 --detection_thresh=$detection_thresh | |
18 --output_png=$output_png | |
19 --summary_out=$summary_out | |
20 --output_summary=$output_summary | |
21 --new_file_path=$__new_file_path__ | |
22 </command> | |
23 <!-- If want to include all simulation results file | |
24 sim_results=$sim_results | |
25 output=$output.id | |
26 --> | |
27 <inputs> | |
28 <conditional name="in_type"> | |
29 <param name="input_type" type="select" label="Use a built-in FASTA file or one from the history?"> | |
30 <option value="built-in">Built-in</option> | |
31 <option value="history">History file</option> | |
32 </param> | |
33 <when value="built-in"> | |
34 <param name="genome" type="select" label="Select a built-in genome" help="if your genome of interest is not listed - contact Galaxy team"> | |
35 <options from_data_table="ngs_sim_fasta" /> | |
36 </param> | |
37 </when> | |
38 <when value="history"> | |
39 <param name="input1" type="data" format="fasta" label="Input genome (FASTA format)" /> | |
40 </when> | |
41 </conditional> | |
42 <param name="read_len" type="integer" value="76" label="Read length" /> | |
43 <param name="avg_coverage" type="integer" value="200" label="Average coverage" /> | |
44 <param name="error_rate" type="float" value="0.001" label="Error rate or quality score" help="Quality score if integer 1 or greater; error rate if between 0 and 1" /> | |
45 <param name="num_sims" type="integer" value="100" label="The number of simulations to run" /> | |
46 <param name="polymorphism" type="select" multiple="true" label="Frequency/ies for minor allele"> | |
47 <option value="0.001">0.001</option> | |
48 <option value="0.002">0.002</option> | |
49 <option value="0.003">0.003</option> | |
50 <option value="0.004">0.004</option> | |
51 <option value="0.005">0.005</option> | |
52 <option value="0.006">0.006</option> | |
53 <option value="0.007">0.007</option> | |
54 <option value="0.008">0.008</option> | |
55 <option value="0.009">0.009</option> | |
56 <option value="0.01">0.01</option> | |
57 <option value="0.02">0.02</option> | |
58 <option value="0.03">0.03</option> | |
59 <option value="0.04">0.04</option> | |
60 <option value="0.05">0.05</option> | |
61 <option value="0.06">0.06</option> | |
62 <option value="0.07">0.07</option> | |
63 <option value="0.08">0.08</option> | |
64 <option value="0.09">0.09</option> | |
65 <option value="0.1">0.1</option> | |
66 <option value="0.2">0.2</option> | |
67 <option value="0.3">0.3</option> | |
68 <option value="0.4">0.4</option> | |
69 <option value="0.5">0.5</option> | |
70 <option value="0.6">0.6</option> | |
71 <option value="0.7">0.7</option> | |
72 <option value="0.8">0.8</option> | |
73 <option value="0.9">0.9</option> | |
74 <option value="1.0">1.0</option> | |
75 </param> | |
76 <param name="detection_thresh" type="select" multiple="true" label="Detection thresholds"> | |
77 <option value="0.001">0.001</option> | |
78 <option value="0.002">0.002</option> | |
79 <option value="0.003">0.003</option> | |
80 <option value="0.004">0.004</option> | |
81 <option value="0.005">0.005</option> | |
82 <option value="0.006">0.006</option> | |
83 <option value="0.007">0.007</option> | |
84 <option value="0.008">0.008</option> | |
85 <option value="0.009">0.009</option> | |
86 <option value="0.01">0.01</option> | |
87 <option value="0.02">0.02</option> | |
88 <option value="0.03">0.03</option> | |
89 <option value="0.04">0.04</option> | |
90 <option value="0.05">0.05</option> | |
91 <option value="0.06">0.06</option> | |
92 <option value="0.07">0.07</option> | |
93 <option value="0.08">0.08</option> | |
94 <option value="0.09">0.09</option> | |
95 <option value="0.1">0.1</option> | |
96 <option value="0.2">0.2</option> | |
97 <option value="0.3">0.3</option> | |
98 <option value="0.4">0.4</option> | |
99 <option value="0.5">0.5</option> | |
100 <option value="0.6">0.6</option> | |
101 <option value="0.7">0.7</option> | |
102 <option value="0.8">0.8</option> | |
103 <option value="0.9">0.9</option> | |
104 <option value="1.0">1.0</option> | |
105 </param> | |
106 <param name="summary_out" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Include a (text) summary file for all the simulations" /> | |
107 <!-- <param name="sim_results" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output all tabular simulation results" help="Number of polymorphisms times number of detection thresholds"/> | |
108 --> | |
109 </inputs> | |
110 <outputs> | |
111 <data format="png" name="output_png" /> | |
112 <data format="tabular" name="output_summary"> | |
113 <filter>summary_out == True</filter> | |
114 </data> | |
115 <!-- | |
116 <data format="tabular" name="output"> | |
117 <filter>sim_files_out</filter> | |
118 </data> | |
119 --> | |
120 </outputs> | |
121 <tests> | |
122 <!-- | |
123 Tests cannot be run because of the non-deterministic element of the simulation. | |
124 But if you run the following "tests" manually in the browser and check against | |
125 the output files, they should be very similar to the listed output files. | |
126 --> | |
127 <!-- | |
128 <test> | |
129 <param name="input_type" value="history" /> | |
130 <param name="input1" value="ngs_simulation_in1.fasta" ftype="fasta" /> | |
131 <param name="read_len" value="76" /> | |
132 <param name="avg_coverage" value="200" /> | |
133 <param name="error_rate" value="0.001" /> | |
134 <param name="num_sims" value="25" /> | |
135 <param name="polymorphism" value="0.02,0.04,0.1" /> | |
136 <param name="detection_thresh" value="0.01,0.02" /> | |
137 <param name="summary_out" value="true" /> | |
138 <output name="output_png" file="ngs_simulation_out1.png" /> | |
139 <output name="output_summary" file="ngs_simulation_out2.tabular" /> | |
140 </test> | |
141 <test> | |
142 <param name="input_type" value="built-in" /> | |
143 <param name="genome" value="pUC18" /> | |
144 <param name="read_len" value="50" /> | |
145 <param name="avg_coverage" value="150" /> | |
146 <param name="error_rate" value="0.005" /> | |
147 <param name="num_sims" value="25" /> | |
148 <param name="polymorphism" value="0.001,0.005" /> | |
149 <param name="detection_thresh" value="0.001,0.002" /> | |
150 <param name="summary_out" value="false" /> | |
151 <output name="output_png" file="ngs_simulation_out3.png" /> | |
152 </test> | |
153 --> | |
154 </tests> | |
155 <help> | |
156 | |
157 **What it does** | |
158 | |
159 This tool simulates an Illumina run and provides plots of false positives and false negatives. It allows for a range of simulation parameters to be set. Note that this simulation sets only one (randomly chosen) position in the genome as polymorphic, according to the value specified. Superimposed on this are "sequencing errors", which are uniformly (and randomly) distributed. Polymorphisms are assigned using the detection threshold, so if the detection threshold is set to the same as the minor allele frequency, the expected false negative rate is 50%. | |
160 | |
161 **Parameter list** | |
162 | |
163 These are the parameters that should be set for the simulation:: | |
164 | |
165 Read length (which is the same for all reads) | |
166 Average Coverage | |
167 Frequency for Minor Allele | |
168 Sequencing Error Rate | |
169 Detection Threshold | |
170 Number of Simulations | |
171 | |
172 You also should choose to use either a built-in genome or supply your own FASTA file. | |
173 | |
174 **Output** | |
175 | |
176 There are one or two. The first is a png that contains two different plots and is always generated. The second is optional and is a text file with some summary information about the simulations that were run. Below are some example outputs for a 10-simulation run on phiX with the default settings:: | |
177 | |
178 Read length 76 | |
179 Average coverage 200 | |
180 Error rate/quality score 0.001 | |
181 Number of simulations 100 | |
182 Frequencies for minor allele 0.002 | |
183 0.004 | |
184 Detection thresholds 0.003 | |
185 0.005 | |
186 0.007 | |
187 Include summary file Yes | |
188 | |
189 Plot output (png): | |
190 | |
191 .. image:: ./static/images/ngs_simulation.png | |
192 | |
193 Summary output (txt):: | |
194 | |
195 FP FN GENOMESIZE.5386 fprate hetcol errcol | |
196 Min. : 71.0 Min. :0.0 Mode:logical Min. :0.01318 Min. :0.004 Min. :0.007 | |
197 1st Qu.:86.0 1st Qu.:1.0 NA's:10 1st Qu.:0.01597 1st Qu.:0.004 1st Qu.:0.007 | |
198 Median :92.5 Median :1.0 NA Median :0.01717 Median :0.004 Median :0.007 | |
199 Mean :93.6 Mean :0.9 NA Mean :0.01738 Mean :0.004 Mean :0.007 | |
200 3rd Qu.:100.8 3rd Qu.:1.0 NA 3rd Qu.:0.01871 3rd Qu.:0.004 3rd Qu.:0.007 | |
201 Max. :123.0 Max. :1.0 NA Max. :0.02284 Max. :0.004 Max. :0.007 | |
202 | |
203 False Positive Rate Summary | |
204 0.003 0.005 0.007 | |
205 0.001 0.17711 0.10854 0.01673 | |
206 0.009 0.18049 0.10791 0.01738 | |
207 | |
208 False Negative Rate Summary | |
209 0.003 0.005 0.007 | |
210 0.001 1.0 0.8 1.0 | |
211 0.009 0.4 0.7 0.9 | |
212 | |
213 | |
214 </help> | |
215 </tool> | |
216 | |
217 |