0
|
1 <tool id="crest" name="CREST" version="1.0">
|
|
2 <description>Clipping reveals structural variations</description>
|
|
3 <requirements>
|
|
4 <requirement type="package">bioperl</requirement>
|
|
5 <requirement type="binary">gfServer</requirement>
|
|
6 <requirement type="binary">gfClient</requirement>
|
|
7 <requirement type="binary">cap</requirement>
|
|
8 </requirements>
|
|
9 <command> mkdir $crest_log.extra_files_path; cat $shscript > $crest_log.extra_files_path/crest.sh; /bin/bash $shscript &> $crest_log</command>
|
|
10 <inputs>
|
|
11 <param name="tumor_bam" type="data" format="bam" label="Tumor Sample"
|
|
12 help="BAM files must contain soft-clipping signatures at the breakpoints. If they do not, you will not get any results."/>
|
|
13 <param name="germline_bam" type="data" format="bam" optional="true" label="Germline Sample" help=""/>
|
|
14 <conditional name="refGenomeSource">
|
|
15 <param name="genomeSource" type="select" label="<HR>Will you select a reference genome from your history or use a built-in index" help="">
|
|
16 <option value="indexed">Use a built-in index</option>
|
|
17 <option value="history">Use one from the history</option>
|
|
18 </param>
|
|
19 <when value="indexed">
|
|
20 <param name="genome_fasta" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team">
|
|
21 <options from_file="crest.loc">
|
|
22 <column name="dbkey" index="0"/>
|
|
23 <column name="name" index="1"/>
|
|
24 <column name="value" index="2"/>
|
|
25 <filter type="data_meta" ref="tumor_bam" key="dbkey" column="0" />
|
|
26 <validator type="no_options" message="No indexes are available" />
|
|
27 </options>
|
|
28 </param>
|
|
29 <param name="genome_2bit" type="select" optional="true" label="The 2bit index" help="">
|
|
30 <options from_file="crest.loc">
|
|
31 <column name="dbkey" index="0"/>
|
|
32 <column name="name" index="1"/>
|
|
33 <column name="value" index="3"/>
|
|
34 <filter type="data_meta" ref="tumor_bam" key="dbkey" column="0" />
|
|
35 </options>
|
|
36 </param>
|
|
37 </when>
|
|
38 <when value="history">
|
|
39 <param name="genome_fasta" type="data" format="fasta" label="Genome Reference Sequence" help="Should match your input Tumor Sample BAM fie database">
|
|
40 <validator type="unspecified_build" message="Must assign a build"/>
|
|
41 </param>
|
|
42 <param name="genome_2bit" type="data" format="twobit" label="Genome Reference 2bit index (Choose same as Genome Reference Sequence)" help="">
|
|
43 <validator type="unspecified_build" message="Must assign a build"/>
|
|
44 </param>
|
|
45 </when> <!-- history -->
|
|
46 </conditional> <!-- refGenomeSource -->
|
|
47 <!-- Input Datasets -->
|
|
48 <conditional name="rnaseq">
|
|
49 <param name="mode" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="<HR>RNAseq mode"
|
|
50 help="Requires a gene model file"/>
|
|
51 <when value="no">
|
|
52 </when>
|
|
53 <when value="yes">
|
|
54 <param name="gene_model" type="data" format="bed" label="Gene model file" help="currently only refFlat format (BED) is supported"/>
|
|
55 <param name="cluster_size" type="integer" value="" optional="true" label="Cluster Size"
|
|
56 help="The soft-clipped reads within cluster_size will be considered together, default is 3">
|
|
57 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
58 </param>
|
|
59 </when>
|
|
60 </conditional>
|
|
61 <!-- options -->
|
|
62 <param name="paired" type="boolean" checked="true" truevalue="" falsevalue="--nopaired" label="<HR>Paired Reads?"/>
|
|
63 <param name="read_len" type="integer" value="" optional="true" label="Read length of the sequencing data"
|
|
64 help="The read length of the sequencing data, defaut 100">
|
|
65 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
66 </param>
|
|
67 <param name="sensitive" type="boolean" checked="false" truevalue="--sensitive" falsevalue="" label="<HR>Sensitive"
|
|
68 help="The program will generate more SVs with higher false positive rate."/>
|
|
69 <param name="range" type="text" value="" optional="true" label="Limit Genome range where SV will be detected,"
|
|
70 help="The range where SV will be detected, using chr1:100-200 format">
|
|
71 <validator type="regex" message="format: chr1:100-200">^\w+(:\d+-\d+)?$</validator>
|
|
72 </param>
|
|
73 <conditional name="hit">
|
|
74 <param name="mode" type="select" label="<HR>Adjust Hit Detection">
|
|
75 <option value="no">Use defaults</option>
|
|
76 <option value="yes">Adjust Settings</option>
|
|
77 </param>
|
|
78 <when value="yes">
|
|
79 <param name="max_score_diff" type="integer" value="" optional="true" label="maximum score difference"
|
|
80 help="The maximum score difference when stopping select hit, default 10.">
|
|
81 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
82 </param>
|
|
83 <param name="min_sclip_reads" type="integer" value="" optional="true" label="Minimum number of soft clipping reads"
|
|
84 help="Minimum number of soft clipping read to triger the procedure, default 3 (10 for RNASeq)">
|
|
85 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
86 </param>
|
|
87 <param name="max_rep_cover" type="integer" value="" optional="true" label="Repetitive coverage threshold"
|
|
88 help="The min number of coverage to be called as repetitive and don't triger the procedure, default 500 (5000 for RNASeq)">
|
|
89 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
90 </param>
|
|
91 <param name="min_sclip_len" type="integer" value="" optional="true" label="Soft clipping detection"
|
|
92 help="The min length of soft clipping part at a position to triger the detection, default 20.">
|
|
93 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
94 </param>
|
|
95 <param name="min_hit_len" type="integer" value="" optional="true" label="Minimum length of a hit for genome mapping"
|
|
96 help="">
|
|
97 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
98 </param>
|
|
99 <param name="min_hit_reads" type="integer" value="" optional="true" label="Minimum read hits"
|
|
100 help="Minimum number of reads in a hit. default 3 (10 for RNASeq)">
|
|
101 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
102 </param>
|
|
103 <param name="min_dist_diff" type="integer" value="" optional="true" label="Min distance between the mapped position and the soft clipping position"
|
|
104 help="Min distance between the mapped position and the soft clipping position, default 20.">
|
|
105 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
106 </param>
|
|
107 </when>
|
|
108 <when value="no"/>
|
|
109 </conditional>
|
|
110
|
|
111 <conditional name="softclip">
|
|
112 <param name="mode" type="select" label="<HR>Adjust Soft Clipping">
|
|
113 <option value="no">Use defaults</option>
|
|
114 <option value="yes">Adjust Settings</option>
|
|
115 </param>
|
|
116 <when value="yes">
|
|
117 <param name="min_percent_id" type="integer" value="" optional="true" label="Identity threshold for soft clipping read mapping"
|
|
118 help="Min percentage of identity of soft clipping read mapping, default 90">
|
|
119 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
120 </param>
|
|
121 <param name="min_percent_hq" type="integer" value="" optional="true" label="High quality bases threshold for soft clipping"
|
|
122 help="Min percentage of high quality base in soft clipping reads, default 80">
|
|
123 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
124 </param>
|
|
125 <param name="lowqual_cutoff" type="integer" value="" optional="true" label="Low quality cutoff"
|
|
126 help="Low quality cutoff value, default 20.">
|
|
127 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
128 </param>
|
|
129 </when>
|
|
130 <when value="no"/>
|
|
131 </conditional>
|
|
132
|
|
133 <conditional name="sv_filter">
|
|
134 <param name="mode" type="select" label="<HR>Adjust Structural Variant Filtering">
|
|
135 <option value="no">Use defaults</option>
|
|
136 <option value="yes">Adjust Settings</option>
|
|
137 </param>
|
|
138 <when value="yes">
|
|
139 <param name="min_percent_cons_of_read" type="float" value="" optional="true" label="Relative consensus length threshold"
|
|
140 help="Minimum percent of consensus length of read length, default 0.75">
|
|
141 <validator type="in_range" message="Must be greater than 0" min="0" max="1"/>
|
|
142 </param>
|
|
143 <param name="max_bp_dist" type="integer" value="" optional="true" label="Maximum distance between break points"
|
|
144 help="Maximum distance in base pairs between two idenfitifed break points, default 15">
|
|
145 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
146 </param>
|
|
147 <param name="germline_seq_width" type="integer" value="" optional="true" label="Germline SV filtering window"
|
|
148 help="Half window width of genomic sequence around break point for germline SV filtering, default 100">
|
|
149 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
150 </param>
|
|
151 <param name="germline_search_width" type="integer" value="" optional="true" label="Soft Clip Germline SV filtering window"
|
|
152 help="Half window width for seaching soft-clipped reads around breakpoint for germline SV iltering, default 50">
|
|
153 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
154 </param>
|
|
155 </when>
|
|
156 <when value="no"/>
|
|
157 </conditional>
|
|
158
|
|
159 <conditional name="rescue">
|
|
160 <param name="mode" type="select" label="<HR>Rescue mode"
|
|
161 help="a SV with only 1 side with enough soft-clipped reads is considered as a valid one, default is ON.">
|
|
162 <option value="no">Rescue mode Off</option>
|
|
163 <option value="default" selected="true">Rescue On with default Setting</option>
|
|
164 <option value="yes">Adjust Rescue Settings</option>
|
|
165 </param>
|
|
166 <when value="yes">
|
|
167 <param name="min_one_side_reads" type="integer" value="" optional="true" label="Minimum number of soft-clipped reads on one side"
|
|
168 help="the minimum number of soft-clipped reads on one side, default 5">
|
|
169 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
170 </param>
|
|
171 </when>
|
|
172 <when value="no"/>
|
|
173 <when value="default"/>
|
|
174 </conditional>
|
|
175
|
|
176 <conditional name="tandem_repeat">
|
|
177 <param name="mode" type="select" label="<HR>Tandem Repeats"
|
|
178 help="Remove tandem repeat caused SV events, default is ON.">
|
|
179 <option value="default" selected="true">Remove Tandem Repeats using default Setting</option>
|
|
180 <option value="yes">Remove Tandem Repeats with Adjusted Settings</option>
|
|
181 <option value="no">Keep Tandem Repeats</option>
|
|
182 </param>
|
|
183 <when value="yes">
|
|
184 <param name="tr_max_indel_size" type="integer" value="" optional="true" label="Maximum INDEL events"
|
|
185 help="Maximum tandem repeat mediated INDEL events, default 100">
|
|
186 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
187 </param>
|
|
188 <param name="tr_min_size" type="integer" value="" optional="true" label="Minimum tandem reapet size"
|
|
189 help="Minimum tandem reapet size, default 2">
|
|
190 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
191 </param>
|
|
192 <param name="tr_max_size" type="integer" value="" optional="true" label="Maximum tandem reapet size"
|
|
193 help="Maximum tandem reapet size, default 8">
|
|
194 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
195 </param>
|
|
196 <param name="tr_min_num" type="integer" value="" optional="true" label="Minimum tandem reapet number"
|
|
197 help="Minimum tandem repeat number, defaut 4">
|
|
198 <validator type="in_range" message="Must be greater than 0" min="1"/>
|
|
199 </param>
|
|
200 <param name="hetero_factor" type="float" value="" optional="true" label="heterogenirity and heterozygosity factor"
|
|
201 help="The factor about the SV's heterogenirity and heterozygosity, default 0.4">
|
|
202 <validator type="in_range" message="Must be greater than 0" min="0" max="1"/>
|
|
203 </param>
|
|
204 <param name="triger_p_value" type="float" value="" optional="true" label="heterogenirity and heterozygosity factor"
|
|
205 help="The p-value that will triger the SV detection when number of soft-clipped reads is small, default 0.05">
|
|
206 <validator type="in_range" message="Must be greater than 0" min="0" max="1"/>
|
|
207 </param>
|
|
208 </when>
|
|
209 <when value="default"/>
|
|
210 <when value="no"/>
|
|
211 </conditional>
|
|
212 </inputs>
|
|
213 <outputs>
|
|
214 <data format="txt" name="crest_log" label="${tool.name} on ${on_string}: crest.log" />
|
|
215 <data format="tabular" name="tumor_cover" label="${tool.name} on ${on_string}: tumor.cover" from_work_dir="tumor.bam.cover"/>
|
|
216 <data format="tabular" name="tumor_sclip" label="${tool.name} on ${on_string}: tumor.sclip.txt" from_work_dir="tumor.bam.sclip.txt"/>
|
|
217 <data format="tabular" name="germline_cover" label="${tool.name} on ${on_string}: germline.cover" from_work_dir="germline.bam.cover">
|
|
218 <filter>germline_bam != None</filter>
|
|
219 </data>
|
|
220 <data format="tabular" name="germline_sclip" label="${tool.name} on ${on_string}: germline.sclip.txt" from_work_dir="germline.bam.sclip.txt">
|
|
221 <filter>germline_bam != None</filter>
|
|
222 </data>
|
|
223 <data format="tabular" name="predSV" label="${tool.name} on ${on_string}: tumor.predSV.txt" from_work_dir="tumor.bam.predSV.txt"/>
|
|
224 <data format="html" name="predSV_html" label="${tool.name} on ${on_string}: tumor.bam.predSV.html" />
|
|
225 </outputs>
|
|
226 <configfiles>
|
|
227 <configfile name="shscript"> #slurp
|
|
228 #!/bin/bash
|
|
229 ## define some things for cheetah proccessing and to avoid problems with xml parsing of this tool_config
|
|
230 #set $amp = chr(38)
|
|
231 #set $ds = chr(36)
|
|
232 #set $gt = chr(62)
|
|
233 #set $lt = chr(60)
|
|
234 #set $echo_cmd = 'echo'
|
|
235 ## Find the CREST.pl in the galaxy tool path
|
|
236 #import Cheetah.FileUtils
|
|
237 #set $toolpath = '/'.join([$__root_dir__,'tools','crest'])
|
|
238 #set $crest = $Cheetah.FileUtils.findFiles($toolpath,['CREST.pl'],[],['example','Tree'])[0]
|
|
239 #set $extractSClip = $Cheetah.FileUtils.findFiles($toolpath,['extractSClip.pl'],[],['example','Tree'])[0]
|
|
240 #set $countDiff = $Cheetah.FileUtils.findFiles($toolpath,['countDiff.pl'],[],['example','Tree'])[0]
|
|
241 #set $bam2html = $Cheetah.FileUtils.findFiles($toolpath,['bam2html.pl'],[],['example','Tree'])[0]
|
|
242 ##
|
|
243 ## Need ptrfinder on path
|
|
244 export PATH=${ds}PATH:$toolpath
|
|
245 #raw
|
|
246 ## Set temp directory
|
|
247 export TMPDIR=`pwd`/tmp
|
|
248 mkdir -p $TMPDIR
|
|
249 #end raw
|
|
250 ## check for the genome reference 2bit
|
|
251 if [ ! -f $refGenomeSource.genome_2bit ]; then exit 1; fi
|
|
252 ## get the dbkey and use that in link name
|
|
253 #set $dbkey = $tumor_bam.metadata.dbkey
|
|
254 #set $ref_fa = '.'.join([$dbkey,'fa'])
|
|
255 #set $ref_2bit = '.'.join([$dbkey,'2bit'])
|
|
256 ref_fa=$ref_fa
|
|
257 ref_2bit=$ref_2bit
|
|
258 ln -s $refGenomeSource.genome_fasta $ref_fa
|
|
259 ln -s $refGenomeSource.genome_2bit $ref_2bit
|
|
260 target_genome=`pwd`/$ref_2bit
|
|
261 ## Problem - gfServer doesn't reserve the port until it's done reading genome, so another might try to open the same port
|
|
262 #raw
|
|
263 ## start a local gfServer with the selected genome reference
|
|
264 ## find an open port on which to start a blat server via gfServer
|
|
265 for (( bp = 50000 + $$ % 1000; bp < 60000; bp += 7 ))
|
|
266 do
|
|
267 if ! netstat -an | grep $bp > /dev/null; then blatport=$bp; break; fi
|
|
268 done
|
|
269 ## exit if can't open a port
|
|
270 echo "Starting gfServer on port " $blatport
|
|
271 #end raw
|
|
272 ( gfServer -canStop -log=gfServer.log start localhost ${ds}blatport ${ds}target_genome 2${gt} /dev/null ) ${amp}
|
|
273 #raw
|
|
274 (
|
|
275 ## symbolic link the tumor input bam annd bai files in our working directory
|
|
276 #end raw
|
|
277 ln -s $tumor_bam tumor.bam
|
|
278 ln -s $tumor_bam.metadata.bam_index tumor.bam.bai
|
|
279 ## String value of an Optional DataToolParameter input is 'None' when not set
|
|
280 #if $germline_bam.__str__ != 'None':
|
|
281 #raw
|
|
282 ## symbolic link the germline input bam annd bai files in our working directory
|
|
283 #end raw
|
|
284 ln -s $germline_bam germline.bam
|
|
285 ln -s $germline_bam.metadata.bam_index germline.bam.bai
|
|
286 #end if
|
|
287 #raw
|
|
288 ## Get soft-clipping positions.
|
|
289 #end raw
|
|
290 $echo_cmd perl -I $toolpath $extractSClip -i tumor.bam --ref_genome $ref_fa
|
|
291 perl -I $toolpath $extractSClip -i tumor.bam --ref_genome $ref_fa
|
|
292 ##
|
|
293 ## If there is a germline input
|
|
294 #if $germline_bam.__str__ != 'None':
|
|
295 $echo_cmd perl -I $toolpath $extractSClip -i tumor.bam --ref_genome $ref_fa
|
|
296 perl -I $toolpath $extractSClip -i tumor.bam --ref_genome $ref_fa
|
|
297 #raw
|
|
298 ## Remove germline events (optional)
|
|
299 #end raw
|
|
300 $echo_cmd perl -I $toolpath $countDiff -d tumor.bam.cover -g germline.bam.cover to soft_clip.dist.txt
|
|
301 perl -I $toolpath $countDiff -d tumor.bam.cover -g germline.bam.cover $gt soft_clip.dist.txt
|
|
302 #end if
|
|
303 )
|
|
304 ## Running the SV detection script.
|
|
305 ## Determine the CREST options
|
|
306 #set $crest_args = ["-f tumor.bam.cover -d tumor.bam"]
|
|
307 ##
|
|
308 #if $germline_bam.__str__ != 'None':
|
|
309 #set $crest_args = $crest_args + ["-g germline.bam"]
|
|
310 #end if
|
|
311 #set $crest_args = $crest_args + ["--ref_genome",$ref_fa]
|
|
312 ##
|
|
313 #if $rnaseq.mode
|
|
314 #set $crest_args = $crest_args + ["--RNASeq","--genemodel",$rnaseq.gene_model.__str__]
|
|
315 #if $rnaseq.cluster_size.__str__ != '':
|
|
316 #set $crest_args = $crest_args + ["--cluster_size",$rnaseq.cluster_size.__str__]
|
|
317 #end if
|
|
318 #end if
|
|
319 ##
|
|
320 #if $paired.__str__ != '':
|
|
321 #set $crest_args = $crest_args + [$paired.__str__]
|
|
322 #end if
|
|
323 #if $sensitive.__str__ != '':
|
|
324 #set $crest_args = $crest_args + [$sensitive.__str__]
|
|
325 #end if
|
|
326 #if $range.__str__ != '':
|
|
327 #set $crest_args = $crest_args + ["-r",$range.__str__]
|
|
328 #end if
|
|
329 #if $read_len.__str__ != '':
|
|
330 #set $crest_args = $crest_args + ["-l",$read_len.__str__]
|
|
331 #end if
|
|
332 ##
|
|
333 #if $hit.mode.__str__ == 'yes':
|
|
334 #if $hit.max_score_diff.__str__ != '':
|
|
335 #set $crest_args = $crest_args + ["--max_score_diff", $hit.max_score_diff.__str__]
|
|
336 #end if
|
|
337 #if $hit.min_sclip_reads.__str__ != '':
|
|
338 #set $crest_args = $crest_args + ["--min_sclip_reads",$hit.min_sclip_reads.__str__]
|
|
339 #end if
|
|
340 #if $hit.max_rep_cover.__str__ != '':
|
|
341 #set $crest_args = $crest_args + ["--max_rep_cover",$hit.max_rep_cover.__str__]
|
|
342 #end if
|
|
343 #if $hit.min_sclip_len.__str__ != '':
|
|
344 #set $crest_args = $crest_args + ["--min_sclip_len",$hit.min_sclip_len.__str__]
|
|
345 #end if
|
|
346 #if $hit.min_hit_len.__str__ != '':
|
|
347 #set $crest_args = $crest_args + ["--min_hit_len",$hit.min_hit_len.__str__]
|
|
348 #end if
|
|
349 #if $hit.min_hit_reads.__str__ != '':
|
|
350 #set $crest_args = $crest_args + ["--min_hit_reads",$hit.min_hit_reads.__str__]
|
|
351 #end if
|
|
352 #if $hit.min_dist_diff.__str__ != '':
|
|
353 #set $crest_args = $crest_args + ["--min_dist_diff",$hit.min_dist_diff.__str__]
|
|
354 #end if
|
|
355 #end if
|
|
356 ##
|
|
357 #if $softclip.mode.__str__ == 'yes':
|
|
358 #if $softclip.min_percent_id.__str__ != '':
|
|
359 #set $crest_args = $crest_args + ["--min_percent_id",$softclip.min_percent_id.__str__]
|
|
360 #end if
|
|
361 #if $softclip.min_percent_hq.__str__ != '':
|
|
362 #set $crest_args = $crest_args + ["--min_percent_hq",$softclip.min_percent_hq.__str__]
|
|
363 #end if
|
|
364 #if $softclip.lowqual_cutoff.__str__ != '':
|
|
365 #set $crest_args = $crest_args + ["--lowqual_cutoff",$softclip.lowqual_cutoff.__str__]
|
|
366 #end if
|
|
367 #end if
|
|
368 ##
|
|
369 #if $sv_filter.mode.__str__ == 'yes':
|
|
370 #if $sv_filter.min_percent_cons_of_read.__str__ != '':
|
|
371 #set $crest_args = $crest_args + ["--min_percent_cons_of_read",$sv_filter.min_percent_cons_of_read.__str__]
|
|
372 #end if
|
|
373 #if $sv_filter.max_bp_dist.__str__ != '':
|
|
374 #set $crest_args = $crest_args + ["--max_bp_dist",$sv_filter.max_bp_dist.__str__]
|
|
375 #end if
|
|
376 #if $sv_filter.germline_seq_width.__str__ != '':
|
|
377 #set $crest_args = $crest_args + ["--germline_seq_width",$sv_filter.germline_seq_width.__str__]
|
|
378 #end if
|
|
379 #if $sv_filter.germline_search_width.__str__ != '':
|
|
380 #set $crest_args = $crest_args + ["--germline_search_width",$sv_filter.germline_search_width.__str__]
|
|
381 #end if
|
|
382 #end if
|
|
383 ##
|
|
384 #if $rescue.mode.__str__ == 'yes':
|
|
385 #if $rescue.min_one_side_reads.__str__ != '':
|
|
386 #set $crest_args = $crest_args + ["--min_one_side_reads",$rescue.min_one_side_reads.__str__]
|
|
387 #end if
|
|
388 #elif $rescue.mode.__str__ == 'no':
|
|
389 #set $crest_args = $crest_args + ["--norescue"]
|
|
390 #end if
|
|
391 ##
|
|
392 #if $tandem_repeat.mode.__str__ == 'yes':
|
|
393 #if $tandem_repeat.tr_max_indel_size.__str__ != '':
|
|
394 #set $crest_args = $crest_args + ["--tr_max_indel_size",$tandem_repeat.tr_max_indel_size.__str__]
|
|
395 #end if
|
|
396 #if $tandem_repeat.tr_min_size.__str__ != '':
|
|
397 #set $crest_args = $crest_args + ["--tr_min_size",$tandem_repeat.tr_min_size.__str__]
|
|
398 #end if
|
|
399 #if $tandem_repeat.tr_max_size.__str__ != '':
|
|
400 #set $crest_args = $crest_args + ["--tr_max_size",$tandem_repeat.tr_max_size.__str__]
|
|
401 #end if
|
|
402 #if $tandem_repeat.tr_min_num.__str__ != '':
|
|
403 #set $crest_args = $crest_args + ["--tr_min_num",$tandem_repeat.tr_min_num.__str__]
|
|
404 #end if
|
|
405 #if $tandem_repeat.hetero_factor.__str__ != '':
|
|
406 #set $crest_args = $crest_args + ["--hetero_factor",$tandem_repeat.hetero_factor.__str__]
|
|
407 #end if
|
|
408 #if $tandem_repeat.triger_p_value.__str__ != '':
|
|
409 #set $crest_args = $crest_args + ["--triger_p_value",$tandem_repeat.triger_p_value.__str__]
|
|
410 #end if
|
|
411 #elif $tandem_repeat.mode.__str__ == 'no':
|
|
412 #set $crest_args = $crest_args + ["--norm_tandem_repeat"]
|
|
413 #end if
|
|
414 #raw
|
|
415 ## check if gfServer is ready
|
|
416 echo "Waiting for gfServer"
|
|
417 for (( tries = 0; tries < 30; tries += 1 ))
|
|
418 do
|
|
419 if ! netstat -an | grep $blatport > /dev/null && ps -f | grep gfServer | grep $blatport > /dev/null; then sleep 60; else break; fi
|
|
420 done
|
|
421 #end raw
|
|
422 #raw
|
|
423 ## Run CREST
|
|
424 #end raw
|
|
425 (
|
|
426 ## perl -I ~/src/crest ../CREST.pl -f tumor.bam.cover -d tumor.bam -g germline.bam --ref_genome hg18.fa --2bitdir=/home/msi/jj/src/crest/example --target_genome=hg18.2bit --blatserver localhost --blatport 50000
|
|
427 $echo_cmd perl -I $toolpath $crest #echo ' '.join($crest_args)# --target_genome ${ds}target_genome --blatserver localhost --blatport ${ds}blatport
|
|
428 perl -I $toolpath $crest #echo ' '.join($crest_args)# --target_genome ${ds}target_genome --blatserver localhost --blatport ${ds}blatport
|
|
429 #raw
|
|
430 ## Visulization of the detailed alignment at breakpoint (optional)
|
|
431 ## The bam2html.pl script builds an html view of the multiple alignment for the breakpoint, so you can manually check the soft-clipping and other things.
|
|
432 #end raw
|
|
433 ## bam2html.pl -r hg18.fa -d tumor.bam -g germline.bam -o predSV.html -f predSV.txt
|
|
434 if [ -e tumor.bam.predSV.txt ]
|
|
435 then
|
|
436 #if $germline_bam.__str__ != 'None':
|
|
437 perl -I $toolpath $bam2html -d tumor.bam -g germline.bam -f tumor.bam.predSV.txt --ref_genome $ref_fa -o $predSV_html
|
|
438 #else
|
|
439 perl -I $toolpath $bam2html -d tumor.bam -f tumor.bam.predSV.txt --ref_genome $ref_fa -o $predSV_html
|
|
440 #end if
|
|
441 fi
|
|
442 )
|
|
443 #raw
|
|
444 ## shut down the blat server
|
|
445 echo "shutting down gfServer on port " $blatport
|
|
446 gfServer stop localhost $blatport
|
|
447 #end raw
|
|
448 </configfile>
|
|
449 </configfiles>
|
|
450 <tests>
|
|
451 </tests>
|
|
452 <help>
|
|
453 **CREST**
|
|
454
|
|
455 CREST_ is an algorithm for detecting genomic structural variations at base-pair resolution using next-generation sequencing data. '
|
|
456
|
|
457 CREST uses pieces of DNA called soft clips to find structural variations. Soft clips are the DNA segments produced during sequencing that fail to properly align to the reference genome as the sample genome is reassembled. CREST uses the soft clips to precisely identify sites of chromosomal rearrangement or where pieces of DNA are inserted or deleted.
|
|
458
|
|
459 Please cite the following article:
|
|
460
|
|
461 Wang J, Mullighan CG, Easton J, Roberts S, Heatley SL, Ma J, Rusch MC, Chen K, Harris CC, Ding L, Holmfeldt L, Payne-Turner D, Fan X, Wei L, Zhao D, Obenauer JC, Naeve C, Mardis ER, Wilson RK, Downing JR and Zhang J. CREST maps somatic structural variation in cancer genomes with base-pair resolution (2011). Nature_Methods_.
|
|
462
|
|
463 .. _Nature_Methods: http://www.nature.com/nmeth/journal/v8/n8/pdf/nmeth.1628.pdf
|
|
464 .. _CREST: http://www.stjuderesearch.org/site/lab/zhang
|
|
465
|
|
466
|
|
467 ----
|
|
468
|
|
469 **Input formats**
|
|
470
|
|
471 BAM files that must contain soft-clipping signatures at the breakpoints. If
|
|
472 they do not, you will not get any results.
|
|
473
|
|
474 CREST uses soft-clipping signatures to identify breakpoints. Soft-clipping is
|
|
475 indicated by "S" elements in the CIGAR for SAM/BAM records. Soft-clipping may
|
|
476 not occur, depending on the mapping algorithm and parameters and sometimes even
|
|
477 the library preparation.
|
|
478
|
|
479 With bwa sampe:
|
|
480
|
|
481 One mapping method that will soft-clip reads is bwa sampe (BWA for paired-end
|
|
482 reads). When BWA successfully maps one read in a pair but is not able to map
|
|
483 the other, it will attempt a more permissive Smith-Waterman alignment of the
|
|
484 unmapped read in the neighborhood of the mapped mate. If it is only able to
|
|
485 align part of the read, then it will soft-clip the portion on the end that it
|
|
486 could not align. Often this occurs at the breakpoints of structural
|
|
487 variations.
|
|
488
|
|
489 In some cases when the insert sizes approach the read length, BWA will not
|
|
490 perform Smith-Waterman alignment. Reads from inserts smaller than the read
|
|
491 length will contain primer and/or adapter and will often not map. When the
|
|
492 insert size is close to the read length, this creates a skewed distribution
|
|
493 of inferred insert sizes which may cause BWA to not attempt Smith-Waterman
|
|
494 realignment. This is indicated by the error message "weird pairing". Often
|
|
495 in these cases there are also unusually low mapping rates.
|
|
496
|
|
497 One way to fix this problem is to remap unmapped reads bwasw. To do this,
|
|
498 extract the unmapped reads as FASTQ files (this may be done with a combination
|
|
499 of samtools view -f 4 and Picard's SamToFastq). Realign using bwa bwasw and
|
|
500 build a BAM file. Then, re-run CREST on this new BAM file, and you may pick
|
|
501 up events that would have been missed otherwise.
|
|
502
|
|
503
|
|
504
|
|
505
|
|
506 ------
|
|
507
|
|
508 **Outputs**
|
|
509
|
|
510 The output
|
|
511 file *.predSV.txt has the following tab-delimited columns: left_chr, left_pos,
|
|
512 left_strand, # of left soft-clipped reads, right_chr, right_pos, right_strand,
|
|
513 # right soft-clipped reads, SV type, coverage at left_pos, coverage at
|
|
514 right_pos, assembled length at left_pos, assembled length at right_pos,
|
|
515 average percent identity at left_pos, percent of non-unique mapping reads at
|
|
516 left_pos, average percent identity at right_pos, percent of non-unique mapping
|
|
517 reads at right_pos, start position of consensus mapping to genome,
|
|
518 starting chromosome of consensus mapping, position of the genomic mapping of
|
|
519 consensus starting position, end position of consensus mapping to genome,
|
|
520 ending chromsome of consnesus mapping, position of genomic mapping of
|
|
521 consensus ending posiiton, and consensus sequences. For inversion(INV), the
|
|
522 last 7 fields will be repeated to reflect the fact two different breakpoints
|
|
523 are needed to identify an INV event.
|
|
524
|
|
525 Example of the tumor.predSV.txt file:
|
|
526
|
|
527 4 125893227 + 5 10 66301858 - 4 CTX 29 14 83 71 0.895173453996983 0.230769230769231 0.735384615384615 0.5 1 4 125893135 176 10 66301773 TTATGAATTTTGAAATATATATCATATTTTGAAATATATATCATATTCTAAATTATGAAAAGAGAATATGATTCTCTTTTCAGTAGCTGTCACCTCCTGGGTTCAAGTGATTCTCCTGCCTCTACCTCCCGAGTAGCTGGGATTACAGGTGCCCACCACCATGCCTGGCTAATTTT
|
|
528 5 7052198 - 0 10 66301865 + 8 CTX 0 22 0 81 0.761379310344828 0.482758620689655 0 0 1 5 7052278 164 10 66301947 AGCCATGGACCTTGTGGTGGGTTCTTAACAATGGTGAGTCCGGAGTTCTTAACGATGGTGAGTCCGTAGTTTGTTCCTTCAGGAGTGAGCCAAGATCATGCCACTGCACTCTAGCCTGGGCAACAGAGGAAGACTCCACCTCAAAAAAAAAAAGTGGGAAGAGG
|
|
529 10 66301858 + 4 4 125893225 - 1 CTX 15 28 71 81 0.735384615384615 0.5 0.889507154213037 0.243243243243243 1 10 66301777 153 4 125893154 TTAGCCAGGCATGGTGGTGGGCACCTGTAATCCCAGCTACTCGGGAGGTAGAGGCAGGAGAATCACTTGAACCCAGGAGGTGACAGCTACTGAAAAGAGAATCATATTCTCTTTTCATAATTTAGAATATGATATATATTTCAAAATATGATA
|
|
530
|
|
531 If there are no or very few results, there may be a lack of soft-clipping.
|
|
532
|
|
533
|
|
534
|
|
535 </help>
|
|
536 </tool>
|