Mercurial > repos > konradpaszkiewicz > velvetoptimiser
comparison VelvetOptimiser-2.1.7_modified/VelvetOptimiser.pl @ 0:50ae1360fbbe default tip
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | konradpaszkiewicz |
---|---|
date | Tue, 07 Jun 2011 18:07:56 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:50ae1360fbbe |
---|---|
1 #!/usr/bin/perl -w | |
2 # | |
3 ##Modified by Konrad Paszkiewicz 07/01/2011 | |
4 | |
5 # VelvetOptimiser.pl | |
6 # | |
7 # Copyright 2008, 2009, 2010 Simon Gladman <simon.gladman@csiro.au> | |
8 # | |
9 # This program is free software; you can redistribute it and/or modify | |
10 # it under the terms of the GNU General Public License as published by | |
11 # the Free Software Foundation; either version 2 of the License, or | |
12 # (at your option) any later version. | |
13 # | |
14 # This program is distributed in the hope that it will be useful, | |
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 # GNU General Public License for more details. | |
18 # | |
19 # You should have received a copy of the GNU General Public License | |
20 # along with this program; if not, write to the Free Software | |
21 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
22 # MA 02110-1301, USA. | |
23 | |
24 # Version 2.1.7 | |
25 | |
26 # | |
27 # pragmas | |
28 # | |
29 use strict; | |
30 | |
31 # | |
32 # includes | |
33 # | |
34 use POSIX qw(strftime); | |
35 use FindBin; | |
36 use lib "$FindBin::Bin"; | |
37 use threads; | |
38 use threads::shared; | |
39 use VelvetOpt::Assembly; | |
40 use VelvetOpt::hwrap; | |
41 use VelvetOpt::gwrap; | |
42 use VelvetOpt::Utils; | |
43 use Data::Dumper; | |
44 use Storable qw (freeze thaw); | |
45 use Getopt::Long; | |
46 use lib '/usr/local/lib/perl5/site_perl/5.8.8'; | |
47 | |
48 # | |
49 # global var decs | |
50 # | |
51 | |
52 #Change the following integer when compiling Velvet with the MAXKMERLENGTH | |
53 #greater than 31 to the value you used. | |
54 my $maxhash; | |
55 my @hashvals; | |
56 my %assemblies : shared; | |
57 my %assembliesObjs; | |
58 my @Options; | |
59 my $readfile; | |
60 my $interested = 0; | |
61 my $verbose : shared; | |
62 my $hashs; | |
63 my $hashe; | |
64 my $amos; | |
65 my $vgoptions; | |
66 my $genomesize; | |
67 my @shortInserts; | |
68 my $logfile = "logfile.txt"; | |
69 my $ass_num = 1; | |
70 my $categories; | |
71 my $prefix; | |
72 my $OUT; | |
73 my $logSem : shared; | |
74 our $num_threads; | |
75 my $current_threads : shared = 0; | |
76 my $opt_func; | |
77 my $opt_func2; | |
78 my $OptVersion = "2.1.7"; | |
79 my $threadfailed : shared = 0; | |
80 | |
81 # | |
82 # | |
83 # main script | |
84 # | |
85 # | |
86 print STDERR " | |
87 **************************************************** | |
88 | |
89 VelvetOptimiser.pl Version $OptVersion | |
90 | |
91 Simon Gladman - CSIRO 2009 | |
92 | |
93 ****************************************************\n"; | |
94 | |
95 my $currfreemem = VelvetOpt::Utils::free_mem; | |
96 | |
97 print STDERR "Number of CPUs available: " . VelvetOpt::Utils::num_cpu . "\n"; | |
98 printf STDERR "Current free RAM: %.3fGB\n", $currfreemem; | |
99 | |
100 #get the velveth and velvetg version numbers... | |
101 my $response = VelvetOpt::hwrap::_runVelveth(" "); | |
102 $response =~ /Version\s+(\d+\.\d+\.\d+)/s; | |
103 my $vhversion = $1; | |
104 unless ($vhversion){ die "Unable to find velveth, please ensure that the velvet executables are in your PATH.\n";} | |
105 $response =~ /CATEGORIES = (\d+)/; | |
106 $categories = $1; | |
107 unless($categories){ $categories = 2; } | |
108 | |
109 $response =~ /MAXKMERLENGTH = (\d+)/; | |
110 $maxhash = $1; | |
111 unless($maxhash){ $maxhash = 31; } | |
112 | |
113 #get the options! | |
114 &setOptions(); | |
115 | |
116 if($prefix eq "auto"){ | |
117 $logfile = strftime("%d-%m-%Y-%H-%M-%S", localtime) . "_Logfile.txt"; | |
118 } else { | |
119 $logfile = $prefix . "_logfile.txt"; | |
120 } | |
121 | |
122 print "Logfile name: $logfile\n"; | |
123 | |
124 #open the logfile | |
125 open $OUT, ">$logfile" or die "Couldn't open $logfile for writing.\n$!\n"; | |
126 | |
127 # | |
128 # | |
129 # Perform common tasks - write details to log file and screen, run velveth and vanilla velvetg | |
130 # | |
131 # | |
132 | |
133 print STDERR "\nMemory use estimation only! Script will terminate after showing results.\n\n" if($genomesize); | |
134 | |
135 print STDERR "Velvet details:\n"; | |
136 print STDERR "\tVelvet version: $vhversion\n"; | |
137 print STDERR "\tCompiled categories: $categories\n" if $categories; | |
138 print STDERR "\tCompiled max kmer length: $maxhash\n" if $maxhash; | |
139 print STDERR "\tMaximum number of threads to run: $num_threads\n"; | |
140 | |
141 #let user know about parameters to run with. | |
142 print STDERR "Will run velvet optimiser with the following paramters:\n"; | |
143 print STDERR "\tVelveth parameter string:\n\t\t$readfile\n"; | |
144 print STDERR "\tVelveth start hash values:\t$hashs\n"; | |
145 print STDERR "\tVelveth end hash value:\t\t$hashe\n"; | |
146 if($vgoptions){ | |
147 print $OUT "\tUser specified velvetg options: $vgoptions\n"; | |
148 } | |
149 if($amos){ | |
150 print STDERR "\tRead tracking for final assembly on.\n"; | |
151 } else { | |
152 print STDERR "\tRead tracking for final assembly off.\n"; | |
153 } | |
154 | |
155 #build the hashval array | |
156 for(my $i = $hashs; $i <= $hashe; $i += 2){ | |
157 push @hashvals, $i; | |
158 } | |
159 | |
160 if($genomesize){ | |
161 my $x = &estMemUse(); | |
162 printf STDERR "\nMemory use estimated to be: %.1fGB for $num_threads threads.\n\n", $x; | |
163 if ($x < $currfreemem){ | |
164 print STDERR "You should have enough memory to complete this job. (Though this estimate is no guarantee..)\n"; | |
165 exit; | |
166 } | |
167 else { | |
168 print STDERR "You probably won't have enough memory to run this job.\nTry decreasing the maximum number of threads used.\n(use the -t option to set max threads.)\n"; | |
169 exit; | |
170 } | |
171 } | |
172 | |
173 | |
174 print $OUT strftime("%b %e %H:%M:%S", localtime), "\n"; | |
175 | |
176 #send run parameters to log file. | |
177 print $OUT "Will run velvet optimiser with the following paramters:\n"; | |
178 print $OUT "\tVelveth parameter string:\n\t\t$readfile\n"; | |
179 print $OUT "\tVelveth start hash values:\t$hashs\n"; | |
180 print $OUT "\tVelveth end hash value:\t\t$hashe\n\n"; | |
181 if($vgoptions){ | |
182 print $OUT "\tUser specified velvetg options: $vgoptions\n"; | |
183 } | |
184 if($amos){ | |
185 print $OUT "\tRead tracking for final assembly on.\n"; | |
186 } else { | |
187 print $OUT "\tRead tracking for final assembly off.\n"; | |
188 } | |
189 | |
190 print STDERR strftime("%b %e %H:%M:%S", localtime), " Beginning velveth runs.\n"; | |
191 print $OUT strftime("%b %e %H:%M:%S", localtime), "\n\n\tBeginning velveth runs.\n"; | |
192 | |
193 #now run velveth for all the hashvalues in a certain number of threads.. | |
194 my @threads; | |
195 foreach my $hashval (@hashvals){ | |
196 while($current_threads >= $num_threads){ | |
197 sleep(2); | |
198 } | |
199 if($threadfailed){ | |
200 for my $thr (threads->list) { | |
201 #print STDERR "Waiting for thread ",$thr->tid," to complete.\n"; | |
202 $thr->join; | |
203 } | |
204 die "Velveth failed to run! Must be a problem with file types, check by running velveth manually or by using -v option and reading the log file.\n"; | |
205 } | |
206 $threads[$ass_num] = threads->create(\&runVelveth, $readfile, $hashval, $vhversion, \$logSem, $ass_num); | |
207 $ass_num ++; | |
208 sleep(2); | |
209 } | |
210 | |
211 for my $thr (threads->list) { | |
212 #print STDERR "Waiting for thread ",$thr->tid," to complete.\n"; | |
213 $thr->join; | |
214 } | |
215 | |
216 #now run velvetg for the all the hashvalues in a certain number of threads.. | |
217 #first get velvetg's version number. | |
218 | |
219 $response = VelvetOpt::gwrap::_runVelvetg(" "); | |
220 $response =~ /Version\s+(\d+\.\d+\.\d+)/s; | |
221 my $vgversion = $1; | |
222 | |
223 print STDERR strftime("%b %e %H:%M:%S", localtime), " Finished velveth runs.\n"; | |
224 | |
225 print STDERR strftime("%b %e %H:%M:%S", localtime), " Beginning vanilla velvetg runs.\n"; | |
226 print $OUT strftime("%b %e %H:%M:%S", localtime), "\n\n\tBeginning vanilla velvetg runs.\n"; | |
227 | |
228 foreach my $key (sort { $a <=> $b } keys %assemblies){ | |
229 while($current_threads >= $num_threads){ | |
230 sleep(2); | |
231 } | |
232 $threads[$ass_num] = threads->create(\&runVelvetg, $vgversion, \$logSem, $key); | |
233 sleep(2); | |
234 } | |
235 | |
236 for my $thr (threads->list) { | |
237 #print STDERR "Waiting for thread ",$thr->tid," to complete.\n"; | |
238 $thr->join; | |
239 } | |
240 | |
241 | |
242 #now to thaw it all out.. | |
243 | |
244 foreach my $key(sort keys %assemblies){ | |
245 my $obj = bless thaw($assemblies{$key}), "VelvetOpt::Assembly"; | |
246 $assembliesObjs{$key} = $obj; | |
247 } | |
248 | |
249 | |
250 #find the best assembly... | |
251 | |
252 # | |
253 # | |
254 # Now perform a velvetg optimisation based upon the file types sent to velveth | |
255 # | |
256 # | |
257 | |
258 # | |
259 # get the best assembly so far... | |
260 # | |
261 | |
262 my $bestId; | |
263 my $maxScore = -100; | |
264 my $asmscorenotneg = 1; | |
265 | |
266 foreach my $key (keys %assembliesObjs){ | |
267 if(($assembliesObjs{$key}->{assmscore} != -1) && $asmscorenotneg){ | |
268 if($assembliesObjs{$key}->{assmscore} > $maxScore){ | |
269 $bestId = $key; | |
270 $maxScore = $assembliesObjs{$key}->{assmscore}; | |
271 } | |
272 } | |
273 elsif($assembliesObjs{$key}->{n50} && $asmscorenotneg){ | |
274 if($assembliesObjs{$key}->{n50} > $maxScore){ | |
275 $bestId = $key; | |
276 $maxScore = $assembliesObjs{$key}->{n50}; | |
277 } | |
278 } | |
279 else { | |
280 $asmscorenotneg = 0; | |
281 if($assembliesObjs{$key}->{totalbp} > $maxScore){ | |
282 $bestId = $key; | |
283 $maxScore = $assembliesObjs{$key}->{totalbp}; | |
284 } | |
285 } | |
286 } | |
287 print "\n\nThe best assembly so far is:\n" if $interested; | |
288 print $assembliesObjs{$bestId}->toStringNoV() if $interested; | |
289 | |
290 # determine the optimisation route for the assembly based on the velveth parameter string. | |
291 my $optRoute = &getOptRoutine($readfile); | |
292 | |
293 print STDERR strftime("%b %e %H:%M:%S", localtime), " Hash value of best assembly by assembly score: ". $assembliesObjs{$bestId}->{hashval} . "\n"; | |
294 | |
295 print $OUT strftime("%b %e %H:%M:%S", localtime), " Best assembly by assembly score - assembly id: $bestId\n"; | |
296 | |
297 print STDERR strftime("%b %e %H:%M:%S", localtime), " Optimisation routine chosen for best assembly: $optRoute\n"; | |
298 print $OUT strftime("%b %e %H:%M:%S", localtime), " Optimisation routine chosen for best assembly: $optRoute\n"; | |
299 | |
300 #now send the best assembly so far to the appropriate optimisation routine... | |
301 | |
302 if($optRoute eq "shortOpt"){ | |
303 | |
304 &expCov($assembliesObjs{$bestId}); | |
305 &covCutoff($assembliesObjs{$bestId}); | |
306 | |
307 } | |
308 elsif($optRoute eq "shortLong"){ | |
309 | |
310 &expCov($assembliesObjs{$bestId}); | |
311 &covCutoff($assembliesObjs{$bestId}); | |
312 | |
313 } | |
314 elsif($optRoute eq "longPaired"){ | |
315 &expCov($assembliesObjs{$bestId}); | |
316 &insLengthLong($assembliesObjs{$bestId}); | |
317 &covCutoff($assembliesObjs{$bestId}); | |
318 } | |
319 elsif($optRoute eq "shortPaired"){ | |
320 &expCov($assembliesObjs{$bestId}); | |
321 &insLengthShort($assembliesObjs{$bestId}); | |
322 &covCutoff($assembliesObjs{$bestId}); | |
323 } | |
324 elsif($optRoute eq "shortLongPaired"){ | |
325 &expCov($assembliesObjs{$bestId}); | |
326 &insLengthShort($assembliesObjs{$bestId}); | |
327 &insLengthLong($assembliesObjs{$bestId}); | |
328 &covCutoff($assembliesObjs{$bestId}); | |
329 } | |
330 else{ | |
331 print STDERR "There was an error choosing an optimisation routine for this assembly. Please change the velveth parameter string and try again.\n"; | |
332 print $OUT "There was an error choosing an optimisation routine for this assembly. Please change the velveth parameter string and try again.\n"; | |
333 } | |
334 | |
335 # once it comes back from the optimisation routines, we need to turn on read tracking and amos output if it was selected in the options. | |
336 # | |
337 # | |
338 # The final assembly run! | |
339 # | |
340 # | |
341 if($amos){ | |
342 $assembliesObjs{$bestId}->{pstringg} .= " -amos_file yes -read_trkg yes"; | |
343 | |
344 my $final = VelvetOpt::gwrap::objectVelvetg($assembliesObjs{$bestId}); | |
345 $assembliesObjs{$bestId}->getAssemblyDetails(); | |
346 } | |
347 | |
348 print STDERR strftime("%b %e %H:%M:%S", localtime), "\n\n\nFinal optimised assembly details:\n"; | |
349 print $OUT strftime("%b %e %H:%M:%S", localtime), "\n\n\nFinal optimised assembly details:\n"; | |
350 print STDERR $assembliesObjs{$bestId}->toStringNoV() if !$verbose; | |
351 print $OUT $assembliesObjs{$bestId}->toStringNoV() if !$verbose; | |
352 print STDERR $assembliesObjs{$bestId}->toString() if $verbose; | |
353 print $OUT $assembliesObjs{$bestId}->toString() if $verbose; | |
354 print STDERR "\n\nAssembly output files are in the following directory:\n" . $assembliesObjs{$bestId}->{ass_dir} . "\n\n"; | |
355 print $OUT "\n\nAssembly output files are in the following directory:\n" . $assembliesObjs{$bestId}->{ass_dir} . "\n"; | |
356 | |
357 #delete superfluous directories.. | |
358 #Modified by Konrad Paszkiewicz 07/01/2011 - move best results to execution directory for ease of use in Galaxy | |
359 foreach my $key(keys %assemblies){ | |
360 unless($key == $bestId){ | |
361 my $dir = $assembliesObjs{$key}->{ass_dir}; | |
362 `rm -r $dir`; | |
363 } | |
364 if($key==$bestId){ | |
365 my $dir = $assembliesObjs{$key}->{ass_dir}; | |
366 | |
367 qx(mv $dir/* $dir/../); | |
368 `rm -rf $dir`; | |
369 | |
370 } | |
371 } | |
372 | |
373 | |
374 | |
375 # subroutines... | |
376 # | |
377 # | |
378 #---------------------------------------------------------------------- | |
379 | |
380 # Option setting routines | |
381 | |
382 sub setOptions { | |
383 use Getopt::Long; | |
384 | |
385 my $thmax = VelvetOpt::Utils::num_cpu; | |
386 | |
387 @Options = ( | |
388 {OPT=>"help", VAR=>\&usage, DESC=>"This help"}, | |
389 {OPT=>"v|verbose+", VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose logging, includes all velvet output in the logfile."}, | |
390 {OPT=>"s|hashs=i", VAR=>\$hashs, DEFAULT=>19, DESC=>"The starting (lower) hash value"}, | |
391 {OPT=>"e|hashe=i", VAR=>\$hashe, DEFAULT=>31, DESC=>"The end (higher) hash value"}, | |
392 {OPT=>"f|velvethfiles=s", VAR=>\$readfile, DEFAULT=>0, DESC=>"The file section of the velveth command line."}, | |
393 {OPT=>"a|amosfile!", VAR=>\$amos, DEFAULT=>0, DESC=>"Turn on velvet's read tracking and amos file output."}, | |
394 {OPT=>"o|velvetgoptions=s", VAR=>\$vgoptions, DEFAULT=>'', DESC=>"Extra velvetg options to pass through. eg. -long_mult_cutoff -max_coverage etc"}, | |
395 {OPT=>"t|threads=i", VAR=>\$num_threads, DEFAULT=>$thmax, DESC=>"The maximum number of simulataneous velvet instances to run."}, | |
396 {OPT=>"g|genomesize=f", VAR=>\$genomesize, DEFAULT=>0, DESC=>"The approximate size of the genome to be assembled in megabases.\n\t\t\tOnly used in memory use estimation. If not specified, memory use estimation\n\t\t\twill not occur. If memory use is estimated, the results are shown and then program exits."}, | |
397 {OPT=>"k|optFuncKmer=s", VAR=>\$opt_func, DEFAULT=>'n50', DESC=>"The optimisation function used for k-mer choice."}, | |
398 {OPT=>"c|optFuncCov=s", VAR=>\$opt_func2, DEFAULT=>'Lbp', DESC=>"The optimisation function used for cov_cutoff optimisation."}, | |
399 {OPT=>"p|prefix=s", VAR=>\$prefix, DEFAULT=>'auto', DESC=>"The prefix for the output filenames, the default is the date and time in the format DD-MM-YYYY-HH-MM_."} | |
400 ); | |
401 | |
402 (@ARGV < 1) && (usage()); | |
403 | |
404 &GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage(); | |
405 | |
406 # Now setup default values. | |
407 foreach (@Options) { | |
408 if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) { | |
409 ${$_->{VAR}} = $_->{DEFAULT}; | |
410 } | |
411 } | |
412 | |
413 print STDERR strftime("%b %e %H:%M:%S", localtime), " Starting to check input parameters.\n"; | |
414 | |
415 unless($readfile){ | |
416 print STDERR "\tYou must supply the velveth parameter line in quotes. eg -f '-short .....'\n"; | |
417 &usage(); | |
418 } | |
419 | |
420 if($hashs > $maxhash){ | |
421 print STDERR "\tStart hash value too high. New start hash value is $maxhash.\n"; | |
422 $hashs = $maxhash; | |
423 } | |
424 if(!&isOdd($hashs)){ | |
425 $hashs = $hashs - 1; | |
426 print STDERR "\tStart hash value not odd. Subtracting one. New start hash value = $hashs\n"; | |
427 } | |
428 | |
429 if($hashe > $maxhash || $hashe < 1){ | |
430 print STDERR "\tEnd hash value not in workable range. New end hash value is $maxhash.\n"; | |
431 $hashe = $maxhash; | |
432 } | |
433 if($hashe < $hashs){ | |
434 print STDERR "\tEnd hash value lower than start hash value. New end hash value = $hashs.\n"; | |
435 $hashe = $hashs; | |
436 } | |
437 if(!&isOdd($hashe)){ | |
438 $hashe = $hashe - 1; | |
439 print STDERR "\tEnd hash value not odd. Subtracting one. New end hash value = $hashe\n"; | |
440 } | |
441 | |
442 #check the velveth parameter string.. | |
443 my $vh_ok = VelvetOpt::hwrap::_checkVHString("check 21 $readfile", $categories); | |
444 | |
445 unless($vh_ok){ die "Please re-start with a corrected velveth parameter string." } | |
446 | |
447 print STDERR "\tVelveth parameter string OK.\n"; | |
448 | |
449 print STDERR strftime("%b %e %H:%M:%S", localtime), " Finished checking input parameters.\n"; | |
450 | |
451 } | |
452 | |
453 sub usage { | |
454 print "Usage: $0 [options] -f 'velveth input line'\n"; | |
455 foreach (@Options) { | |
456 printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC}, | |
457 defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : ""; | |
458 } | |
459 print "\nAdvanced!: Changing the optimisation function(s)\n"; | |
460 print VelvetOpt::Assembly::opt_func_toString; | |
461 exit(1); | |
462 } | |
463 | |
464 #---------------------------------------------------------------------- | |
465 | |
466 | |
467 # | |
468 # runVelveth | |
469 # | |
470 | |
471 sub runVelveth{ | |
472 | |
473 { | |
474 lock($current_threads); | |
475 $current_threads ++; | |
476 } | |
477 | |
478 my $rf = shift; | |
479 my $hv = shift; | |
480 my $vv = shift; | |
481 my $semRef = shift; | |
482 my $anum = shift; | |
483 my $assembly; | |
484 | |
485 print STDERR strftime("%b %e %H:%M:%S", localtime), "\t\tRunning velveth with hash value: $hv.\n"; | |
486 | |
487 #make the velveth command line. | |
488 my $vhline = $prefix . "_data_$hv $hv $rf"; | |
489 | |
490 #make a new VelvetAssembly and store it in the %assemblies hash... | |
491 $assembly = VelvetOpt::Assembly->new(ass_id => $anum, pstringh => $vhline, versionh =>$vv, assmfunc => $opt_func, assmfunc2 => $opt_func2); | |
492 | |
493 #run velveth on this assembly object | |
494 my $vhresponse = VelvetOpt::hwrap::objectVelveth($assembly, $categories); | |
495 | |
496 unless($vhresponse){ die "Velveth didn't run on hash value of $hv.\n$!\n";} | |
497 | |
498 unless(-r ($prefix . "_data_$hv" . "/Roadmaps")){ | |
499 print STDERR "Velveth failed! Response:\n$vhresponse\n"; | |
500 { | |
501 lock ($threadfailed); | |
502 $threadfailed = 1; | |
503 } | |
504 } | |
505 | |
506 #run the hashdetail generation routine. | |
507 $vhresponse = $assembly->getHashingDetails(); | |
508 | |
509 #print the objects to the log file... | |
510 { | |
511 lock($$semRef); | |
512 print $OUT $assembly->toStringNoV() if !$verbose; | |
513 print $OUT $assembly->toString() if $verbose; | |
514 } | |
515 | |
516 { | |
517 lock(%assemblies); | |
518 my $ass_str = freeze($assembly); | |
519 $assemblies{$anum} = $ass_str; | |
520 } | |
521 | |
522 { | |
523 lock($current_threads); | |
524 $current_threads --; | |
525 } | |
526 print STDERR strftime("%b %e %H:%M:%S", localtime), "\t\tVelveth with hash value $hv finished.\n"; | |
527 } | |
528 | |
529 # | |
530 # runVelvetg | |
531 # | |
532 sub runVelvetg{ | |
533 | |
534 { | |
535 lock($current_threads); | |
536 $current_threads ++; | |
537 } | |
538 | |
539 my $vv = shift; | |
540 my $semRef = shift; | |
541 my $anum = shift; | |
542 my $assembly; | |
543 | |
544 #get back the object! | |
545 $assembly = bless thaw($assemblies{$anum}), "VelvetOpt::Assembly"; | |
546 | |
547 print STDERR strftime("%b %e %H:%M:%S", localtime), "\t\tRunning vanilla velvetg on hash value: " . $assembly->{hashval} . "\n"; | |
548 | |
549 #make the velvetg commandline. | |
550 my $vgline = $prefix . "_data_" . $assembly->{hashval}; | |
551 | |
552 $vgline .= " $vgoptions"; | |
553 | |
554 #save the velvetg commandline in the assembly. | |
555 $assembly->{pstringg} = $vgline; | |
556 | |
557 #save the velvetg version in the assembly. | |
558 $assembly->{versiong} = $vv; | |
559 | |
560 #run velvetg | |
561 my $vgresponse = VelvetOpt::gwrap::objectVelvetg($assembly); | |
562 | |
563 unless($vgresponse){ die "Velvetg didn't run on the directory $vgline.\n$!\n";} | |
564 | |
565 #run the assembly details routine.. | |
566 $assembly->getAssemblyDetails(); | |
567 | |
568 #print the objects to the log file... | |
569 { | |
570 lock($$semRef); | |
571 print $OUT $assembly->toStringNoV() if !$verbose; | |
572 print $OUT $assembly->toString() if $verbose; | |
573 } | |
574 | |
575 { | |
576 lock(%assemblies); | |
577 my $ass_str = freeze($assembly); | |
578 $assemblies{$anum} = $ass_str; | |
579 } | |
580 | |
581 { | |
582 lock($current_threads); | |
583 $current_threads --; | |
584 } | |
585 print STDERR strftime("%b %e %H:%M:%S", localtime), "\t\tVelvetg on hash value: " . $assembly->{hashval} . " finished.\n"; | |
586 } | |
587 | |
588 # | |
589 # isOdd | |
590 # | |
591 sub isOdd { | |
592 my $x = shift; | |
593 if($x % 2 == 1){ | |
594 return 1; | |
595 } | |
596 else { | |
597 return 0; | |
598 } | |
599 } | |
600 | |
601 | |
602 # | |
603 # getOptRoutine | |
604 # | |
605 sub getOptRoutine { | |
606 | |
607 my $readfile = shift; | |
608 | |
609 # Choose the optimisation path depending on the types of read files sent to velveth | |
610 # For short only: shortOpt routine | |
611 # For short and long: shortLong routine | |
612 # For short paired: shortPaired routine | |
613 # For short and long paired: longPaired routine | |
614 # For short paired and long: shortPaired routine | |
615 # For short paired & long paired: shortlongPaired routine | |
616 | |
617 #look at velveth string ($readfile) and look for keywords from velvet manual... | |
618 my $long = 0; | |
619 my $longPaired = 0; | |
620 my $shortPaired = 0; | |
621 my $short = 0; | |
622 | |
623 #standard cases.. | |
624 if($readfile =~ /-short.? /) { $short = 1; } | |
625 if($readfile =~ /-long /) { $long = 1; } | |
626 if($readfile =~ /-shortPaired /) { $shortPaired = 1; } | |
627 if($readfile =~ /-longPaired /) { $longPaired = 1; } | |
628 | |
629 #weird cases to cover the non-use of the short keyword (since its the default.) | |
630 if(!($readfile =~ /(-short.? )|(-long )|(-shortPaired )|(-longPaired )/)) { $short = 1; } #if nothing is specified, assume short. | |
631 if(!($readfile =~ /-short.? /) && ($readfile =~ /(-long )|(-longPaired )/)) { $short = 1; } #if long or longPaired is specified, also assum short since very unlikely to only have long... | |
632 | |
633 if($short && !($long || $longPaired || $shortPaired)){ | |
634 return "shortOpt"; | |
635 } | |
636 elsif($short && $long && !($longPaired || $shortPaired)){ | |
637 return "shortLong"; | |
638 } | |
639 elsif($short && $longPaired && !$shortPaired){ | |
640 return "longPaired"; | |
641 } | |
642 elsif($short && $shortPaired && !$longPaired){ | |
643 return "shortPaired"; | |
644 } | |
645 elsif($short && $shortPaired && $longPaired){ | |
646 return "shortLongPaired"; | |
647 } | |
648 elsif($shortPaired && !$short && !$long && !$longPaired){ | |
649 return "shortPaired"; | |
650 } | |
651 else { | |
652 return "Unknown"; | |
653 } | |
654 } | |
655 | |
656 # | |
657 # covCutoff - the coverage cutoff optimisation routine. | |
658 # | |
659 sub covCutoff{ | |
660 | |
661 my $ass = shift; | |
662 #get the assembly score and set the current cutoff score. | |
663 my $ass_score = $ass->{assmscore}; | |
664 print "In covCutOff and assembly score is: $ass_score..\n" if $interested; | |
665 | |
666 | |
667 | |
668 sub func { | |
669 my $ass = shift; | |
670 my $cutoff = shift; | |
671 my $ass_score = $ass->{assmscore}; | |
672 my $ps = $ass->{pstringg}; | |
673 if($ps =~ /cov_cutoff/){ | |
674 $ps =~ s/cov_cutoff\s+\d+(\.\d+)?/cov_cutoff $cutoff/; | |
675 } | |
676 else { | |
677 $ps .= " -cov_cutoff $cutoff"; | |
678 } | |
679 $ass->{pstringg} = $ps; | |
680 | |
681 print STDERR strftime("%b %e %H:%M:%S", localtime); | |
682 printf STDERR "\t\tSetting cov_cutoff to %.3f.\n", $cutoff; | |
683 print $OUT strftime("%b %e %H:%M:%S", localtime); | |
684 printf $OUT "\t\tSetting cov_cutoff to %.3f.\n", $cutoff; | |
685 | |
686 my $worked = VelvetOpt::gwrap::objectVelvetg($ass); | |
687 if($worked){ | |
688 $ass->getAssemblyDetails(); | |
689 } | |
690 else { | |
691 die "Velvet Error in covCutoff!\n"; | |
692 } | |
693 $ass_score = $ass->{assmscore}; | |
694 print $OUT $ass->toStringNoV(); | |
695 | |
696 return $ass_score; | |
697 | |
698 } | |
699 | |
700 print STDERR strftime("%b %e %H:%M:%S", localtime), " Beginning coverage cutoff optimisation\n"; | |
701 print $OUT strftime("%b %e %H:%M:%S", localtime), " Beginning coverage cutoff optimisation\n"; | |
702 | |
703 my $dir = $ass->{ass_dir}; | |
704 $dir .= "/stats.txt"; | |
705 #print "\tLooking for exp_cov in $dir\n"; | |
706 my $expCov = VelvetOpt::Utils::estExpCov($dir, $ass->{hashval}); | |
707 | |
708 my $a = 0; | |
709 my $b = 0.8 * $expCov; | |
710 my $t = 0.618; | |
711 my $c = $a + $t * ($b - $a); | |
712 my $d = $b + $t * ($a - $b); | |
713 my $fc = func($ass, $c); | |
714 my $fd = func($ass, $d); | |
715 | |
716 my $iters = 1; | |
717 | |
718 printf STDERR "\t\tLooking for best cutoff score between %.3f and %.3f\n", $a, $b; | |
719 printf $OUT "\t\tLooking for best cutoff score between %.3f and %.3f\n", $a, $b; | |
720 | |
721 while(abs($a -$b) > 1){ | |
722 if($fc > $fd){ | |
723 printf STDERR "\t\tMax cutoff lies between %.3f & %.3f\n", $d, $b; | |
724 my $absdiff = abs($fc - $fd); | |
725 print STDERR "\t\tfc = $fc\tfd = $fd\tabs diff = $absdiff\n"; | |
726 printf $OUT "\t\tMax cutoff lies between %.3f & %.3f\n", $d, $b; | |
727 $a = $d; | |
728 $d = $c; | |
729 $fd = $fc; | |
730 $c = $a + $t * ($b - $a); | |
731 $fc = func($ass, $c); | |
732 } | |
733 else { | |
734 printf STDERR "\t\tMax cutoff lies between %.3f & %.3f\n", $a, $c; | |
735 my $absdiff = abs($fc - $fd); | |
736 print STDERR "\t\tfc = $fc\tfd = $fd\tabs diff = $absdiff\n"; | |
737 printf $OUT "\t\tMax cutoff lies between %.3f & %.3f\n", $a, $c; | |
738 $b = $c; | |
739 $c = $d; | |
740 $fc = $fd; | |
741 $d = $b + $t * ($a - $b); | |
742 $fd = func($ass, $d); | |
743 } | |
744 $iters ++; | |
745 } | |
746 | |
747 printf STDERR "\t\tOptimum value of cutoff is %.2f\n", $b; | |
748 print STDERR "\t\tTook $iters iterations\n"; | |
749 printf $OUT "\t\tOptimum value of cutoff is %.2f\n", $b; | |
750 print $OUT "\t\tTook $iters iterations\n"; | |
751 | |
752 return 1; | |
753 | |
754 } | |
755 | |
756 # | |
757 # expCov - find the expected coverage for the assembly and run velvetg with that exp_cov. | |
758 # | |
759 sub expCov { | |
760 | |
761 print STDERR strftime("%b %e %H:%M:%S", localtime), " Looking for the expected coverage\n"; | |
762 print $OUT strftime("%b %e %H:%M:%S", localtime), " Looking for the expected coverage\n"; | |
763 | |
764 my $ass = shift; | |
765 | |
766 #need to get the directory of the assembly and add "stats.txt" to it and then send it to | |
767 #the histogram methods in SlugsUtils.pm... | |
768 my $dir = $ass->{ass_dir}; | |
769 $dir .= "/stats.txt"; | |
770 my $expCov = VelvetOpt::Utils::estExpCov($dir, $ass->{hashval}); | |
771 | |
772 print STDERR strftime("%b %e %H:%M:%S", localtime), "\t\tExpected coverage set to $expCov\n"; | |
773 print $OUT strftime("%b %e %H:%M:%S", localtime), "\t\tExpected coverage set to $expCov\n"; | |
774 | |
775 #re-write the pstringg with the new velvetg command.. | |
776 my $vg = $ass->{pstringg}; | |
777 if($vg =~ /exp_cov/){ | |
778 $vg =~ s/exp_cov\s+\d+/exp_cov $expCov/; | |
779 } | |
780 else { | |
781 $vg .= " -exp_cov $expCov"; | |
782 } | |
783 | |
784 $ass->{pstringg} = $vg; | |
785 | |
786 print $OUT $ass->toStringNoV(); | |
787 | |
788 } | |
789 | |
790 # | |
791 # insLengthLong - get the Long insert length and use it in the assembly.. | |
792 # | |
793 sub insLengthLong { | |
794 print STDERR strftime("%b %e %H:%M:%S", localtime), " Getting the long insert length\n"; | |
795 print $OUT strftime("%b %e %H:%M:%S", localtime), " Getting the long insert length\n"; | |
796 my $ass = shift; | |
797 my $len = "auto"; | |
798 print STDERR strftime("%b %e %H:%M:%S", localtime), " Setting assembly long insert length $len\n"; | |
799 print $OUT strftime("%b %e %H:%M:%S", localtime), " Setting assembly long insert length $len\n"; | |
800 | |
801 #re-write the pstringg with the new velvetg command.. | |
802 #my $vg = $ass->{pstringg}; | |
803 #if($vg =~ /ins_length_long/){ | |
804 # $vg =~ s/ins_length_long\s+\d+/ins_length_long $len/; | |
805 #} | |
806 #else { | |
807 # $vg .= " -ins_length_long $len"; | |
808 #} | |
809 } | |
810 | |
811 # | |
812 # insLengthShort - get the short insert length and use it in the assembly.. | |
813 # | |
814 sub insLengthShort { | |
815 print STDERR strftime("%b %e %H:%M:%S", localtime), " Setting the short insert length\n"; | |
816 print $OUT strftime("%b %e %H:%M:%S", localtime), " Setting the short insert length\n"; | |
817 my $ass = shift; | |
818 my $len = "auto"; | |
819 print STDERR strftime("%b %e %H:%M:%S", localtime), " Setting assembly short insert length(s) to $len\n"; | |
820 print $OUT strftime("%b %e %H:%M:%S", localtime), " Setting assembly short insert length(s) to $len\n"; | |
821 | |
822 #re-write the pstringg with the new velvetg command.. | |
823 #my $vg = $ass->{pstringg}; | |
824 #if($vg =~ /ins_length /){ | |
825 # $vg =~ s/ins_length\s+\d+/ins_length $len/; | |
826 #} | |
827 #else { | |
828 # $vg .= " -ins_length $len"; | |
829 #} | |
830 #$ass->{pstringg} = $vg; | |
831 } | |
832 | |
833 | |
834 # | |
835 # estMemUse - estimates the memory usage from | |
836 # | |
837 sub estMemUse { | |
838 | |
839 my $max_runs = @hashvals; | |
840 my $totmem = 0; | |
841 #get the read lengths and the number of reads... | |
842 #need the short read filenames... | |
843 my ($rs, $nr) = VelvetOpt::Utils::getReadSizeNum($readfile); | |
844 if ($max_runs > $num_threads){ | |
845 for(my $i = 0; $i < $num_threads; $i ++){ | |
846 $totmem += VelvetOpt::Utils::estVelvetMemUse($rs, $genomesize, $nr, $hashvals[$i]); | |
847 } | |
848 } | |
849 else { | |
850 foreach my $h (@hashvals){ | |
851 $totmem += VelvetOpt::Utils::estVelvetMemUse($rs, $genomesize, $nr, $h); | |
852 } | |
853 } | |
854 return $totmem; | |
855 } |