comparison VelvetOptimiser-2.1.7_modified/VelvetOptimiser.pl @ 0:50ae1360fbbe default tip

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author konradpaszkiewicz
date Tue, 07 Jun 2011 18:07:56 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:50ae1360fbbe
1 #!/usr/bin/perl -w
2 #
3 ##Modified by Konrad Paszkiewicz 07/01/2011
4
5 # VelvetOptimiser.pl
6 #
7 # Copyright 2008, 2009, 2010 Simon Gladman <simon.gladman@csiro.au>
8 #
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
22 # MA 02110-1301, USA.
23
24 # Version 2.1.7
25
26 #
27 # pragmas
28 #
29 use strict;
30
31 #
32 # includes
33 #
34 use POSIX qw(strftime);
35 use FindBin;
36 use lib "$FindBin::Bin";
37 use threads;
38 use threads::shared;
39 use VelvetOpt::Assembly;
40 use VelvetOpt::hwrap;
41 use VelvetOpt::gwrap;
42 use VelvetOpt::Utils;
43 use Data::Dumper;
44 use Storable qw (freeze thaw);
45 use Getopt::Long;
46 use lib '/usr/local/lib/perl5/site_perl/5.8.8';
47
48 #
49 # global var decs
50 #
51
52 #Change the following integer when compiling Velvet with the MAXKMERLENGTH
53 #greater than 31 to the value you used.
54 my $maxhash;
55 my @hashvals;
56 my %assemblies : shared;
57 my %assembliesObjs;
58 my @Options;
59 my $readfile;
60 my $interested = 0;
61 my $verbose : shared;
62 my $hashs;
63 my $hashe;
64 my $amos;
65 my $vgoptions;
66 my $genomesize;
67 my @shortInserts;
68 my $logfile = "logfile.txt";
69 my $ass_num = 1;
70 my $categories;
71 my $prefix;
72 my $OUT;
73 my $logSem : shared;
74 our $num_threads;
75 my $current_threads : shared = 0;
76 my $opt_func;
77 my $opt_func2;
78 my $OptVersion = "2.1.7";
79 my $threadfailed : shared = 0;
80
81 #
82 #
83 # main script
84 #
85 #
86 print STDERR "
87 ****************************************************
88
89 VelvetOptimiser.pl Version $OptVersion
90
91 Simon Gladman - CSIRO 2009
92
93 ****************************************************\n";
94
95 my $currfreemem = VelvetOpt::Utils::free_mem;
96
97 print STDERR "Number of CPUs available: " . VelvetOpt::Utils::num_cpu . "\n";
98 printf STDERR "Current free RAM: %.3fGB\n", $currfreemem;
99
100 #get the velveth and velvetg version numbers...
101 my $response = VelvetOpt::hwrap::_runVelveth(" ");
102 $response =~ /Version\s+(\d+\.\d+\.\d+)/s;
103 my $vhversion = $1;
104 unless ($vhversion){ die "Unable to find velveth, please ensure that the velvet executables are in your PATH.\n";}
105 $response =~ /CATEGORIES = (\d+)/;
106 $categories = $1;
107 unless($categories){ $categories = 2; }
108
109 $response =~ /MAXKMERLENGTH = (\d+)/;
110 $maxhash = $1;
111 unless($maxhash){ $maxhash = 31; }
112
113 #get the options!
114 &setOptions();
115
116 if($prefix eq "auto"){
117 $logfile = strftime("%d-%m-%Y-%H-%M-%S", localtime) . "_Logfile.txt";
118 } else {
119 $logfile = $prefix . "_logfile.txt";
120 }
121
122 print "Logfile name: $logfile\n";
123
124 #open the logfile
125 open $OUT, ">$logfile" or die "Couldn't open $logfile for writing.\n$!\n";
126
127 #
128 #
129 # Perform common tasks - write details to log file and screen, run velveth and vanilla velvetg
130 #
131 #
132
133 print STDERR "\nMemory use estimation only! Script will terminate after showing results.\n\n" if($genomesize);
134
135 print STDERR "Velvet details:\n";
136 print STDERR "\tVelvet version: $vhversion\n";
137 print STDERR "\tCompiled categories: $categories\n" if $categories;
138 print STDERR "\tCompiled max kmer length: $maxhash\n" if $maxhash;
139 print STDERR "\tMaximum number of threads to run: $num_threads\n";
140
141 #let user know about parameters to run with.
142 print STDERR "Will run velvet optimiser with the following paramters:\n";
143 print STDERR "\tVelveth parameter string:\n\t\t$readfile\n";
144 print STDERR "\tVelveth start hash values:\t$hashs\n";
145 print STDERR "\tVelveth end hash value:\t\t$hashe\n";
146 if($vgoptions){
147 print $OUT "\tUser specified velvetg options: $vgoptions\n";
148 }
149 if($amos){
150 print STDERR "\tRead tracking for final assembly on.\n";
151 } else {
152 print STDERR "\tRead tracking for final assembly off.\n";
153 }
154
155 #build the hashval array
156 for(my $i = $hashs; $i <= $hashe; $i += 2){
157 push @hashvals, $i;
158 }
159
160 if($genomesize){
161 my $x = &estMemUse();
162 printf STDERR "\nMemory use estimated to be: %.1fGB for $num_threads threads.\n\n", $x;
163 if ($x < $currfreemem){
164 print STDERR "You should have enough memory to complete this job. (Though this estimate is no guarantee..)\n";
165 exit;
166 }
167 else {
168 print STDERR "You probably won't have enough memory to run this job.\nTry decreasing the maximum number of threads used.\n(use the -t option to set max threads.)\n";
169 exit;
170 }
171 }
172
173
174 print $OUT strftime("%b %e %H:%M:%S", localtime), "\n";
175
176 #send run parameters to log file.
177 print $OUT "Will run velvet optimiser with the following paramters:\n";
178 print $OUT "\tVelveth parameter string:\n\t\t$readfile\n";
179 print $OUT "\tVelveth start hash values:\t$hashs\n";
180 print $OUT "\tVelveth end hash value:\t\t$hashe\n\n";
181 if($vgoptions){
182 print $OUT "\tUser specified velvetg options: $vgoptions\n";
183 }
184 if($amos){
185 print $OUT "\tRead tracking for final assembly on.\n";
186 } else {
187 print $OUT "\tRead tracking for final assembly off.\n";
188 }
189
190 print STDERR strftime("%b %e %H:%M:%S", localtime), " Beginning velveth runs.\n";
191 print $OUT strftime("%b %e %H:%M:%S", localtime), "\n\n\tBeginning velveth runs.\n";
192
193 #now run velveth for all the hashvalues in a certain number of threads..
194 my @threads;
195 foreach my $hashval (@hashvals){
196 while($current_threads >= $num_threads){
197 sleep(2);
198 }
199 if($threadfailed){
200 for my $thr (threads->list) {
201 #print STDERR "Waiting for thread ",$thr->tid," to complete.\n";
202 $thr->join;
203 }
204 die "Velveth failed to run! Must be a problem with file types, check by running velveth manually or by using -v option and reading the log file.\n";
205 }
206 $threads[$ass_num] = threads->create(\&runVelveth, $readfile, $hashval, $vhversion, \$logSem, $ass_num);
207 $ass_num ++;
208 sleep(2);
209 }
210
211 for my $thr (threads->list) {
212 #print STDERR "Waiting for thread ",$thr->tid," to complete.\n";
213 $thr->join;
214 }
215
216 #now run velvetg for the all the hashvalues in a certain number of threads..
217 #first get velvetg's version number.
218
219 $response = VelvetOpt::gwrap::_runVelvetg(" ");
220 $response =~ /Version\s+(\d+\.\d+\.\d+)/s;
221 my $vgversion = $1;
222
223 print STDERR strftime("%b %e %H:%M:%S", localtime), " Finished velveth runs.\n";
224
225 print STDERR strftime("%b %e %H:%M:%S", localtime), " Beginning vanilla velvetg runs.\n";
226 print $OUT strftime("%b %e %H:%M:%S", localtime), "\n\n\tBeginning vanilla velvetg runs.\n";
227
228 foreach my $key (sort { $a <=> $b } keys %assemblies){
229 while($current_threads >= $num_threads){
230 sleep(2);
231 }
232 $threads[$ass_num] = threads->create(\&runVelvetg, $vgversion, \$logSem, $key);
233 sleep(2);
234 }
235
236 for my $thr (threads->list) {
237 #print STDERR "Waiting for thread ",$thr->tid," to complete.\n";
238 $thr->join;
239 }
240
241
242 #now to thaw it all out..
243
244 foreach my $key(sort keys %assemblies){
245 my $obj = bless thaw($assemblies{$key}), "VelvetOpt::Assembly";
246 $assembliesObjs{$key} = $obj;
247 }
248
249
250 #find the best assembly...
251
252 #
253 #
254 # Now perform a velvetg optimisation based upon the file types sent to velveth
255 #
256 #
257
258 #
259 # get the best assembly so far...
260 #
261
262 my $bestId;
263 my $maxScore = -100;
264 my $asmscorenotneg = 1;
265
266 foreach my $key (keys %assembliesObjs){
267 if(($assembliesObjs{$key}->{assmscore} != -1) && $asmscorenotneg){
268 if($assembliesObjs{$key}->{assmscore} > $maxScore){
269 $bestId = $key;
270 $maxScore = $assembliesObjs{$key}->{assmscore};
271 }
272 }
273 elsif($assembliesObjs{$key}->{n50} && $asmscorenotneg){
274 if($assembliesObjs{$key}->{n50} > $maxScore){
275 $bestId = $key;
276 $maxScore = $assembliesObjs{$key}->{n50};
277 }
278 }
279 else {
280 $asmscorenotneg = 0;
281 if($assembliesObjs{$key}->{totalbp} > $maxScore){
282 $bestId = $key;
283 $maxScore = $assembliesObjs{$key}->{totalbp};
284 }
285 }
286 }
287 print "\n\nThe best assembly so far is:\n" if $interested;
288 print $assembliesObjs{$bestId}->toStringNoV() if $interested;
289
290 # determine the optimisation route for the assembly based on the velveth parameter string.
291 my $optRoute = &getOptRoutine($readfile);
292
293 print STDERR strftime("%b %e %H:%M:%S", localtime), " Hash value of best assembly by assembly score: ". $assembliesObjs{$bestId}->{hashval} . "\n";
294
295 print $OUT strftime("%b %e %H:%M:%S", localtime), " Best assembly by assembly score - assembly id: $bestId\n";
296
297 print STDERR strftime("%b %e %H:%M:%S", localtime), " Optimisation routine chosen for best assembly: $optRoute\n";
298 print $OUT strftime("%b %e %H:%M:%S", localtime), " Optimisation routine chosen for best assembly: $optRoute\n";
299
300 #now send the best assembly so far to the appropriate optimisation routine...
301
302 if($optRoute eq "shortOpt"){
303
304 &expCov($assembliesObjs{$bestId});
305 &covCutoff($assembliesObjs{$bestId});
306
307 }
308 elsif($optRoute eq "shortLong"){
309
310 &expCov($assembliesObjs{$bestId});
311 &covCutoff($assembliesObjs{$bestId});
312
313 }
314 elsif($optRoute eq "longPaired"){
315 &expCov($assembliesObjs{$bestId});
316 &insLengthLong($assembliesObjs{$bestId});
317 &covCutoff($assembliesObjs{$bestId});
318 }
319 elsif($optRoute eq "shortPaired"){
320 &expCov($assembliesObjs{$bestId});
321 &insLengthShort($assembliesObjs{$bestId});
322 &covCutoff($assembliesObjs{$bestId});
323 }
324 elsif($optRoute eq "shortLongPaired"){
325 &expCov($assembliesObjs{$bestId});
326 &insLengthShort($assembliesObjs{$bestId});
327 &insLengthLong($assembliesObjs{$bestId});
328 &covCutoff($assembliesObjs{$bestId});
329 }
330 else{
331 print STDERR "There was an error choosing an optimisation routine for this assembly. Please change the velveth parameter string and try again.\n";
332 print $OUT "There was an error choosing an optimisation routine for this assembly. Please change the velveth parameter string and try again.\n";
333 }
334
335 # once it comes back from the optimisation routines, we need to turn on read tracking and amos output if it was selected in the options.
336 #
337 #
338 # The final assembly run!
339 #
340 #
341 if($amos){
342 $assembliesObjs{$bestId}->{pstringg} .= " -amos_file yes -read_trkg yes";
343
344 my $final = VelvetOpt::gwrap::objectVelvetg($assembliesObjs{$bestId});
345 $assembliesObjs{$bestId}->getAssemblyDetails();
346 }
347
348 print STDERR strftime("%b %e %H:%M:%S", localtime), "\n\n\nFinal optimised assembly details:\n";
349 print $OUT strftime("%b %e %H:%M:%S", localtime), "\n\n\nFinal optimised assembly details:\n";
350 print STDERR $assembliesObjs{$bestId}->toStringNoV() if !$verbose;
351 print $OUT $assembliesObjs{$bestId}->toStringNoV() if !$verbose;
352 print STDERR $assembliesObjs{$bestId}->toString() if $verbose;
353 print $OUT $assembliesObjs{$bestId}->toString() if $verbose;
354 print STDERR "\n\nAssembly output files are in the following directory:\n" . $assembliesObjs{$bestId}->{ass_dir} . "\n\n";
355 print $OUT "\n\nAssembly output files are in the following directory:\n" . $assembliesObjs{$bestId}->{ass_dir} . "\n";
356
357 #delete superfluous directories..
358 #Modified by Konrad Paszkiewicz 07/01/2011 - move best results to execution directory for ease of use in Galaxy
359 foreach my $key(keys %assemblies){
360 unless($key == $bestId){
361 my $dir = $assembliesObjs{$key}->{ass_dir};
362 `rm -r $dir`;
363 }
364 if($key==$bestId){
365 my $dir = $assembliesObjs{$key}->{ass_dir};
366
367 qx(mv $dir/* $dir/../);
368 `rm -rf $dir`;
369
370 }
371 }
372
373
374
375 # subroutines...
376 #
377 #
378 #----------------------------------------------------------------------
379
380 # Option setting routines
381
382 sub setOptions {
383 use Getopt::Long;
384
385 my $thmax = VelvetOpt::Utils::num_cpu;
386
387 @Options = (
388 {OPT=>"help", VAR=>\&usage, DESC=>"This help"},
389 {OPT=>"v|verbose+", VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose logging, includes all velvet output in the logfile."},
390 {OPT=>"s|hashs=i", VAR=>\$hashs, DEFAULT=>19, DESC=>"The starting (lower) hash value"},
391 {OPT=>"e|hashe=i", VAR=>\$hashe, DEFAULT=>31, DESC=>"The end (higher) hash value"},
392 {OPT=>"f|velvethfiles=s", VAR=>\$readfile, DEFAULT=>0, DESC=>"The file section of the velveth command line."},
393 {OPT=>"a|amosfile!", VAR=>\$amos, DEFAULT=>0, DESC=>"Turn on velvet's read tracking and amos file output."},
394 {OPT=>"o|velvetgoptions=s", VAR=>\$vgoptions, DEFAULT=>'', DESC=>"Extra velvetg options to pass through. eg. -long_mult_cutoff -max_coverage etc"},
395 {OPT=>"t|threads=i", VAR=>\$num_threads, DEFAULT=>$thmax, DESC=>"The maximum number of simulataneous velvet instances to run."},
396 {OPT=>"g|genomesize=f", VAR=>\$genomesize, DEFAULT=>0, DESC=>"The approximate size of the genome to be assembled in megabases.\n\t\t\tOnly used in memory use estimation. If not specified, memory use estimation\n\t\t\twill not occur. If memory use is estimated, the results are shown and then program exits."},
397 {OPT=>"k|optFuncKmer=s", VAR=>\$opt_func, DEFAULT=>'n50', DESC=>"The optimisation function used for k-mer choice."},
398 {OPT=>"c|optFuncCov=s", VAR=>\$opt_func2, DEFAULT=>'Lbp', DESC=>"The optimisation function used for cov_cutoff optimisation."},
399 {OPT=>"p|prefix=s", VAR=>\$prefix, DEFAULT=>'auto', DESC=>"The prefix for the output filenames, the default is the date and time in the format DD-MM-YYYY-HH-MM_."}
400 );
401
402 (@ARGV < 1) && (usage());
403
404 &GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage();
405
406 # Now setup default values.
407 foreach (@Options) {
408 if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) {
409 ${$_->{VAR}} = $_->{DEFAULT};
410 }
411 }
412
413 print STDERR strftime("%b %e %H:%M:%S", localtime), " Starting to check input parameters.\n";
414
415 unless($readfile){
416 print STDERR "\tYou must supply the velveth parameter line in quotes. eg -f '-short .....'\n";
417 &usage();
418 }
419
420 if($hashs > $maxhash){
421 print STDERR "\tStart hash value too high. New start hash value is $maxhash.\n";
422 $hashs = $maxhash;
423 }
424 if(!&isOdd($hashs)){
425 $hashs = $hashs - 1;
426 print STDERR "\tStart hash value not odd. Subtracting one. New start hash value = $hashs\n";
427 }
428
429 if($hashe > $maxhash || $hashe < 1){
430 print STDERR "\tEnd hash value not in workable range. New end hash value is $maxhash.\n";
431 $hashe = $maxhash;
432 }
433 if($hashe < $hashs){
434 print STDERR "\tEnd hash value lower than start hash value. New end hash value = $hashs.\n";
435 $hashe = $hashs;
436 }
437 if(!&isOdd($hashe)){
438 $hashe = $hashe - 1;
439 print STDERR "\tEnd hash value not odd. Subtracting one. New end hash value = $hashe\n";
440 }
441
442 #check the velveth parameter string..
443 my $vh_ok = VelvetOpt::hwrap::_checkVHString("check 21 $readfile", $categories);
444
445 unless($vh_ok){ die "Please re-start with a corrected velveth parameter string." }
446
447 print STDERR "\tVelveth parameter string OK.\n";
448
449 print STDERR strftime("%b %e %H:%M:%S", localtime), " Finished checking input parameters.\n";
450
451 }
452
453 sub usage {
454 print "Usage: $0 [options] -f 'velveth input line'\n";
455 foreach (@Options) {
456 printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC},
457 defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : "";
458 }
459 print "\nAdvanced!: Changing the optimisation function(s)\n";
460 print VelvetOpt::Assembly::opt_func_toString;
461 exit(1);
462 }
463
464 #----------------------------------------------------------------------
465
466
467 #
468 # runVelveth
469 #
470
471 sub runVelveth{
472
473 {
474 lock($current_threads);
475 $current_threads ++;
476 }
477
478 my $rf = shift;
479 my $hv = shift;
480 my $vv = shift;
481 my $semRef = shift;
482 my $anum = shift;
483 my $assembly;
484
485 print STDERR strftime("%b %e %H:%M:%S", localtime), "\t\tRunning velveth with hash value: $hv.\n";
486
487 #make the velveth command line.
488 my $vhline = $prefix . "_data_$hv $hv $rf";
489
490 #make a new VelvetAssembly and store it in the %assemblies hash...
491 $assembly = VelvetOpt::Assembly->new(ass_id => $anum, pstringh => $vhline, versionh =>$vv, assmfunc => $opt_func, assmfunc2 => $opt_func2);
492
493 #run velveth on this assembly object
494 my $vhresponse = VelvetOpt::hwrap::objectVelveth($assembly, $categories);
495
496 unless($vhresponse){ die "Velveth didn't run on hash value of $hv.\n$!\n";}
497
498 unless(-r ($prefix . "_data_$hv" . "/Roadmaps")){
499 print STDERR "Velveth failed! Response:\n$vhresponse\n";
500 {
501 lock ($threadfailed);
502 $threadfailed = 1;
503 }
504 }
505
506 #run the hashdetail generation routine.
507 $vhresponse = $assembly->getHashingDetails();
508
509 #print the objects to the log file...
510 {
511 lock($$semRef);
512 print $OUT $assembly->toStringNoV() if !$verbose;
513 print $OUT $assembly->toString() if $verbose;
514 }
515
516 {
517 lock(%assemblies);
518 my $ass_str = freeze($assembly);
519 $assemblies{$anum} = $ass_str;
520 }
521
522 {
523 lock($current_threads);
524 $current_threads --;
525 }
526 print STDERR strftime("%b %e %H:%M:%S", localtime), "\t\tVelveth with hash value $hv finished.\n";
527 }
528
529 #
530 # runVelvetg
531 #
532 sub runVelvetg{
533
534 {
535 lock($current_threads);
536 $current_threads ++;
537 }
538
539 my $vv = shift;
540 my $semRef = shift;
541 my $anum = shift;
542 my $assembly;
543
544 #get back the object!
545 $assembly = bless thaw($assemblies{$anum}), "VelvetOpt::Assembly";
546
547 print STDERR strftime("%b %e %H:%M:%S", localtime), "\t\tRunning vanilla velvetg on hash value: " . $assembly->{hashval} . "\n";
548
549 #make the velvetg commandline.
550 my $vgline = $prefix . "_data_" . $assembly->{hashval};
551
552 $vgline .= " $vgoptions";
553
554 #save the velvetg commandline in the assembly.
555 $assembly->{pstringg} = $vgline;
556
557 #save the velvetg version in the assembly.
558 $assembly->{versiong} = $vv;
559
560 #run velvetg
561 my $vgresponse = VelvetOpt::gwrap::objectVelvetg($assembly);
562
563 unless($vgresponse){ die "Velvetg didn't run on the directory $vgline.\n$!\n";}
564
565 #run the assembly details routine..
566 $assembly->getAssemblyDetails();
567
568 #print the objects to the log file...
569 {
570 lock($$semRef);
571 print $OUT $assembly->toStringNoV() if !$verbose;
572 print $OUT $assembly->toString() if $verbose;
573 }
574
575 {
576 lock(%assemblies);
577 my $ass_str = freeze($assembly);
578 $assemblies{$anum} = $ass_str;
579 }
580
581 {
582 lock($current_threads);
583 $current_threads --;
584 }
585 print STDERR strftime("%b %e %H:%M:%S", localtime), "\t\tVelvetg on hash value: " . $assembly->{hashval} . " finished.\n";
586 }
587
588 #
589 # isOdd
590 #
591 sub isOdd {
592 my $x = shift;
593 if($x % 2 == 1){
594 return 1;
595 }
596 else {
597 return 0;
598 }
599 }
600
601
602 #
603 # getOptRoutine
604 #
605 sub getOptRoutine {
606
607 my $readfile = shift;
608
609 # Choose the optimisation path depending on the types of read files sent to velveth
610 # For short only: shortOpt routine
611 # For short and long: shortLong routine
612 # For short paired: shortPaired routine
613 # For short and long paired: longPaired routine
614 # For short paired and long: shortPaired routine
615 # For short paired & long paired: shortlongPaired routine
616
617 #look at velveth string ($readfile) and look for keywords from velvet manual...
618 my $long = 0;
619 my $longPaired = 0;
620 my $shortPaired = 0;
621 my $short = 0;
622
623 #standard cases..
624 if($readfile =~ /-short.? /) { $short = 1; }
625 if($readfile =~ /-long /) { $long = 1; }
626 if($readfile =~ /-shortPaired /) { $shortPaired = 1; }
627 if($readfile =~ /-longPaired /) { $longPaired = 1; }
628
629 #weird cases to cover the non-use of the short keyword (since its the default.)
630 if(!($readfile =~ /(-short.? )|(-long )|(-shortPaired )|(-longPaired )/)) { $short = 1; } #if nothing is specified, assume short.
631 if(!($readfile =~ /-short.? /) && ($readfile =~ /(-long )|(-longPaired )/)) { $short = 1; } #if long or longPaired is specified, also assum short since very unlikely to only have long...
632
633 if($short && !($long || $longPaired || $shortPaired)){
634 return "shortOpt";
635 }
636 elsif($short && $long && !($longPaired || $shortPaired)){
637 return "shortLong";
638 }
639 elsif($short && $longPaired && !$shortPaired){
640 return "longPaired";
641 }
642 elsif($short && $shortPaired && !$longPaired){
643 return "shortPaired";
644 }
645 elsif($short && $shortPaired && $longPaired){
646 return "shortLongPaired";
647 }
648 elsif($shortPaired && !$short && !$long && !$longPaired){
649 return "shortPaired";
650 }
651 else {
652 return "Unknown";
653 }
654 }
655
656 #
657 # covCutoff - the coverage cutoff optimisation routine.
658 #
659 sub covCutoff{
660
661 my $ass = shift;
662 #get the assembly score and set the current cutoff score.
663 my $ass_score = $ass->{assmscore};
664 print "In covCutOff and assembly score is: $ass_score..\n" if $interested;
665
666
667
668 sub func {
669 my $ass = shift;
670 my $cutoff = shift;
671 my $ass_score = $ass->{assmscore};
672 my $ps = $ass->{pstringg};
673 if($ps =~ /cov_cutoff/){
674 $ps =~ s/cov_cutoff\s+\d+(\.\d+)?/cov_cutoff $cutoff/;
675 }
676 else {
677 $ps .= " -cov_cutoff $cutoff";
678 }
679 $ass->{pstringg} = $ps;
680
681 print STDERR strftime("%b %e %H:%M:%S", localtime);
682 printf STDERR "\t\tSetting cov_cutoff to %.3f.\n", $cutoff;
683 print $OUT strftime("%b %e %H:%M:%S", localtime);
684 printf $OUT "\t\tSetting cov_cutoff to %.3f.\n", $cutoff;
685
686 my $worked = VelvetOpt::gwrap::objectVelvetg($ass);
687 if($worked){
688 $ass->getAssemblyDetails();
689 }
690 else {
691 die "Velvet Error in covCutoff!\n";
692 }
693 $ass_score = $ass->{assmscore};
694 print $OUT $ass->toStringNoV();
695
696 return $ass_score;
697
698 }
699
700 print STDERR strftime("%b %e %H:%M:%S", localtime), " Beginning coverage cutoff optimisation\n";
701 print $OUT strftime("%b %e %H:%M:%S", localtime), " Beginning coverage cutoff optimisation\n";
702
703 my $dir = $ass->{ass_dir};
704 $dir .= "/stats.txt";
705 #print "\tLooking for exp_cov in $dir\n";
706 my $expCov = VelvetOpt::Utils::estExpCov($dir, $ass->{hashval});
707
708 my $a = 0;
709 my $b = 0.8 * $expCov;
710 my $t = 0.618;
711 my $c = $a + $t * ($b - $a);
712 my $d = $b + $t * ($a - $b);
713 my $fc = func($ass, $c);
714 my $fd = func($ass, $d);
715
716 my $iters = 1;
717
718 printf STDERR "\t\tLooking for best cutoff score between %.3f and %.3f\n", $a, $b;
719 printf $OUT "\t\tLooking for best cutoff score between %.3f and %.3f\n", $a, $b;
720
721 while(abs($a -$b) > 1){
722 if($fc > $fd){
723 printf STDERR "\t\tMax cutoff lies between %.3f & %.3f\n", $d, $b;
724 my $absdiff = abs($fc - $fd);
725 print STDERR "\t\tfc = $fc\tfd = $fd\tabs diff = $absdiff\n";
726 printf $OUT "\t\tMax cutoff lies between %.3f & %.3f\n", $d, $b;
727 $a = $d;
728 $d = $c;
729 $fd = $fc;
730 $c = $a + $t * ($b - $a);
731 $fc = func($ass, $c);
732 }
733 else {
734 printf STDERR "\t\tMax cutoff lies between %.3f & %.3f\n", $a, $c;
735 my $absdiff = abs($fc - $fd);
736 print STDERR "\t\tfc = $fc\tfd = $fd\tabs diff = $absdiff\n";
737 printf $OUT "\t\tMax cutoff lies between %.3f & %.3f\n", $a, $c;
738 $b = $c;
739 $c = $d;
740 $fc = $fd;
741 $d = $b + $t * ($a - $b);
742 $fd = func($ass, $d);
743 }
744 $iters ++;
745 }
746
747 printf STDERR "\t\tOptimum value of cutoff is %.2f\n", $b;
748 print STDERR "\t\tTook $iters iterations\n";
749 printf $OUT "\t\tOptimum value of cutoff is %.2f\n", $b;
750 print $OUT "\t\tTook $iters iterations\n";
751
752 return 1;
753
754 }
755
756 #
757 # expCov - find the expected coverage for the assembly and run velvetg with that exp_cov.
758 #
759 sub expCov {
760
761 print STDERR strftime("%b %e %H:%M:%S", localtime), " Looking for the expected coverage\n";
762 print $OUT strftime("%b %e %H:%M:%S", localtime), " Looking for the expected coverage\n";
763
764 my $ass = shift;
765
766 #need to get the directory of the assembly and add "stats.txt" to it and then send it to
767 #the histogram methods in SlugsUtils.pm...
768 my $dir = $ass->{ass_dir};
769 $dir .= "/stats.txt";
770 my $expCov = VelvetOpt::Utils::estExpCov($dir, $ass->{hashval});
771
772 print STDERR strftime("%b %e %H:%M:%S", localtime), "\t\tExpected coverage set to $expCov\n";
773 print $OUT strftime("%b %e %H:%M:%S", localtime), "\t\tExpected coverage set to $expCov\n";
774
775 #re-write the pstringg with the new velvetg command..
776 my $vg = $ass->{pstringg};
777 if($vg =~ /exp_cov/){
778 $vg =~ s/exp_cov\s+\d+/exp_cov $expCov/;
779 }
780 else {
781 $vg .= " -exp_cov $expCov";
782 }
783
784 $ass->{pstringg} = $vg;
785
786 print $OUT $ass->toStringNoV();
787
788 }
789
790 #
791 # insLengthLong - get the Long insert length and use it in the assembly..
792 #
793 sub insLengthLong {
794 print STDERR strftime("%b %e %H:%M:%S", localtime), " Getting the long insert length\n";
795 print $OUT strftime("%b %e %H:%M:%S", localtime), " Getting the long insert length\n";
796 my $ass = shift;
797 my $len = "auto";
798 print STDERR strftime("%b %e %H:%M:%S", localtime), " Setting assembly long insert length $len\n";
799 print $OUT strftime("%b %e %H:%M:%S", localtime), " Setting assembly long insert length $len\n";
800
801 #re-write the pstringg with the new velvetg command..
802 #my $vg = $ass->{pstringg};
803 #if($vg =~ /ins_length_long/){
804 # $vg =~ s/ins_length_long\s+\d+/ins_length_long $len/;
805 #}
806 #else {
807 # $vg .= " -ins_length_long $len";
808 #}
809 }
810
811 #
812 # insLengthShort - get the short insert length and use it in the assembly..
813 #
814 sub insLengthShort {
815 print STDERR strftime("%b %e %H:%M:%S", localtime), " Setting the short insert length\n";
816 print $OUT strftime("%b %e %H:%M:%S", localtime), " Setting the short insert length\n";
817 my $ass = shift;
818 my $len = "auto";
819 print STDERR strftime("%b %e %H:%M:%S", localtime), " Setting assembly short insert length(s) to $len\n";
820 print $OUT strftime("%b %e %H:%M:%S", localtime), " Setting assembly short insert length(s) to $len\n";
821
822 #re-write the pstringg with the new velvetg command..
823 #my $vg = $ass->{pstringg};
824 #if($vg =~ /ins_length /){
825 # $vg =~ s/ins_length\s+\d+/ins_length $len/;
826 #}
827 #else {
828 # $vg .= " -ins_length $len";
829 #}
830 #$ass->{pstringg} = $vg;
831 }
832
833
834 #
835 # estMemUse - estimates the memory usage from
836 #
837 sub estMemUse {
838
839 my $max_runs = @hashvals;
840 my $totmem = 0;
841 #get the read lengths and the number of reads...
842 #need the short read filenames...
843 my ($rs, $nr) = VelvetOpt::Utils::getReadSizeNum($readfile);
844 if ($max_runs > $num_threads){
845 for(my $i = 0; $i < $num_threads; $i ++){
846 $totmem += VelvetOpt::Utils::estVelvetMemUse($rs, $genomesize, $nr, $hashvals[$i]);
847 }
848 }
849 else {
850 foreach my $h (@hashvals){
851 $totmem += VelvetOpt::Utils::estVelvetMemUse($rs, $genomesize, $nr, $h);
852 }
853 }
854 return $totmem;
855 }