| 
0
 | 
     1 #!/usr/bin/env perl
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 use strict;
 | 
| 
 | 
     4 use warnings;
 | 
| 
 | 
     5 use File::Temp;
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 @ARGV >= 3 or die "Usage: $0 <input.bam> <num processes> <output.vcf> [freebayes options]\nRuns FreeBayes 0.8.7 separately for each reference chromosome, with as many concurrent processes as specified\n"; 
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 my $in_bam = shift @ARGV;
 | 
| 
 | 
    10 my $num_procs = shift @ARGV;
 | 
| 
 | 
    11 my $out_vcf = shift @ARGV;
 | 
| 
 | 
    12 
 | 
| 
 | 
    13 $SIG{__DIE__} = $SIG{INT} = $SIG{HUP} = $SIG{TERM} = $SIG{ABRT} = \&cleanup;
 | 
| 
 | 
    14 # Lines look some thing like "@SQ	SN:chr1	LN:249250621"
 | 
| 
 | 
    15 open(SAM_HEADERS, "samtools view -H $in_bam |")
 | 
| 
 | 
    16   or die "Cannot run samtools on $in_bam: $!\n";
 | 
| 
 | 
    17 my %seq2length;
 | 
| 
 | 
    18 my %seqname2orig_order;
 | 
| 
 | 
    19 while(<SAM_HEADERS>){
 | 
| 
 | 
    20   if(/^\@SQ\s+SN:(\S+)\s+LN:(\d+)/){
 | 
| 
 | 
    21     $seq2length{$1} = $2;
 | 
| 
 | 
    22     $seqname2orig_order{$1} = $.;
 | 
| 
 | 
    23   }
 | 
| 
 | 
    24 }
 | 
| 
 | 
    25 close(SAM_HEADERS);
 | 
| 
 | 
    26 
 | 
| 
 | 
    27 # Make sure the index  is available and has the name FreeBayes expects
 | 
| 
 | 
    28 my $bamfile_stem = $in_bam;
 | 
| 
 | 
    29 $bamfile_stem =~ s/\.[^.]+$//; # remove last file extension, if there is one
 | 
| 
 | 
    30 if(not -e "$bamfile_stem.bai"){
 | 
| 
 | 
    31   if(not -e "$in_bam.bai"){
 | 
| 
 | 
    32     system("samtools index $in_bam") >> 8 and die "Cannot index $in_bam: samtools had exit startus $?\n"; 
 | 
| 
 | 
    33   }
 | 
| 
 | 
    34   system("ln -s $in_bam.bai $bamfile_stem.bai")>> 8 and die "Cannot link exsiting BAM index $in_bam.bai to FreeBayes required file name $bamfile_stem.bai: ln had exit status $?\n";
 | 
| 
 | 
    35 }
 | 
| 
 | 
    36 
 | 
| 
 | 
    37 my @cmds;
 | 
| 
 | 
    38 my @tmp_outfiles;
 | 
| 
 | 
    39 my %tmpfile2orig_order;
 | 
| 
 | 
    40 # Sort contigs from largest to smallest for scheduling efficiency
 | 
| 
 | 
    41 for my $seq_name (sort {$seq2length{$b} <=> $seq2length{$a}} keys %seq2length){
 | 
| 
 | 
    42   my ($tmp_fh, $tmp_filename) = tmpnam();
 | 
| 
 | 
    43   push @cmds, "/export/achri_data/programs/freebayes0.8.7 -b $in_bam -v $tmp_filename " . join(" ", @ARGV) . " -r $seq_name:1..$seq2length{$seq_name}";
 | 
| 
 | 
    44   push @tmp_outfiles, $tmp_filename;
 | 
| 
 | 
    45   $tmpfile2orig_order{$tmp_filename} = $seqname2orig_order{$seq_name};
 | 
| 
 | 
    46 }
 | 
| 
 | 
    47 
 | 
| 
 | 
    48 open(OUT_VCF, ">$out_vcf")
 | 
| 
 | 
    49   or die "Cannot open $out_vcf for writing: $!\n";
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 run_cmds($num_procs, @cmds);
 | 
| 
 | 
    52 
 | 
| 
 | 
    53 # Grab output header from first temp output file
 | 
| 
 | 
    54 open(H, $tmp_outfiles[0])
 | 
| 
 | 
    55   or die "Cannot open $tmp_outfiles[0] for reading: $!\n";
 | 
| 
 | 
    56 while(<H>){
 | 
| 
 | 
    57   last if not /^#/; # end of headers
 | 
| 
 | 
    58   # mod for self-referencing meta-data
 | 
| 
 | 
    59   if(/^##commandline=/){
 | 
| 
 | 
    60     print OUT_VCF "##commandline=$0 $in_bam $num_procs $out_vcf ".join(" ", @ARGV)."\n";
 | 
| 
 | 
    61   }
 | 
| 
 | 
    62   else{
 | 
| 
 | 
    63     # Otherwise verbatim
 | 
| 
 | 
    64     print OUT_VCF $_;
 | 
| 
 | 
    65   }
 | 
| 
 | 
    66 
 | 
| 
 | 
    67 }
 | 
| 
 | 
    68 close(H);
 | 
| 
 | 
    69 
 | 
| 
 | 
    70 # Concatenate the temporary results into a final outfile
 | 
| 
 | 
    71 for my $tmp_outfile (sort {$tmpfile2orig_order{$a} <=> $tmpfile2orig_order{$b}} @tmp_outfiles){
 | 
| 
 | 
    72   open(TMP_VCF, $tmp_outfile) 
 | 
| 
 | 
    73     or die "Cannot open $tmp_outfile for reading: $!\n";
 | 
| 
 | 
    74   while(<TMP_VCF>){
 | 
| 
 | 
    75     print OUT_VCF unless /^#/;
 | 
| 
 | 
    76   }
 | 
| 
 | 
    77   close(TMP_VCF);
 | 
| 
 | 
    78 }
 | 
| 
 | 
    79 close(OUT_VCF);
 | 
| 
 | 
    80 &cleanup;
 | 
| 
 | 
    81 
 | 
| 
 | 
    82 sub cleanup{
 | 
| 
 | 
    83   unlink @tmp_outfiles;
 | 
| 
 | 
    84 }
 | 
| 
 | 
    85 sub run_cmds{
 | 
| 
 | 
    86 
 | 
| 
 | 
    87   my ($max_cmds, @cmd) = @_;
 | 
| 
 | 
    88 
 | 
| 
 | 
    89   my ($num_children, $pid);
 | 
| 
 | 
    90 
 | 
| 
 | 
    91         for($num_children = 1; $num_children < $max_cmds && @cmds; $num_children++){
 | 
| 
 | 
    92         # initialize the number of child processes at 1, and increment it by one
 | 
| 
 | 
    93         #while it is less than $max_cmds
 | 
| 
 | 
    94 
 | 
| 
 | 
    95                 my $cmd = shift (@cmds);
 | 
| 
 | 
    96                 if($pid = fork) {
 | 
| 
 | 
    97                         # do nothing if parent
 | 
| 
 | 
    98                 } elsif(defined $pid) { # $pid is zero here if defined
 | 
| 
 | 
    99                         print STDERR $cmd, "\n";
 | 
| 
 | 
   100                         system $cmd;
 | 
| 
 | 
   101                         exit;
 | 
| 
 | 
   102                 } else {
 | 
| 
 | 
   103                         #weird fork error
 | 
| 
 | 
   104                         die "Can't fork: $!\n";
 | 
| 
 | 
   105                 }
 | 
| 
 | 
   106         }
 | 
| 
 | 
   107 
 | 
| 
 | 
   108         while(@cmds) {
 | 
| 
 | 
   109                 undef $pid;
 | 
| 
 | 
   110                 FORK: {
 | 
| 
 | 
   111                         my $cmd = shift (@cmds);
 | 
| 
 | 
   112                         if($pid = fork) {
 | 
| 
 | 
   113                                 # parent here
 | 
| 
 | 
   114                                 $num_children++;
 | 
| 
 | 
   115                                 wait;
 | 
| 
 | 
   116                                 $num_children--;
 | 
| 
 | 
   117                                 next;
 | 
| 
 | 
   118 
 | 
| 
 | 
   119                         } elsif(defined $pid) { # $pid is zero here if defined
 | 
| 
 | 
   120                                 print STDERR $cmd, "\n";
 | 
| 
 | 
   121                                 system $cmd;
 | 
| 
 | 
   122                                 exit;
 | 
| 
 | 
   123 
 | 
| 
 | 
   124                         } else {
 | 
| 
 | 
   125                                 #weird fork error
 | 
| 
 | 
   126                                 die "Can't fork: $!\n";
 | 
| 
 | 
   127                         }
 | 
| 
 | 
   128                 }
 | 
| 
 | 
   129         }
 | 
| 
 | 
   130         wait while $num_children--;
 | 
| 
 | 
   131 }
 | 
| 
 | 
   132 
 |