0
|
1 #!/usr/bin/env perl
|
|
2
|
|
3 use strict;
|
|
4 use warnings;
|
|
5 use IO::Handle;
|
|
6
|
|
7 # Report lines of a file that have as one of the column values a value from the pattern file
|
|
8 @ARGV == 6 or die "Usage: $0 <input.bam> <named genomic regions.bed> <file of patterns> <filtered output.bam> <matched regions.bed> <output messages.txt>\n";
|
|
9
|
|
10 die "Input BAM file $ARGV[0] does not exist\n" if not -e $ARGV[0];
|
|
11 die "Input BAM file $ARGV[0] is not readable\n" if not -r $ARGV[0];
|
|
12
|
|
13 my @alts;
|
|
14 if(-e $ARGV[2]){
|
|
15 open(PATTERNS, $ARGV[2])
|
|
16 or die "Cannot open $ARGV[1] for reading: $!\n";
|
|
17 while(<PATTERNS>){
|
|
18 chomp;
|
|
19 push @alts, $_;
|
|
20 }
|
|
21 close(PATTERNS);
|
|
22 }
|
|
23 else{ # else assume the arg is a list of names directly
|
|
24 @alts = split /\s+/, $ARGV[2];
|
|
25 }
|
|
26
|
|
27 my @regions;
|
|
28 my %seen;
|
|
29 my $regex = "^(?:".join("|", @alts).")\$";
|
|
30 open(TAB, $ARGV[1])
|
|
31 or die "Cannot open $ARGV[1] for reading: $!\n";
|
|
32 while(<TAB>){
|
|
33 chomp;
|
|
34 my @F = split /\t/, $_;
|
|
35 next unless @F > 3;
|
|
36 if($F[3] =~ /$regex/io){
|
|
37 next if $seen{"$F[0]:$F[1]-$F[2]"}++; # sometimes regions are repeated, don't send these repeats to samtools
|
|
38 push @regions, $_;
|
|
39 }
|
|
40 }
|
|
41 close(TAB);
|
|
42
|
|
43 die "No matches to desired names in the provided named genomic regions BED file, aborting filtered BAM file creation" if not @regions;
|
|
44
|
|
45 open(BED, ">$ARGV[4]")
|
|
46 or die "Cannot open $ARGV[4] for writing: $!\n";
|
|
47 print BED join("\n", @regions), "\n";
|
|
48 close(BED);
|
|
49
|
|
50 open(ERRFILE, ">$ARGV[5]") or die "Cannot open $ARGV[5] for writing: $!\n";
|
|
51 STDOUT->fdopen(\*ERRFILE, "w") or die "Cannot redirect stdout to $ARGV[5]: $!\n";
|
|
52 STDERR->fdopen(\*ERRFILE, "w") or die "Cannot redirect stderr to $ARGV[5]: $!\n";
|
|
53 system("samtools view -h -L $ARGV[4] $ARGV[0] | samtools view -S -b - > $ARGV[3]") >> 8
|
|
54 and die "Samtools failed: exit status ", ($?>>8), "\n";
|
|
55 system("samtools index $ARGV[3]");
|