2
|
1 #!/usr/bin/perl
|
|
2
|
|
3 #purpose: script which calculates the percent of genome coverage and average coverage of bases
|
|
4 #author: Ziru Zhou
|
|
5 #date: October, 2012
|
|
6
|
|
7
|
|
8 use strict;
|
|
9 use warnings;
|
|
10 use File::Basename;
|
|
11
|
|
12 #globals
|
|
13 #======================================================================
|
|
14 my $bamfile = $ARGV[0];
|
|
15 my $reffile = $ARGV[1];
|
|
16 my $outputfile = $ARGV[2];
|
|
17 my $bamname = $ARGV[3];
|
|
18 my $refname = $ARGV[4];
|
|
19
|
|
20 my $genomesize = 0;
|
|
21 my $pileupwc = 0;
|
|
22 my $pileupc4 = 0;
|
|
23
|
|
24 my $percentofcover = 0;
|
|
25 my $averagebasecoverage = 0;
|
|
26
|
|
27
|
|
28 #function declarations
|
|
29 #======================================================================
|
|
30 #get the pileupwc value
|
|
31 sub Getpileupwc()
|
|
32 {
|
|
33 #system ("samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 > tmp");
|
|
34 #$pileupwc = `cat tmp | wc -l`;
|
|
35 $pileupwc = `samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 | wc -l`;
|
|
36 }
|
|
37
|
|
38 #get the pileupc4 value
|
|
39 sub Getpileupc4()
|
|
40 {
|
|
41 $pileupc4 = `samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 | awk '{ total += \$1 } END { print total }'`;
|
|
42 }
|
|
43
|
|
44 #get the genomesize value
|
|
45 sub Getgenomesize()
|
|
46 {
|
|
47 $genomesize = `tail -n 1 "$reffile.fai"| cut --fields=3`;
|
|
48 }
|
|
49
|
|
50 #function to calculate the final 2 values for output
|
|
51 sub Calculate()
|
|
52 {
|
|
53 $percentofcover = ( int($pileupwc) / int($genomesize) ) * 100;
|
|
54 $averagebasecoverage = int($pileupc4) / int($pileupwc);
|
|
55 }
|
|
56
|
|
57 #function to write to output file
|
|
58 sub Output()
|
|
59 {
|
|
60 open FILE, '>'.$outputfile or die "unable to create $outputfile\n";
|
|
61
|
|
62 print FILE "\n#";
|
|
63 print FILE "\n# Generated by BAMEdit. Please send your questions/comments to modENCODE DCC at help\@modencode.org.";
|
|
64 print FILE "\n#";
|
|
65 print FILE "\n# coverage: % of bases in genome covered";
|
|
66 print FILE "\n# avg coverage: average coverage of bases covered in genome ";
|
|
67 print FILE "\n#\n";
|
|
68 print FILE "input file\t$bamname\n";
|
|
69 print FILE "genome file\t$refname\n";
|
|
70 print FILE "genome length\t$genomesize";
|
|
71 printf FILE "coverage\t%.2f\n", $percentofcover;
|
|
72 printf FILE "avg coverage\t%.2f\n", $averagebasecoverage;
|
|
73
|
|
74 close FILE;
|
|
75 }
|
|
76
|
|
77 #function to clean up, the tmp file and the .fai file
|
|
78 sub Cleanup()
|
|
79 {
|
|
80 system ("sudo rm ${reffile}.dat.fai");
|
|
81 }
|
|
82
|
|
83 #function calls
|
|
84 #======================================================================
|
|
85 Getpileupwc();
|
|
86 Getpileupc4();
|
|
87 Getgenomesize();
|
|
88 Calculate();
|
|
89 Output();
|
|
90 Cleanup();
|