annotate pileup.pl @ 12:7a51d316635a draft

Uploaded
author modencode-dcc
date Fri, 18 Jan 2013 15:55:46 -0500
parents 1ced8ed78013
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
1 #!/usr/bin/perl
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
2
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
3 #purpose: script which calculates the percent of genome coverage and average coverage of bases
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
4 #author: Ziru Zhou
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
5 #date: October, 2012
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
6
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
7
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
8 use strict;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
9 use warnings;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
10 use File::Basename;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
11
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
12 #globals
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
13 #======================================================================
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
14 my $bamfile = $ARGV[0];
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
15 my $reffile = $ARGV[1];
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
16 my $outputfile = $ARGV[2];
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
17 my $bamname = $ARGV[3];
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
18 my $refname = $ARGV[4];
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
19
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
20 my $genomesize = 0;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
21 my $pileupwc = 0;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
22 my $pileupc4 = 0;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
23
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
24 my $percentofcover = 0;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
25 my $averagebasecoverage = 0;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
26
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
27
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
28 #function declarations
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
29 #======================================================================
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
30 #get the pileupwc value
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
31 sub Getpileupwc()
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
32 {
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
33 #system ("samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 > tmp");
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
34 #$pileupwc = `cat tmp | wc -l`;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
35 $pileupwc = `samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 | wc -l`;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
36 }
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
37
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
38 #get the pileupc4 value
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
39 sub Getpileupc4()
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
40 {
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
41 $pileupc4 = `samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 | awk '{ total += \$1 } END { print total }'`;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
42 }
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
43
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
44 #get the genomesize value
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
45 sub Getgenomesize()
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
46 {
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
47 $genomesize = `tail -n 1 "$reffile.fai"| cut --fields=3`;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
48 }
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
49
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
50 #function to calculate the final 2 values for output
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
51 sub Calculate()
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
52 {
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
53 $percentofcover = ( int($pileupwc) / int($genomesize) ) * 100;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
54 $averagebasecoverage = int($pileupc4) / int($pileupwc);
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
55 }
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
56
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
57 #function to write to output file
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
58 sub Output()
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
59 {
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
60 open FILE, '>'.$outputfile or die "unable to create $outputfile\n";
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
61
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
62 print FILE "\n#";
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
63 print FILE "\n# Generated by BAMEdit. Please send your questions/comments to modENCODE DCC at help\@modencode.org.";
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
64 print FILE "\n#";
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
65 print FILE "\n# coverage: % of bases in genome covered";
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
66 print FILE "\n# avg coverage: average coverage of bases covered in genome ";
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
67 print FILE "\n#\n";
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
68 print FILE "input file\t$bamname\n";
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
69 print FILE "genome file\t$refname\n";
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
70 print FILE "genome length\t$genomesize";
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
71 printf FILE "coverage\t%.2f\n", $percentofcover;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
72 printf FILE "avg coverage\t%.2f\n", $averagebasecoverage;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
73
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
74 close FILE;
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
75 }
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
76
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
77 #function to clean up, the tmp file and the .fai file
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
78 sub Cleanup()
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
79 {
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
80 system ("sudo rm ${reffile}.dat.fai");
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
81 }
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
82
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
83 #function calls
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
84 #======================================================================
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
85 Getpileupwc();
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
86 Getpileupc4();
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
87 Getgenomesize();
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
88 Calculate();
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
89 Output();
1ced8ed78013 Uploaded
modencode-dcc
parents:
diff changeset
90 Cleanup();