Mercurial > repos > geert-vandeweyer > coverage_report
annotate CoverageReport.pl @ 26:859999cb135b draft
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
author | geert-vandeweyer |
---|---|
date | Wed, 29 Nov 2017 08:12:27 -0500 |
parents | 6cb012c8497a |
children | 576bfc1586f9 |
rev | line source |
---|---|
1 | 1 #!/usr/bin/perl |
2 | |
3 # load modules | |
4 use Getopt::Std; | |
5 use File::Basename; | |
6 use Number::Format; | |
7 | |
8 # number format | |
9 my $de = new Number::Format(-thousands_sep =>',',-decimal_point => '.'); | |
10 | |
11 ########## | |
12 ## opts ## | |
13 ########## | |
14 ## input files | |
15 # b : path to input (b)am file | |
16 # t : path to input (t)arget regions in BED format | |
17 ## output files | |
18 # o : report pdf (o)utput file | |
19 # z : all plots and tables in tar.g(z) format | |
20 ## entries in the report | |
21 # r : Coverage per (r)egion (boolean) | |
22 # s : (s)ubregion coverage if average < specified (plots for positions along target region) (boolean) | |
23 # S : (S)ubregion coverage for ALL failed exons => use either s OR S or you will have double plots. | |
24 # A : (A)ll exons will be plotted. | |
25 # L : (L)ist failed exons instead of plotting | |
26 # m : (m)inimal Coverage threshold | |
27 # f : fraction of average as threshold | |
28 # n : sample (n)ame. | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
29 # T : collapse overlapping Target regions. |
1 | 30 |
24
fd788f9db899
Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents:
22
diff
changeset
|
31 getopts('b:t:o:z:rsSALm:n:f:T', \%opts) ; |
1 | 32 |
33 # make output directory in (tmp) working dir | |
34 our $wd = "/tmp/Coverage.".int(rand(1000)); | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
35 |
1 | 36 while (-d $wd) { |
37 $wd = "/tmp/Coverage.".int(rand(1000)); | |
24
fd788f9db899
Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents:
22
diff
changeset
|
38 |
1 | 39 } |
40 system("mkdir $wd"); | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
41 $wd = "/tmp/Coverage.993"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
42 print "wd : $wd\n"; |
1 | 43 ## variables |
44 our %commandsrun = (); | |
45 | |
46 if (!exists($opts{'b'}) || !-e $opts{'b'}) { | |
47 die('Bam File not found'); | |
48 } | |
49 if (!exists($opts{'t'}) || !-e $opts{'t'}) { | |
50 die('Target File (BED) not found'); | |
51 } | |
52 | |
53 if (exists($opts{'m'})) { | |
54 $thresh = $opts{'m'}; | |
55 } | |
56 else { | |
57 $thresh = 40; | |
58 } | |
59 | |
60 if (exists($opts{'f'})) { | |
61 $frac = $opts{'f'}; | |
62 } | |
63 else { | |
64 $frac = 0.2; | |
65 } | |
66 | |
67 if (exists($opts{'o'})) { | |
68 $pdffile = $opts{'o'}; | |
69 } | |
70 else { | |
71 $pdffile = "$wd/CoverageReport.pdf"; | |
72 } | |
73 | |
74 if (exists($opts{'z'})) { | |
75 $tarfile = $opts{'z'}; | |
76 } | |
77 else { | |
78 $tarfile = "$wd/Results.tar.gz"; | |
79 } | |
80 | |
24
fd788f9db899
Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents:
22
diff
changeset
|
81 ## 0. Collapse overlapping target regions. |
fd788f9db899
Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents:
22
diff
changeset
|
82 if (defined($opts{'T'})) { |
25
6cb012c8497a
Added BED format check before collapsing regions.
geert-vandeweyer
parents:
24
diff
changeset
|
83 ## check BED format. Must have 6 cols if using this. |
6cb012c8497a
Added BED format check before collapsing regions.
geert-vandeweyer
parents:
24
diff
changeset
|
84 my $head = `head -n 1 $opts{'t'}`; |
6cb012c8497a
Added BED format check before collapsing regions.
geert-vandeweyer
parents:
24
diff
changeset
|
85 chomp; |
6cb012c8497a
Added BED format check before collapsing regions.
geert-vandeweyer
parents:
24
diff
changeset
|
86 my @c = split(/\t/,$head); |
6cb012c8497a
Added BED format check before collapsing regions.
geert-vandeweyer
parents:
24
diff
changeset
|
87 if (scalar(@c) < 6) { |
6cb012c8497a
Added BED format check before collapsing regions.
geert-vandeweyer
parents:
24
diff
changeset
|
88 die("Targets BED file must be in 6-column format for collapsings. See tool documentation for more info.\n"); |
6cb012c8497a
Added BED format check before collapsing regions.
geert-vandeweyer
parents:
24
diff
changeset
|
89 } |
24
fd788f9db899
Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents:
22
diff
changeset
|
90 my $targets = $opts{'t'}; |
fd788f9db899
Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents:
22
diff
changeset
|
91 my $tmptargets = "$wd/collapsedtargets.bed"; |
fd788f9db899
Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents:
22
diff
changeset
|
92 system("sort -k1,1 -k2,2n $targets > $wd/sorted.targets.bed"); |
fd788f9db899
Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents:
22
diff
changeset
|
93 system("bedtools merge -s -scores max -nms -i $wd/sorted.targets.bed > $tmptargets"); |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
94 open IN, $tmptargets; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
95 open OUT, ">$wd/collapsed.targets.renamed.bed"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
96 # we assume that overlapping fragments come from isoforms, not from different genes. |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
97 my %counters = (); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
98 my @genes = (); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
99 while (<IN>) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
100 chomp; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
101 my @p = split(/\t/,$_); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
102 my @g = split(/,/,$p[3]); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
103 $g[0] =~ m/(\S+)\(.*/; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
104 my $gene = $1; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
105 if (!defined($counters{$gene})) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
106 push(@genes,$gene); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
107 $counters{$gene}{'lines'} = (); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
108 $counters{$gene}{'orient'} = $p[5]; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
109 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
110 $p[3] = $gene."(COLLAPSED)"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
111 push(@{$counters{$gene}{'lines'}},\@p); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
112 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
113 close IN; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
114 foreach my $gene (@genes) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
115 if ($counters{$gene}{'orient'} eq '-') { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
116 my $idx = scalar(@{$counters{$gene}{'lines'}}) + 1; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
117 foreach my $line (@{$counters{$gene}{'lines'}}) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
118 $idx--; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
119 $line->[3] .= "|Region_$idx"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
120 print OUT join("\t",@$line)."\n"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
121 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
122 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
123 else { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
124 my $idx = 0; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
125 foreach my $line (@{$counters{$gene}{'lines'}}) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
126 $idx++; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
127 $line->[3] .= "|Region_$idx"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
128 print OUT join("\t",@$line)."\n"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
129 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
130 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
131 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
132 close OUT; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
133 $opts{'t'} = "$wd/collapsed.targets.renamed.bed"; |
24
fd788f9db899
Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents:
22
diff
changeset
|
134 } |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
135 # 1. Coverage per exon |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
136 # included in 2. |
1 | 137 |
138 # 2. Coverage per position | |
139 &SubRegionCoverage($opts{'b'}, $opts{'t'}); | |
140 our %filehash; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
141 our $tcov; |
1 | 142 if (exists($opts{'s'}) || exists($opts{'S'}) || exists($opts{'A'}) || exists($opts{'L'})) { |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
143 system("mkdir -p $wd/SplitFiles"); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
144 system("rm $wd/SplitFiles/*"); |
1 | 145 ## get position coverages |
146 ## split input files | |
147 open IN, "$wd/Targets.Position.Coverage"; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
148 open BCOVSUM, ">$wd/Results/".$opts{'n'}.".Average_Region_Coverage.txt"; |
1 | 149 my $fileidx = 0; |
150 my $currreg = ''; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
151 my $elength = 0; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
152 my $esum = 0; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
153 my $eline = ""; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
154 my %out = (); |
1 | 155 while (<IN>) { |
156 my $line = $_; | |
157 chomp($line); | |
158 my @p = split(/\t/,$line); | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
159 my $reg = $p[0].'-'.$p[1].'-'.$p[2]. ": $p[3]"; |
1 | 160 my $ex = $p[3]; |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
161 my $epos = $p[1]; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
162 # average exon coverage calculation. |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
163 if (!defined($out{$ex})) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
164 $out{$ex} = (); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
165 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
166 if (!defined($out{$ex}{$p[0]})) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
167 $out{$ex}{$p[0]} = (); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
168 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
169 # needs to be transcript specific ($ex) and position specific ($epos) to handle both isoforms and PAR/multiple mapping situations. |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
170 if (!defined($out{$ex}{$p[0]}{$epos})) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
171 $out{$ex}{$p[0]}{$epos}{'r'} = "$p[0]\t$p[1]\t$p[2]\t$p[3]\t$p[4]\t$p[5]"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
172 $out{$ex}{$p[0]}{$epos}{'c'} = (); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
173 |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
174 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
175 push(@{$out{$ex}{$p[0]}{$epos}{'c'}},$p[-1]); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
176 # splitting files |
1 | 177 if ($reg ne $currreg) { |
178 ## new exon open new outfile | |
179 if ($currreg ne '') { | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
180 print BCOVSUM "$eline\t".($esum/$elength)."\n"; |
1 | 181 ## filehandle is open. close it |
182 close OUT; | |
183 } | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
184 $eline = "$p[0]\t$p[1]\t$p[2]\t$p[3]\t$p[4]\t$p[5]"; #\t$p[6]\t$p[7]\t$p[8]"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
185 $esum = 0; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
186 $elength = 0; |
1 | 187 if (!exists($filehash{$reg})) { |
188 $fileidx++; | |
189 $filehash{$reg}{'idx'} = $fileidx; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
190 $filehash{$reg}{'exon'} = $reg; |
1 | 191 open OUT, ">> $wd/SplitFiles/File_$fileidx.txt"; |
192 $currreg = $reg; | |
193 } | |
194 else { | |
195 open OUT, ">> $wd/SplitFiles/File_".$filehash{$reg}{'idx'}.".txt"; | |
196 $currreg = $reg; | |
197 } | |
198 } | |
199 ## print the line to the open filehandle. | |
200 print OUT "$line\n"; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
201 $esum += $p[-1]; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
202 $elength++; |
1 | 203 } |
204 close OUT; | |
205 close IN; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
206 if ($esum > 0) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
207 print BCOVSUM "$eline\t".($esum/$elength)."\n"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
208 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
209 close BCOVSUM; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
210 open OUT, ">$wd/avg.tcov.txt"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
211 |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
212 foreach my $tr_ex (sort {$a cmp $b} keys(%out)) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
213 foreach my $chr (sort {$a cmp $b} keys(%{$out{$tr_ex}})) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
214 foreach(sort {$a <=> $b} keys(%{$out{$tr_ex}{$chr}})) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
215 my ($avg,$nr,$nrcov) = GetStats(\@{$out{$tr_ex}{$chr}{$_}{'c'}}); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
216 my $frac = 0; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
217 if ($nr > 0) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
218 $frac = ($nrcov / $nr); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
219 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
220 print OUT $out{$tr_ex}{$chr}{$_}{'r'}."\t".$avg."\t".$nrcov."\t".$nr."\t".$frac."\n"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
221 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
222 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
223 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
224 close OUT; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
225 $tcov = "$wd/avg.tcov.txt"; |
1 | 226 |
227 } | |
228 ## sort output files according to targets file | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
229 my %hash = (); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
230 open IN, $tcov; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
231 while (<IN>) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
232 my @p = split(/\t/,$_) ; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
233 $hash{$p[3].':'.$p[1]} = $_; |
1 | 234 } |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
235 close IN; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
236 open OUT, ">$tcov"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
237 open IN, $opts{'t'}; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
238 while (<IN>) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
239 my @p = split(/\t/,$_) ; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
240 print OUT $hash{$p[3].':'.$p[1]}; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
241 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
242 close IN; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
243 close OUT; |
1 | 244 |
245 | |
246 #################################### | |
247 ## PROCESS RESULTS & CREATE PLOTS ## | |
248 #################################### | |
249 system("mkdir $wd/Report"); | |
250 | |
251 system("mkdir $wd/Rout"); | |
252 system("mkdir $wd/Plots"); | |
253 | |
254 $samplename = $opts{'n'}; | |
255 $samplename =~ s/_/\\_/g; | |
256 | |
257 # 0. Preamble | |
258 ## compose preamble | |
259 open OUT, ">$wd/Report/Report.tex"; | |
260 print OUT '\documentclass[a4paper,10pt]{article}'."\n"; | |
261 print OUT '\usepackage[left=2cm,top=1.5cm,right=1.5cm,bottom=2.5cm,nohead]{geometry}'."\n"; | |
262 print OUT '\usepackage{longtable}'."\n"; | |
263 print OUT '\usepackage[T1]{fontenc}'."\n"; | |
264 print OUT '\usepackage{fancyhdr}'."\n"; | |
265 print OUT '\usepackage[latin9]{inputenc}'."\n"; | |
266 print OUT '\usepackage{color}'."\n"; | |
267 print OUT '\usepackage[pdftex]{graphicx}'."\n"; | |
268 print OUT '\definecolor{grey}{RGB}{160,160,160}'."\n"; | |
269 print OUT '\definecolor{darkgrey}{RGB}{100,100,100}'."\n"; | |
270 print OUT '\definecolor{red}{RGB}{255,0,0}'."\n"; | |
271 print OUT '\definecolor{orange}{RGB}{238,118,0}'."\n"; | |
272 print OUT '\setlength\LTleft{0pt}'."\n"; | |
273 print OUT '\setlength\LTright{0pt}'."\n"; | |
274 print OUT '\begin{document}'."\n"; | |
275 print OUT '\pagestyle{fancy}'."\n"; | |
276 print OUT '\fancyhead{}'."\n"; | |
277 print OUT '\renewcommand{\footrulewidth}{0.4pt}'."\n"; | |
278 print OUT '\renewcommand{\headrulewidth}{0pt}'."\n"; | |
279 print OUT '\fancyfoot[R]{\today\hspace{2cm}\thepage\ of \pageref{endofdoc}}'."\n"; | |
280 print OUT '\fancyfoot[C]{}'."\n"; | |
281 print OUT '\fancyfoot[L]{Coverage Report for ``'.$samplename.'"}'."\n"; | |
282 print OUT '\let\oldsubsubsection=\subsubsection'."\n"; | |
283 print OUT '\renewcommand{\subsubsection}{%'."\n"; | |
284 print OUT ' \filbreak'."\n"; | |
285 print OUT ' \oldsubsubsection'."\n"; | |
286 print OUT '}'."\n"; | |
287 # main title | |
288 print OUT '\section*{Coverage Report for ``'.$samplename.'"}'."\n"; | |
289 close OUT; | |
290 | |
291 # 1. Summary Report | |
292 # Get samtools flagstat summary of BAM file | |
293 my $flagstat = `samtools flagstat $opts{'b'}`; | |
294 my @s = split(/\n/,$flagstat); | |
295 # Get number of reads mapped in total | |
296 ## updated on 2012-10-1 !! | |
297 $totalmapped = $s[2]; | |
298 $totalmapped =~ s/^(\d+)(\s.+)/$1/; | |
299 # count columns | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
300 my $head = `head -n 1 $tcov`; |
1 | 301 chomp($head); |
302 my @cols = split(/\t/,$head); | |
303 my $nrcols = scalar(@cols); | |
304 my $covcol = $nrcols - 3; | |
305 # get min/max/median/average coverage => values | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
306 my $covs = `cut -f $covcol $tcov`; |
1 | 307 my @coverages = split(/\n/,$covs); |
308 my ($eavg,$med,$min,$max,$first,$third,$ontarget) = arraystats(@coverages); | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
309 my $spec = ''; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
310 if ($totalmapped != 0 && $totalmapped ne '') { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
311 $spec = sprintf("%.1f",($ontarget / $totalmapped)*100); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
312 } |
1 | 313 # get min/max/median/average coverage => boxplot in R |
314 open OUT, ">$wd/Rout/boxplot.R"; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
315 print OUT 'coverage <- read.table("'.$tcov.'",as.is=TRUE,sep="\t",header=FALSE)'."\n"; |
1 | 316 print OUT 'coverage <- coverage[,'.$covcol.']'."\n"; |
22
95062840f80f
Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing this out.
geert-vandeweyer
parents:
12
diff
changeset
|
317 print OUT 'png(file="../Plots/CoverageBoxPlot.png", bg="white", width=240, height=480,type=c("cairo"))'."\n"; |
1 | 318 print OUT 'boxplot(coverage,range=1.5,main="Target Region Coverage")'."\n"; |
319 print OUT 'graphics.off()'."\n"; | |
320 close OUT; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
321 print "Running boxplot.R : \n"; |
12
86df3f847a72
Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents:
11
diff
changeset
|
322 system("cd $wd/Rout && Rscript boxplot.R"); |
1 | 323 |
324 ## global nt coverage plot | |
325 ## use perl to make histogram (lower memory) | |
326 open IN, "$wd/Targets.Position.Coverage"; | |
327 my %dens; | |
328 my $counter = 0; | |
329 my $sum = 0; | |
330 while (<IN>) { | |
331 chomp(); | |
332 my @p = split(/\t/); | |
333 $sum += $p[-1]; | |
334 $counter++; | |
335 if (defined($dens{$p[-1]})) { | |
336 $dens{$p[-1]}++; | |
337 } | |
338 else { | |
339 $dens{$p[-1]} = 1; | |
340 } | |
341 } | |
342 $avg = $sum/$counter; | |
343 close IN; | |
344 open OUT, ">$wd/Rout/hist.txt"; | |
3 | 345 if (!defined($dens{'0'})) { |
346 $dens{'0'} = 0; | |
347 } | |
1 | 348 foreach (keys(%dens)) { |
349 print OUT "$_;$dens{$_}\n"; | |
350 } | |
351 close OUT; | |
352 open OUT, ">$wd/Rout/ntplot.R"; | |
353 # read coverage hist in R to plot | |
354 print OUT 'coverage <- read.table("hist.txt" , as.is = TRUE, header=FALSE,sep=";")'."\n"; | |
355 print OUT 'mincov <- '."$thresh \n"; | |
356 print OUT "avg <- round($avg)\n"; | |
357 print OUT "colnames(coverage) <- c('cov','count')\n"; | |
358 print OUT 'coverage$cov <- coverage$cov / avg'."\n"; | |
359 print OUT 'rep <- which(coverage$cov > 1)'."\n"; | |
360 print OUT 'coverage[coverage$cov > 1,1] <- 1'."\n"; | |
361 print OUT 'values <- coverage[coverage$cov < 1,]'."\n"; | |
362 print OUT 'values <- rbind(values,c(1,sum(coverage[coverage$cov == 1,"count"])))'."\n"; | |
363 print OUT 'values <- values[order(values$cov),]'."\n"; | |
364 print OUT 'prevcount <- 0'."\n"; | |
365 # make cumulative count data frame | |
366 print OUT 'for (i in rev(values$cov)) {'."\n"; | |
367 print OUT ' values[values$cov == i,"count"] <- prevcount + values[values$cov == i,"count"]'."\n"; | |
368 print OUT ' prevcount <- values[values$cov == i,"count"]'."\n"; | |
369 print OUT '}'."\n"; | |
370 print OUT 'values$count <- values$count / (values[values$cov == 0,"count"] / 100)'."\n"; | |
371 # get some values to plot lines. | |
372 print OUT 'mincov.x <- mincov/avg'."\n"; | |
373 print OUT 'if (mincov/avg <= 1) {'."\n"; | |
374 print OUT ' ii <- which(values$cov == mincov.x)'."\n"; | |
375 print OUT ' if (length(ii) == 1) {'."\n"; | |
376 print OUT ' mincov.y <- values[ii[1],"count"]'."\n"; | |
377 print OUT ' } else {'."\n"; | |
378 print OUT ' i1 <- max(which(values$cov < mincov.x))'."\n"; | |
379 print OUT ' i2 <- min(which(values$cov > mincov.x))'."\n"; | |
380 print OUT ' mincov.y <- ((values[i2,"count"] - values[i1,"count"])/(values[i2,"cov"] - values[i1,"cov"]))*(mincov.x - values[i1,"cov"]) + values[i1,"count"]'."\n"; | |
381 print OUT ' }'."\n"; | |
382 print OUT '}'."\n"; | |
383 # open output image and create plot | |
22
95062840f80f
Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing this out.
geert-vandeweyer
parents:
12
diff
changeset
|
384 print OUT 'png(file="../Plots/CoverageNtPlot.png", bg="white", width=540, height=480,type=c("cairo"))'."\n"; |
1 | 385 print OUT 'par(xaxs="i",yaxs="i")'."\n"; |
386 print OUT 'plot(values$cov,values$count,ylim=c(0,100),pch=".",main="Cumulative Normalised Base-Coverage Plot",xlab="Normalizalised Coverage",ylab="Cumulative Nr. Of Bases")'."\n"; | |
387 print OUT 'lines(values$cov,values$count)'."\n"; | |
388 print OUT 'if (mincov.x <= 1) {'."\n"; | |
389 print OUT ' lines(c(mincov.x,mincov.x),c(0,mincov.y),lty=2,col="darkgreen")'."\n"; | |
390 print OUT ' lines(c(0,mincov.x),c(mincov.y,mincov.y),lty=2,col="darkgreen")'."\n"; | |
391 print OUT ' text(1,(95),pos=2,col="darkgreen",labels="Threshold: '.$thresh.'x")'."\n"; | |
392 print OUT ' text(1,(91),pos=2,col="darkgreen",labels=paste("%Bases: ",round(mincov.y,2),"%",sep=""))'."\n"; | |
393 print OUT '} else {'."\n"; | |
394 print OUT ' text(1,(95),pos=2,col="darkgreen",labels="Threshold ('.$thresh.'x) > Average")'."\n"; | |
395 print OUT ' text(1,(91),pos=2,col="darkgreen",labels="Plotting impossible")'."\n"; | |
396 print OUT '}'."\n"; | |
397 print OUT 'frac.x <- '."$frac\n"; | |
398 print OUT 'ii <- which(values$cov == frac.x)'."\n"; | |
399 print OUT 'if (length(ii) == 1) {'."\n"; | |
400 print OUT ' frac.y <- values[ii[1],"count"]'."\n"; | |
401 print OUT '} else {'."\n"; | |
402 print OUT ' i1 <- max(which(values$cov < frac.x))'."\n"; | |
403 print OUT ' i2 <- min(which(values$cov > frac.x))'."\n"; | |
404 print OUT ' frac.y <- ((values[i2,"count"] - values[i1,"count"])/(values[i2,"cov"] - values[i1,"cov"]))*(frac.x - values[i1,"cov"]) + values[i1,"count"]'."\n"; | |
405 print OUT '}'."\n"; | |
406 print OUT 'lines(c(frac.x,frac.x),c(0,frac.y),lty=2,col="red")'."\n"; | |
407 print OUT 'lines(c(0,frac.x),c(frac.y,frac.y),lty=2,col="red")'."\n"; | |
408 #iprint OUT 'text((frac.x+0.05),(frac.y - 2),pos=4,col="red",labels=paste(frac.x," x Avg.Cov : ",round(frac.x * avg,2),"x",sep="" ))'."\n"; | |
409 #print OUT 'text((frac.x+0.05),(frac.y-5),pos=4,col="red",labels=paste("%Bases: ",round(frac.y,2),"%",sep=""))'."\n"; | |
410 print OUT 'text(1,86,pos=2,col="red",labels=paste(frac.x," x Avg.Cov : ",round(frac.x * avg,2),"x",sep="" ))'."\n"; | |
411 print OUT 'text(1,82,pos=2,col="red",labels=paste("%Bases: ",round(frac.y,2),"%",sep=""))'."\n"; | |
412 | |
413 print OUT 'graphics.off()'."\n"; | |
414 | |
415 close OUT; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
416 print "Running ntplot.r\n"; |
12
86df3f847a72
Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents:
11
diff
changeset
|
417 system("cd $wd/Rout && Rscript ntplot.R"); |
1 | 418 ## PRINT TO .TEX FILE |
419 open OUT, ">>$wd/Report/Report.tex"; | |
420 # average coverage overviews | |
421 print OUT '\subsection*{Overall Summary}'."\n"; | |
422 print OUT '{\small '; | |
423 # left : boxplot | |
424 print OUT '\begin{minipage}{0.3\linewidth}\centering'."\n"; | |
425 print OUT '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/CoverageBoxPlot.png}'."\n"; | |
426 print OUT '\end{minipage}'."\n"; | |
427 # right : cum.cov.plot | |
428 print OUT '\hspace{0.6cm}'."\n"; | |
429 print OUT '\begin{minipage}{0.65\linewidth}\centering'."\n"; | |
430 print OUT '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/CoverageNtPlot.png}'."\n"; | |
431 print OUT '\end{minipage} \\\\'."\n"; | |
432 ## next line | |
433 print OUT '\begin{minipage}{0.48\linewidth}'."\n"; | |
434 print OUT '\vspace{-1.2em}'."\n"; | |
435 print OUT '\begin{tabular}{ll}'."\n"; | |
436 # bam statistics | |
437 print OUT '\multicolumn{2}{l}{\textbf{\underline{Samtools Flagstat Summary}}} \\\\'."\n"; | |
438 foreach (@s) { | |
439 $_ =~ m/^(\d+)\s(.+)$/; | |
440 my $one = $1; | |
441 my $two = $2; | |
442 $two =~ s/\s\+\s0\s//; | |
443 $two = ucfirst($two); | |
444 $one =~ s/%/\\%/g; | |
445 # remove '+ 0 ' from front | |
446 $two =~ s/\+\s0\s//; | |
447 # remove trailing from end | |
448 $two =~ s/(\s\+.*)|(:.*)/\)/; | |
449 $two =~ s/%/\\%/g; | |
450 $two =~ s/>=/\$\\ge\$/g; | |
451 $two = ucfirst($two); | |
452 print OUT '\textbf{'.$two.'} & '.$one.' \\\\'."\n"; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
453 |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
454 |
1 | 455 } |
456 print OUT '\end{tabular}\end{minipage}'."\n"; | |
457 print OUT '\hspace{1.5cm}'."\n"; | |
458 # target coverage statistics | |
459 print OUT '\begin{minipage}{0.4\linewidth}'."\n"; | |
460 #print OUT '\vspace{-4.8em}'."\n"; | |
461 print OUT '\begin{tabular}{ll}'."\n"; | |
462 print OUT '\multicolumn{2}{l}{\textbf{\underline{Target Region Coverage}}} \\\\'."\n"; | |
463 print OUT '\textbf{Number of Target Regions} & '.scalar(@coverages).' \\\\'."\n"; | |
464 print OUT '\textbf{Minimal Region Coverage} & '.$min.' \\\\'."\n"; | |
465 print OUT '\textbf{25\% Region Coverage} & '.$first.' \\\\'. "\n"; | |
466 print OUT '\textbf{50\% (Median) Region Coverage} & '.$med.' \\\\'. "\n"; | |
467 print OUT '\textbf{75\% Region Coverage} & '.$third.' \\\\'. "\n"; | |
468 print OUT '\textbf{Maximal Region Coverage} & '.$max.' \\\\'. "\n"; | |
469 print OUT '\textbf{Average Region Coverage} & '.int($eavg).' \\\\'. "\n"; | |
470 print OUT '\textbf{Mapped On Target} & '.$spec.' \\\\'."\n"; | |
471 print OUT '\multicolumn{2}{l}{\textbf{\underline{Target Base Coverage }}} \\\\'."\n"; | |
472 print OUT '\textbf{Number of Target Bases} & '.$counter.' \\\\'."\n"; | |
473 print OUT '\textbf{Average Base Coverage} & '.int($avg).' \\\\'. "\n"; | |
474 print OUT '\textbf{Non-Covered Bases} & '.$dens{'0'}.' \\\\'."\n"; | |
475 #print OUT '\textbf{Bases Covered $ge$ '.$frac.'xAvg.Cov} & '. | |
476 print OUT '\end{tabular}\end{minipage}}'."\n"; | |
477 close OUT; | |
478 | |
479 # 2. GLOBAL COVERAGE OVERVIEW PER GENE | |
480 @failedexons; | |
481 @allexons; | |
482 @allregions; | |
483 @failedregions; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
484 %failednames; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
485 %allnames; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
486 |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
487 if (exists($opts{'r'}) || exists($opts{'s'}) || exists($opts{'S'})|| exists($opts{'A'})) { |
1 | 488 # count columns |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
489 my $head = `head -n 1 '$tcov'`; |
1 | 490 chomp($head); |
491 my @cols = split(/\t/,$head); | |
492 my $nrcols = scalar(@cols); | |
493 my $covcol = $nrcols - 3; | |
494 # Coverage Plots for each gene => barplots in R, table here. | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
495 open IN, "$tcov"; |
1 | 496 my $currgroup = ''; |
497 my $startline = 0; | |
498 my $stopline = 0; | |
499 $linecounter = 0; | |
500 while (<IN>) { | |
501 $linecounter++; | |
502 chomp($_); | |
503 my @c = split(/\t/,$_); | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
504 my $reg = $c[0].'-'.$c[1].'-'.$c[2]; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
505 push(@allregions,$reg); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
506 my $group = $reg .": ".$c[3]; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
507 #my $gene = $c[3]; |
1 | 508 ## coverage failure? |
509 if ($c[$nrcol-1] < 1 || $c[$covcol-1] < $thresh) { | |
510 push(@failedexons,$group); | |
511 push(@failedregions,$c[0].'-'.$c[1].'-'.$c[2]); | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
512 $failednames{$group} = $c[0].'-'.$c[1].'-'.$c[2]; |
1 | 513 } |
514 ## store exon | |
515 push(@allexons,$group); | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
516 $allnames{$group} = $c[0].'-'.$c[1].'-'.$c[2]; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
517 if (!exists($opts{'r'}) && !exists($opts{'s'}) && !exists($opts{'S'}) && exists($opts{'A'})) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
518 ## no need for barplots |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
519 next; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
520 } |
1 | 521 ## extract and check gene |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
522 my $gene = $group; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
523 $gene =~ s/^chr\S+: (\S+)[\|\s](.+)/$1/; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
524 if ($gene ne $currgroup ) { |
1 | 525 if ($currgroup ne '') { |
526 # new gene, make plot. | |
527 open OUT, ">$wd/Rout/barplot.R"; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
528 print OUT 'coveragetable <- read.table("'.$tcov.'",as.is=TRUE,sep="\t",header=FALSE)'."\n"; |
1 | 529 print OUT 'coverage <- coveragetable[c('.$startline.':'.$stopline.'),'.$covcol.']'."\n"; |
530 print OUT 'entries <- coveragetable[c('.$startline.':'.$stopline.'),4]'."\n"; | |
531 print OUT 'entries <- sub("\\\\S+\\\\|","",entries,perl=TRUE)'."\n"; | |
532 print OUT 'coverage[coverage < 1] <- 1'."\n"; | |
533 print OUT 'colors <- c(rep("grey",length(coverage)))'."\n"; | |
534 # coverage not whole target region => orange | |
535 print OUT 'covperc <- coveragetable[c('.$startline.':'.$stopline.'),'.$nrcols.']'."\n"; | |
536 print OUT 'colors[covperc<1] <- "orange"'."\n"; | |
537 # coverage below threshold => red | |
538 print OUT 'colors[coverage<'.$thresh.'] <- "red"'."\n"; | |
539 | |
540 if ($stopline - $startline > 20) { | |
541 $scale = 2; | |
542 } | |
543 else { | |
544 $scale = 1; | |
545 } | |
546 my $width = 480 * $scale; | |
547 my $height = 240 * $scale; | |
22
95062840f80f
Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing this out.
geert-vandeweyer
parents:
12
diff
changeset
|
548 print OUT 'png(file="../Plots/Coverage_'.$currgroup.'.png", bg="white", width='.$width.', height='.$height.',type=c("cairo"))'."\n"; |
1 | 549 print OUT 'ylim = c(0,max(max(log10(coverage),log10('.($thresh+20).'))))'."\n"; |
550 print OUT 'mp <- barplot(log10(coverage),col=colors,main="Exon Coverage for '.$currgroup.'",ylab="Log10(Coverage)",ylim=ylim)'."\n"; | |
551 print OUT 'text(mp, log10(coverage) + '.(0.4/$scale).',format(coverage),xpd = TRUE,srt=90)'."\n"; | |
552 print OUT 'text(mp,par("usr")[3]-0.05,labels=entries,srt=45,adj=1,xpd=TRUE)'."\n"; | |
553 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n"; | |
554 print OUT 'graphics.off()'."\n"; | |
555 close OUT; | |
12
86df3f847a72
Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents:
11
diff
changeset
|
556 system("cd $wd/Rout && Rscript barplot.R"); |
1 | 557 if ($scale == 1) { |
558 push(@small,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}'); | |
559 } | |
560 else { | |
561 push(@large,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}'); | |
562 } | |
563 | |
564 } | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
565 $currgroup = $gene; |
1 | 566 $startline = $linecounter; |
567 } | |
568 $stopline = $linecounter; | |
569 } | |
570 close IN; | |
571 if ($currgroup ne '') { | |
572 # last gene, make plot. | |
573 open OUT, ">$wd/Rout/barplot.R"; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
574 print OUT 'coveragetable <- read.table("'.$tcov.'",as.is=TRUE,sep="\t",header=FALSE)'."\n"; |
1 | 575 print OUT 'coverage <- coveragetable[c('.$startline.':'.$stopline.'),'.$covcol.']'."\n"; |
576 print OUT 'entries <- coveragetable[c('.$startline.':'.$stopline.'),4]'."\n"; | |
577 print OUT 'entries <- sub("\\\\S+\\\\|","",entries,perl=TRUE)'."\n"; | |
578 print OUT 'coverage[coverage < 1] <- 1'."\n"; | |
579 print OUT 'colors <- c(rep("grey",length(coverage)))'."\n"; | |
580 print OUT 'colors[coverage<'.$thresh.'] <- "red"'."\n"; | |
581 | |
582 if ($stopline - $startline > 20) { | |
583 $scale = 2; | |
584 } | |
585 else { | |
586 $scale = 1; | |
587 } | |
588 my $width = 480 * $scale; | |
589 my $height = 240 * $scale; | |
22
95062840f80f
Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing this out.
geert-vandeweyer
parents:
12
diff
changeset
|
590 print OUT 'png(file="../Plots/Coverage_'.$currgroup.'.png", bg="white", width='.$width.', height='.$height.',type=c("cairo"))'."\n"; |
1 | 591 print OUT 'ylim = c(0,max(max(log10(coverage),log10('.($thresh+20).'))))'."\n"; |
592 print OUT 'mp <- barplot(log10(coverage),col=colors,main="Exon Coverage for '.$currgroup.'",ylab="Log10(Coverage)", ylim=ylim)'."\n"; | |
593 print OUT 'text(mp, log10(coverage) + log10(2),format(coverage),xpd = TRUE,srt=90)'."\n"; | |
594 print OUT 'text(mp,par("usr")[3]-0.1,labels=entries,srt=45,adj=1,xpd=TRUE)'."\n"; | |
595 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n"; | |
596 print OUT 'graphics.off()'."\n"; | |
597 close OUT; | |
12
86df3f847a72
Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents:
11
diff
changeset
|
598 system("cd $wd/Rout && Rscript barplot.R"); |
1 | 599 if ($scale == 1) { |
600 push(@small,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}'); | |
601 } | |
602 else { | |
603 push(@large,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}'); | |
604 } | |
605 } | |
606 ## print to TEX | |
607 open OUT, ">>$wd/Report/Report.tex"; | |
608 print OUT '\subsection*{Gene Summaries}'."\n"; | |
609 print OUT '\underline{Legend:} \\\\'."\n"; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
610 print OUT '{\color{red}\textbf{RED:} Average coverage did not reach set threshold of '.$thresh.'} \\\\'."\n"; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
611 print OUT '{\color{orange}\textbf{ORANGE:} Coverage was incomplete for the exon (section with zero coverage found). Overruled by red.} \\\\' ."\n"; |
1 | 612 $col = 1; |
613 foreach (@small) { | |
614 if ($col > 2) { | |
615 $col = 1; | |
616 print OUT "\n"; | |
617 } | |
618 print OUT '\begin{minipage}{0.5\linewidth}\centering'."\n"; | |
619 print OUT $_."\n"; | |
620 print OUT '\end{minipage}'."\n"; | |
621 $col++; | |
622 } | |
623 ## new line | |
624 if ($col == 2) { | |
625 print OUT '\\\\'." \n"; | |
626 } | |
627 foreach(@large) { | |
628 print OUT $_."\n"; | |
629 } | |
630 close OUT; | |
631 | |
632 } | |
633 | |
634 # 3. Detailed overview of failed exons (globally failed) | |
635 if (exists($opts{'s'})) { | |
636 # count columns | |
637 my $head = `head -n 1 $wd/Targets.Position.Coverage`; | |
638 chomp($head); | |
639 my @cols = split(/\t/,$head); | |
640 my $nrcols = scalar(@cols); | |
641 my $covcol = $nrcols; | |
642 my $poscol = $nrcols -1; | |
643 # tex section header | |
644 open TEX, ">>$wd/Report/Report.tex"; | |
645 print TEX '\subsection*{Failed Exon Plots}'."\n"; | |
646 $col = 1; | |
647 print TEX '\underline{NOTE:} Only exons with global coverage $<$'.$thresh.' or incomplete coverage were plotted \\\\'."\n"; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
648 foreach(sort(keys(%failednames)) ) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
649 #foreach(@failedregions) { |
1 | 650 if ($col > 2) { |
651 $col = 1; | |
652 print TEX "\n"; | |
653 } | |
654 # which exon | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
655 my $group = $_; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
656 my ($region,$name) = split(/: /,$group); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
657 #my $region = $failednames{$_}; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
658 my $exon = $filehash{$group}{'exon'}; |
1 | 659 # link exon to tmp file |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
660 my $exonfile = "$wd/SplitFiles/File_".$filehash{$group}{'idx'}.".txt"; |
1 | 661 ## determine transcript orientation and location |
662 my $firstline = `head -n 1 $exonfile`; | |
663 my @firstcols = split(/\t/,$firstline); | |
664 my $orient = $firstcols[5]; | |
665 my $genomicchr = $firstcols[0]; | |
666 my $genomicstart = $firstcols[1]; | |
667 my $genomicstop = $firstcols[2]; | |
668 if ($orient eq '+') { | |
669 $bps = $genomicstop - $genomicstart + 1; | |
670 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop); | |
671 } | |
672 else { | |
673 $bps = $genomicstop - $genomicstart + 1; | |
674 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop); | |
675 } | |
676 # print Rscript | |
677 open OUT, ">$wd/Rout/exonplot.R"; | |
678 print OUT 'coveragetable <- read.table("'.$exonfile.'",as.is=TRUE,sep="\t",header=FALSE)'."\n"; | |
679 print OUT 'coverage <- coveragetable[,'.$covcol.']'."\n"; | |
680 print OUT 'coverage[coverage < 1] <- 1'."\n"; | |
681 print OUT 'positions <- coveragetable[,'.$poscol.']'."\n"; | |
682 | |
683 my $width = 480 ; | |
684 my $height = 240 ; | |
685 my $exonstr = $exon; | |
686 $exonstr =~ s/\s/_/g; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
687 $exonstr =~ s/:/_/g; |
1 | 688 $exon =~ s/_/ /g; |
689 $exon =~ s/\|/ /g; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
690 $exon =~ s/chr.*: (.*)$/$1/; |
22
95062840f80f
Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing this out.
geert-vandeweyer
parents:
12
diff
changeset
|
691 print OUT 'png(file="../Plots/Coverage_'.$exonstr.'.png", bg="white", width='.$width.', height='.$height.',type=c("cairo"))'."\n"; |
1 | 692 print OUT 'ylim = c(0,log10(max(max(coverage),'.($thresh+10).')))'."\n"; |
693 if ($orient eq '-') { | |
694 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",xlim=rev(range(positions)),sub="(Transcribed from minus strand)")'."\n"; | |
695 print OUT 'mtext("'.$subtitle.'")'."\n"; | |
696 } | |
697 else { | |
698 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",sub="(Transcribed from plus strand)")'."\n"; | |
699 print OUT 'mtext("'.$subtitle.'")'."\n"; | |
700 } | |
701 print OUT 'lines(positions,log10(coverage))'."\n"; | |
702 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n"; | |
703 print OUT 'failedpos <- positions[coverage<'.$thresh.']'."\n"; | |
704 print OUT 'failedcov <- coverage[coverage<'.$thresh.']'."\n"; | |
705 print OUT 'points(failedpos,log10(failedcov),col="red",pch=19)'."\n"; | |
706 print OUT 'graphics.off()'."\n"; | |
707 close OUT; | |
708 # run R script | |
12
86df3f847a72
Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents:
11
diff
changeset
|
709 system("cd $wd/Rout && Rscript exonplot.R"); |
1 | 710 # Add to .TEX |
711 print TEX '\begin{minipage}{0.5\linewidth}\centering'."\n"; | |
712 print TEX '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$exonstr.'.png}'."\n"; | |
713 print TEX '\end{minipage}'."\n"; | |
714 $col++; | |
715 } | |
716 } | |
717 | |
718 ## plot failed (subregion) or all exons | |
719 if (exists($opts{'S'}) || exists($opts{'A'})) { | |
720 # count columns | |
721 my $head = `head -n 1 $wd/Targets.Position.Coverage`; | |
722 chomp($head); | |
723 my @cols = split(/\t/,$head); | |
724 my $nrcols = scalar(@cols); | |
725 my $covcol = $nrcols; | |
726 my $poscol = $nrcols -1; | |
727 # tex section header | |
728 open TEX, ">>$wd/Report/Report.tex"; | |
729 print TEX '\subsection*{Failed Exon Plots}'."\n"; | |
730 if (exists($opts{'S'})) { | |
731 print TEX '\underline{NOTE:} ALL exons were tested for local coverage $<$'.$thresh.' \\\\'."\n"; | |
732 } | |
733 elsif (exists($opts{'A'})) { | |
734 print TEX '\underline{NOTE:} ALL exons are plotted, regardless of coverage \\\\'."\n"; | |
735 } | |
736 $col = 1; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
737 foreach(sort(keys(%allnames))) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
738 |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
739 #foreach(@allregions) { |
1 | 740 if ($col > 2) { |
741 $col = 1; | |
742 print TEX "\n"; | |
743 } | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
744 my $group = $_; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
745 my ($region,$name) = split(/: /,$group); |
1 | 746 # which exon |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
747 #my $region = $_; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
748 #my $region = $allnames{$_}; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
749 my $exon = $filehash{$group}{'exon'}; |
1 | 750 # grep exon to tmp file |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
751 my $exonfile = "$wd/SplitFiles/File_".$filehash{$group}{'idx'}.".txt"; |
1 | 752 ## determine transcript orientation. |
753 my $firstline = `head -n 1 $exonfile`; | |
754 my @firstcols = split(/\t/,$firstline); | |
755 my $orient = $firstcols[5]; | |
756 my $genomicchr = $firstcols[0]; | |
757 my $genomicstart = $firstcols[1]; | |
758 my $genomicstop = $firstcols[2]; | |
759 | |
760 if ($orient eq '+') { | |
761 $bps = $genomicstop - $genomicstart + 1; | |
762 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop); | |
763 | |
764 } | |
765 else { | |
766 $bps = $genomicstop - $genomicstart + 1; | |
767 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop); | |
768 | |
769 } | |
770 | |
771 # check if failed | |
772 if (exists($opts{'S'})) { | |
773 my $cs = `cut -f $covcol '$exonfile' `; | |
774 my @c = split(/\n/,$cs); | |
775 @c = sort { $a <=> $b } @c; | |
776 if ($c[0] >= $thresh) { | |
777 # lowest coverage > threshold => skip | |
778 next; | |
779 } | |
780 } | |
781 # print Rscript | |
782 open OUT, ">$wd/Rout/exonplot.R"; | |
783 print OUT 'coveragetable <- read.table("'.$exonfile.'",as.is=TRUE,sep="\t",header=FALSE)'."\n"; | |
784 print OUT 'coverage <- coveragetable[,'.$covcol.']'."\n"; | |
785 print OUT 'coverage[coverage < 1] <- 1'."\n"; | |
786 print OUT 'positions <- coveragetable[,'.$poscol.']'."\n"; | |
787 my $width = 480 ; | |
788 my $height = 240 ; | |
789 my $exonstr = $exon; | |
790 $exonstr =~ s/\s/_/g; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
791 $exonstr =~ s/:/_/g; |
1 | 792 $exon =~ s/_/ /g; |
793 $exon =~ s/\|/ /g; | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
794 $exon =~ s/^chr.*: (.*)$/$1/; |
22
95062840f80f
Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing this out.
geert-vandeweyer
parents:
12
diff
changeset
|
795 print OUT 'png(file="../Plots/Coverage_'.$exonstr.'.png", bg="white", width='.$width.', height='.$height.',type=c("cairo"))'."\n"; |
1 | 796 print OUT 'ylim = c(0,log10(max(max(coverage),'.($thresh+10).')))'."\n"; |
797 if ($orient eq '-') { | |
798 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",xlim=rev(range(positions)),sub="(Transcribed from minus strand)")'."\n"; | |
799 print OUT 'mtext("'.$subtitle.'")'."\n"; | |
800 } | |
801 else { | |
802 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",sub="(Transcribed from plus strand)")'."\n"; | |
803 print OUT 'mtext("'.$subtitle.'")'."\n"; | |
804 } | |
805 | |
806 print OUT 'lines(positions,log10(coverage))'."\n"; | |
807 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n"; | |
808 print OUT 'failedpos <- positions[coverage<'.$thresh.']'."\n"; | |
809 print OUT 'failedcov <- coverage[coverage<'.$thresh.']'."\n"; | |
810 print OUT 'points(failedpos,log10(failedcov),col="red",pch=19)'."\n"; | |
811 print OUT 'graphics.off()'."\n"; | |
812 close OUT; | |
813 # run R script | |
12
86df3f847a72
Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents:
11
diff
changeset
|
814 system("cd $wd/Rout && Rscript exonplot.R"); |
1 | 815 # Add to .TEX |
816 print TEX '\begin{minipage}{0.5\linewidth}\centering'."\n"; | |
817 print TEX '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$exonstr.'.png}'."\n"; | |
818 print TEX '\end{minipage}'."\n"; | |
819 $col++; | |
820 } | |
821 } | |
822 ## list failed exons | |
823 if (exists($opts{'L'})) { | |
824 # count columns | |
825 my $head = `head -n 1 $wd/Targets.Position.Coverage`; | |
826 chomp($head); | |
827 my @cols = split(/\t/,$head); | |
828 my $nrcols = scalar(@cols); | |
829 my $covcol = $nrcols; | |
830 my $poscol = $nrcols -1; | |
831 ## hash to print | |
832 # tex section header | |
833 open TEX, ">>$wd/Report/Report.tex"; | |
834 print TEX '\subsection*{List of Failed Exons}'."\n"; | |
835 print TEX '\underline{NOTE:} ALL exons were tested for local coverage $<$'.$thresh.' \\\\'."\n"; | |
836 print TEX '{\footnotesize\begin{longtable}[l]{@{\extracolsep{\fill}}llll}'."\n".'\hline'."\n"; | |
837 print TEX '\textbf{Target Name} & \textbf{Genomic Position} & \textbf{Avg.Coverage} & \textbf{Min.Coverage} \\\\'."\n".'\hline'."\n"; | |
838 print TEX '\endhead'."\n"; | |
839 print TEX '\hline '."\n".'\multicolumn{4}{r}{{\textsl{\footnotesize Continued on next page}}} \\\\ '."\n".'\hline' ."\n". '\endfoot' . "\n". '\endlastfoot' . "\n"; | |
840 | |
841 $col = 1; | |
842 open IN, "$wd/Targets.Global.Coverage"; | |
843 while (<IN>) { | |
844 chomp($_); | |
845 my @p = split(/\t/,$_); | |
846 my $region = $p[0].'-'.$p[1].'-'.$p[2]; | |
847 my $exon = $filehash{$region}{'exon'}; | |
848 # grep exon to tmp file | |
849 my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt"; | |
850 ## determine transcript orientation. | |
851 my $firstline = `head -n 1 $exonfile`; | |
852 my @firstcols = split(/\t/,$firstline); | |
853 my $orient = $firstcols[5]; | |
854 my $genomicchr = $firstcols[0]; | |
855 my $genomicstart = $firstcols[1]; | |
856 my $genomicstop = $firstcols[2]; | |
857 | |
858 if ($orient eq '+') { | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
859 #$bps = $genomicstop - $genomicstart + 1; |
1 | 860 $subtitle = "$genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop); |
861 | |
862 } | |
863 else { | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
864 #$bps = $genomicstop - $genomicstart + 1; |
1 | 865 $subtitle = "$genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop); |
866 } | |
867 | |
868 # check if failed | |
869 my $cs = `cut -f $covcol '$exonfile' `; | |
870 my @c = split(/\n/,$cs); | |
871 my ($avg,$med,$min,$max,$first,$third,$ontarget) = arraystats(@c); | |
872 | |
873 if ($min >= $thresh) { | |
874 # lowest coverage > threshold => skip | |
875 next; | |
876 } | |
877 | |
878 # print to .tex table | |
879 if (length($exon) > 30) { | |
880 $exon = substr($exon,0,27) . '...'; | |
881 } | |
882 $exon =~ s/_/ /g; | |
883 $exon =~ s/\|/ /g; | |
884 | |
885 print TEX "$exon & $subtitle & ".int($avg)." & $min ".'\\\\'."\n"; | |
886 } | |
887 close IN; | |
888 print TEX '\hline'."\n"; | |
889 print TEX '\end{longtable}}'."\n"; | |
890 close TEX; | |
891 } | |
892 | |
893 | |
894 ## Close document | |
895 open OUT, ">>$wd/Report/Report.tex"; | |
896 print OUT '\label{endofdoc}'."\n"; | |
897 print OUT '\end{document}'."\n"; | |
898 close OUT; | |
899 system("cd $wd/Report && pdflatex Report.tex > /dev/null 2>&1 && pdflatex Report.tex > /dev/null 2>&1 "); | |
900 | |
901 ## mv report to output file | |
902 system("cp -f $wd/Report/Report.pdf '$pdffile'"); | |
903 ##create tar.gz file | |
904 system("mkdir $wd/Results"); | |
905 system("cp -Rf $wd/Plots $wd/Results/"); | |
906 system("cp -Rf $wd/Report/ $wd/Results/"); | |
907 if (-e "$wd/Targets.Global.Coverage") { | |
908 system("cp -Rf $wd/Targets.Global.Coverage $wd/Results/"); | |
909 } | |
910 if (-e "$wd/Targets.Position.Coverage") { | |
911 system("cp -Rf $wd/Targets.Position.Coverage $wd/Results/"); | |
912 } | |
913 | |
914 system("cd $wd && tar czf '$tarfile' Results/"); | |
915 ## clean up (galaxy stores outside wd) | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
916 #system("rm -Rf $wd"); |
1 | 917 ############### |
918 ## FUNCTIONS ## | |
919 ############### | |
920 sub arraystats{ | |
921 my @array = @_; | |
922 my $count = scalar(@array); | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
923 if ($count == 0 ) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
924 return (0,0,0,0,0,0,0); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
925 } |
1 | 926 @array = sort { $a <=> $b } @array; |
927 # median | |
928 my $median = 0; | |
929 if ($count % 2) { | |
930 $median = $array[int($count/2)]; | |
931 } else { | |
932 $median = ($array[$count/2] + $array[$count/2 - 1]) / 2; | |
933 } | |
934 # average | |
935 my $sum = 0; | |
936 foreach (@array) { $sum += $_; } | |
937 my $average = $sum / $count; | |
938 # quantiles (rounded) | |
939 my $quart = int($count/4) ; | |
940 my $first = $array[$quart]; | |
941 my $third = $array[($quart*3)]; | |
942 my $min = $array[0]; | |
943 my $max = $array[($count-1)]; | |
944 return ($average,$median,$min,$max,$first,$third,$sum); | |
945 } | |
946 | |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
947 sub GetStats { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
948 my ($aref) = @_; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
949 if (scalar(@$aref) == 0) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
950 return qw/0 0/; |
1 | 951 } |
26
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
952 # median |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
953 my @s = sort {$a <=> $b } @$aref; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
954 my $nrzero = 0; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
955 my $len = scalar(@s); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
956 for (my $i = 0; $i< $len;$i++) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
957 if ($s[$i] == 0) { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
958 $nrzero++; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
959 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
960 else { |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
961 last; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
962 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
963 } |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
964 my $nrcov = $len - $nrzero; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
965 # avg |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
966 my $avg = 0; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
967 foreach (@s) { $avg += $_ }; |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
968 $avg = sprintf("%.1f",($avg / scalar(@s))); |
859999cb135b
revised routines. Better handling of collapsing, isoforms and mutiple mapppings. Barplots revised to visualise average base coverage in exon instead of total number of reads in exon
geert-vandeweyer
parents:
25
diff
changeset
|
969 return($avg,$len,$nrcov); |
1 | 970 } |
971 | |
972 | |
973 sub SubRegionCoverage { | |
974 my ($bam,$targets) = @_; | |
975 my $command = "cd $wd && coverageBed -abam $bam -b $targets -d > $wd/Targets.Position.Coverage"; | |
976 system($command); | |
977 $commandsrun{$command} = 1; | |
978 } | |
979 |