annotate CoverageReport.pl @ 24:fd788f9db899 draft

Added (default) option to collapse repetitive bed files
author geert-vandeweyer
date Thu, 12 Feb 2015 08:51:37 -0500
parents 95062840f80f
children 6cb012c8497a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
1 #!/usr/bin/perl
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
2
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
3 # load modules
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
4 use Getopt::Std;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
5 use File::Basename;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
6 use Number::Format;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
7
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
8 # number format
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
9 my $de = new Number::Format(-thousands_sep =>',',-decimal_point => '.');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
10
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
11 ##########
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
12 ## opts ##
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
13 ##########
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
14 ## input files
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
15 # b : path to input (b)am file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
16 # t : path to input (t)arget regions in BED format
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
17 ## output files
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
18 # o : report pdf (o)utput file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
19 # z : all plots and tables in tar.g(z) format
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
20 ## entries in the report
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
21 # r : Coverage per (r)egion (boolean)
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
22 # s : (s)ubregion coverage if average < specified (plots for positions along target region) (boolean)
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
23 # S : (S)ubregion coverage for ALL failed exons => use either s OR S or you will have double plots.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
24 # A : (A)ll exons will be plotted.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
25 # L : (L)ist failed exons instead of plotting
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
26 # m : (m)inimal Coverage threshold
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
27 # f : fraction of average as threshold
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
28 # n : sample (n)ame.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
29
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
30
24
fd788f9db899 Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents: 22
diff changeset
31 getopts('b:t:o:z:rsSALm:n:f:T', \%opts) ;
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
32
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
33 # make output directory in (tmp) working dir
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
34 our $wd = "/tmp/Coverage.".int(rand(1000));
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
35 while (-d $wd) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
36 $wd = "/tmp/Coverage.".int(rand(1000));
24
fd788f9db899 Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents: 22
diff changeset
37
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
38 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
39 system("mkdir $wd");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
40
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
41 ## variables
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
42 our %commandsrun = ();
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
43
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
44 if (!exists($opts{'b'}) || !-e $opts{'b'}) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
45 die('Bam File not found');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
46 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
47 if (!exists($opts{'t'}) || !-e $opts{'t'}) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
48 die('Target File (BED) not found');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
49 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
50
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
51 if (exists($opts{'m'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
52 $thresh = $opts{'m'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
53 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
54 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
55 $thresh = 40;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
56 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
57
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
58 if (exists($opts{'f'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
59 $frac = $opts{'f'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
60 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
61 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
62 $frac = 0.2;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
63 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
64
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
65 if (exists($opts{'o'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
66 $pdffile = $opts{'o'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
67 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
68 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
69 $pdffile = "$wd/CoverageReport.pdf";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
70 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
71
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
72 if (exists($opts{'z'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
73 $tarfile = $opts{'z'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
74 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
75 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
76 $tarfile = "$wd/Results.tar.gz";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
77 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
78
24
fd788f9db899 Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents: 22
diff changeset
79 ## 0. Collapse overlapping target regions.
fd788f9db899 Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents: 22
diff changeset
80 if (defined($opts{'T'})) {
fd788f9db899 Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents: 22
diff changeset
81 my $targets = $opts{'t'};
fd788f9db899 Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents: 22
diff changeset
82 my $tmptargets = "$wd/collapsedtargets.bed";
fd788f9db899 Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents: 22
diff changeset
83 system("sort -k1,1 -k2,2n $targets > $wd/sorted.targets.bed");
fd788f9db899 Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents: 22
diff changeset
84 system("bedtools merge -s -scores max -nms -i $wd/sorted.targets.bed > $tmptargets");
fd788f9db899 Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents: 22
diff changeset
85 $opts{'t'} = $tmptargets;
fd788f9db899 Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents: 22
diff changeset
86 }
fd788f9db899 Added (default) option to collapse repetitive bed files
geert-vandeweyer
parents: 22
diff changeset
87
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
88 # 1. Global Summary => default
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
89 &GlobalSummary($opts{'b'}, $opts{'t'});
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
90
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
91 # 2. Coverage per position
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
92 &SubRegionCoverage($opts{'b'}, $opts{'t'});
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
93 our %filehash;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
94 if (exists($opts{'s'}) || exists($opts{'S'}) || exists($opts{'A'}) || exists($opts{'L'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
95 system("mkdir $wd/SplitFiles");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
96 ## get position coverages
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
97 ## split input files
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
98 open IN, "$wd/Targets.Position.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
99 my $fileidx = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
100 my $currreg = '';
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
101 while (<IN>) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
102 my $line = $_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
103 chomp($line);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
104 my @p = split(/\t/,$line);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
105 my $reg = $p[0].'-'.$p[1].'-'.$p[2]; #.$p[3];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
106 my $ex = $p[3];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
107 if ($reg ne $currreg) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
108 ## new exon open new outfile
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
109 if ($currreg ne '') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
110 ## filehandle is open. close it
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
111 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
112 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
113 if (!exists($filehash{$reg})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
114 $fileidx++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
115 $filehash{$reg}{'idx'} = $fileidx;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
116 $filehash{$reg}{'exon'} = $ex;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
117 open OUT, ">> $wd/SplitFiles/File_$fileidx.txt";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
118 $currreg = $reg;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
119 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
120 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
121 open OUT, ">> $wd/SplitFiles/File_".$filehash{$reg}{'idx'}.".txt";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
122 $currreg = $reg;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
123 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
124 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
125 ## print the line to the open filehandle.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
126 print OUT "$line\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
127 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
128 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
129 close IN;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
130
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
131 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
132
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
133 ## sort output files according to targets file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
134 if (exists($opts{'r'}) ) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
135 my %hash = ();
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
136 open IN, "$wd/Targets.Global.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
137 while (<IN>) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
138 my @p = split(/\t/,$_) ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
139 $hash{$p[3]} = $_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
140 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
141 close IN;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
142 open OUT, ">$wd/Targets.Global.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
143 open IN, $opts{'t'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
144 while (<IN>) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
145 my @p = split(/\t/,$_) ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
146 print OUT $hash{$p[3]};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
147 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
148 close IN;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
149 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
150 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
151
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
152
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
153 ####################################
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
154 ## PROCESS RESULTS & CREATE PLOTS ##
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
155 ####################################
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
156 system("mkdir $wd/Report");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
157
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
158 system("mkdir $wd/Rout");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
159 system("mkdir $wd/Plots");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
160
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
161 $samplename = $opts{'n'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
162 $samplename =~ s/_/\\_/g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
163
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
164 # 0. Preamble
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
165 ## compose preamble
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
166 open OUT, ">$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
167 print OUT '\documentclass[a4paper,10pt]{article}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
168 print OUT '\usepackage[left=2cm,top=1.5cm,right=1.5cm,bottom=2.5cm,nohead]{geometry}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
169 print OUT '\usepackage{longtable}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
170 print OUT '\usepackage[T1]{fontenc}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
171 print OUT '\usepackage{fancyhdr}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
172 print OUT '\usepackage[latin9]{inputenc}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
173 print OUT '\usepackage{color}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
174 print OUT '\usepackage[pdftex]{graphicx}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
175 print OUT '\definecolor{grey}{RGB}{160,160,160}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
176 print OUT '\definecolor{darkgrey}{RGB}{100,100,100}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
177 print OUT '\definecolor{red}{RGB}{255,0,0}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
178 print OUT '\definecolor{orange}{RGB}{238,118,0}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
179 print OUT '\setlength\LTleft{0pt}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
180 print OUT '\setlength\LTright{0pt}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
181 print OUT '\begin{document}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
182 print OUT '\pagestyle{fancy}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
183 print OUT '\fancyhead{}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
184 print OUT '\renewcommand{\footrulewidth}{0.4pt}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
185 print OUT '\renewcommand{\headrulewidth}{0pt}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
186 print OUT '\fancyfoot[R]{\today\hspace{2cm}\thepage\ of \pageref{endofdoc}}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
187 print OUT '\fancyfoot[C]{}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
188 print OUT '\fancyfoot[L]{Coverage Report for ``'.$samplename.'"}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
189 print OUT '\let\oldsubsubsection=\subsubsection'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
190 print OUT '\renewcommand{\subsubsection}{%'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
191 print OUT ' \filbreak'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
192 print OUT ' \oldsubsubsection'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
193 print OUT '}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
194 # main title
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
195 print OUT '\section*{Coverage Report for ``'.$samplename.'"}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
196 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
197
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
198 # 1. Summary Report
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
199 # Get samtools flagstat summary of BAM file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
200 my $flagstat = `samtools flagstat $opts{'b'}`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
201 my @s = split(/\n/,$flagstat);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
202 # Get number of reads mapped in total
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
203 ## updated on 2012-10-1 !!
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
204 $totalmapped = $s[2];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
205 $totalmapped =~ s/^(\d+)(\s.+)/$1/;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
206 # count columns
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
207 my $head = `head -n 1 $wd/Targets.Global.Coverage`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
208 chomp($head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
209 my @cols = split(/\t/,$head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
210 my $nrcols = scalar(@cols);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
211 my $covcol = $nrcols - 3;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
212 # get min/max/median/average coverage => values
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
213 my $covs = `cut -f $covcol $wd/Targets.Global.Coverage`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
214 my @coverages = split(/\n/,$covs);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
215 my ($eavg,$med,$min,$max,$first,$third,$ontarget) = arraystats(@coverages);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
216 my $spec = sprintf("%.1f",($ontarget / $totalmapped)*100);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
217 # get min/max/median/average coverage => boxplot in R
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
218 open OUT, ">$wd/Rout/boxplot.R";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
219 print OUT 'coverage <- read.table("../Targets.Global.Coverage",as.is=TRUE,sep="\t",header=FALSE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
220 print OUT 'coverage <- coverage[,'.$covcol.']'."\n";
22
95062840f80f Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing this out.
geert-vandeweyer
parents: 12
diff changeset
221 print OUT 'png(file="../Plots/CoverageBoxPlot.png", bg="white", width=240, height=480,type=c("cairo"))'."\n";
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
222 print OUT 'boxplot(coverage,range=1.5,main="Target Region Coverage")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
223 print OUT 'graphics.off()'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
224 close OUT;
12
86df3f847a72 Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents: 11
diff changeset
225 system("cd $wd/Rout && Rscript boxplot.R");
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
226
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
227 ## global nt coverage plot
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
228 ## use perl to make histogram (lower memory)
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
229 open IN, "$wd/Targets.Position.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
230 my %dens;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
231 my $counter = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
232 my $sum = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
233 while (<IN>) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
234 chomp();
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
235 my @p = split(/\t/);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
236 $sum += $p[-1];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
237 $counter++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
238 if (defined($dens{$p[-1]})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
239 $dens{$p[-1]}++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
240 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
241 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
242 $dens{$p[-1]} = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
243 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
244 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
245 $avg = $sum/$counter;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
246 close IN;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
247 open OUT, ">$wd/Rout/hist.txt";
3
39c374d4cba7 Uploaded
geert-vandeweyer
parents: 1
diff changeset
248 if (!defined($dens{'0'})) {
39c374d4cba7 Uploaded
geert-vandeweyer
parents: 1
diff changeset
249 $dens{'0'} = 0;
39c374d4cba7 Uploaded
geert-vandeweyer
parents: 1
diff changeset
250 }
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
251 foreach (keys(%dens)) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
252 print OUT "$_;$dens{$_}\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
253 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
254 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
255 open OUT, ">$wd/Rout/ntplot.R";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
256 # read coverage hist in R to plot
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
257 print OUT 'coverage <- read.table("hist.txt" , as.is = TRUE, header=FALSE,sep=";")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
258 print OUT 'mincov <- '."$thresh \n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
259 print OUT "avg <- round($avg)\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
260 print OUT "colnames(coverage) <- c('cov','count')\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
261 print OUT 'coverage$cov <- coverage$cov / avg'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
262 print OUT 'rep <- which(coverage$cov > 1)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
263 print OUT 'coverage[coverage$cov > 1,1] <- 1'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
264 print OUT 'values <- coverage[coverage$cov < 1,]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
265 print OUT 'values <- rbind(values,c(1,sum(coverage[coverage$cov == 1,"count"])))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
266 print OUT 'values <- values[order(values$cov),]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
267 print OUT 'prevcount <- 0'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
268 # make cumulative count data frame
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
269 print OUT 'for (i in rev(values$cov)) {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
270 print OUT ' values[values$cov == i,"count"] <- prevcount + values[values$cov == i,"count"]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
271 print OUT ' prevcount <- values[values$cov == i,"count"]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
272 print OUT '}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
273 print OUT 'values$count <- values$count / (values[values$cov == 0,"count"] / 100)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
274 # get some values to plot lines.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
275 print OUT 'mincov.x <- mincov/avg'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
276 print OUT 'if (mincov/avg <= 1) {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
277 print OUT ' ii <- which(values$cov == mincov.x)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
278 print OUT ' if (length(ii) == 1) {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
279 print OUT ' mincov.y <- values[ii[1],"count"]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
280 print OUT ' } else {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
281 print OUT ' i1 <- max(which(values$cov < mincov.x))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
282 print OUT ' i2 <- min(which(values$cov > mincov.x))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
283 print OUT ' mincov.y <- ((values[i2,"count"] - values[i1,"count"])/(values[i2,"cov"] - values[i1,"cov"]))*(mincov.x - values[i1,"cov"]) + values[i1,"count"]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
284 print OUT ' }'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
285 print OUT '}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
286 # open output image and create plot
22
95062840f80f Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing this out.
geert-vandeweyer
parents: 12
diff changeset
287 print OUT 'png(file="../Plots/CoverageNtPlot.png", bg="white", width=540, height=480,type=c("cairo"))'."\n";
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
288 print OUT 'par(xaxs="i",yaxs="i")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
289 print OUT 'plot(values$cov,values$count,ylim=c(0,100),pch=".",main="Cumulative Normalised Base-Coverage Plot",xlab="Normalizalised Coverage",ylab="Cumulative Nr. Of Bases")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
290 print OUT 'lines(values$cov,values$count)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
291 print OUT 'if (mincov.x <= 1) {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
292 print OUT ' lines(c(mincov.x,mincov.x),c(0,mincov.y),lty=2,col="darkgreen")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
293 print OUT ' lines(c(0,mincov.x),c(mincov.y,mincov.y),lty=2,col="darkgreen")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
294 print OUT ' text(1,(95),pos=2,col="darkgreen",labels="Threshold: '.$thresh.'x")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
295 print OUT ' text(1,(91),pos=2,col="darkgreen",labels=paste("%Bases: ",round(mincov.y,2),"%",sep=""))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
296 print OUT '} else {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
297 print OUT ' text(1,(95),pos=2,col="darkgreen",labels="Threshold ('.$thresh.'x) > Average")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
298 print OUT ' text(1,(91),pos=2,col="darkgreen",labels="Plotting impossible")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
299 print OUT '}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
300 print OUT 'frac.x <- '."$frac\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
301 print OUT 'ii <- which(values$cov == frac.x)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
302 print OUT 'if (length(ii) == 1) {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
303 print OUT ' frac.y <- values[ii[1],"count"]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
304 print OUT '} else {'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
305 print OUT ' i1 <- max(which(values$cov < frac.x))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
306 print OUT ' i2 <- min(which(values$cov > frac.x))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
307 print OUT ' frac.y <- ((values[i2,"count"] - values[i1,"count"])/(values[i2,"cov"] - values[i1,"cov"]))*(frac.x - values[i1,"cov"]) + values[i1,"count"]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
308 print OUT '}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
309 print OUT 'lines(c(frac.x,frac.x),c(0,frac.y),lty=2,col="red")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
310 print OUT 'lines(c(0,frac.x),c(frac.y,frac.y),lty=2,col="red")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
311 #iprint OUT 'text((frac.x+0.05),(frac.y - 2),pos=4,col="red",labels=paste(frac.x," x Avg.Cov : ",round(frac.x * avg,2),"x",sep="" ))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
312 #print OUT 'text((frac.x+0.05),(frac.y-5),pos=4,col="red",labels=paste("%Bases: ",round(frac.y,2),"%",sep=""))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
313 print OUT 'text(1,86,pos=2,col="red",labels=paste(frac.x," x Avg.Cov : ",round(frac.x * avg,2),"x",sep="" ))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
314 print OUT 'text(1,82,pos=2,col="red",labels=paste("%Bases: ",round(frac.y,2),"%",sep=""))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
315
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
316 print OUT 'graphics.off()'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
317
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
318 close OUT;
12
86df3f847a72 Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents: 11
diff changeset
319 system("cd $wd/Rout && Rscript ntplot.R");
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
320 ## PRINT TO .TEX FILE
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
321 open OUT, ">>$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
322 # average coverage overviews
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
323 print OUT '\subsection*{Overall Summary}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
324 print OUT '{\small ';
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
325 # left : boxplot
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
326 print OUT '\begin{minipage}{0.3\linewidth}\centering'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
327 print OUT '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/CoverageBoxPlot.png}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
328 print OUT '\end{minipage}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
329 # right : cum.cov.plot
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
330 print OUT '\hspace{0.6cm}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
331 print OUT '\begin{minipage}{0.65\linewidth}\centering'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
332 print OUT '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/CoverageNtPlot.png}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
333 print OUT '\end{minipage} \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
334 ## next line
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
335 print OUT '\begin{minipage}{0.48\linewidth}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
336 print OUT '\vspace{-1.2em}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
337 print OUT '\begin{tabular}{ll}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
338 # bam statistics
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
339 print OUT '\multicolumn{2}{l}{\textbf{\underline{Samtools Flagstat Summary}}} \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
340 foreach (@s) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
341 $_ =~ m/^(\d+)\s(.+)$/;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
342 my $one = $1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
343 my $two = $2;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
344 $two =~ s/\s\+\s0\s//;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
345 $two = ucfirst($two);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
346 $one =~ s/%/\\%/g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
347 # remove '+ 0 ' from front
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
348 $two =~ s/\+\s0\s//;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
349 # remove trailing from end
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
350 $two =~ s/(\s\+.*)|(:.*)/\)/;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
351 $two =~ s/%/\\%/g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
352 $two =~ s/>=/\$\\ge\$/g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
353 $two = ucfirst($two);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
354 print OUT '\textbf{'.$two.'} & '.$one.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
355 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
356 print OUT '\end{tabular}\end{minipage}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
357 print OUT '\hspace{1.5cm}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
358 # target coverage statistics
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
359 print OUT '\begin{minipage}{0.4\linewidth}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
360 #print OUT '\vspace{-4.8em}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
361 print OUT '\begin{tabular}{ll}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
362 print OUT '\multicolumn{2}{l}{\textbf{\underline{Target Region Coverage}}} \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
363 print OUT '\textbf{Number of Target Regions} & '.scalar(@coverages).' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
364 print OUT '\textbf{Minimal Region Coverage} & '.$min.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
365 print OUT '\textbf{25\% Region Coverage} & '.$first.' \\\\'. "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
366 print OUT '\textbf{50\% (Median) Region Coverage} & '.$med.' \\\\'. "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
367 print OUT '\textbf{75\% Region Coverage} & '.$third.' \\\\'. "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
368 print OUT '\textbf{Maximal Region Coverage} & '.$max.' \\\\'. "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
369 print OUT '\textbf{Average Region Coverage} & '.int($eavg).' \\\\'. "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
370 print OUT '\textbf{Mapped On Target} & '.$spec.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
371 print OUT '\multicolumn{2}{l}{\textbf{\underline{Target Base Coverage }}} \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
372 print OUT '\textbf{Number of Target Bases} & '.$counter.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
373 print OUT '\textbf{Average Base Coverage} & '.int($avg).' \\\\'. "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
374 print OUT '\textbf{Non-Covered Bases} & '.$dens{'0'}.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
375 #print OUT '\textbf{Bases Covered $ge$ '.$frac.'xAvg.Cov} & '.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
376 print OUT '\end{tabular}\end{minipage}}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
377 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
378
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
379 # 2. GLOBAL COVERAGE OVERVIEW PER GENE
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
380 @failedexons;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
381 @allexons;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
382 @allregions;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
383 @failedregions;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
384 if (exists($opts{'r'}) || exists($opts{'s'}) || exists($opts{'S'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
385 # count columns
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
386 my $head = `head -n 1 $wd/Targets.Global.Coverage`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
387 chomp($head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
388 my @cols = split(/\t/,$head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
389 my $nrcols = scalar(@cols);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
390 my $covcol = $nrcols - 3;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
391 # Coverage Plots for each gene => barplots in R, table here.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
392 open IN, "$wd/Targets.Global.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
393 my $currgroup = '';
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
394 my $startline = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
395 my $stopline = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
396 $linecounter = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
397 while (<IN>) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
398 $linecounter++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
399 chomp($_);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
400 my @c = split(/\t/,$_);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
401 push(@allregions,$c[0].'-'.$c[1].'-'.$c[2]);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
402 my $group = $c[3];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
403 ## coverage failure?
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
404 if ($c[$nrcol-1] < 1 || $c[$covcol-1] < $thresh) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
405 push(@failedexons,$group);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
406 push(@failedregions,$c[0].'-'.$c[1].'-'.$c[2]);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
407 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
408 ## store exon
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
409 push(@allexons,$group);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
410 ## extract and check gene
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
411 $group =~ s/^(\S+)[\|\s](.+)/$1/;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
412 if ($group ne $currgroup ) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
413 if ($currgroup ne '') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
414 # new gene, make plot.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
415 open OUT, ">$wd/Rout/barplot.R";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
416 print OUT 'coveragetable <- read.table("../Targets.Global.Coverage",as.is=TRUE,sep="\t",header=FALSE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
417 print OUT 'coverage <- coveragetable[c('.$startline.':'.$stopline.'),'.$covcol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
418 print OUT 'entries <- coveragetable[c('.$startline.':'.$stopline.'),4]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
419 print OUT 'entries <- sub("\\\\S+\\\\|","",entries,perl=TRUE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
420 print OUT 'coverage[coverage < 1] <- 1'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
421 print OUT 'colors <- c(rep("grey",length(coverage)))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
422 # coverage not whole target region => orange
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
423 print OUT 'covperc <- coveragetable[c('.$startline.':'.$stopline.'),'.$nrcols.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
424 print OUT 'colors[covperc<1] <- "orange"'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
425 # coverage below threshold => red
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
426 print OUT 'colors[coverage<'.$thresh.'] <- "red"'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
427
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
428 if ($stopline - $startline > 20) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
429 $scale = 2;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
430 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
431 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
432 $scale = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
433 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
434 my $width = 480 * $scale;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
435 my $height = 240 * $scale;
22
95062840f80f Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing this out.
geert-vandeweyer
parents: 12
diff changeset
436 print OUT 'png(file="../Plots/Coverage_'.$currgroup.'.png", bg="white", width='.$width.', height='.$height.',type=c("cairo"))'."\n";
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
437 print OUT 'ylim = c(0,max(max(log10(coverage),log10('.($thresh+20).'))))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
438 print OUT 'mp <- barplot(log10(coverage),col=colors,main="Exon Coverage for '.$currgroup.'",ylab="Log10(Coverage)",ylim=ylim)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
439 print OUT 'text(mp, log10(coverage) + '.(0.4/$scale).',format(coverage),xpd = TRUE,srt=90)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
440 print OUT 'text(mp,par("usr")[3]-0.05,labels=entries,srt=45,adj=1,xpd=TRUE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
441 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
442 print OUT 'graphics.off()'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
443 close OUT;
12
86df3f847a72 Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents: 11
diff changeset
444 system("cd $wd/Rout && Rscript barplot.R");
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
445 if ($scale == 1) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
446 push(@small,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
447 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
448 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
449 push(@large,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
450 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
451
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
452 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
453 $currgroup = $group;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
454 $startline = $linecounter;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
455 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
456 $stopline = $linecounter;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
457 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
458 close IN;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
459 if ($currgroup ne '') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
460 # last gene, make plot.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
461 open OUT, ">$wd/Rout/barplot.R";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
462 print OUT 'coveragetable <- read.table("../Targets.Global.Coverage",as.is=TRUE,sep="\t",header=FALSE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
463 print OUT 'coverage <- coveragetable[c('.$startline.':'.$stopline.'),'.$covcol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
464 print OUT 'entries <- coveragetable[c('.$startline.':'.$stopline.'),4]'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
465 print OUT 'entries <- sub("\\\\S+\\\\|","",entries,perl=TRUE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
466 print OUT 'coverage[coverage < 1] <- 1'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
467 print OUT 'colors <- c(rep("grey",length(coverage)))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
468 print OUT 'colors[coverage<'.$thresh.'] <- "red"'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
469
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
470 if ($stopline - $startline > 20) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
471 $scale = 2;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
472 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
473 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
474 $scale = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
475 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
476 my $width = 480 * $scale;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
477 my $height = 240 * $scale;
22
95062840f80f Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing this out.
geert-vandeweyer
parents: 12
diff changeset
478 print OUT 'png(file="../Plots/Coverage_'.$currgroup.'.png", bg="white", width='.$width.', height='.$height.',type=c("cairo"))'."\n";
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
479 print OUT 'ylim = c(0,max(max(log10(coverage),log10('.($thresh+20).'))))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
480 print OUT 'mp <- barplot(log10(coverage),col=colors,main="Exon Coverage for '.$currgroup.'",ylab="Log10(Coverage)", ylim=ylim)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
481 print OUT 'text(mp, log10(coverage) + log10(2),format(coverage),xpd = TRUE,srt=90)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
482 print OUT 'text(mp,par("usr")[3]-0.1,labels=entries,srt=45,adj=1,xpd=TRUE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
483 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
484 print OUT 'graphics.off()'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
485 close OUT;
12
86df3f847a72 Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents: 11
diff changeset
486 system("cd $wd/Rout && Rscript barplot.R");
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
487 if ($scale == 1) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
488 push(@small,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
489 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
490 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
491 push(@large,'\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$currgroup.'.png}');
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
492 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
493 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
494 ## print to TEX
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
495 open OUT, ">>$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
496 print OUT '\subsection*{Gene Summaries}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
497 print OUT '\underline{Legend:} \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
498 print OUT '{\color{red}\textbf{RED:} Coverage did not reach set threshold of '.$thresh.'} \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
499 print OUT '{\color{orange}\textbf{ORANGE:} Coverage was incomplete for the exon. Overruled by red.} \\\\' ."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
500 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
501 foreach (@small) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
502 if ($col > 2) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
503 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
504 print OUT "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
505 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
506 print OUT '\begin{minipage}{0.5\linewidth}\centering'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
507 print OUT $_."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
508 print OUT '\end{minipage}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
509 $col++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
510 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
511 ## new line
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
512 if ($col == 2) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
513 print OUT '\\\\'." \n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
514 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
515 foreach(@large) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
516 print OUT $_."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
517 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
518 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
519
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
520 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
521
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
522 # 3. Detailed overview of failed exons (globally failed)
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
523 if (exists($opts{'s'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
524 # count columns
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
525 my $head = `head -n 1 $wd/Targets.Position.Coverage`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
526 chomp($head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
527 my @cols = split(/\t/,$head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
528 my $nrcols = scalar(@cols);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
529 my $covcol = $nrcols;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
530 my $poscol = $nrcols -1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
531 # tex section header
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
532 open TEX, ">>$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
533 print TEX '\subsection*{Failed Exon Plots}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
534 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
535 print TEX '\underline{NOTE:} Only exons with global coverage $<$'.$thresh.' or incomplete coverage were plotted \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
536 foreach(@failedregions) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
537 if ($col > 2) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
538 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
539 print TEX "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
540 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
541 # which exon
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
542 my $region = $_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
543 my $exon = $filehash{$region}{'exon'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
544 # link exon to tmp file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
545 my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
546 ## determine transcript orientation and location
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
547 my $firstline = `head -n 1 $exonfile`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
548 my @firstcols = split(/\t/,$firstline);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
549 my $orient = $firstcols[5];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
550 my $genomicchr = $firstcols[0];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
551 my $genomicstart = $firstcols[1];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
552 my $genomicstop = $firstcols[2];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
553 if ($orient eq '+') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
554 $bps = $genomicstop - $genomicstart + 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
555 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
556 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
557 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
558 $bps = $genomicstop - $genomicstart + 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
559 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
560 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
561 # print Rscript
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
562 open OUT, ">$wd/Rout/exonplot.R";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
563 print OUT 'coveragetable <- read.table("'.$exonfile.'",as.is=TRUE,sep="\t",header=FALSE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
564 print OUT 'coverage <- coveragetable[,'.$covcol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
565 print OUT 'coverage[coverage < 1] <- 1'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
566 print OUT 'positions <- coveragetable[,'.$poscol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
567
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
568 my $width = 480 ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
569 my $height = 240 ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
570 my $exonstr = $exon;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
571 $exonstr =~ s/\s/_/g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
572 $exon =~ s/_/ /g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
573 $exon =~ s/\|/ /g;
22
95062840f80f Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing this out.
geert-vandeweyer
parents: 12
diff changeset
574 print OUT 'png(file="../Plots/Coverage_'.$exonstr.'.png", bg="white", width='.$width.', height='.$height.',type=c("cairo"))'."\n";
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
575 print OUT 'ylim = c(0,log10(max(max(coverage),'.($thresh+10).')))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
576 if ($orient eq '-') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
577 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",xlim=rev(range(positions)),sub="(Transcribed from minus strand)")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
578 print OUT 'mtext("'.$subtitle.'")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
579 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
580 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
581 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",sub="(Transcribed from plus strand)")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
582 print OUT 'mtext("'.$subtitle.'")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
583 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
584 print OUT 'lines(positions,log10(coverage))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
585 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
586 print OUT 'failedpos <- positions[coverage<'.$thresh.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
587 print OUT 'failedcov <- coverage[coverage<'.$thresh.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
588 print OUT 'points(failedpos,log10(failedcov),col="red",pch=19)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
589 print OUT 'graphics.off()'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
590 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
591 # run R script
12
86df3f847a72 Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents: 11
diff changeset
592 system("cd $wd/Rout && Rscript exonplot.R");
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
593 # Add to .TEX
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
594 print TEX '\begin{minipage}{0.5\linewidth}\centering'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
595 print TEX '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$exonstr.'.png}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
596 print TEX '\end{minipage}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
597 $col++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
598 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
599 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
600
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
601 ## plot failed (subregion) or all exons
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
602 if (exists($opts{'S'}) || exists($opts{'A'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
603 # count columns
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
604 my $head = `head -n 1 $wd/Targets.Position.Coverage`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
605 chomp($head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
606 my @cols = split(/\t/,$head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
607 my $nrcols = scalar(@cols);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
608 my $covcol = $nrcols;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
609 my $poscol = $nrcols -1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
610 # tex section header
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
611 open TEX, ">>$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
612 print TEX '\subsection*{Failed Exon Plots}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
613 if (exists($opts{'S'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
614 print TEX '\underline{NOTE:} ALL exons were tested for local coverage $<$'.$thresh.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
615 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
616 elsif (exists($opts{'A'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
617 print TEX '\underline{NOTE:} ALL exons are plotted, regardless of coverage \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
618 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
619 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
620 foreach(@allregions) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
621 if ($col > 2) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
622 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
623 print TEX "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
624 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
625 # which exon
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
626 my $region = $_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
627 my $exon = $filehash{$region}{'exon'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
628 # grep exon to tmp file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
629 my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
630 ## determine transcript orientation.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
631 my $firstline = `head -n 1 $exonfile`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
632 my @firstcols = split(/\t/,$firstline);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
633 my $orient = $firstcols[5];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
634 my $genomicchr = $firstcols[0];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
635 my $genomicstart = $firstcols[1];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
636 my $genomicstop = $firstcols[2];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
637
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
638 if ($orient eq '+') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
639 $bps = $genomicstop - $genomicstart + 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
640 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
641
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
642 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
643 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
644 $bps = $genomicstop - $genomicstart + 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
645 $subtitle = "Region 0-$bps: $genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
646
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
647 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
648
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
649 # check if failed
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
650 if (exists($opts{'S'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
651 my $cs = `cut -f $covcol '$exonfile' `;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
652 my @c = split(/\n/,$cs);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
653 @c = sort { $a <=> $b } @c;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
654 if ($c[0] >= $thresh) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
655 # lowest coverage > threshold => skip
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
656 next;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
657 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
658 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
659 # print Rscript
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
660 open OUT, ">$wd/Rout/exonplot.R";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
661 print OUT 'coveragetable <- read.table("'.$exonfile.'",as.is=TRUE,sep="\t",header=FALSE)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
662 print OUT 'coverage <- coveragetable[,'.$covcol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
663 print OUT 'coverage[coverage < 1] <- 1'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
664 print OUT 'positions <- coveragetable[,'.$poscol.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
665 my $width = 480 ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
666 my $height = 240 ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
667 my $exonstr = $exon;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
668 $exonstr =~ s/\s/_/g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
669 $exon =~ s/_/ /g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
670 $exon =~ s/\|/ /g;
22
95062840f80f Correction to png calls to use cairo instead of x11. thanks to Eric Enns for pointing this out.
geert-vandeweyer
parents: 12
diff changeset
671 print OUT 'png(file="../Plots/Coverage_'.$exonstr.'.png", bg="white", width='.$width.', height='.$height.',type=c("cairo"))'."\n";
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
672 print OUT 'ylim = c(0,log10(max(max(coverage),'.($thresh+10).')))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
673 if ($orient eq '-') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
674 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",xlim=rev(range(positions)),sub="(Transcribed from minus strand)")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
675 print OUT 'mtext("'.$subtitle.'")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
676 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
677 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
678 print OUT 'plot(positions,log10(coverage),type="n",main="Coverage for '.$exon.'",ylab="log10(Coverage)",ylim=ylim,xlab="Position",sub="(Transcribed from plus strand)")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
679 print OUT 'mtext("'.$subtitle.'")'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
680 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
681
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
682 print OUT 'lines(positions,log10(coverage))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
683 print OUT 'abline(h=log10('.$thresh.'),lwd=4,col=rgb(255,0,0,100,maxColorValue=255))'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
684 print OUT 'failedpos <- positions[coverage<'.$thresh.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
685 print OUT 'failedcov <- coverage[coverage<'.$thresh.']'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
686 print OUT 'points(failedpos,log10(failedcov),col="red",pch=19)'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
687 print OUT 'graphics.off()'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
688 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
689 # run R script
12
86df3f847a72 Switched to R 3.0.2 from iuc, and moved bedtools to seperate tool_definition
geert-vandeweyer
parents: 11
diff changeset
690 system("cd $wd/Rout && Rscript exonplot.R");
1
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
691 # Add to .TEX
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
692 print TEX '\begin{minipage}{0.5\linewidth}\centering'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
693 print TEX '\includegraphics[width=\textwidth,keepaspectratio=true]{../Plots/Coverage_'.$exonstr.'.png}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
694 print TEX '\end{minipage}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
695 $col++;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
696 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
697 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
698 ## list failed exons
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
699 if (exists($opts{'L'})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
700 # count columns
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
701 my $head = `head -n 1 $wd/Targets.Position.Coverage`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
702 chomp($head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
703 my @cols = split(/\t/,$head);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
704 my $nrcols = scalar(@cols);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
705 my $covcol = $nrcols;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
706 my $poscol = $nrcols -1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
707 ## hash to print
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
708 # tex section header
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
709 open TEX, ">>$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
710 print TEX '\subsection*{List of Failed Exons}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
711 print TEX '\underline{NOTE:} ALL exons were tested for local coverage $<$'.$thresh.' \\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
712 print TEX '{\footnotesize\begin{longtable}[l]{@{\extracolsep{\fill}}llll}'."\n".'\hline'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
713 print TEX '\textbf{Target Name} & \textbf{Genomic Position} & \textbf{Avg.Coverage} & \textbf{Min.Coverage} \\\\'."\n".'\hline'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
714 print TEX '\endhead'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
715 print TEX '\hline '."\n".'\multicolumn{4}{r}{{\textsl{\footnotesize Continued on next page}}} \\\\ '."\n".'\hline' ."\n". '\endfoot' . "\n". '\endlastfoot' . "\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
716
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
717 $col = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
718 open IN, "$wd/Targets.Global.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
719 while (<IN>) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
720 chomp($_);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
721 my @p = split(/\t/,$_);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
722 my $region = $p[0].'-'.$p[1].'-'.$p[2];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
723 my $exon = $filehash{$region}{'exon'};
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
724 # grep exon to tmp file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
725 my $exonfile = "$wd/SplitFiles/File_".$filehash{$region}{'idx'}.".txt";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
726 ## determine transcript orientation.
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
727 my $firstline = `head -n 1 $exonfile`;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
728 my @firstcols = split(/\t/,$firstline);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
729 my $orient = $firstcols[5];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
730 my $genomicchr = $firstcols[0];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
731 my $genomicstart = $firstcols[1];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
732 my $genomicstop = $firstcols[2];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
733
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
734 if ($orient eq '+') {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
735 $bps = $genomicstop - $genomicstart + 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
736 $subtitle = "$genomicchr:".$de->format_number($genomicstart)."+".$de->format_number($genomicstop);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
737
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
738 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
739 else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
740 $bps = $genomicstop - $genomicstart + 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
741 $subtitle = "$genomicchr:".$de->format_number($genomicstart)."-".$de->format_number($genomicstop);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
742 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
743
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
744 # check if failed
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
745 my $cs = `cut -f $covcol '$exonfile' `;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
746 my @c = split(/\n/,$cs);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
747 my ($avg,$med,$min,$max,$first,$third,$ontarget) = arraystats(@c);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
748
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
749 if ($min >= $thresh) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
750 # lowest coverage > threshold => skip
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
751 next;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
752 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
753
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
754 # print to .tex table
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
755 if (length($exon) > 30) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
756 $exon = substr($exon,0,27) . '...';
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
757 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
758 $exon =~ s/_/ /g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
759 $exon =~ s/\|/ /g;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
760
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
761 print TEX "$exon & $subtitle & ".int($avg)." & $min ".'\\\\'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
762 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
763 close IN;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
764 print TEX '\hline'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
765 print TEX '\end{longtable}}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
766 close TEX;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
767 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
768
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
769
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
770 ## Close document
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
771 open OUT, ">>$wd/Report/Report.tex";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
772 print OUT '\label{endofdoc}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
773 print OUT '\end{document}'."\n";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
774 close OUT;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
775 system("cd $wd/Report && pdflatex Report.tex > /dev/null 2>&1 && pdflatex Report.tex > /dev/null 2>&1 ");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
776
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
777 ## mv report to output file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
778 system("cp -f $wd/Report/Report.pdf '$pdffile'");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
779 ##create tar.gz file
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
780 system("mkdir $wd/Results");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
781 system("cp -Rf $wd/Plots $wd/Results/");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
782 system("cp -Rf $wd/Report/ $wd/Results/");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
783 if (-e "$wd/Targets.Global.Coverage") {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
784 system("cp -Rf $wd/Targets.Global.Coverage $wd/Results/");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
785 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
786 if (-e "$wd/Targets.Position.Coverage") {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
787 system("cp -Rf $wd/Targets.Position.Coverage $wd/Results/");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
788 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
789
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
790 system("cd $wd && tar czf '$tarfile' Results/");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
791 ## clean up (galaxy stores outside wd)
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
792 system("rm -Rf $wd");
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
793 ###############
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
794 ## FUNCTIONS ##
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
795 ###############
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
796 sub arraystats{
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
797 my @array = @_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
798 my $count = scalar(@array);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
799 @array = sort { $a <=> $b } @array;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
800 # median
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
801 my $median = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
802 if ($count % 2) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
803 $median = $array[int($count/2)];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
804 } else {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
805 $median = ($array[$count/2] + $array[$count/2 - 1]) / 2;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
806 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
807 # average
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
808 my $sum = 0;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
809 foreach (@array) { $sum += $_; }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
810 my $average = $sum / $count;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
811 # quantiles (rounded)
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
812 my $quart = int($count/4) ;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
813 my $first = $array[$quart];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
814 my $third = $array[($quart*3)];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
815 my $min = $array[0];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
816 my $max = $array[($count-1)];
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
817 return ($average,$median,$min,$max,$first,$third,$sum);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
818 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
819
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
820 sub GlobalSummary {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
821 my ($bam,$targets) = @_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
822
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
823 my $command = "cd $wd && coverageBed -abam $bam -b $targets > $wd/Targets.Global.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
824 if (exists($commandsrun{$command})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
825 return;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
826 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
827 system($command);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
828 $commandsrun{$command} = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
829 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
830
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
831 sub CoveragePerRegion {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
832 my ($bam,$targets) = @_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
833 my $command = "cd $wd && coverageBed -abam $bam -b $targets > $wd/Targets.Global.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
834 if (exists($commandsrun{$command})) {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
835 return;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
836 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
837 system($command);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
838 $commandsrun{$command} = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
839 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
840
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
841 sub SubRegionCoverage {
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
842 my ($bam,$targets) = @_;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
843 my $command = "cd $wd && coverageBed -abam $bam -b $targets -d > $wd/Targets.Position.Coverage";
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
844 system($command);
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
845 $commandsrun{$command} = 1;
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
846 }
864d0ccfbe6f Initial Uploaded
geert-vandeweyer
parents:
diff changeset
847