annotate mutspecStat.pl @ 7:eda59b985b1c draft default tip

Uploaded
author iarc
date Mon, 13 Mar 2017 08:21:19 -0400
parents 46a10309dfe2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1 #!/usr/bin/env perl
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
3 #-----------------------------------#
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
4 # Author: Maude #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
5 # Script: mutspecStat.pl #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
6 # Last update: 09/02/17 #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
7 #-----------------------------------#
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
8
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
9 use strict;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
10 use warnings;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
11 use Getopt::Long;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
12 use Pod::Usage;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
13 use File::Basename; # my ($filename, $directories, $suffix) = fileparse($file, qr/\.[^.]*/);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
14 use File::Path;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
15 use Spreadsheet::WriteExcel;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
16
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
17 our ($verbose, $man, $help) = (0, 0, 0); # Parse options and print usage if there is a syntax error, or if usage was explicitly requested.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
18 our ($refGenome, $output, $folder_temp, $path_R_Scripts, $path_SeqrefGenome) = ("empty", "empty", "empty", "empty", "empty", "empty"); # The reference genome to use; The path for saving the result; The path for saving the temporary files; The path to R scripts; The path to the fasta reference sequences
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
19 our ($poolData, $oneReportPerSample) = (2, 2); # If a folder is pass as input file pool all the data and generate the report on the pool and for each samples; # Generate one report for each samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
20
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
21
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
22 GetOptions('verbose|v'=>\$verbose, 'help|h'=>\$help, 'man|m'=>\$man, 'refGenome=s'=>\$refGenome, 'outfile|o=s' => \$output, 'temp=s' => \$folder_temp, 'pathRscript=s' => \$path_R_Scripts, 'pathSeqRefGenome=s' => \$path_SeqrefGenome, 'poolData' => \$poolData, 'reportSample' => \$oneReportPerSample) or pod2usage(2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
23
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
24 our ($input) = @ARGV;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
25
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
26 pod2usage(-verbose=>1, -exitval=>1, -output=>\*STDERR) if ($help);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
27 pod2usage(-verbose=>2, -exitval=>1, -output=>\*STDERR) if ($man);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
28 pod2usage(-verbose=>0, -exitval=>1, -output=>\*STDERR) if(@ARGV == 0); # No argument is pass to the command line print the usage of the script
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
29 pod2usage(-verbose=>0, -exitval=>1, -output=>\*STDERR) if(@ARGV == 2); # Only one argument is expected to be pass to @ARGV (the input)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
30
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
31
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
32
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
33
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
34 ####### The input must be a folder with one or several annotated files
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
35 if(!-d $input)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
36 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
37 print STDERR "Error: The input must be a Dataset List\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
38 print STDERR "Even for 1 file, please create a Dataset List\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
39 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
40 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
41
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
42
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
43 ######################################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
44 # GLOBAL VARIABLES #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
45 ######################################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
46 # Recover the current path
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
47 our $pwd = `pwd`;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
48 chomp($pwd);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
49
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
50
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
51 # Path to R scripts
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
52 our $pathRscriptChi2test = "$path_R_Scripts/R/chi2test_MutSpecStat_Galaxy.r";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
53 our $pathRScriptFigs = "$path_R_Scripts/R/figs_MutSpecStat_Galaxy.r";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
54 our $pathRScriptTxnSB = "$path_R_Scripts/R/transciptionalStrandBias.r";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
55 our $pathRScriptMutSpectrum = "$path_R_Scripts/R/mutationSpectra_Galaxy.r";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
56
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
57
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
58 # The path for saving the files with enough mutations for calculating the statistics;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
59 our $folderCheckedForStat = "$pwd/folder_checked";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
60 if(!-e $folderCheckedForStat) { mkdir($folderCheckedForStat) or die "$!: $folderCheckedForStat\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
61
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
62 # Output dir with all the results
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
63 our $folderMutAnalysis = "";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
64
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
65 # Hash table with the length of each chromosomes
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
66 our %chromosomes;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
67 # Define the name of the column containing the chromosome, start, ref and alt alleles (based on Annovar output)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
68 our ($chr_name, $start_name, $ref_name, $alt_name) = qw(Chr Start Ref Alt);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
69 # Annovar annotation used
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
70 our $func_name = "Func.refGene";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
71 our $exonicFunc_name = "ExonicFunc.refGene";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
72 our $strand_name = "Strand";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
73 our $context_name = "context";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
74 # Font formats
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
75 our ($format_A10, $format_A10Boldleft, $format_A10ItalicRed) = ("", "", "");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
76 our ($formatT_left, $formatT_right, $formatT_bottomRight, $formatT_bottomLeft, $formatT_bottom, $formatT_bottomHeader, $formatT_bottomRightHeader, $formatT_bottomHeader2, $formatT_rightHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
77 our ($formatT_graphTitle);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
78 our ($table_topleft, $table_topRight, $table_bottomleft, $table_bottomRight, $table_top, $table_right, $table_bottom, $table_bottomItalicRed, $table_left, $table_bottomrightHeader, $table_left2, $table_middleHeader, $table_middleHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
79 # Hash table with the result of chi2 test for the strand bias
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
80 our %h_chi2 = ();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
81 # For NMF input
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
82 our %h_inputNMF = ();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
83
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
84
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
85 ######################################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
86 # MAIN #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
87 ######################################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
88 # Check the presence of the flags and create the output and temp directories
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
89 CheckFlags();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
90
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
91 # First check if the files are annotated or not.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
92 # If the files are annotated check there is enough mutations for generating the statistics, otherwise remove the samples from the analysis
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
93 checkVariants();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
94
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
95 # Retrieve chromosomes length
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
96 checkChrDir();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
97
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
98 # Calculate the statistics and generate the report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
99 ReportMutDist();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
100
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
101 # Remove the temporary directory
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
102 rmtree($folder_temp);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
103 rmtree($folderCheckedForStat);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
104
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
105
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
106 ######################################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
107 # FUNCTIONS #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
108 ######################################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
109
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
110 # Check the presence of the flags and create the output and temp directories
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
111 sub CheckFlags
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
112 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
113 # Check the reference genome
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
114 if($refGenome eq "empty")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
115 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
116 print STDERR "Missing flag !\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
117 print STDERR "You forget to specify the name for the reference genome!!!\nPlease specify it with the flag --refGenome\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
118 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
119 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
120
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
121 # If no output is specified write the result as the same place as the input file
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
122 if($output eq "empty")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
123 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
124 # The input is a folder with one or more annotated files
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
125 my $directory = dirname( $input );
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
126
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
127 $folderMutAnalysis = "$directory/Mutational_Analysis";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
128 if(!-e $folderMutAnalysis) { mkdir($folderMutAnalysis) or die "$!: $folderMutAnalysis\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
129 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
130 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
131 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
132 if(!-e $output) { mkdir($output) or die "$!: $output\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
133
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
134 $folderMutAnalysis = "$output/Mutational_Analysis";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
135 if(!-e $folderMutAnalysis) { mkdir($folderMutAnalysis) or die "$!: $folderMutAnalysis\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
136 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
137
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
138 # If no temp folder is specified write the result in the current path
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
139 my ($filename, $directories, $suffix) = fileparse($input, qr/\.[^.]*/);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
140 if($folder_temp eq "empty") { $folder_temp = "$pwd/TEMP_MutationalAnalysis_$filename"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
141 if(!-e $folder_temp) { mkdir($folder_temp) or die "$!: $folder_temp\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
142
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
143 # Check the path to the R scripts
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
144 if($path_R_Scripts eq "empty")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
145 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
146 print STDERR "Missing flag !\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
147 print STDERR "You forget to specify the path for the R scripts!!!\nPlease specify it with the flag --pathRscript\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
148 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
149 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
150
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
151
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
152 foreach my $file (`ls $input/*`)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
153 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
154 chomp($file);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
155
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
156 ## Verify the name of file, must be <= 31 chars for the sheet name
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
157 my ($filename, $directories, $suffix) = fileparse($file, qr/\.[^.]*/);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
158
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
159 if(length($filename) > 31)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
160 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
161 print STDERR "Error: The filename of: $file\nMust be <= 31 chars\nPlease modify it before running the script\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
162 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
163 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
164 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
165 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
166
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
167 # Check input file(s)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
168 sub checkVariants
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
169 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
170 # Count the number of file(s) with enough mutations (at least 1 with a strand orientation)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
171 my $timerFile = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
172 my @listRemovedFile = ();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
173
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
174 foreach my $file (`ls $input/*`)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
175 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
176 chomp($file);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
177
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
178 ### Check if the file is annotated
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
179 my $testAnnotation = "toto";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
180 $testAnnotation = `grep 'Func.refGene' $file`;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
181
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
182 if($testAnnotation eq "toto")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
183 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
184 print STDERR "Error: The input file you specify is not annotated!\nThe file concerned is: $file !!!!\nPlease first annotate your file before trying to generate the report on mutation spectra\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
185 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
186 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
187 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
188 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
189 ### check if there is at least 1 mutation with a strand info
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
190 my $strand_value = recoverNumCol($file, "Strand");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
191 my $nbSBScoding = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
192
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
193 open(F1, $file) or die "$!: $file\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
194 my $header = <F1>;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
195 while(<F1>)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
196 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
197 $_ =~ s/[\r\n]+$//;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
198 my @tab = split("\t", $_);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
199
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
200 if($tab[$strand_value] ne "NA")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
201 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
202 $nbSBScoding++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
203 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
204 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
205 close F1;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
206
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
207 if($nbSBScoding != 0)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
208 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
209 $timerFile++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
210 `cp $file $folderCheckedForStat/`;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
211 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
212 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
213 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
214 print STDOUT "\n\nWarning: There is no variant to compute statistics for $file\n\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
215 push(@listRemovedFile, $file);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
216 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
217 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
218 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
219
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
220 if($timerFile == 0)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
221 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
222 print STDERR "\n\nError: No variants to compute statistics for:\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
223
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
224 foreach (@listRemovedFile)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
225 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
226 print STDERR $_."\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
227 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
228 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
229 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
230 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
231
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
232 # Retrieve chromosomes length
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
233 sub checkChrDir
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
234 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
235 my @files = `ls $path_SeqrefGenome/$refGenome"_seq"/*.fa`;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
236 foreach my $file (@files)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
237 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
238 if ($file !~ /chr(\d+|x|y)\.fa/i){next;}
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
239 open(FILE,$file);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
240 <FILE>;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
241 my $seq="";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
242 while (<FILE>){ chomp; $seq.=$_;}
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
243 $file =~ /chr(.*)\.fa/;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
244 $chromosomes{"chr".$1}=length($seq);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
245 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
246 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
247
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
248 # Calculate the statistics and generate the report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
249 sub ReportMutDist
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
250 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
251 print STDOUT "-----------------------------------------------------------------\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
252 print STDOUT "-----------------Report Mutational Analysis----------------------\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
253 print STDOUT "-----------------------------------------------------------------\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
254
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
255 my $folderFigure = "$folderMutAnalysis/Figures";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
256 if(-e $folderFigure) { rmtree($folderFigure); mkdir($folderFigure) or die "Can't create the directory $folderFigure\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
257 else { mkdir($folderFigure) or die "Can't create the directory $folderFigure\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
258 my $folderChi2 = "$folderFigure/Chi2";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
259 if(!-e $folderChi2) { mkdir($folderChi2) or die "Can't create the directory $folderChi2\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
260 my $folderWebLogo = "$folderFigure/WebLogo";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
261 if(!-e $folderWebLogo) { mkdir($folderWebLogo) or die "Can't create the directory $folderWebLogo\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
262 my $folderNMF = "$folderFigure/Input_NMF";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
263 if(!-e $folderNMF) { mkdir($folderNMF) or die "Can't create the directory $folderNMF\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
264
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
265
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
266 ################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
267 ### Calculates all the statistics ###
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
268 ################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
269
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
270 ########### Recover the functional region for all the samples. Allows to thave the same annotations for the pie chart "Impact on protein sequence"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
271 my @tab_func = recoverAnnovarAnnotation($func_name);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
272 if(@tab_func == 0)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
273 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
274 print STDERR "Error: the table for the functional region is empty!!!!! check $folderCheckedForStat\n$func_name\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
275 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
276 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
277
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
278 ############ Calculate the different statistics present in the report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
279 my %h_file = ();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
280 CalculateStatistics(\%h_file, \@tab_func);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
281
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
282 ############ Calculate the chi2 for the strand bias
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
283 CalculateChi2(\%h_file, $folderChi2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
284
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
285 ############ Write the different statistics present in the report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
286 WriteStatistics(\%h_file, $#tab_func, $folderFigure, $folderChi2, $folderNMF);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
287
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
288 ############ Create logo for studying the 10 flanking bases of the mutation
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
289 CreateLogo(\%h_file, $folderWebLogo);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
290 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
291
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
292
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
293 # Calculate the different statistics present in the report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
294 sub CalculateStatistics
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
295 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
296 my ($refH_file, $refT_func) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
297
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
298 our ($chr_value, $start_value, $ref_value, $alt_value, $func_value, $exonicFunc_value, $strand_value, $contextSeq_value) = ("", "", "", "", "", "", "", "", "", "");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
299
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
300 # Generate the pool of all the data
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
301 if($poolData == 1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
302 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
303 my @listFile = `ls $folderCheckedForStat`;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
304
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
305 # For keeping the header only one time
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
306 chomp($listFile[0]);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
307 system("cp $folderCheckedForStat/$listFile[0] $folderCheckedForStat/Pool_Data.txt");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
308
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
309 open(OUT, ">>", "$folderCheckedForStat/Pool_Data.txt") or die "$!: $folderCheckedForStat/Pool_Data.txt\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
310
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
311 for(my $i=1; $i<=$#listFile; $i++)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
312 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
313 chomp($listFile[$i]);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
314 open(F1, "$folderCheckedForStat/$listFile[$i]") or die "$!: $folderCheckedForStat/$listFile[$i]\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
315 my $header = <F1>;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
316 while(<F1>) { print OUT $_; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
317 close F1;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
318 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
319 close OUT;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
320 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
321
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
322 foreach my $file (`ls $folderCheckedForStat/*`)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
323 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
324 chomp($file);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
325 ############ Recover the number of the columns of interest
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
326 $chr_value = recoverNumCol($file, $chr_name);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
327 $start_value = recoverNumCol($file, $start_name);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
328 $ref_value = recoverNumCol($file, $ref_name);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
329 $alt_value = recoverNumCol($file, $alt_name);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
330 $func_value = recoverNumCol($file, $func_name);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
331 $exonicFunc_value = recoverNumCol($file, $exonicFunc_name);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
332 $strand_value = recoverNumCol($file, $strand_name);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
333 $contextSeq_value = recoverNumCol($file, $context_name);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
334 ############ Recover the number of the columns of interest
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
335
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
336 ############ Calculate the statistics for each file
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
337 File2Hash($file, $func_value, $exonicFunc_value, $chr_value, $ref_value, $alt_value, $strand_value, $contextSeq_value, $refH_file, $refT_func);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
338 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
339 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
340
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
341 # Calculate the chi2 for the strand bias
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
342 sub CalculateChi2
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
343 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
344 my ($refH_file, $folderChi2) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
345
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
346 # No value for the chi2
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
347 if(scalar (keys %{$refH_file}) == 0)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
348 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
349 print STDERR "Error: No value for calculating the chi2 for the strand bias\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
350 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
351 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
352
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
353 # Strand bias for one mutation type for all the samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
354 my %h_tempchi2 = ();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
355 my ($ca_NonTr, $ca_Tr, $cg_NonTr, $cg_Tr, $ct_NonTr, $ct_Tr, $ta_NonTr, $ta_Tr, $tc_NonTr, $tc_Tr, $tg_NonTr, $tg_Tr) = (0,0,0,0,0,0, 0,0,0,0,0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
356
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
357 my $nb_file = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
358
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
359 foreach my $k_file (sort keys %{$refH_file})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
360 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
361 $nb_file++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
362 foreach my $k_func (sort keys %{$refH_file->{$k_file}{'6mutType'}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
363 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
364 foreach my $k_mutation (sort keys %{$refH_file->{$k_file}{'6mutType'}{$k_func}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
365 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
366 if($k_mutation eq "C:G>A:T")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
367 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
368 $h_tempchi2{'C>A'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
369 $h_tempchi2{'C>A'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
370 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
371 if($k_mutation eq "C:G>G:C")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
372 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
373 $h_tempchi2{'C>G'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
374 $h_tempchi2{'C>G'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
375 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
376 if($k_mutation eq "C:G>T:A")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
377 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
378 $h_tempchi2{'C>T'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
379 $h_tempchi2{'C>T'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
380 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
381 if($k_mutation eq "T:A>A:T")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
382 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
383 $h_tempchi2{'T>A'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
384 $h_tempchi2{'T>A'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
385 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
386 if($k_mutation eq "T:A>C:G")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
387 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
388 $h_tempchi2{'T>C'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
389 $h_tempchi2{'T>C'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
390 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
391 if($k_mutation eq "T:A>G:C")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
392 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
393 $h_tempchi2{'T>G'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
394 $h_tempchi2{'T>G'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
395 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
396 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
397 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
398 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
399
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
400 # Create the input file for NMF
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
401 open(CHI2, ">", "$folderChi2/Input_chi2_strandBias.txt") or die "$!: $folderChi2/Input_chi2_strandBias.txt\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
402 print CHI2 "SampleName\tNonTr\tTr\tAlteration\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
403
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
404 foreach my $k_mutation (sort keys %h_tempchi2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
405 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
406 foreach my $k_file (sort keys %{$h_tempchi2{$k_mutation}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
407 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
408 print CHI2 "$k_file\t$h_tempchi2{$k_mutation}{$k_file}{'NonTr'}\t$h_tempchi2{$k_mutation}{$k_file}{'Tr'}\t$k_mutation\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
409 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
410 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
411 close CHI2;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
412
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
413
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
414 `Rscript $pathRscriptChi2test --folderChi2 $folderChi2 2>&1`;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
415 # `Rscript $pathRscriptChi2test $folderChi2 2>&1`;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
416
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
417
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
418 if(!-e "$folderChi2/Output_chi2_strandBias.txt")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
419 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
420 print STDERR "Error: Chi2 test didn't work !!!\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
421 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
422 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
423 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
424
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
425 # Write the different statistics in the report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
426 sub WriteStatistics
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
427 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
428 my ($refH_file, $nb_func, $folderFigure, $folderChi2, $folderNMF) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
429
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
430 # Save the different graphs in specific folde
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
431 if(!-e "$folderFigure/Overall_mutation_distribution") { mkdir("$folderFigure/Overall_mutation_distribution") or die "Can't create the directory $folderFigure/Overall_mutation_distribution\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
432 if(!-e "$folderFigure/Impact_protein_sequence") { mkdir("$folderFigure/Impact_protein_sequence") or die "Can't create the directory $folderFigure/Impact_protein_sequence\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
433 if(!-e "$folderFigure/SBS_distribution") { mkdir("$folderFigure/SBS_distribution") or die "Can't create the directory $folderFigure/SBS_distribution\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
434 if(!-e "$folderFigure/Stranded_Analysis") { mkdir("$folderFigure/Stranded_Analysis") or die "Can't create the directory $folderFigure/Stranded_Analysis\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
435 if(!-e "$folderFigure/Trinucleotide_Sequence_Context") { mkdir("$folderFigure/Trinucleotide_Sequence_Context") or die "Can't create the directory $folderFigure/Trinucleotide_Sequence_Context\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
436 if(!-e "$folderFigure/Distribution_SBS_Per_Chromosomes") { mkdir("$folderFigure/Distribution_SBS_Per_Chromosomes") or die "Can't create the directory $folderFigure/Distribution_SBS_Per_Chromosomes\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
437
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
438
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
439 # Create a workbook with all the samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
440 my $wb = ""; my $ws_sum = "";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
441 my ($ws_inputNMF_count, $ws_inputNMF_percent) = ("", "");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
442
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
443 # Create one Excel file with all the samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
444 if($oneReportPerSample == 2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
445 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
446 $wb = Spreadsheet::WriteExcel->new("$folderMutAnalysis/Report_Mutation_Spectra.xls");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
447
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
448 ############## Set the variables for font formats in the Excel report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
449 Format_A10($wb, \$format_A10); # Text center in Arial 10
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
450 Format_A10BoldLeft($wb, \$format_A10Boldleft); # Text on the left in Arial 10 bold
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
451 Format_TextSection($wb, \$formatT_left, \$formatT_right, \$formatT_bottomRight, \$formatT_bottomLeft, \$formatT_bottom, \$formatT_bottomHeader, \$formatT_bottomRightHeader, \$formatT_bottomHeader2, \$formatT_rightHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
452 Format_GraphTitle($wb, \$formatT_graphTitle);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
453 Format_Table($wb, \$table_topleft, \$table_topRight, \$table_bottomleft, \$table_bottomRight, \$table_top, \$table_right, \$table_bottom, \$table_bottomItalicRed, \$table_left, \$table_bottomrightHeader, \$table_left2, \$table_middleHeader, \$table_middleHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
454 Format_A10ItalicRed($wb, \$format_A10ItalicRed);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
455
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
456 ############### Worksheet with a summary of the samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
457 $ws_sum = $wb->add_worksheet("Sample_List");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
458 $ws_sum->write(0, 0, "Samples", $format_A10); $ws_sum->write(0, 1, "Total number SBS", $format_A10); $ws_sum->write(0, 2, "Total number of Indel", $format_A10); $ws_sum->write(0, 3, "Total number of mutations", $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
459 $ws_sum->set_column(0,0, 50); $ws_sum->set_column(1,1, 20); $ws_sum->set_column(2,2, 20); $ws_sum->set_column(3,3, 22);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
460
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
461 ############### Write the input matrix for NMF for the count and the un-normalized frequency
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
462 $ws_inputNMF_count = $wb->add_worksheet("Input_NMF_Count");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
463 $ws_inputNMF_percent = $wb->add_worksheet("Input_NMF_Percent");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
464 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
465
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
466
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
467 ################################################ Set the Rows and columns of the different part of the report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
468 my $row_SumSheet = 1; # First row for the summary sheet of the report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
469 my $rowStart_SBSdistrBySeg = 48; my $colStart_SBSdistrBySeg = 0; # For the table SBS distribution by segment
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
470 my $colStart_matrixSeqContext = 19; # Sequence context
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
471 my $col_inputNMF = 0; # Write the names of the samples with at least 33 SBS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
472
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
473
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
474 ## For each file
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
475 foreach my $k_file (sort keys %{$refH_file})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
476 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
477 print "File in process: $k_file\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
478
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
479 # Count the total of mutations for 6 mutation types on genomic strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
480 my ($c_ca6_g, $c_cg6_g, $c_ct6_g, $c_ta6_g, $c_tc6_g, $c_tg6_g) = (0,0,0, 0,0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
481
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
482 if($k_file ne "Pool_Data") { $col_inputNMF++; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
483
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
484 ############### Save the chi2 values into a hash table
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
485 if(-e "$folderChi2/Output_chi2_strandBias.txt")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
486 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
487 chi2hash("$folderChi2/Output_chi2_strandBias.txt", $k_file);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
488 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
489
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
490 # Create one workbook for each sample
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
491 if($oneReportPerSample == 1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
492 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
493 $wb = Spreadsheet::WriteExcel->new("$folderMutAnalysis/Report_Mutation_Spectra-$k_file.xls");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
494
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
495 ############## Set the variables for font formats in the Excel report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
496 Format_A10($wb, \$format_A10); # Text center in Arial 10
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
497 Format_A10BoldLeft($wb, \$format_A10Boldleft); # Text on the left in Arial 10 bold
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
498 Format_TextSection($wb, \$formatT_left, \$formatT_right, \$formatT_bottomRight, \$formatT_bottomLeft, \$formatT_bottom, \$formatT_bottomHeader, \$formatT_bottomRightHeader, \$formatT_bottomHeader2, \$formatT_rightHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
499 Format_GraphTitle($wb, \$formatT_graphTitle);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
500 Format_Table($wb, \$table_topleft, \$table_topRight, \$table_bottomleft, \$table_bottomRight, \$table_top, \$table_right, \$table_bottom, \$table_bottomItalicRed, \$table_left, \$table_bottomrightHeader, \$table_left2, \$table_middleHeader, \$table_middleHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
501 Format_A10ItalicRed($wb, \$format_A10ItalicRed);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
502
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
503 ############### Worksheet with a summary of the samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
504 $ws_sum = $wb->add_worksheet("Sample_List");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
505 $ws_sum->write(0, 0, "Samples", $format_A10); $ws_sum->write(0, 1, "Total number SBS", $format_A10); $ws_sum->write(0, 2, "Total number of Indel", $format_A10); $ws_sum->write(0, 3, "Total number of mutations", $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
506 $ws_sum->set_column(0,0, 50); $ws_sum->set_column(1,1, 20); $ws_sum->set_column(2,2, 20); $ws_sum->set_column(3,3, 22);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
507 # Write in the Samples sheet the name and the total number of SBS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
508 $ws_sum->write(1, 0, "$k_file", $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
509 $ws_sum->write(1, 1, $refH_file->{$k_file}{'TotalSBSGenomic'}, $format_A10); $ws_sum->write(1, 2, $refH_file->{$k_file}{'TotalIndelGenomic'}, $format_A10); $ws_sum->write($row_SumSheet, 3, $refH_file->{$k_file}{'TotalMutGenomic'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
510 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
511 # One workbook with all the samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
512 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
513 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
514 # Write in the Samples sheet the name and the total number of SBS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
515 $ws_sum->write($row_SumSheet, 0, $k_file, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
516 $ws_sum->write($row_SumSheet, 1, $refH_file->{$k_file}{'TotalSBSGenomic'}, $format_A10); $ws_sum->write($row_SumSheet, 2, $refH_file->{$k_file}{'TotalIndelGenomic'}, $format_A10); $ws_sum->write($row_SumSheet, 3, $refH_file->{$k_file}{'TotalMutGenomic'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
517
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
518 # For NMF don't consider the pool of the samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
519 if($k_file ne "Pool_Data")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
520 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
521 # Write in the input NMF sheet the name of the samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
522 $ws_inputNMF_count->write(0, $col_inputNMF, $k_file);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
523 $ws_inputNMF_percent->write(0, $col_inputNMF, $k_file);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
524 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
525 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
526
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
527 # Calculate the correlation between the number of SBS and the size of the chromosome
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
528 PearsonCoefficient($refH_file, $k_file);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
529
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
530 # Add a worksheet to the workbook
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
531 my $ws = $wb->add_worksheet($k_file);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
532
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
533 # Write the titles of the different sections of the report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
534 WriteBorderSection($wb, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $colStart_matrixSeqContext);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
535
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
536 # Write the mutation types (6 types)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
537 WriteHeaderSection($wb, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $colStart_matrixSeqContext);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
538
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
539
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
540 # Save the figures of each samples in a different folder
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
541 if(!-e "$folderFigure/Overall_mutation_distribution/$k_file") { mkdir("$folderFigure/Overall_mutation_distribution/$k_file") or die "Can't create the directory $folderFigure/Overall_mutation_distribution/$k_file\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
542 if(!-e "$folderFigure/Impact_protein_sequence/$k_file") { mkdir("$folderFigure/Impact_protein_sequence/$k_file") or die "Can't create the directory $folderFigure/Impact_protein_sequence/$k_file\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
543 if(!-e "$folderFigure/SBS_distribution/$k_file") { mkdir("$folderFigure/SBS_distribution/$k_file") or die "Can't create the directory $folderFigure/SBS_distribution\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
544 if(!-e "$folderFigure/Stranded_Analysis/$k_file") { mkdir("$folderFigure/Stranded_Analysis/$k_file") or die "Can't create the directory $folderFigure/Stranded_Analysis/$k_file\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
545 if(!-e "$folderFigure/Trinucleotide_Sequence_Context/$k_file") { mkdir("$folderFigure/Trinucleotide_Sequence_Context/$k_file") or die "Can't create the directory $folderFigure/Trinucleotide_Sequence_Context/$k_file\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
546
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
547
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
548
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
549 ##################################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
550 ################################################################# Write the statistics ##########################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
551 ##################################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
552 my $row_SBSDistrBySegAndFunc_CG = $rowStart_SBSdistrBySeg+($nb_func*2)+16;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
553
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
554
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
555 ######## Count of SBS by functional impact on the protein (Table 2) + Create the input for ggplot2 (pie chart with functional impact) + Create the input for ggplot2 (pie chart of SBS vs. Indels)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
556 writeDistrFuncImpact($ws, $refH_file, $k_file, "$folderFigure/Impact_protein_sequence/$k_file/$k_file-DistributionExoFunc.txt", "$folderFigure/Overall_mutation_distribution/$k_file/$k_file-OverallMutationDistribution.txt");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
557
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
558
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
559 ######## Result of the chi2 for the strand bias (Table 3) + Create the input for ggplot2 (Strand bias bar graph)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
560 writeChi2result($wb, $ws, "$folderFigure/Stranded_Analysis/$k_file/$k_file-StrandBias.txt", $refH_file, $k_file);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
561
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
562
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
563 ######## SBS distribution by functional region (Table 4) + Strand bias by functional region (Table 5) + Create the input for ggplot2 (SBS distribution) + Overall count and percent of SBS (Table 1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
564 writeStatbyFuncRegion($refH_file, $k_file, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, \$row_SBSDistrBySegAndFunc_CG, "$folderFigure/SBS_distribution/$k_file/$k_file-SBS_distribution.txt");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
565
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
566
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
567 ######## Distribution of SBS per chromosomes and the result of Pearson test (Table 6)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
568 writeDistrByChr($ws, $refH_file, $k_file, $row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg, "$folderFigure/Distribution_SBS_Per_Chromosomes/$k_file-DistributionSNVS_per_chromosome.txt");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
569
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
570
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
571 ######## Trinucleotide sequence context on genomic strand (Panel 1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
572 # Represent the trinucleotide with a heatmap with count of SBS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
573 my $heatmapCountggplot2 = "$folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-HeatmapCount-Genomic.txt";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
574 my $heatmapPercentggplot2 = "$folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-HeatmapPercent-Genomic.txt";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
575 my $triNtBarChartggplot2 = "$folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-MutationSpectraPercent-Genomic.txt";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
576
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
577 writeTriNtGenomic($ws, $refH_file, $k_file, $colStart_matrixSeqContext, $heatmapCountggplot2, $heatmapPercentggplot2, $triNtBarChartggplot2, \$c_ca6_g, \$c_cg6_g, \$c_ct6_g, \$c_ta6_g, \$c_tc6_g, \$c_tg6_g);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
578
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
579 # For the input matrix for NMF
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
580 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Sample'}}, $k_file); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
581
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
582
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
583 ######## Trinucleotide sequence context on genomic strand (Panel 2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
584 my $triNtBarChartCodingCountggplot2 = "$folderFigure/Stranded_Analysis/$k_file/$k_file-StrandedSignatureCount.txt";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
585 my $triNtBarChartCodingPercentggplot2 = "$folderFigure/Stranded_Analysis/$k_file/$k_file-StrandedSignaturePercent.txt";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
586
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
587 writeTriNtCoding($ws, $rowStart_SBSdistrBySeg, $colStart_matrixSeqContext, $refH_file, $k_file, $triNtBarChartCodingCountggplot2, $triNtBarChartCodingPercentggplot2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
588
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
589
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
590 ######## Generate the figures and include them in the report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
591 createWriteFigs($ws, $rowStart_SBSdistrBySeg, $colStart_matrixSeqContext, $folderFigure, $k_file, $c_ca6_g, $c_cg6_g, $c_ct6_g, $c_ta6_g, $c_tc6_g, $c_tg6_g);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
592
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
593
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
594 # Next sample
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
595 $row_SumSheet++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
596 } # End $k_file
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
597
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
598 ######## Write the input matrix for NMF
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
599 # One workbook with all the samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
600 writeInputNMF($ws_inputNMF_count, $ws_inputNMF_percent, "$folderNMF/Input_NMF_Count.txt", "$folderNMF/Input_NMF_Frequency.txt");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
601
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
602
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
603 # Close the workbook
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
604 $wb->close();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
605 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
606
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
607 # Create logo for representing the sequence context with n bases
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
608 sub CreateLogo
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
609 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
610 my ($refH_file, $folderWebLogo) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
611
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
612 my $folderSample = "";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
613
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
614 foreach my $k_file (sort keys %{$refH_file})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
615 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
616 $folderSample = "$folderWebLogo/$k_file";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
617 if(!-e $folderSample) { mkdir($folderSample) or die "Can't create the directory $folderSample\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
618
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
619 my $test_lengthSeqContext = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
620
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
621
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
622 foreach my $k_mutation (sort keys %{$refH_file->{$k_file}{'WebLogo3'}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
623 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
624 $k_mutation =~ /(\w)>(\w)/;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
625 my ($ref, $alt) = ($1, $2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
626
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
627 open(WEBLOGO, ">", "$folderSample/$k_file-$ref$alt.fa") or die "$!: $folderSample/$k_file-$ref$alt.fa\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
628 foreach (@{$refH_file->{$k_file}{'WebLogo3'}{$k_mutation}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
629 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
630 print WEBLOGO ">$k_file\n$_\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
631
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
632 if(length($_) < 10) { $test_lengthSeqContext = 0; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
633 else { $test_lengthSeqContext = 1; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
634 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
635 close WEBLOGO;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
636 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
637
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
638 ## Generate the logo
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
639 foreach my $fastaFile (`ls $folderSample/*.fa`)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
640 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
641 chomp($fastaFile);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
642 my ($filename, $directories, $suffix) = fileparse("$folderSample/$fastaFile", qr/\.[^.]*/);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
643
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
644 $filename =~ /(.+)\-/;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
645 my $title = $1;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
646
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
647 ## Test if there is fasta sequences for the mutation type
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
648 my $nbLigne_temp = `wc -l $fastaFile`;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
649 my @nbLigne = split(" ", $nbLigne_temp);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
650
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
651
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
652 if($nbLigne[0] == 0) { print "WARNING: No sequence for $filename\n"; next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
653
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
654 # When length sequence context is lower than 10 the image is to small for adding a title
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
655 if($test_lengthSeqContext == 1) { system("weblogo -c classic -F png_print -U probability --title $title < $fastaFile > $folderSample/$filename-Probability.png"); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
656 else { system("weblogo -c classic -F png_print -U probability < $fastaFile > $folderSample/$filename-Probability.png"); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
657 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
658 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
659 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
660
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
661
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
662
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
663 ### Save the count of SBS for each file into a hash table
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
664 sub File2Hash
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
665 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
666 my ($inputFile, $func_value, $exonicFunc_value, $chr_value, $ref_value, $alt_value, $strand_value, $contextSeq_value, $refH_file, $refT_func) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
667
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
668 my ($filename, $directories, $suffix) = fileparse($inputFile, qr/\.[^.]*/);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
669
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
670 # Initialisation of the hash
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
671 my @tab_mutation = qw(C:G>A:T C:G>G:C C:G>T:A T:A>A:T T:A>C:G T:A>G:C);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
672 my @tab_aaChange = ("NonTr", "Tr", "TotalMutG");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
673 my @tabExoFunc = ("frameshift insertion", "frameshift deletion", "frameshift block substitution", "frameshift substitution", "stopgain", "stoploss", "nonframeshift insertion", "nonframeshift deletion", "nonframeshift substitution", "nonframeshift block substitution", "nonsynonymous SNV", "synonymous SNV", "unknown", "NA");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
674
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
675 # Total number of SBS on the genomic strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
676 $refH_file->{$filename}{'TotalSBSGenomic'} = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
677 # Total number of Indel on the genomic strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
678 $refH_file->{$filename}{'TotalIndelGenomic'} = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
679 # Total number of SBS on the coding strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
680 $refH_file->{$filename}{'TotalSBSCoding'} = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
681 # Total number of SBS and Indel on the genomic strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
682 $refH_file->{$filename}{'TotalMutGenomic'} = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
683
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
684
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
685 #####################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
686 # Initialisation of the tables and hash tables #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
687 #####################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
688
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
689 ## SBS by segment (6 mutation types)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
690 foreach my $elt_tabFunc (@$refT_func)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
691 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
692 foreach my $elt_tabMutation (@tab_mutation)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
693 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
694 foreach my $elt_aaChange (@tab_aaChange)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
695 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
696 $refH_file->{$filename}{'6mutType'}{$elt_tabFunc}{$elt_tabMutation}{$elt_aaChange} = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
697 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
698 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
699 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
700
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
701 ## Pearson correlation
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
702 $refH_file->{$filename}{'SBSPerChr'}{'AllMutType'} = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
703 # Count of SBS per chromosome foreach mutation types
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
704 foreach my $elt_tabMutation (@tab_mutation)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
705 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
706 foreach my $chromosome (sort keys %chromosomes){ $refH_file->{$filename}{'SBSPerChr'}{$elt_tabMutation}{'CHR'}{$chromosome}{'chr'} = 0;}
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
707 $refH_file->{$filename}{'SBSPerChr'}{$elt_tabMutation}{'Pearson'} = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
708 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
709 foreach my $chromosome (sort keys %chromosomes)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
710 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
711 $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}{$chromosome}{'chr'}=0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
712 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
713
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
714 ## Impact of SBS on protein
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
715 foreach my $elt_exoFunc (@tabExoFunc)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
716 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
717 $refH_file->{$filename}{'ImpactSBS'}{$elt_exoFunc} = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
718 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
719
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
720 ## Sequence context (genomic strand)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
721 my @tab_mutation2 = qw(C>A C>G C>T T>A T>C T>G);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
722 my @tab_context = qw(A_A A_C A_G A_T C_A C_C C_G C_T G_A G_C G_G G_T T_A T_C T_G T_T);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
723 foreach my $elt_context (@tab_context)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
724 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
725 foreach my $elt_mutation3 (@tab_mutation2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
726 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
727 $refH_file->{$filename}{'SeqContextG'}{$elt_context}{$elt_mutation3} = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
728 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
729 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
730
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
731 ## Sequence context (coding strand)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
732 my @tab_TrNonTr = qw(NonTr Tr);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
733 foreach my $elt_context (@tab_context)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
734 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
735 foreach my $elt_mutation2 (@tab_mutation2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
736 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
737 foreach my $trNonTr (@tab_TrNonTr)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
738 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
739 $refH_file->{$filename}{'SeqContextC'}{$elt_context}{$elt_mutation2}{$trNonTr} = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
740 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
741 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
742 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
743
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
744
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
745 #####################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
746 # Parse the intput file #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
747 #####################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
748
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
749 open(F1,$inputFile) or die "$!: $inputFile\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
750 my $header = <F1>;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
751 while(<F1>)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
752 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
753 $_ =~ s/[\r\n]+$//;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
754 my @tab = split("\t", $_);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
755
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
756 ### Don't consider random chromosomes and chromosome M
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
757 if( ($tab[$chr_value] =~ /random/i) || ($tab[$chr_value] =~ /M/i) ) { next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
758
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
759
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
760 ### Recover the trinucleotide sequence context: Extract the base just before and after the mutation
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
761 my $context = "";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
762 my $contextSequence = $tab[$contextSeq_value]; $contextSequence =~ tr/a-z/A-Z/;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
763 my @tempContextSequence = split("", $contextSequence);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
764 my $total_nbBaseContext = $#tempContextSequence;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
765 my $midlle_totalNbBaseContext = $total_nbBaseContext/2; # For having the middle of the sequence
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
766 my $before = $midlle_totalNbBaseContext - 1; my $after = $midlle_totalNbBaseContext + 1;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
767 $context = $tempContextSequence[$before]."_".$tempContextSequence[$after];
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
768
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
769
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
770
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
771 ### Recover the annotations on the impact on the protein for creating the pie chart
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
772 my $exoFunc = "";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
773 # Sometimes the annotation is repeated frameshift deletion;frameshift deletion
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
774 if($tab[$exonicFunc_value] =~ /\;/)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
775 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
776 my @temp = split(";", $tab[$exonicFunc_value]);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
777 if($temp[0] eq $temp[1]) { $exoFunc = $temp[0]; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
778 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
779 # The annotations have changed after MAJ Annovar 2014Jul22 (stopgain SNV => stopgain)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
780 elsif($tab[$exonicFunc_value] eq "stopgain SNV") { $exoFunc = "stopgain"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
781 elsif($tab[$exonicFunc_value] eq "stoploss SNV") { $exoFunc = "stoploss"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
782 elsif($tab[$exonicFunc_value] eq "nonsynonymous_SNV") { $exoFunc = "nonsynonymous SNV"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
783 elsif($tab[$exonicFunc_value] eq "stopgain_SNV") { $exoFunc = "stopgain SNV"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
784 elsif($tab[$exonicFunc_value] eq "synonymous_SNV") { $exoFunc = "synonymous SNV"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
785 else { $exoFunc = $tab[$exonicFunc_value]; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
786
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
787 if(exists $refH_file->{$filename}{'ImpactSBS'}{$exoFunc})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
788 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
789 # If the sequence context if not recovered correctly don't considered the variants
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
790 if( ($context =~ /N/) || (length($context) != 3) ) { next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
791
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
792 $refH_file->{$filename}{'ImpactSBS'}{$exoFunc}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
793 $refH_file->{$filename}{'TotalMutGenomic'}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
794 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
795 else { print "WARNING: Exonic function not considered: $exoFunc\n"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
796
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
797 #### Only SBS are considered for the statistics
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
798 if( ($tab[$ref_value] =~ /^[ACGT]$/i) && ($tab[$alt_value] =~ /^[ACGT]$/i) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
799 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
800 # If the sequence context if not recovered correctly don't considered the variants
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
801 if( ($context =~ /N/) || (length($context) != 3) ) { next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
802
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
803 # Total number of SBS on the genomic strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
804 $refH_file->{$filename}{'TotalSBSGenomic'}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
805
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
806 # Total number of SBS on the coding strand with a sequence context
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
807 if( ($tab[$strand_value] eq "+") || ($tab[$strand_value] eq "-") )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
808 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
809 if( ($context ne "NA") && (($context =~ /N/) || (length($context) != 3)) ) { next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
810 $refH_file->{$filename}{'TotalSBSCoding'}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
811 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
812 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
813 else { $refH_file->{$filename}{'TotalIndelGenomic'}++; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
814
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
815 ### Number of SBS per chromosome: remove the "chr"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
816 my $chrNameForH=$tab[$chr_value];
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
817 if(exists $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}{$chrNameForH}{'chr'})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
818 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
819 $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}{$chrNameForH}{'chr'}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
820 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
821
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
822
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
823 #### Some func value are repeated and separated by ";"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
824 my $funcSegment = "";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
825 if($tab[$func_value] =~ /;/) { my @temp = split(";", $tab[$func_value]); $funcSegment = $temp[0]; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
826 else { $funcSegment = $tab[$func_value]; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
827
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
828
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
829
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
830 #####################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
831 # Calculate the statistics for each mutation type #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
832 #####################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
833 if( (($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "A")) || ( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "T") ) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
834 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
835 my $mutation = "C:G>A:T";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
836
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
837 statPerMutType($filename, \@tab, $ref_value, $alt_value, \@tempContextSequence, $before, $after, $context, $funcSegment, $mutation, $refH_file, $chrNameForH, $strand_value, $midlle_totalNbBaseContext);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
838 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
839 if( (($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "G")) || ( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "C") ) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
840 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
841 my $mutation = "C:G>G:C";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
842
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
843 statPerMutType($filename, \@tab, $ref_value, $alt_value, \@tempContextSequence, $before, $after, $context, $funcSegment, $mutation, $refH_file, $chrNameForH, $strand_value, $midlle_totalNbBaseContext);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
844 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
845 if( (($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "T")) || ( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "A") ) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
846 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
847 my $mutation = "C:G>T:A";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
848 statPerMutType($filename, \@tab, $ref_value, $alt_value, \@tempContextSequence, $before, $after, $context, $funcSegment, $mutation, $refH_file, $chrNameForH, $strand_value, $midlle_totalNbBaseContext);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
849 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
850 if( (($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "A")) || ( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "T") ) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
851 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
852 my $mutation = "T:A>A:T";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
853 statPerMutType($filename, \@tab, $ref_value, $alt_value, \@tempContextSequence, $before, $after, $context, $funcSegment, $mutation, $refH_file, $chrNameForH, $strand_value, $midlle_totalNbBaseContext);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
854 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
855 if( (($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "C")) || ( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "G")) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
856 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
857 my $mutation = "T:A>C:G";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
858 statPerMutType($filename, \@tab, $ref_value, $alt_value, \@tempContextSequence, $before, $after, $context, $funcSegment, $mutation, $refH_file, $chrNameForH, $strand_value, $midlle_totalNbBaseContext);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
859 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
860 if( (($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "G")) || ( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "C")) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
861 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
862 my $mutation = "T:A>G:C";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
863 statPerMutType($filename, \@tab, $ref_value, $alt_value, \@tempContextSequence, $before, $after, $context, $funcSegment, $mutation, $refH_file, $chrNameForH, $strand_value, $midlle_totalNbBaseContext);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
864 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
865 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
866 close F1;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
867 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
868
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
869 ### Count the number of SBS for 12 and 6 categories
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
870 sub statPerMutType
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
871 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
872 my ($filename, $refTab, $ref_value, $alt_value, $refTab_tempSeqContext, $before, $after, $context, $funcSegment, $mutation, $refH_file, $chrNameForH, $strand_value, $midlle_totalNbBaseContext) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
873
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
874 my @tab = @$refTab;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
875 my @tempContextSequence = @$refTab_tempSeqContext;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
876
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
877
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
878 # Split the mutations
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
879 $mutation =~ /(\w)\:(\w)\>(\w)\:(\w)/;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
880 my ($ref1, $ref2, $alt1, $alt2) = ($1, $2, $3, $4);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
881
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
882 # Count the total number of mutations
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
883 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
884
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
885 # Pearson correlation
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
886 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
887 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
888 $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
889 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
890
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
891 #### Sequence context - 6 mutation types - genomic strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
892 my $mutationSeqContext6mutType = "$ref1>$alt1";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
893 # We want to express the mutation in C> or T>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
894 if( ($tab[$ref_value] eq $ref2) && ($tab[$alt_value] eq $alt2) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
895 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
896 my $base3 = complement($tempContextSequence[$before]);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
897 my $base5 = complement($tempContextSequence[$after]);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
898 my $context_reverse = $base5."_".$base3;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
899 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
900 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
901 $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
902 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
903 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
904 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
905 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
906 $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
907 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
908
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
909
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
910 #### Strand analysis C>N and T>N on NonTr strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
911 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq $ref1)&&($tab[$alt_value] eq $alt1))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq $ref2)&&($tab[$alt_value] eq $alt2))) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
912 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
913 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
914 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
915 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
916 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
917
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
918 # C>A With the sequence context (C>N and T>N on strand +)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
919 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq $ref1)&&($tab[$alt_value] eq $alt1)) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
920 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
921 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{$mutationSeqContext6mutType}{'NonTr'})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
922 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
923 $refH_file->{$filename}{'SeqContextC'}{$context}{$mutationSeqContext6mutType}{'NonTr'}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
924 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
925 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
926 # C>A With the sequence context (G>N and A>N on strand -)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
927 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
928 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
929 my $base3 = complement($tempContextSequence[$before]);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
930 my $base5 = complement($tempContextSequence[$after]);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
931 my $context_reverse = $base5."_".$base3;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
932
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
933 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{$mutationSeqContext6mutType}{'NonTr'})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
934 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
935 $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{$mutationSeqContext6mutType}{'NonTr'}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
936 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
937 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
938 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
939
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
940 #### Strand analysis C>N and T>N on Tr strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
941 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq $ref1)&&($tab[$alt_value] eq $alt1))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq $ref2)&&($tab[$alt_value] eq $alt2))) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
942 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
943 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
944 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
945 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
946 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
947
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
948 # C>N and T>N With the sequence context (strand -)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
949 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq $ref1)&&($tab[$alt_value] eq $alt1)) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
950 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
951 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{$mutationSeqContext6mutType}{'Tr'})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
952 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
953 $refH_file->{$filename}{'SeqContextC'}{$context}{$mutationSeqContext6mutType}{'Tr'}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
954 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
955 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
956 # C>N and T>N with the sequence context (strand +)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
957 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq $ref2)&&($tab[$alt_value] eq $alt2)) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
958 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
959 my $base3 = complement($tempContextSequence[$before]);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
960 my $base5 = complement($tempContextSequence[$after]);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
961 my $context_reverse = $base5."_".$base3;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
962 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{$mutationSeqContext6mutType}{'Tr'})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
963 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
964 $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{$mutationSeqContext6mutType}{'Tr'}++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
965 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
966 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
967 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
968
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
969 #### WebLogo-3
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
970 if(($tab[$ref_value] eq $ref1) && ($tab[$alt_value] eq $alt1))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
971 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
972 # For the logo all the sequences must have the same length
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
973 if(scalar(@tempContextSequence) == 2) { next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
974 my ($contextTemp1, $contextTemp2) = ("", "");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
975 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
976 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
977 my $context = $contextTemp1.$ref1.$contextTemp2;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
978 push(@{$refH_file->{$filename}{'WebLogo3'}{$mutationSeqContext6mutType}}, $context);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
979 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
980 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
981 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
982
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
983 if(scalar(@tempContextSequence) == 2) { next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
984 my ($contextTemp1, $contextTemp2) = ("", "");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
985 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
986 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
987 my $context = $contextTemp1.$ref1.$contextTemp2; $context = reverse $context;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
988 push(@{$refH_file->{$filename}{'WebLogo3'}{$mutationSeqContext6mutType}}, $context);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
989 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
990 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
991
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
992 # Calculate the correlation between the number of SBS and the size of the chromosome
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
993 sub PearsonCoefficient
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
994 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
995 our ($refH_file, $filename) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
996
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
997 #### Calculate the Pearson coefficient
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
998 my @total_SBS = (); # Pearson for all mutation types
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
999
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1000 # Create a 2D array
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1001 foreach my $k_mutation (sort keys %{$refH_file->{$filename}{'SBSPerChr'}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1002 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1003 my $x = [];
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1004 my $correlation = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1005
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1006 if($k_mutation eq "AllMutType") { next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1007 elsif($k_mutation eq "TotalPerChr") { next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1008 elsif($k_mutation eq "ChrSize") { next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1009 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1010 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1011 my $testZero = 0; # The correlation function doesn't works if all the variables are equal to zero
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1012 # generate an anonymous 2D array where $x->[1] is the row
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1013 # $x->[1][1] is the value in row 1 column 1 and $x->[1][2] is the value of row 1 column 2
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1014 # once you build the entire array, pass it to the correlation subroutine
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1015 my $i=1;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1016 while ( my ($chromosome, $lenght) = each (%chromosomes))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1017 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1018 $x->[$i][1] = $lenght; # First column contains the chromosome size
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1019 $x->[$i][2] = $refH_file->{$filename}{'SBSPerChr'}{$k_mutation}{'CHR'}{$chromosome}{'chr'}; # Second column contains the count of SBS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1020 if($refH_file->{$filename}{'SBSPerChr'}{$k_mutation}{'CHR'}{$chromosome}{'chr'}==0) { $testZero++; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1021 $i++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1022 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1023 if( $testZero == keys %{$refH_file->{$filename}{'SBSPerChr'}{$k_mutation}{'CHR'}} )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1024 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1025 $correlation = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1026 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1027 # Pass the 2D array to the correlation subroutine
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1028 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1029 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1030 $correlation = correlation($x);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1031 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1032
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1033 $refH_file->{$filename}{'SBSPerChr'}{$k_mutation}{'Pearson'} = $correlation; # Pearson per mutation type
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1034 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1035 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1036
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1037 #generate an anonymous 2D array for all mutation type
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1038 my $testZero = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1039 my $x = [];
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1040 my $correlation = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1041 my $i=1;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1042 while ( my ($chromosome, $lenght) = each (%chromosomes))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1043 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1044 $x->[$i][1] = $lenght;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1045 $x->[$i][2] = $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}{$chromosome}{'chr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1046 $i++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1047 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1048 if($testZero == keys %{$refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}} ) { $correlation = 0; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1049 else { $correlation = correlation($x); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1050 # Pass the 2D array to the correlation subroutine
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1051 $refH_file->{$filename}{'SBSPerChr'}{'AllMutType'} = $correlation;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1052
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1053 sub correlation
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1054 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1055 my ($x) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1056 my ($mean_x,$mean_y) = mean($x);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1057 my $ssxx=ss($x,$mean_x,$mean_y,1,1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1058 my $ssyy=ss($x,$mean_x,$mean_y,2,2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1059 my $ssxy=ss($x,$mean_x,$mean_y,1,2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1060 my $correl=correl($ssxx,$ssyy,$ssxy);;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1061 my $xcorrel=sprintf("%.2f",$correl);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1062 return($xcorrel);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1063
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1064 sub mean
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1065 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1066 my ($x)=@_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1067 my $num = scalar(@{$x}) - 2;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1068 my $sum_x = '0';
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1069 my $sum_y = '0';
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1070 for (my $i = 2; $i < scalar(@{$x}); ++$i)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1071 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1072 $sum_x += $x->[$i][1];
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1073 $sum_y += $x->[$i][2];
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1074 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1075 my $mu_x = $sum_x / $num;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1076 my $mu_y = $sum_y / $num;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1077 return($mu_x,$mu_y);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1078 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1079
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1080 ### ss = sum of squared (deviations to the mean)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1081 sub ss
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1082 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1083 my ($x,$mean_x,$mean_y,$one,$two)=@_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1084 my $sum = '0';
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1085 for (my $i=2;$i<scalar(@{$x});++$i)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1086 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1087 $sum += ($x->[$i][$one]-$mean_x)*($x->[$i][$two]-$mean_y);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1088 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1089 return $sum;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1090 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1091
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1092 sub correl
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1093 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1094 my($ssxx,$ssyy,$ssxy)=@_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1095
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1096 my ($sign, $correl) = (0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1097 if(abs($ssxy) == 0) { $sign = 0 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1098 else { $sign=$ssxy/abs($ssxy); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1099
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1100 if( ($ssxx==0) || ($ssyy==0) ) { $correl = 0 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1101 else { $correl=$sign*sqrt($ssxy*$ssxy/($ssxx*$ssyy)); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1102
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1103 return $correl;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1104 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1105 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1106 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1107
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1108 # Save the output of the chi2 into a hash table for writing the results in the Excel file
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1109 sub chi2hash
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1110 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1111 my ($outputChi2, $k_file) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1112
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1113 open(F1, $outputChi2) or die "$!: $outputChi2\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1114 my $header = <F1>;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1115 # Strand_Bias($tab[0]) NonTr-Tr($tab[1]) Proportion($tab[2]) P-val-Chi2($tab[3]) FDR($tab[4]) Confidence Interval($tab[5]) Mutation_Type($tab[6]) SampleName($tab[7])
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1116 while(<F1>)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1117 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1118 $_ =~ s/[\r\n]+$//;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1119 my @tab = split("\t", $_);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1120
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1121 if($tab[7] eq $k_file)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1122 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1123 if($tab[1] eq "NA")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1124 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1125 $h_chi2{$tab[7]}{$tab[6]}{'NonTr'} = "NA";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1126 $h_chi2{$tab[7]}{$tab[6]}{'Tr'} = "NA";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1127 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1128 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1129 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1130 my ($nonTr, $tr) = split("-", $tab[1]);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1131
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1132 $h_chi2{$tab[7]}{$tab[6]}{'NonTr'} = $nonTr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1133 $h_chi2{$tab[7]}{$tab[6]}{'Tr'} = $tr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1134 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1135
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1136
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1137 $h_chi2{$tab[7]}{$tab[6]}{'p-value'} = $tab[3];
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1138 $h_chi2{$tab[7]}{$tab[6]}{'ConfInt'} = $tab[5];
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1139
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1140 # For the pool data the FDR isn't calculated so replace the NA (=Missing values) by "-"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1141 if($tab[7] eq "Pool_Data")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1142 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1143 $h_chi2{$tab[7]}{$tab[6]}{'FDR'} = "-";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1144 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1145 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1146 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1147 $h_chi2{$tab[7]}{$tab[6]}{'FDR'} = $tab[4];
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1148 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1149 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1150 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1151 close F1;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1152 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1153
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1154 ### Complement bases (for the sequence context)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1155 sub complement
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1156 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1157 if($_[0] eq "A") { return "T"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1158 if($_[0] eq "C") { return "G"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1159 if($_[0] eq "G") { return "C"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1160 if($_[0] eq "T") { return "A"; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1161 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1162
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1163 ### Recover the functional region for all the samples. Allows to thave the same annotations for the pie chart "Impact on protein sequence"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1164 sub recoverAnnovarAnnotation
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1165 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1166 my ($func_name) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1167
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1168 my %hash = ();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1169
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1170 # The input is a folder
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1171 foreach my $file (`ls $folderCheckedForStat/*`)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1172 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1173 chomp($file);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1174 my $AV_annotation_value = recoverNumCol($file, $func_name);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1175
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1176 open(F1, $file) or die "$!: $file\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1177 my $header = <F1>;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1178 while(<F1>)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1179 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1180 $_ =~ s/[\r\n]+$//;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1181 my @tab = split("\t", $_);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1182
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1183 # Some files can have an empty line at the end and WE DON'T WANT to consider it
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1184 if(! defined $tab[0]) { next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1185 # Some func value are repeated and separated by ";"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1186 my $funcSegment = "";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1187 if($tab[$AV_annotation_value] =~ /;/)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1188 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1189 my @temp = split(";", $tab[$AV_annotation_value]);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1190 $funcSegment = $temp[0];
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1191 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1192 else { $funcSegment = $tab[$AV_annotation_value]; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1193
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1194 $hash{$funcSegment} = "";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1195 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1196 close F1;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1197 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1198 my @tab_AVAnnotation = ();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1199 foreach my $k (sort keys %hash) { push(@tab_AVAnnotation, $k); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1200 return @tab_AVAnnotation;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1201 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1202
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1203 sub recoverNumCol
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1204 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1205 my ($input, $name_of_column) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1206
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1207 open(F1,$input) or die "recoverNumCol: $!: $input\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1208 # For having the name of the columns
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1209 my $search_header = <F1>; $search_header =~ s/[\r\n]+$//; my @tab_search_header = split("\t",$search_header);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1210 close F1;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1211 # The number of the column
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1212 my $name_of_column_NB = "toto";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1213 for(my $i=0; $i<=$#tab_search_header; $i++)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1214 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1215 if($tab_search_header[$i] eq $name_of_column) { $name_of_column_NB = $i; last; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1216 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1217 if($name_of_column_NB eq "toto") { print STDERR "Error recoverNumCol(): the column named $name_of_column doesn't exits in the input file $input!!!!!\n"; exit 3; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1218 else { return $name_of_column_NB; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1219 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1220
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1221
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1222
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1223 ######################################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1224 # Functions for writing in the Excel report #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1225 ######################################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1226 # Write the header for the six mutation types
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1227 sub WriteHeaderSection
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1228 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1229 our ($wb, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $colStart_matrixSeqContext) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1230
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1231 our ($format_CA, $format_CG, $format_CT, $format_TA, $format_TC, $format_TG, $format_TG2, $format_LeftHeader, $format_RightHeader, $format_LeftHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1232 Format_Header($wb, \$format_CA, \$format_CG, \$format_CT, \$format_TA, \$format_TC, \$format_TG, \$format_TG2, \$format_LeftHeader, \$format_RightHeader, \$format_LeftHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1233
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1234 our ($format_LeftCA, $format_LeftCG, $format_LeftCT, $format_LeftTA, $format_LeftTC, $format_LeftTG, $format_RightCA, $format_RightCG, $format_RightCT, $format_RightTA, $format_RightTC, $format_RightTG);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1235 Format_HeaderSBSDistrBySegAndFunc($wb, \$format_LeftCA, \$format_LeftCG, \$format_LeftCT, \$format_LeftTA, \$format_LeftTC, \$format_LeftTG, \$format_RightCA, \$format_RightCG, \$format_RightCT, \$format_RightTA, \$format_RightTC, \$format_RightTG);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1236
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1237 our $format_A11Bold = ""; Format_A11Bold($wb, \$format_A11Bold); # Arial 11 bold and center
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1238 our $format_A11BoldLeft = ""; Format_A11BoldLeft($wb, \$format_A11BoldLeft); # Arial 11 bold and left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1239
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1240 our ($format_header12CA, $format_header12CG, $format_header12CT, $format_header12TA, $format_header12TC, $format_header12TG);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1241 Format_Header12MutType($wb, \$format_header12CA, \$format_header12CG, \$format_header12CT, \$format_header12TA, \$format_header12TC, \$format_header12TG);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1242
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1243 ## Header for SBS distribution by segment
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1244 HeaderMutTypeSBSDistrBySeg();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1245
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1246 ## Header for strand bias by function
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1247 $ws->set_column($colStart_SBSdistrBySeg+5, $colStart_SBSdistrBySeg+5, 11);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1248
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1249 my $row = $rowStart_SBSdistrBySeg+$nb_func+10; my $col = $colStart_SBSdistrBySeg;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1250 $ws->write($row, $col+1, ' ', $format_CA); $ws->write($row, $col+2, "C>A", $format_CA); $ws->write($row, $col+3, ' ', $format_CA);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1251 $ws->write($row, $col+5, ' ', $format_CG); $ws->write($row, $col+6, "C>G", $format_CG); $ws->write($row, $col+7, ' ', $format_CG);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1252 $ws->write($row, $col+9, ' ', $format_CT); $ws->write($row, $col+10, "C>T", $format_CT); $ws->write($row, $col+11, ' ', $format_RightCT);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1253
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1254 $row = $rowStart_SBSdistrBySeg+($nb_func*2)+14;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1255 $ws->write($row, $col+1, ' ', $format_TA); $ws->write($row, $col+2, "T>A", $format_TA); $ws->write($row, $col+3, ' ', $format_TA);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1256 $ws->write($row, $col+5, ' ', $format_TC); $ws->write($row, $col+6, "T>C", $format_TC); $ws->write($row, $col+7, ' ', $format_TC);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1257 $ws->write($row, $col+9, ' ', $format_TG2); $ws->write($row, $col+10, "T>G", $format_TG2); $ws->write($row, $col+11, ' ', $format_RightTG);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1258
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1259 $ws->set_row($rowStart_SBSdistrBySeg+$nb_func+11, 18); $ws->set_row($rowStart_SBSdistrBySeg+($nb_func*2)+15, 18);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1260 $ws->set_column($colStart_SBSdistrBySeg+5, $colStart_SBSdistrBySeg+5, 13); $ws->set_column($colStart_SBSdistrBySeg+9, $colStart_SBSdistrBySeg+9, 13);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1261
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1262 for(my $i=$rowStart_SBSdistrBySeg+$nb_func+10; $i<=$rowStart_SBSdistrBySeg+($nb_func*2)+14; $i+=$nb_func+4)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1263 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1264 $ws->write($i+1, $colStart_SBSdistrBySeg, 'Segment', $format_LeftHeader); $ws -> write($i+1, $colStart_SBSdistrBySeg+1, 'Non-Tr/Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+2, 'Non-Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+3, 'Tr', $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1265 $ws -> write($i+1, $colStart_SBSdistrBySeg+5, 'Non-Tr/Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+6, 'Non-Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+7, 'Tr', $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1266 $ws -> write($i+1, $colStart_SBSdistrBySeg+9, 'Non-Tr/Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+10, 'Non-Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+11, 'Tr', $format_RightHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1267 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1268
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1269
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1270 ## Header for Counts of SBS per chromosome and mutation type
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1271 HeaderCountSBSPerChr();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1272
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1273 ## Header for the short sequence context
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1274 HeaderShortTriNtContext();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1275
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1276 ## Header for the 12 mutation types with the sequence context (coding strand)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1277 HeaderLongTriNtContext();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1278
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1279 sub HeaderMutTypeSBSDistrBySeg
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1280 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1281 $ws->set_row($rowStart_SBSdistrBySeg+2, 18);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1282 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+2, "C:G>A:T", $format_CA); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+3, $format_CA);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1283 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+4, "C:G>G:C", $format_CG); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+5, $format_CG);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1284 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+6, "C:G>T:A", $format_CT); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+7, $format_CT);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1285 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+8, "T:A>A:T", $format_TA); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+9, $format_TA);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1286 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+10, "T:A>C:G", $format_TC); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+11, $format_TC);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1287 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+12, "T:A>G:C", $format_TG); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+13, $format_TG);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1288
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1289 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg, "Segment", $format_LeftHeader); $ws->set_column($colStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, 13); $ws->set_row($rowStart_SBSdistrBySeg+3, 18);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1290 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+1, "Total SBS", $format_A11Bold); $ws->set_column($colStart_SBSdistrBySeg+1, $colStart_SBSdistrBySeg+1, 11);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1291 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+2, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+3, "#", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1292 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+4, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+5, "#", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1293 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+6, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+7, "#", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1294 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+8, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+9, "#", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1295 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+10, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+11, "#", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1296 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+12, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, 13, "#", $format_RightHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1297 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1298
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1299 sub HeaderCountSBSPerChr
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1300 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1301 $ws->set_column(3,3, 10); $ws->set_column(4,4, 10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1302 $ws->set_row($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, 18);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1303 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+7, $colStart_SBSdistrBySeg+1, "Pearson", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1304 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg, "Chr", $format_LeftHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1305 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+1, "Size", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1306 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+2, "All SBS", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1307
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1308 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+3, "C:G>A:T", $format_CA);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1309 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+4, "C:G>G:C", $format_CG);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1310 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+5, "C:G>T:A", $format_CT);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1311 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+6, "T:A>A:T", $format_TA);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1312 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+7, "T:A>C:G", $format_TC);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1313 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+8, "T:A>G:C", $format_TG);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1314 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1315
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1316 sub HeaderShortTriNtContext
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1317 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1318 ### GENOMIC STRAND
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1319 $ws->write(2, $colStart_matrixSeqContext, 'Count matrix', $format_LeftHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1320 $ws->write(3, $colStart_matrixSeqContext+4, 'C>A', $format_CA); $ws->write(3, $colStart_matrixSeqContext+5, 'C>G', $format_CG); $ws->write(3, $colStart_matrixSeqContext+6, 'C>T', $format_CT); $ws->write(3, $colStart_matrixSeqContext+7, 'T>A', $format_TA); $ws->write(3, $colStart_matrixSeqContext+8, 'T>C', $format_TC); $ws->write(3, $colStart_matrixSeqContext+9, 'T>G', $format_TG2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1321
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1322 $ws->write(2, $colStart_matrixSeqContext+11, 'Frequency matrix', $format_A11BoldLeft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1323 $ws->write(3, $colStart_matrixSeqContext+14, 'C>A', $format_CA); $ws->write(3, $colStart_matrixSeqContext+15, 'C>G', $format_CG); $ws->write(3, $colStart_matrixSeqContext+16, 'C>T', $format_CT); $ws->write(3, $colStart_matrixSeqContext+17, 'T>A', $format_TA); $ws->write(3, $colStart_matrixSeqContext+18, 'T>C', $format_TC); $ws->write(3, $colStart_matrixSeqContext+19, 'T>G', $format_TG2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1324
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1325 ### sequence context with a bar graph
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1326 $ws->write(25, $colStart_matrixSeqContext+10, "Mutation spectra frequency", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1327 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1328
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1329 sub HeaderLongTriNtContext
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1330 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1331 $ws->set_row($rowStart_SBSdistrBySeg+3, 15); $ws->set_row($rowStart_SBSdistrBySeg+4, 15); $ws->set_row($rowStart_SBSdistrBySeg+5, 15);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1332 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_matrixSeqContext, "Count matrix", $format_LeftHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1333 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+1, "C>A", $format_CA); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+2, $format_CA); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+1, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+2, "Tr", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1334 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+3, "C>G", $format_CG); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+4, $format_CG); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+3, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+4, "Tr", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1335 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+5, "C>T", $format_CT); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+6, $format_CT); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+5, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+6, "Tr", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1336 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+7, "T>A", $format_TA); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+8, $format_TA); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+7, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+8, "Tr", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1337 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+9, "T>C", $format_TC); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+10, $format_TC); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+9, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+10, "Tr", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1338 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+11, "T>G", $format_TG2); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+12, $format_TG2); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+11, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+12, "Tr", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1339
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1340
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1341 $ws->set_row($rowStart_SBSdistrBySeg+24, 15); $ws->set_row($rowStart_SBSdistrBySeg+25, 15); $ws->set_row($rowStart_SBSdistrBySeg+26, 15);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1342 $ws->write($rowStart_SBSdistrBySeg+24, $colStart_matrixSeqContext, "Frequency matrix", $format_LeftHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1343 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+1, "C>A", $format_CA); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+2, $format_CA); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+1, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+2, "Tr", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1344 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+3, "C>G", $format_CG); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+4, $format_CG); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+3, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+4, "Tr", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1345 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+5, "C>T", $format_CT); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+6, $format_CT); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+5, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+6, "Tr", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1346 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+7, "T>A", $format_TA); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+8, $format_TA); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+7, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+8, "Tr", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1347 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+9, "T>C", $format_TC); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+10, $format_TC); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+9, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+10, "Tr", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1348 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+11, "T>G", $format_TG2); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+12, $format_TG2); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+11, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+12, "Tr", $format_A11Bold);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1349 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1350 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1351 # Write the titles of the different sections of the report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1352 sub WriteBorderSection
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1353 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1354 our ($wb, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $colStart_matrixSeqContext) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1355
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1356 our ($format_topLeft, $format_topRight, $format_bottomLeft, $format_bottomRight, $format_top, $format_right, $format_bottom, $format_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1357 Format_section($wb, \$format_topLeft, \$format_topRight, \$format_bottomLeft, \$format_bottomRight, \$format_top, \$format_right, \$format_bottom, \$format_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1358
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1359 TableSBSDistrBySeg();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1360 TableStrandBiasBySegment();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1361 CountSBSPerChr();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1362 ShortTriNtContext(); # 6 mut type
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1363 LongTriNtContext(); # 12 mut type
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1364
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1365 sub TableSBSDistrBySeg
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1366 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1367 # Top-Left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1368 $ws->write($rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, "Table 4. SBS distribution by functional region", $format_topLeft); $ws->set_row($rowStart_SBSdistrBySeg, 18); # Set the height of the row to 0.25"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1369 # Top
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1370 for(my $i=1; $i<=13; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg+$i, $format_top); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1371 # Top-Right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1372 $ws->write_blank($rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg+13, $format_topRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1373 # Right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1374 $ws->write_blank($rowStart_SBSdistrBySeg+1, $colStart_SBSdistrBySeg+13, $format_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1375 # Bottom-left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1376 $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+5, $colStart_SBSdistrBySeg, $format_bottomLeft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1377 # Left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1378 $ws->write_blank($rowStart_SBSdistrBySeg+1, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg, $format_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1379 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1380
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1381 sub TableStrandBiasBySegment
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1382 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1383 # Top-Left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1384 $ws->write($rowStart_SBSdistrBySeg+$nb_func+8, $colStart_SBSdistrBySeg, "Table 5. Strand bias by functional region", $format_topLeft); $ws->set_row($rowStart_SBSdistrBySeg+$nb_func+8, 18); # Set the height of the row to 0.25"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1385 # Top
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1386 for(my $i=1; $i<=10; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+8, $colStart_SBSdistrBySeg+$i, $format_top); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1387 # Top-Right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1388 $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+8, $colStart_SBSdistrBySeg+11, $format_topRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1389 # Right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1390 $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+9, $colStart_SBSdistrBySeg+11, $format_right); $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*2)+13, $colStart_SBSdistrBySeg+11, $format_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1391 # Left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1392 $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+9, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+10, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*2)+13, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*2)+14, $colStart_SBSdistrBySeg, $format_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1393 # Bottom
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1394 $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*3)+16, $colStart_SBSdistrBySeg+4, $format_bottom); $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*3)+16, $colStart_SBSdistrBySeg+8, $format_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1395 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1396
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1397 sub CountSBSPerChr
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1398 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1399 #### Top-Left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1400 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+4, $colStart_SBSdistrBySeg, "Table 6. SBS distribution per chromosome", $format_topLeft); $ws->set_row($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+4, 18); # Set the height of the row to 0.25"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1401 #### Top
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1402 for(my $i=1; $i<8; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+4, $colStart_SBSdistrBySeg+$i, $format_top); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1403 #### Top-Right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1404 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+4, $colStart_SBSdistrBySeg+8, $format_topRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1405 #### Right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1406 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+5, $colStart_SBSdistrBySeg+8, $format_right); $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+6, $colStart_SBSdistrBySeg+8, $format_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1407
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1408 #### Bottom-Right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1409 # Human genome = 24 chromosomes
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1410 if($refGenome =~ /hg/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+33, $colStart_SBSdistrBySeg+8, $format_bottomRight); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1411 # Mouse genome = 21 chromosomes
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1412 if($refGenome =~ /mm/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+30, $colStart_SBSdistrBySeg+8, $format_bottomRight); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1413 # Rat genome = 22 chromosomes
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1414 if($refGenome =~ /rn/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+31, $colStart_SBSdistrBySeg+8, $format_bottomRight); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1415
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1416 #### Bottom
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1417 if($refGenome =~ /hg/)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1418 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1419 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+33, $colStart_SBSdistrBySeg+1, $format_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1420 for(my $i=3; $i<=7; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+33, $colStart_SBSdistrBySeg+$i, $format_bottom); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1421 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1422 if($refGenome =~ /mm/)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1423 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1424 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+30, $colStart_SBSdistrBySeg+1, $format_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1425 for(my $i=3; $i<=7; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+30, $colStart_SBSdistrBySeg+$i, $format_bottom); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1426 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1427 if($refGenome =~ /rn/)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1428 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1429 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+31, $colStart_SBSdistrBySeg+1, $format_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1430 for(my $i=3; $i<=7; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+31, $colStart_SBSdistrBySeg+$i, $format_bottom); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1431 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1432
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1433 #### Left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1434 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+5, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+6, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+7, $colStart_SBSdistrBySeg, $format_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1435
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1436 #### Bottom-left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1437 if($refGenome =~ /hg/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+33, $colStart_SBSdistrBySeg, $format_bottomLeft); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1438 if($refGenome =~ /mm/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+30, $colStart_SBSdistrBySeg, $format_bottomLeft); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1439 if($refGenome =~ /rn/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+31, $colStart_SBSdistrBySeg, $format_bottomLeft); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1440 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1441
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1442 sub ShortTriNtContext
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1443 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1444 my $format_headerSection = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1445 $format_headerSection->set_left(2); $format_headerSection->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1446
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1447 # Top-left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1448 $ws->write(0, $colStart_matrixSeqContext, "Panel 1. Trinucleotide sequence context of SBS on the genomic sequence", $format_topLeft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1449 # Top
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1450 for(my $i=1; $i<=19; $i++) { $ws->write_blank(0, $colStart_matrixSeqContext+$i, $format_top); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1451 # Top-right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1452 $ws->write_blank(0, $colStart_matrixSeqContext+20, $format_topRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1453 # Right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1454 for(my $i=1; $i<=37; $i++) { $ws->write_blank($i, $colStart_matrixSeqContext+20, $format_right); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1455 # Bottom-right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1456 $ws->write_blank(37, $colStart_matrixSeqContext+20, $format_bottomRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1457 # Bottom
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1458 for(my $i=1; $i<=19; $i++) { $ws->write_blank(38, $colStart_matrixSeqContext+$i, $format_top); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1459 # Bottom-left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1460 $ws->write_blank(37, $colStart_matrixSeqContext, $format_bottomLeft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1461 # Left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1462 $ws->write(1, $colStart_matrixSeqContext, "", $format_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1463 for(my $i=3; $i<=36; $i++) { $ws->write_blank($i, $colStart_matrixSeqContext, $format_left); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1464 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1465
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1466 sub LongTriNtContext
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1467 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1468 # Top-left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1469 $ws->write($rowStart_SBSdistrBySeg, $colStart_matrixSeqContext, "Panel 2. Stranded analysis of trinucleotide sequence context of SBS", $format_topLeft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1470 # Top
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1471 for(my $i=1; $i<=28; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg, $colStart_matrixSeqContext+$i, $format_top); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1472 # Top-right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1473 $ws->write_blank($rowStart_SBSdistrBySeg, $colStart_matrixSeqContext+29, $format_topRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1474 # Right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1475 for(my $i=1; $i<=42; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+$i, $colStart_matrixSeqContext+29, $format_right); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1476 # Bottom-right
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1477 $ws->write_blank(91, $colStart_matrixSeqContext+29, $format_bottomRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1478 # Bottom
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1479 for(my $i=13; $i<=28; $i++) { $ws->write_blank(92, $colStart_matrixSeqContext+$i, $format_top); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1480 # Bottom-left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1481 $ws->write_blank(91, $colStart_matrixSeqContext, $format_bottomLeft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1482 # Left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1483 $ws->write_blank($rowStart_SBSdistrBySeg+1, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext, $format_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1484 $ws->write_blank($rowStart_SBSdistrBySeg+22, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+23, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext, $format_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1485 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1486 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1487
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1488 ############################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1489 # Write count of SBS by functional impact on the protein (Table 2) + Create the input for ggplot2 (pie chart with functional impact) + Create the input for ggplot2 (pie chart of SBS vs. Indels)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1490 sub writeDistrFuncImpact
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1491 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1492 my ($ws, $refH_file, $sample, $funcImpactggplot2, $overallDistrggplot2) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1493
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1494 # Set the row for the table 2
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1495 my $lImpactSBS = 31;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1496 my ($deletion, $insertion) = (0, 0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1497
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1498
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1499 $ws->write(29, 6, "Table 2. Frequency and counts of functional impact", $format_A10Boldleft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1500 $ws->set_column(6, 6, 13); $ws->set_column(10, 10, 15);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1501 $ws->write(30, 6, "RefSeq gene", $table_topleft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1502 $ws->write(30, 7, "", $table_top);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1503 $ws->write(30, 8, "Percent", $table_top);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1504 $ws->write(30, 9, "Count", $table_topRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1505
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1506
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1507
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1508 open(IMPACTSBS, ">", $funcImpactggplot2) or die "$!: $funcImpactggplot2\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1509 print IMPACTSBS "AA_Change\tCount\tPercent\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1510
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1511 # Pie chart with the distribution of SBS vs Indel
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1512 open(SBSINDEL, ">", $overallDistrggplot2) or die "$!: $overallDistrggplot2\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1513 print SBSINDEL "Variant_type\tCount\tPercent\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1514
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1515
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1516 foreach my $k_exoFunc(sort keys %{$refH_file->{$sample}{'ImpactSBS'}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1517 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1518 my $percent = ($refH_file->{$sample}{'ImpactSBS'}{$k_exoFunc} / $refH_file->{$sample}{'TotalMutGenomic'})*100;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1519 $percent = sprintf("%.2f", $percent);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1520
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1521 if($k_exoFunc eq "NA")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1522 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1523 print IMPACTSBS "Not_Applicable\t$percent\t$refH_file->{$sample}{'ImpactSBS'}{$k_exoFunc}\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1524 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1525 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1526 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1527 my $temp = $k_exoFunc;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1528 $temp =~ s/ /_/g;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1529 print IMPACTSBS "$temp\t$percent\t$refH_file->{$sample}{'ImpactSBS'}{$k_exoFunc}\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1530 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1531
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1532 $ws->write($lImpactSBS, 6, $k_exoFunc, $table_left2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1533 $ws->write($lImpactSBS, 8, $percent, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1534 $ws->write($lImpactSBS, 9, $refH_file->{$sample}{'ImpactSBS'}{$k_exoFunc}, $table_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1535
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1536 $lImpactSBS++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1537
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1538 # Pie chart with the distribution of SBS vs Indel
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1539 if($k_exoFunc =~ /deletion/i) { $deletion += $refH_file->{$sample}{'ImpactSBS'}{$k_exoFunc}; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1540 elsif($k_exoFunc =~ /insertion/i) { $insertion += $refH_file->{$sample}{'ImpactSBS'}{$k_exoFunc}; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1541 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1542 close IMPACTSBS;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1543
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1544 $ws->write($lImpactSBS, 9, $refH_file->{$sample}{'TotalMutGenomic'}, $table_bottomrightHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1545 $ws->write($lImpactSBS, 6, "", $table_bottomleft); $ws->write($lImpactSBS, 7, "", $table_bottom); $ws->write($lImpactSBS, 8, "", $table_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1546
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1547 # Pie chart with the distribution of SBS vs Indel
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1548 my $percentSBSIndel = ($deletion/$refH_file->{$sample}{'TotalMutGenomic'})*100; $percentSBSIndel = sprintf("%.2f", $percentSBSIndel);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1549 print SBSINDEL "Deletion\t$deletion\t$percentSBSIndel\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1550 $percentSBSIndel = ($insertion/$refH_file->{$sample}{'TotalMutGenomic'})*100; $percentSBSIndel = sprintf("%.2f", $percentSBSIndel);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1551 print SBSINDEL "Insertion\t$insertion\t$percentSBSIndel\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1552 $percentSBSIndel = ($refH_file->{$sample}{TotalSBSGenomic}/$refH_file->{$sample}{'TotalMutGenomic'})*100; $percentSBSIndel = sprintf("%.2f", $percentSBSIndel);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1553 print SBSINDEL "SBS\t$refH_file->{$sample}{TotalSBSGenomic}\t$percentSBSIndel\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1554 close SBSINDEL;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1555 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1556
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1557
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1558 ############################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1559 # Write the result of the chi2 for the strand bias (Table 3)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1560 sub writeChi2result
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1561 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1562 my ($wb, $ws, $strandBiasggplot2, $refH_file, $k_file) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1563
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1564 # Define the header of the table 3
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1565 $ws->write(28, 11, "Table 3. Significance of the strand biases", $format_A10Boldleft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1566 $ws->set_column(11, 11, 13); $ws->set_column(16, 16, 15); $ws->set_column(17, 17, 10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1567 $ws->write(29, 11, "Mutation Type", $table_topleft); $ws->write(29, 12, "Non-Tr/Tr", $table_top); $ws->write(29, 13, "Non-Tr", $table_top); $ws->write(29, 14, "Tr", $table_top); $ws->write(29, 15, "P-value", $table_top); $ws->write(29, 16, "FDR q value", $table_top); $ws->write(29, 17, "95% CI", $table_topRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1568 $ws->write(39, 11, "Table 3. Significance of the strand biases", $format_A10Boldleft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1569 $ws->write(40, 11, "Mutation Type", $table_topleft); $ws->write(40, 12, "Non-Tr/Tr", $table_top); $ws->write(40, 13, "Non-Tr", $table_top); $ws->write(40, 14, "Tr", $table_top); $ws->write(40, 15, "P-value", $table_top); $ws->write(40, 16, "FDR q value", $table_top); $ws->write(40, 17, "95% CI", $table_topRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1570
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1571
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1572 # Define the count on non-transcribed and transcribed strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1573 # C>A
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1574 my ($ca_NonTr, $ca_Tr) = (0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1575 if( ($h_chi2{$k_file}{"C>A"}{'NonTr'} eq "NA") || ($h_chi2{$k_file}{"C>A"}{'Tr'} eq "NA") )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1576 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1577 $ca_NonTr = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1578 $ca_Tr = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1579 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1580 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1581 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1582 $ca_NonTr = $h_chi2{$k_file}{"C>A"}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1583 $ca_Tr = $h_chi2{$k_file}{"C>A"}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1584 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1585 # C>G
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1586 my ($cg_NonTr, $cg_Tr) = (0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1587 if( ($h_chi2{$k_file}{"C>G"}{'NonTr'} eq "NA") || ($h_chi2{$k_file}{"C>G"}{'Tr'} eq "NA") )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1588 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1589 $cg_NonTr = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1590 $cg_Tr = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1591 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1592 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1593 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1594 $cg_NonTr = $h_chi2{$k_file}{"C>G"}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1595 $cg_Tr = $h_chi2{$k_file}{"C>G"}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1596 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1597 # C>T
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1598 my ($ct_NonTr, $ct_Tr) = (0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1599 if( ($h_chi2{$k_file}{"C>T"}{'NonTr'} eq "NA") || ($h_chi2{$k_file}{"C>T"}{'Tr'} eq "NA") )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1600 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1601 $ct_NonTr = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1602 $ct_Tr = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1603 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1604 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1605 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1606 $ct_NonTr = $h_chi2{$k_file}{"C>T"}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1607 $ct_Tr = $h_chi2{$k_file}{"C>T"}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1608 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1609 # T>A
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1610 my ($ta_NonTr, $ta_Tr) = (0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1611 if( ($h_chi2{$k_file}{"T>A"}{'NonTr'} eq "NA") || ($h_chi2{$k_file}{"T>A"}{'Tr'} eq "NA") )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1612 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1613 $ta_NonTr = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1614 $ta_Tr = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1615 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1616 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1617 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1618 $ta_NonTr = $h_chi2{$k_file}{"T>A"}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1619 $ta_Tr = $h_chi2{$k_file}{"T>A"}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1620 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1621 # T>C
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1622 my ($tc_NonTr, $tc_Tr) = (0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1623 if( ($h_chi2{$k_file}{"T>C"}{'NonTr'} eq "NA") || ($h_chi2{$k_file}{"T>C"}{'Tr'} eq "NA") )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1624 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1625 $tc_NonTr = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1626 $tc_Tr = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1627 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1628 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1629 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1630 $tc_NonTr = $h_chi2{$k_file}{"T>C"}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1631 $tc_Tr = $h_chi2{$k_file}{"T>C"}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1632 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1633 # T>G
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1634 my ($tg_NonTr, $tg_Tr) = (0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1635 if( ($h_chi2{$k_file}{"T>G"}{'NonTr'} eq "NA") || ($h_chi2{$k_file}{"T>G"}{'Tr'} eq "NA") )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1636 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1637 $tg_NonTr = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1638 $tg_Tr = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1639 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1640 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1641 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1642 $tg_NonTr = $h_chi2{$k_file}{"T>G"}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1643 $tg_Tr = $h_chi2{$k_file}{"T>G"}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1644 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1645
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1646
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1647
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1648 # Create an input for representing the strand bias with ggplot2
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1649 open(SB, ">", $strandBiasggplot2) or die "$!: $strandBiasggplot2\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1650 print SB "Alteration\tStrand\tCount\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1651
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1652 print SB "C>A\tNonTranscribed\t".$ca_NonTr."\n"."C>A\tTranscribed\t".$ca_Tr."\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1653 print SB "C>G\tNonTranscribed\t".$cg_NonTr."\n"."C>G\tTranscribed\t".$cg_Tr."\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1654 print SB "C>T\tNonTranscribed\t".$ct_NonTr."\n"."C>T\tTranscribed\t".$ct_Tr."\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1655 print SB "T>A\tNonTranscribed\t".$ta_NonTr."\n"."T>A\tTranscribed\t".$ta_Tr."\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1656 print SB "T>C\tNonTranscribed\t".$tc_NonTr."\n"."T>C\tTranscribed\t".$tc_Tr."\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1657 print SB "T>G\tNonTranscribed\t".$tg_NonTr."\n"."T>G\tTranscribed\t".$tg_Tr."\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1658 close SB;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1659
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1660
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1661 ### Calcul the ratio for the strand bias and write it on the Excel file (Table 3)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1662 writeRatioSB($ca_NonTr, $ca_Tr, $ws, 30, $refH_file, $k_file, "C>A", "G>T", $format_A10, $table_left, $table_middleHeader, $table_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1663 writeRatioSB($cg_NonTr, $cg_Tr, $ws, 31, $refH_file, $k_file, "C>G", "G>C", $format_A10, $table_left, $table_middleHeader, $table_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1664 writeRatioSB($ct_NonTr, $ct_Tr, $ws, 32, $refH_file, $k_file, "C>T", "G>A", $format_A10, $table_left, $table_middleHeader, $table_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1665 writeRatioSB($ta_NonTr, $ta_Tr, $ws, 33, $refH_file, $k_file, "T>A", "A>T", $format_A10, $table_left, $table_middleHeader, $table_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1666 writeRatioSB($tc_NonTr, $tc_Tr, $ws, 34, $refH_file, $k_file, "T>C", "A>G", $format_A10, $table_left, $table_middleHeader, $table_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1667 writeRatioSB($tg_NonTr, $tg_Tr, $ws, 35, $refH_file, $k_file, "T>G", "A>C", $table_bottom, $table_bottomleft, $table_middleHeader2, $table_bottomRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1668
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1669
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1670 ### Write a warning message when NonTr+Tr < 10
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1671 my $format_italic_red = $wb->add_format(font=>'Arial', size=>10, italic=>1, color => 'red');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1672
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1673 if( (($ca_NonTr+$ca_Tr)< 10) || (($cg_NonTr+$cg_Tr)< 10) || (($ct_NonTr+$ct_Tr)< 10) || (($ta_NonTr+$ta_Tr)< 10) || (($tc_NonTr+$tc_Tr)< 10) || (($tg_NonTr+$tg_Tr)< 10) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1674 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1675 $ws->write(36, 11, "Warning message: chi-squared approximation may be incorrect because the number of SBS", $format_italic_red);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1676 $ws->write(37, 11, "on Non-transcribed and transcribed strand is lower than 10", $format_italic_red);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1677 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1678 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1679 # Write values in Table 3 (Sub function of writeChi2result)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1680 sub writeRatioSB
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1681 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1682 my ($count_NonTr, $count_Tr, $ws, $row, $refH_file, $k_file, $mut1, $mut2, $formatText, $formatTextMut1, $formatTextRatio, $formatTable) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1683
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1684 my ($ratio_mut1, $ratio_mut2, $percent_NonTr, $percent_Tr) = (0, 0, 0, 0, 0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1685 if( ($count_NonTr==0) || ($count_Tr==0) ) { $ratio_mut1 = 0; $ratio_mut2 = 0; $percent_NonTr = 0; $percent_Tr = 0; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1686 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1687 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1688 $ratio_mut1 = $count_NonTr/$count_Tr; $ratio_mut1 = sprintf("%.2f", $ratio_mut1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1689 $ratio_mut2 = $count_Tr/$count_NonTr; $ratio_mut2 = sprintf("%.2f", $ratio_mut2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1690 $percent_NonTr = ($count_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1691 $percent_Tr = ($count_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1692 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1693
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1694 # C>N and T>N
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1695 $ws->write($row, 11, $mut1, $formatTextMut1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1696 $ws->write($row, 12, $ratio_mut1, $formatTextRatio);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1697 $ws->write($row, 13, $count_NonTr, $formatText);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1698 $ws->write($row, 14, $count_Tr, $formatText);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1699 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1700 if( ($count_NonTr+$count_Tr) < 10 )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1701 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1702 if($mut1 eq "T>G")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1703 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1704 if(! exists $h_chi2{$k_file}{$mut1}{'p-value'}) { $ws->write_string($row, 15, ""); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1705 elsif($h_chi2{$k_file}{$mut1}{'p-value'} eq "NA") { $ws->write_string($row, 15, $h_chi2{$k_file}{$mut1}{'p-value'}, $table_bottom); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1706 else { $ws->write_string($row, 15, $h_chi2{$k_file}{$mut1}{'p-value'}, $table_bottomItalicRed); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1707 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1708 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1709 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1710 if(! exists $h_chi2{$k_file}{$mut1}{'p-value'}) { $ws->write_string($row, 15, ""); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1711 elsif($h_chi2{$k_file}{$mut1}{'p-value'} eq "NA") { $ws->write_string($row, 15, $h_chi2{$k_file}{$mut1}{'p-value'}, $formatText); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1712 else { $ws->write_string($row, 15, $h_chi2{$k_file}{$mut1}{'p-value'}, $format_A10ItalicRed); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1713 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1714 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1715 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1716 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1717 $ws->write_string($row, 15, $h_chi2{$k_file}{$mut1}{'p-value'}, $formatText);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1718 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1719
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1720 $ws->write($row, 16, $h_chi2{$k_file}{$mut1}{'FDR'}, $formatText);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1721 $ws->write($row, 17, $h_chi2{$k_file}{$mut1}{'ConfInt'}, $formatTable);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1722
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1723
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1724 # G>N and A>N
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1725 $ws->write($row+11, 11, $mut2, $formatTextMut1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1726 $ws->write($row+11, 12, $ratio_mut2, $formatTextRatio);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1727 $ws->write($row+11, 13, $count_Tr, $formatText);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1728 $ws->write($row+11, 14, $count_NonTr, $formatText);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1729 if( ($count_NonTr+$count_Tr) < 10 )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1730 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1731 if($mut1 eq "T>G")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1732 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1733 if(! exists $h_chi2{$k_file}{$mut1}{'p-value'}) { $ws->write_string($row+11, 15, ""); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1734 elsif($h_chi2{$k_file}{$mut1}{'p-value'} eq "NA") { $ws->write_string($row+11, 15, $h_chi2{$k_file}{$mut1}{'p-value'}, $table_bottom); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1735 else { $ws->write_string($row+11, 15, $h_chi2{$k_file}{$mut1}{'p-value'}, $table_bottomItalicRed); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1736 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1737 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1738 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1739 if(! exists $h_chi2{$k_file}{$mut1}{'p-value'}) { $ws->write_string($row+11, 15, ""); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1740 elsif($h_chi2{$k_file}{$mut1}{'p-value'} eq "NA") { $ws->write_string($row+11, 15, $h_chi2{$k_file}{$mut1}{'p-value'}, $formatText); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1741 else { $ws->write_string($row+11, 15, $h_chi2{$k_file}{$mut1}{'p-value'}, $format_A10ItalicRed); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1742 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1743 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1744 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1745 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1746 $ws->write_string($row+11, 15, $h_chi2{$k_file}{$mut1}{'p-value'}, $formatText);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1747 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1748
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1749 $ws->write($row+11, 16, $h_chi2{$k_file}{$mut1}{'FDR'}, $formatText);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1750 $ws->write($row+11, 17, $h_chi2{$k_file}{$mut1}{'ConfInt'}, $formatTable);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1751 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1752
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1753
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1754 ############################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1755 # SBS distribution by functional region (Table 4) & Strand bias by functional region (Table 5) & Overall count and percent of SBS (Table 1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1756 sub writeStatbyFuncRegion
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1757 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1758 my ($refH_file, $sample, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $ref_RowSBSDistrBySegAndFuncCG, $mutDistrggplot2) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1759
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1760 my $row_SBSdistrBySeg = $rowStart_SBSdistrBySeg+4;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1761 my $row_SBSDistrBySegAndFunc_CA = $rowStart_SBSdistrBySeg + $nb_func + 12;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1762 my $rowEndCG_SBSDistrBySegAndFunc_CG = $$ref_RowSBSDistrBySegAndFuncCG + $nb_func;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1763 my $row_SBSDistrBySegAndFunc_CT = $rowStart_SBSdistrBySeg + ($nb_func*3) + 20;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1764 my $colTable4 = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1765
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1766 my ($count_ca, $count_cg, $count_ct, $count_ta, $count_tc, $count_tg) = (0,0,0,0,0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1767
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1768
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1769 ## 6 mutation types by segment
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1770 foreach my $k_func (sort keys %{$refH_file->{$sample}{'6mutType'}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1771 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1772 my $totalSBS_bySegment = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1773
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1774 # Write the functional region for the section SBS distribution by segment (Table 4)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1775 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg, $k_func, $formatT_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1776 # Write the exonic func for the section strand bias by segment (Table 5)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1777 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg, $k_func, $formatT_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1778 # Write the last functional element in the table
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1779 if($$ref_RowSBSDistrBySegAndFuncCG == $rowEndCG_SBSDistrBySegAndFunc_CG)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1780 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1781 $ws->write($$ref_RowSBSDistrBySegAndFuncCG, $colStart_SBSdistrBySeg, $k_func, $formatT_bottomLeft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1782 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1783 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1784 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1785 $ws->write($$ref_RowSBSDistrBySegAndFuncCG, $colStart_SBSdistrBySeg, $k_func, $formatT_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1786 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1787
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1788 foreach my $k_mutation (sort keys %{$refH_file->{$sample}{'6mutType'}{$k_func}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1789 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1790 ### Write the count of SBS per mutation on genomic (Table 4) and coding strand (Table 5)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1791 if($k_mutation eq "C:G>A:T")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1792 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1793 writeCountSBS($refH_file, $sample, $k_func, $k_mutation, $ws, $row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg, $colStart_SBSdistrBySeg+3, $row_SBSdistrBySeg, $rowEndCG_SBSDistrBySegAndFunc_CG, \$count_ca);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1794 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1795 if($k_mutation eq "C:G>G:C")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1796 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1797 writeCountSBS($refH_file, $sample, $k_func, $k_mutation, $ws, $row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+4, $colStart_SBSdistrBySeg+5, $row_SBSdistrBySeg, $rowEndCG_SBSDistrBySegAndFunc_CG, \$count_cg);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1798 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1799 if($k_mutation eq "C:G>T:A")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1800 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1801 writeCountSBS($refH_file, $sample, $k_func, $k_mutation, $ws, $row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+8, $colStart_SBSdistrBySeg+7, $row_SBSdistrBySeg, $rowEndCG_SBSDistrBySegAndFunc_CG, \$count_ct);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1802 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1803 if($k_mutation eq "T:A>A:T")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1804 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1805 writeCountSBS($refH_file, $sample, $k_func, $k_mutation, $ws, $$ref_RowSBSDistrBySegAndFuncCG, $colStart_SBSdistrBySeg, $colStart_SBSdistrBySeg+9, $row_SBSdistrBySeg, $rowEndCG_SBSDistrBySegAndFunc_CG, \$count_ta);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1806 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1807 if($k_mutation eq "T:A>C:G")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1808 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1809 writeCountSBS($refH_file, $sample, $k_func, $k_mutation, $ws, $$ref_RowSBSDistrBySegAndFuncCG, $colStart_SBSdistrBySeg+4, $colStart_SBSdistrBySeg+11, $row_SBSdistrBySeg, $rowEndCG_SBSDistrBySegAndFunc_CG, \$count_tc);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1810 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1811 if($k_mutation eq "T:A>G:C")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1812 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1813 writeCountSBS($refH_file, $sample, $k_func, $k_mutation, $ws, $$ref_RowSBSDistrBySegAndFuncCG, $colStart_SBSdistrBySeg+8, $colStart_SBSdistrBySeg+13, $row_SBSdistrBySeg, $rowEndCG_SBSDistrBySegAndFunc_CG, \$count_tg);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1814 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1815
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1816 # Calculate the total number of SBS on the genomic strand for each mutation types by exonic region
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1817 $totalSBS_bySegment += $refH_file->{$sample}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1818 } # End $k_mutation
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1819
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1820 $row_SBSDistrBySegAndFunc_CA++; $$ref_RowSBSDistrBySegAndFuncCG++; #$row_SBSDistrBySegAndFunc_CT++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1821
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1822 # Write the percent by exonic region
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1823 writePercentSBS($refH_file, $sample, $k_func, "C:G>A:T", $row_SBSdistrBySeg, $colStart_SBSdistrBySeg+2, $ws, $totalSBS_bySegment);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1824 writePercentSBS($refH_file, $sample, $k_func, "C:G>G:C", $row_SBSdistrBySeg, $colStart_SBSdistrBySeg+4, $ws, $totalSBS_bySegment);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1825 writePercentSBS($refH_file, $sample, $k_func, "C:G>T:A", $row_SBSdistrBySeg, $colStart_SBSdistrBySeg+6, $ws, $totalSBS_bySegment);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1826 writePercentSBS($refH_file, $sample, $k_func, "T:A>A:T", $row_SBSdistrBySeg, $colStart_SBSdistrBySeg+8, $ws, $totalSBS_bySegment);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1827 writePercentSBS($refH_file, $sample, $k_func, "T:A>C:G", $row_SBSdistrBySeg, $colStart_SBSdistrBySeg+10, $ws, $totalSBS_bySegment);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1828 writePercentSBS($refH_file, $sample, $k_func, "T:A>G:C", $row_SBSdistrBySeg, $colStart_SBSdistrBySeg+12, $ws, $totalSBS_bySegment);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1829
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1830 # Write the count of SBS by segment
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1831 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+1, $totalSBS_bySegment, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1832
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1833 $row_SBSdistrBySeg++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1834 } # End $k_func
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1835
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1836
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1837 # Write the total number of SBS on the genomic strand (Table 4)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1838 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+1, $refH_file->{$sample}{'TotalSBSGenomic'}, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1839
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1840
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1841 ##### Calculate the total percentages by mutation type
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1842 my $percent_ca = ($count_ca / $refH_file->{$sample}{'TotalSBSGenomic'}) * 100; $percent_ca = sprintf("%.2f", $percent_ca);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1843 my $percent_cg = ($count_cg / $refH_file->{$sample}{'TotalSBSGenomic'}) * 100; $percent_cg = sprintf("%.2f", $percent_cg);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1844 my $percent_ct = ($count_ct / $refH_file->{$sample}{'TotalSBSGenomic'}) * 100; $percent_ct = sprintf("%.2f", $percent_ct);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1845 my $percent_ta = ($count_ta / $refH_file->{$sample}{'TotalSBSGenomic'}) * 100; $percent_ta = sprintf("%.2f", $percent_ta);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1846 my $percent_tc = ($count_tc / $refH_file->{$sample}{'TotalSBSGenomic'}) * 100; $percent_tc = sprintf("%.2f", $percent_tc);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1847 my $percent_tg = ($count_tg / $refH_file->{$sample}{'TotalSBSGenomic'}) * 100; $percent_tg = sprintf("%.2f", $percent_tg);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1848
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1849
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1850 # Write the total percentage (Table 4)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1851 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+2, $percent_ca, $formatT_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1852 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+3, $count_ca, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1853 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+4, $percent_cg, $formatT_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1854 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+5, $count_cg, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1855 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+6, $percent_ct, $formatT_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1856 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+7, $count_ct, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1857 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+8, $percent_ta, $formatT_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1858 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+9, $count_ta, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1859 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+10, $percent_tc, $formatT_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1860 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+11, $count_tc, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1861 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+12, $percent_tg, $formatT_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1862 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+13, $count_tg, $formatT_bottomRightHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1863
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1864
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1865
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1866 # Overall distribution of SBS (Table 1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1867 $ws->write(0, 0, "Graph 1. SBS distribution", $formatT_graphTitle); $ws->set_row(0, 18);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1868 $ws->write(29, 0, "Table 1. Frequency and counts of all SBS", $format_A10Boldleft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1869 $ws->write(30, 0, "Mutation type", $table_topleft);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1870 $ws->write(30, 1, "Percentage", $table_top);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1871 $ws->write(30, 2, "Count", $table_topRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1872 $ws->write(31, 0, "C:G>A:T", $table_left); $ws->write(31, 1, $percent_ca, $format_A10); $ws->write(31, 2, $count_ca, $table_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1873 $ws->write(32, 0, "C:G>G:C", $table_left); $ws->write(32, 1, $percent_cg, $format_A10); $ws->write(32, 2, $count_cg, $table_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1874 $ws->write(33, 0, "C:G>T:A", $table_left); $ws->write(33, 1, $percent_ct, $format_A10); $ws->write(33, 2, $count_ct, $table_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1875 $ws->write(34, 0, "T:A>A:T", $table_left); $ws->write(34, 1, $percent_ta, $format_A10); $ws->write(34, 2, $count_ta, $table_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1876 $ws->write(35, 0, "T:A>C:G", $table_left); $ws->write(35, 1, $percent_tc, $format_A10); $ws->write(35, 2, $count_tc, $table_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1877 $ws->write(36, 0, "T:A>G:C", $table_left); $ws->write(36, 1, $percent_tg, $format_A10); $ws->write(36, 2, $count_tg, $table_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1878 $ws->write(37, 0, "", $table_bottomleft); $ws->write(37, 1, "", $table_bottom); $ws->write(37, 2, $refH_file->{$sample}{'TotalSBSGenomic'}, $table_bottomrightHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1879
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1880 # Create an input for ggplot2 for representing the distribution of SBS for each mutation types (Figure 1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1881 open(DISTRSBS, ">", $mutDistrggplot2) or die "$!: $mutDistrggplot2\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1882 print DISTRSBS "Mutation_Type\tCount\tPercentage\tSample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1883 print DISTRSBS "C:G>A:T\t$count_ca\t$percent_ca\t$sample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1884 print DISTRSBS "C:G>G:C\t$count_cg\t$percent_cg\t$sample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1885 print DISTRSBS "C:G>T:A\t$count_ct\t$percent_ct\t$sample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1886 print DISTRSBS "T:A>A:T\t$count_ta\t$percent_ta\t$sample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1887 print DISTRSBS "T:A>C:G\t$count_tc\t$percent_tc\t$sample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1888 print DISTRSBS "T:A>G:C\t$count_tg\t$percent_tg\t$sample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1889 close DISTRSBS;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1890 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1891 # Write the percentage in table 4 of the Excel report (Sub function of writeStatbyFuncRegion)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1892 sub writePercentSBS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1893 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1894 my ($refH_file, $k_file, $k_func, $mutation, $row, $col, $ws, $totalSBS) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1895
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1896 my $percent = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1897 if($refH_file->{$k_file}{'6mutType'}{$k_func}{$mutation}{'TotalMutG'} == 0)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1898 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1899 $percent = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1900 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1901 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1902 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1903 $percent = ($refH_file->{$k_file}{'6mutType'}{$k_func}{$mutation}{'TotalMutG'} / $totalSBS ) * 100;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1904 $percent = sprintf("%.2f", $percent);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1905 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1906 $ws->write($row, $col, $percent, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1907 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1908 # Write the count in table 4 and table 5 of the Excel report (Sub function of writeStatbyFuncRegion)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1909 sub writeCountSBS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1910 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1911 my ($refH_file, $k_file, $k_func, $k_mutation, $ws, $row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg, $colTable4, $row_SBSdistrBySeg, $rowEndCG_SBSDistrBySegAndFunc_CG, $refCount) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1912
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1913 my $ratioSB = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1914 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1915 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1916 $ratioSB = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1917 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1918 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1919 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1920 $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1921 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1922 $ratioSB = sprintf("%.2f", $ratioSB);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1923
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1924
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1925 if($row_SBSDistrBySegAndFunc_CA == $rowEndCG_SBSDistrBySegAndFunc_CG)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1926 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1927 # Write the ratio of NonTr / Tr (Table 5)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1928 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+1, $ratioSB, $formatT_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1929 # Write the count of SBS in the NonTr and Tr strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1930 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $formatT_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1931 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_bottom);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1932
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1933 if( ($k_mutation eq "C:G>T:A") || ($k_mutation eq "T:A>G:C") )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1934 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1935 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_bottomRight);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1936 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1937 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1938 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1939 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1940 # Write the ratio of NonTr / Tr (Table 5)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1941 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+1, $ratioSB, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1942 # Write the count of SBS in the NonTr and Tr strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1943 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1944 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1945
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1946 if( ($k_mutation eq "C:G>T:A") || ($k_mutation eq "T:A>G:C") )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1947 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1948 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1949 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1950 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1951
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1952 if($k_mutation eq "C:G>A:T")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1953 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1954 # Calculate the total number of SBS per mut type (genomic strand)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1955 $$refCount += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1956 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1957 elsif($k_mutation eq "C:G>G:C")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1958 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1959 $$refCount += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1960 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1961 elsif($k_mutation eq "C:G>T:A")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1962 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1963 $$refCount += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1964 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1965 elsif($k_mutation eq "T:A>A:T")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1966 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1967 $$refCount += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1968 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1969 elsif($k_mutation eq "T:A>C:G")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1970 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1971 $$refCount += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1972 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1973 elsif($k_mutation eq "T:A>G:C")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1974 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1975 $$refCount += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1976 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1977
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1978
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1979 # Write the count by exonic region (Table 4)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1980 $ws->write($row_SBSdistrBySeg, $colTable4, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1981
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1982 if($k_mutation eq "T:A>G:C")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1983 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1984 $ws->write($row_SBSdistrBySeg, $colTable4, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $formatT_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1985 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1986 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1987
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1988
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1989 ############################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1990 # SBS distribution by chromosomes (Table 6)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1991 sub writeDistrByChr
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1992 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1993 my ($ws, $refH_file, $sample, $row, $col, $distrByChrggplot2) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1994
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1995
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1996 # For the HTML report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1997 open(SBSPerChr, ">", $distrByChrggplot2) or die "$!: $distrByChrggplot2\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1998 print SBSPerChr "\tPearson\t$refH_file->{$sample}{'SBSPerChr'}{'AllMutType'}\t", $refH_file->{$sample}{'SBSPerChr'}{"C:G>A:T"}{'Pearson'},"\t", $refH_file->{$sample}{'SBSPerChr'}{"C:G>G:C"}{'Pearson'},"\t", $refH_file->{$sample}{'SBSPerChr'}{"C:G>T:A"}{'Pearson'},"\t", $refH_file->{$sample}{'SBSPerChr'}{"T:A>A:T"}{'Pearson'},"\t", $refH_file->{$sample}{'SBSPerChr'}{"T:A>C:G"}{'Pearson'},"\t", $refH_file->{$sample}{'SBSPerChr'}{"T:A>G:C"}{'Pearson'},"\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1999 print SBSPerChr "Chr\tSize\tAll SBS\tC:G>A:T\tC:G>G:C\tC:G>T:A\tT:A>A:T\tT:A>C:G\tT:A>G:C\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2000
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2001
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2002 my $row_SBSPerChr = $row + 8;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2003
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2004
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2005 # Write the Pearson coefficient
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2006 $ws->write($row+6, $col+3, $refH_file->{$sample}{'SBSPerChr'}{"C:G>A:T"}{'Pearson'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2007 $ws->write($row+6, $col+4, $refH_file->{$sample}{'SBSPerChr'}{"C:G>G:C"}{'Pearson'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2008 $ws->write($row+6, $col+5, $refH_file->{$sample}{'SBSPerChr'}{"C:G>T:A"}{'Pearson'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2009 $ws->write($row+6, $col+6, $refH_file->{$sample}{'SBSPerChr'}{"T:A>A:T"}{'Pearson'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2010 $ws->write($row+6, $col+7, $refH_file->{$sample}{'SBSPerChr'}{"T:A>C:G"}{'Pearson'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2011 $ws->write($row+6, $col+8, $refH_file->{$sample}{'SBSPerChr'}{"T:A>G:C"}{'Pearson'}, $formatT_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2012
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2013 # Write the chromosome number and their sizes / Write count SBS per chromosomes
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2014 my $line=0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2015
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2016 foreach my $chromosome (sort keys %chromosomes)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2017 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2018 $ws->write($row_SBSPerChr+($line), $col, $chromosome, $formatT_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2019 $ws->write($row_SBSPerChr+($line), $col+1, $chromosomes{$chromosome}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2020 $ws->write($row_SBSPerChr+($line), $col+2, $refH_file->{$sample}{'SBSPerChr'}{'TotalPerChr'}{$chromosome}{'chr'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2021
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2022 # Write the count per mutation
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2023 $ws->write($row_SBSPerChr+($line), $col+3, $refH_file->{$sample}{'SBSPerChr'}{"C:G>A:T"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2024 $ws->write($row_SBSPerChr+($line), $col+4, $refH_file->{$sample}{'SBSPerChr'}{"C:G>G:C"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2025 $ws->write($row_SBSPerChr+($line), $col+5, $refH_file->{$sample}{'SBSPerChr'}{"C:G>T:A"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2026 $ws->write($row_SBSPerChr+($line), $col+6, $refH_file->{$sample}{'SBSPerChr'}{"T:A>A:T"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2027 $ws->write($row_SBSPerChr+($line), $col+7, $refH_file->{$sample}{'SBSPerChr'}{"T:A>C:G"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2028 $ws->write($row_SBSPerChr+($line), $col+8, $refH_file->{$sample}{'SBSPerChr'}{"T:A>G:C"}{'CHR'}{$chromosome}{'chr'}, $formatT_right);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2029
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2030
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2031 # For the HTML report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2032 print SBSPerChr "$chromosome\t", $chromosomes{$chromosome},"\t", $refH_file->{$sample}{'SBSPerChr'}{'TotalPerChr'}{$chromosome}{'chr'},"\t", $refH_file->{$sample}{'SBSPerChr'}{"C:G>A:T"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$sample}{'SBSPerChr'}{"C:G>G:C"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$sample}{'SBSPerChr'}{"C:G>T:A"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$sample}{'SBSPerChr'}{"T:A>A:T"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$sample}{'SBSPerChr'}{"T:A>C:G"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$sample}{'SBSPerChr'}{"T:A>G:C"}{'CHR'}{$chromosome}{'chr'},"\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2033
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2034 $line++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2035 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2036
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2037 # Write the Pearson coefficient for the total number of SBS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2038 $ws->write($row+6, $col+2, $refH_file->{$sample}{'SBSPerChr'}{'AllMutType'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2039 $ws->write($row_SBSPerChr+(keys %chromosomes), $col+2, $refH_file->{$sample}{'TotalSBSGenomic'}, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2040
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2041 print SBSPerChr "\t\t$refH_file->{$sample}{'TotalSBSGenomic'}\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2042 close SBSPerChr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2043 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2044
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2045
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2046 ############################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2047 # Trinucleotide sequence context on genomic strand (Panel 1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2048 sub writeTriNtGenomic
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2049 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2050 my ($ws, $refH_file, $sample, $col, $heatmapCountggplot2, $heatmapPercentggplot2, $triNtBarChartggplot2, $ref_c_ca6_g, $ref_c_cg6_g, $ref_c_ct6_g, $ref_c_ta6_g, $ref_c_tc6_g, $ref_c_tg6_g) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2051
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2052 # Initialise the row of the panel 1
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2053 my $row_SeqContext6 = 4;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2054 # Percent total of mutations for 6 mutation types on genomic strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2055 my ($p_ca6_g, $p_cg6_g, $p_ct6_g, $p_ta6_g, $p_tc6_g, $p_tg6_g) = (0,0,0, 0,0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2056 my $maxValue = 0; # For the heatmap
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2057
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2058 # For checking if the total number of SBS is correct
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2059 my $total_SBS_genomic = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2060
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2061
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2062 open(HEATMAPCGENOMIC, ">", $heatmapCountggplot2) or die "$!: $heatmapCountggplot2\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2063 print HEATMAPCGENOMIC "\tC>A\tC>G\tC>T\tT>A\tT>C\tT>G\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2064 open(HEATMAPPGENOMIC, ">", $heatmapPercentggplot2) or die "$!: $heatmapPercentggplot2\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2065 print HEATMAPPGENOMIC "\tC>A\tC>G\tC>T\tT>A\tT>C\tT>G\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2066
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2067 ## Bar plot NMF like
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2068 open(BARPLOTNMFLIKE, ">", $triNtBarChartggplot2) or die "$!: $triNtBarChartggplot2\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2069 print BARPLOTNMFLIKE "alteration\tcontext\tvalue\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2070
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2071
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2072 foreach my $k_context (sort keys %{$refH_file->{$sample}{'SeqContextG'}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2073 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2074 if( ($k_context =~ /N/) || (length($k_context) != 3) ) { next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2075
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2076 # Write the context: 6 mut type on genomic strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2077 $ws->write($row_SeqContext6 , $col+3, $k_context, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2078 $ws->write($row_SeqContext6 , $col+13, $k_context, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2079
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2080 # Count for the heatmap
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2081 print HEATMAPCGENOMIC $k_context."\t";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2082 print HEATMAPPGENOMIC $k_context."\t";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2083
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2084 foreach my $k_mutation (sort keys %{$refH_file->{$sample}{'SeqContextG'}{$k_context}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2085 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2086 # For checking the total number of SBS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2087 $total_SBS_genomic += $refH_file->{$sample}{'SeqContextG'}{$k_context}{$k_mutation};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2088
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2089 # Calculate the percentages
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2090 my $percent = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2091 if($refH_file->{$sample}{'SeqContextG'}{$k_context}{$k_mutation} == 0) { $percent = 0; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2092 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2093 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2094 $percent = ($refH_file->{$sample}{'SeqContextG'}{$k_context}{$k_mutation} / $refH_file->{$sample}{'TotalSBSGenomic'}) * 100;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2095 $percent = sprintf("%.2f", $percent);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2096 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2097
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2098 # For representing the sequence context with a bar plot (NMF like style)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2099 print BARPLOTNMFLIKE $k_mutation,"\t", $k_context,"\t", $percent,"\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2100
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2101 # Write the count for the heatmap
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2102 print HEATMAPCGENOMIC $refH_file->{$sample}{'SeqContextG'}{$k_context}{$k_mutation}."\t";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2103 print HEATMAPPGENOMIC "$percent\t";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2104
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2105
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2106 # For NMF input
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2107 my $count = $refH_file->{$sample}{'SeqContextG'}{$k_context}{$k_mutation};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2108 if($sample ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{$k_mutation}}, $count); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2109 if($sample ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{$k_mutation}}, $percent); }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2110
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2111
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2112 if($k_mutation eq "C>A")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2113 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2114 triNtByMut($ws, $row_SeqContext6, $col+4, $col+14, $refH_file, $sample, $k_context, $k_mutation, $percent, $maxValue, $ref_c_ca6_g, \$p_ca6_g);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2115 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2116 elsif($k_mutation eq "C>G")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2117 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2118 triNtByMut($ws, $row_SeqContext6, $col+5, $col+15, $refH_file, $sample, $k_context, $k_mutation, $percent, $maxValue, $ref_c_cg6_g, \$p_cg6_g);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2119 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2120 elsif($k_mutation eq "C>T")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2121 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2122 triNtByMut($ws, $row_SeqContext6, $col+6, $col+16, $refH_file, $sample, $k_context, $k_mutation, $percent, $maxValue, $ref_c_ct6_g, \$p_ct6_g);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2123 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2124 elsif($k_mutation eq "T>A")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2125 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2126 triNtByMut($ws, $row_SeqContext6, $col+7, $col+17, $refH_file, $sample, $k_context, $k_mutation, $percent, $maxValue, $ref_c_ta6_g, \$p_ta6_g);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2127 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2128 elsif($k_mutation eq "T>C")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2129 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2130 triNtByMut($ws, $row_SeqContext6, $col+8, $col+18, $refH_file, $sample, $k_context, $k_mutation, $percent, $maxValue, $ref_c_tc6_g, \$p_tc6_g);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2131 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2132 elsif($k_mutation eq "T>G")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2133 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2134 triNtByMut($ws, $row_SeqContext6, $col+9, $col+19, $refH_file, $sample, $k_context, $k_mutation, $percent, $maxValue, $ref_c_tg6_g, \$p_tg6_g);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2135 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2136 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2137 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2138 print STDERR "Error: Mutation type not considered for: $k_mutation\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2139 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2140 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2141 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2142 $row_SeqContext6++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2143
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2144 print HEATMAPCGENOMIC "\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2145 print HEATMAPPGENOMIC "\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2146 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2147 close HEATMAPCGENOMIC; close HEATMAPPGENOMIC;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2148 close BARPLOTNMFLIKE;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2149
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2150
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2151 # Write the total number of SBS per mutation type: COUNT
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2152 $ws->write($row_SeqContext6, $col+4, $$ref_c_ca6_g, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2153 $ws->write($row_SeqContext6, $col+5, $$ref_c_cg6_g, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2154 $ws->write($row_SeqContext6, $col+6, $$ref_c_ct6_g, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2155 $ws->write($row_SeqContext6, $col+7, $$ref_c_ta6_g, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2156 $ws->write($row_SeqContext6, $col+8, $$ref_c_tc6_g, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2157 $ws->write($row_SeqContext6, $col+9, $$ref_c_tg6_g, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2158 if($total_SBS_genomic != $refH_file->{$sample}{'TotalSBSGenomic'})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2159 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2160 print STDERR "Error in the calculation of the total number of SBS on the genomic strand!!!!\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2161 print STDERR "From hash table $refH_file->{$sample}{'TotalSBSGenomic'}\tVS\t$total_SBS_genomic\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2162 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2163 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2164
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2165 # Write the total number of SBS per mutation type: PERCENT
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2166 $ws->write($row_SeqContext6, $col+14, $p_ca6_g, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2167 $ws->write($row_SeqContext6, $col+15, $p_cg6_g, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2168 $ws->write($row_SeqContext6, $col+16, $p_ct6_g, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2169 $ws->write($row_SeqContext6, $col+17, $p_ta6_g, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2170 $ws->write($row_SeqContext6, $col+18, $p_tc6_g, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2171 $ws->write($row_SeqContext6, $col+19, $p_tg6_g, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2172
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2173 my $totalPercent_genomic = $p_ca6_g + $p_cg6_g + $p_ct6_g + $p_ta6_g + $p_tc6_g + $p_tg6_g;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2174 $totalPercent_genomic = sprintf("%.0f", $totalPercent_genomic);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2175
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2176 if($totalPercent_genomic != 100)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2177 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2178 print STDERR "Error in the calculation of the total percentages on the genomic strand!!!\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2179 print STDERR "The total is equal to=\t$totalPercent_genomic\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2180 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2181 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2182 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2183 # Trinucleotide count and percentage by mutation type (Sub function of writeTriNtGenomic)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2184 sub triNtByMut
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2185 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2186 my ($ws, $row, $colC, $colP, $refH_file, $sample, $context, $mutation, $percent, $maxValue, $refCountG, $refPercentG) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2187
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2188 ### COUNT
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2189 $ws->write($row, $colC, $refH_file->{$sample}{'SeqContextG'}{$context}{$mutation}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2190
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2191 ### PERCENTAGE
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2192 $ws->write($row, $colP, $percent, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2193
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2194 # For the heatmap
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2195 if($percent >= $maxValue) { $maxValue = $percent; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2196
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2197 # For the total amount per mutation types
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2198 $$refCountG += $refH_file->{$sample}{'SeqContextG'}{$context}{$mutation};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2199 $$refPercentG += $percent;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2200 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2201
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2202
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2203 ############################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2204 # Trinucleotide sequence context on coding strand (Panel 2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2205 sub writeTriNtCoding
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2206 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2207 my ($ws, $row, $col, $refH_file, $sample, $triNtBarChartCodingCountggplot2, $triNtBarChartCodingPercentggplot2) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2208
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2209 # Initialise the row
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2210 my $row_SeqContext12 = $row+6;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2211 my $row_SeqContext12Percent = $row+27;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2212
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2213 # Total count and percent calculated for the strand bias
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2214 my ($ca_NonTr, $ca_Tr, $cg_NonTr, $cg_Tr, $ct_NonTr, $ct_Tr, $ta_NonTr, $ta_Tr, $tc_NonTr, $tc_Tr, $tg_NonTr, $tg_Tr) = (0,0,0, 0,0,0, 0,0,0, 0,0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2215 my ($percent_ca_NonTr, $percent_ca_Tr, $percent_cg_NonTr, $percent_cg_Tr, $percent_ct_NonTr, $percent_ct_Tr, $percent_ta_NonTr, $percent_ta_Tr, $percent_tc_NonTr, $percent_tc_Tr, $percent_tg_NonTr, $percent_tg_Tr) = (0,0,0, 0,0,0, 0,0,0, 0,0,0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2216
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2217 # For checking if the total number of SBS is correct
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2218 my $total_SBS_coding = 0;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2219
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2220
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2221 open(COUNT, ">", $triNtBarChartCodingCountggplot2) or die "$!: $triNtBarChartCodingCountggplot2\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2222 print COUNT "MutationTypeContext\tStrand\tValue\tSample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2223 open(PERCENT, ">", $triNtBarChartCodingPercentggplot2) or die "$!: $triNtBarChartCodingPercentggplot2\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2224 print PERCENT "MutationTypeContext\tStrand\tValue\tSample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2225
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2226 foreach my $k_context (sort keys %{$refH_file->{$sample}{'SeqContextC'}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2227 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2228 if( ($k_context =~ /N/) || (length($k_context) != 3) ) { next; }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2229
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2230 # Write the context: 12 mut type on coding strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2231 $ws->write($row_SeqContext12 , $col, $k_context, $formatT_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2232 $ws->write($row_SeqContext12Percent , $col, $k_context, $formatT_left);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2233
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2234 foreach my $k_mutation (sort keys %{$refH_file->{$sample}{'SeqContextC'}{$k_context}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2235 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2236 # Percent: 12 mut type on coding strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2237 my ($percent_NonTr, $percent_Tr) = (0, 0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2238
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2239 if($refH_file->{$sample}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'} != 0)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2240 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2241 $percent_NonTr = ( $refH_file->{$sample}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'} / $refH_file->{$sample}{'TotalSBSCoding'} ) * 100;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2242 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2243
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2244 if($refH_file->{$sample}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'} != 0)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2245 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2246 $percent_Tr = ( $refH_file->{$sample}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'} / $refH_file->{$sample}{'TotalSBSCoding'} ) * 100;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2247 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2248
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2249
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2250 # Counts
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2251 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$sample}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$sample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2252 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$sample}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$sample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2253
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2254 # Percentages
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2255 $percent_NonTr = sprintf("%.2f", $percent_NonTr);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2256 $percent_Tr = sprintf("%.2f", $percent_Tr);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2257 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$sample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2258 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$sample\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2259
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2260 # Calculate the total number for each mutation types
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2261 if($k_mutation eq "C>A")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2262 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2263 triNtByMutCoding($refH_file, $sample, $k_context, $k_mutation, $ws, $row_SeqContext12, $col+1, $row_SeqContext12Percent, \$ca_NonTr, \$ca_Tr, $percent_NonTr, $percent_Tr);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2264
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2265 $percent_ca_NonTr += $percent_NonTr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2266 $percent_ca_Tr += $percent_Tr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2267 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2268 if($k_mutation eq "C>G")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2269 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2270 triNtByMutCoding($refH_file, $sample, $k_context, $k_mutation, $ws, $row_SeqContext12, $col+3, $row_SeqContext12Percent, \$cg_NonTr, \$cg_Tr, $percent_NonTr, $percent_Tr);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2271
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2272 $percent_cg_NonTr += $percent_NonTr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2273 $percent_cg_Tr += $percent_Tr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2274 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2275 if($k_mutation eq "C>T")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2276 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2277 triNtByMutCoding($refH_file, $sample, $k_context, $k_mutation, $ws, $row_SeqContext12, $col+5, $row_SeqContext12Percent, \$ct_NonTr, \$ct_Tr, $percent_NonTr, $percent_Tr);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2278
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2279 $percent_ct_NonTr += $percent_NonTr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2280 $percent_ct_Tr += $percent_Tr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2281 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2282 if($k_mutation eq "T>A")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2283 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2284 triNtByMutCoding($refH_file, $sample, $k_context, $k_mutation, $ws, $row_SeqContext12, $col+7, $row_SeqContext12Percent, \$ta_NonTr, \$ta_Tr, $percent_NonTr, $percent_Tr);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2285
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2286 $percent_ta_NonTr += $percent_NonTr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2287 $percent_ta_Tr += $percent_Tr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2288 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2289 if($k_mutation eq "T>C")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2290 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2291 triNtByMutCoding($refH_file, $sample, $k_context, $k_mutation, $ws, $row_SeqContext12, $col+9, $row_SeqContext12Percent, \$tc_NonTr, \$tc_Tr, $percent_NonTr, $percent_Tr);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2292
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2293 $percent_tc_NonTr += $percent_NonTr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2294 $percent_tc_Tr += $percent_Tr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2295 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2296 if($k_mutation eq "T>G")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2297 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2298 triNtByMutCoding($refH_file, $sample, $k_context, $k_mutation, $ws, $row_SeqContext12, $col+11, $row_SeqContext12Percent, \$tg_NonTr, \$tg_Tr, $percent_NonTr, $percent_Tr);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2299
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2300 $percent_tg_NonTr += $percent_NonTr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2301 $percent_tg_Tr += $percent_Tr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2302 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2303
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2304 # For checking if the total number of SBS is correct
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2305 $total_SBS_coding += $refH_file->{$sample}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'} + $refH_file->{$sample}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2306 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2307 $row_SeqContext12++; $row_SeqContext12Percent++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2308 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2309 close COUNT; close PERCENT;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2310
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2311
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2312 ## Write the total of each mutation types: 12 mut type on coding strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2313 $ws->write($row_SeqContext12, $col+1, $ca_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $col+2, $ca_Tr, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2314 $ws->write($row_SeqContext12, $col+3, $cg_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $col+4, $cg_Tr, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2315 $ws->write($row_SeqContext12, $col+5, $ct_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $col+6, $ct_Tr, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2316 $ws->write($row_SeqContext12, $col+7, $ta_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $col+8, $ta_Tr, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2317 $ws->write($row_SeqContext12, $col+9, $tc_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $col+10, $tc_Tr, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2318 $ws->write($row_SeqContext12, $col+11, $tg_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $col+12, $tg_Tr, $formatT_bottomHeader2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2319 # Write the total percentages of each mutation types: 12 mut type on coding strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2320 $ws->write($row_SeqContext12Percent, $col+1, $percent_ca_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $col+2, $percent_ca_Tr, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2321 $ws->write($row_SeqContext12Percent, $col+3, $percent_cg_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $col+4, $percent_cg_Tr, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2322 $ws->write($row_SeqContext12Percent, $col+5, $percent_ct_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $col+6, $percent_ct_Tr, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2323 $ws->write($row_SeqContext12Percent, $col+7, $percent_ta_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $col+8, $percent_ta_Tr, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2324 $ws->write($row_SeqContext12Percent, $col+9, $percent_tc_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $col+10, $percent_tc_Tr, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2325 $ws->write($row_SeqContext12Percent, $col+11, $percent_tg_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $col+12, $percent_tg_Tr, $formatT_bottomHeader);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2326
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2327 if($total_SBS_coding == $refH_file->{$sample}{'TotalSBSCoding'})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2328 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2329 $ws->write($row_SeqContext12, $col+13, $refH_file->{$sample}{'TotalSBSCoding'}, $formatT_bottomHeader2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2330 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2331 else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2332 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2333 print STDERR "Error: in the calculation of the total number of SBS on the coding strand!!!!\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2334 print STDERR "From hash table $refH_file->{$sample}{'TotalSBSCoding'}\tVS\t$total_SBS_coding\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2335 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2336 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2337
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2338 my $totalP_SBS_coding = $percent_ca_NonTr + $percent_ca_Tr + $percent_cg_NonTr + $percent_cg_Tr + $percent_ct_NonTr + $percent_ct_Tr + $percent_ta_NonTr + $percent_ta_Tr + $percent_tc_NonTr + $percent_tc_Tr + $percent_tg_NonTr + $percent_tg_Tr;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2339 $totalP_SBS_coding = sprintf("%.0f", $totalP_SBS_coding);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2340
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2341 if($totalP_SBS_coding != 100)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2342 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2343 print STDERR "Error: The percentages for the trinucleotide sequence context on the coding strand for 12 mutation types is not equal to 100!!!\n$totalP_SBS_coding\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2344 exit;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2345 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2346 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2347 # Trinucleotide count and percentage by mutation type on Coding strand (Sub function of writeTriNtCoding)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2348 sub triNtByMutCoding
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2349 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2350 my ($refH_file, $sample, $context, $mutation, $ws, $row, $col, $rowP, $refNonTr, $refTr, $percent_NonTr, $percent_Tr) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2351
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2352 $$refNonTr += $refH_file->{$sample}{'SeqContextC'}{$context}{$mutation}{'NonTr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2353 $$refTr += $refH_file->{$sample}{'SeqContextC'}{$context}{$mutation}{'Tr'};
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2354
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2355 # COUNT : 12 mutation type (stranded bar graph)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2356 $ws->write($row, $col, $refH_file->{$sample}{'SeqContextC'}{$context}{$mutation}{'NonTr'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2357 $ws->write($row, $col+1, $refH_file->{$sample}{'SeqContextC'}{$context}{$mutation}{'Tr'}, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2358
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2359
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2360 ## PERCENT : 12 mutation type (stranded bar graph)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2361 $ws->write($rowP, $col, $percent_NonTr, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2362 $ws->write($rowP, $col+1, $percent_Tr, $format_A10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2363 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2364
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2365
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2366 ############################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2367 # Create and write the figures on the Excel report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2368 sub createWriteFigs
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2369 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2370 my ($ws, $row, $col, $folderFigure, $sample, $c_ca6_g, $c_cg6_g, $c_ct6_g, $c_ta6_g, $c_tc6_g, $c_tg6_g) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2371
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2372 ######## Create figures
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2373 # Bar char for SBS distribution (Figure 1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2374 # Pie char for Impact on protein sequence (Figure 2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2375 # Stranded distribution of SBS (Figure 3)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2376 # Heatmaps for trinucleotide context
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2377 `Rscript $pathRScriptFigs --folderFigure $folderFigure --folderTemp $folder_temp --filename $sample`;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2378
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2379 # Bar chart for trinucleotide context on coding strand
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2380 `Rscript $pathRScriptTxnSB $folderFigure/Stranded_Analysis/$sample/$sample-StrandedSignatureCount.txt $folderFigure/Stranded_Analysis/$sample/$sample-StrandedSignatureCount $folder_temp/$sample-StrandedSignatureCount Count`;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2381
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2382 `Rscript $pathRScriptTxnSB $folderFigure/Stranded_Analysis/$sample/$sample-StrandedSignaturePercent.txt $folderFigure/Stranded_Analysis/$sample/$sample-StrandedSignaturePercent $folder_temp/$sample-StrandedSignaturePercent Percent`;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2383
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2384 # Bar plot for representing the sequence context (NMF like style)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2385 `Rscript $pathRScriptMutSpectrum $folderFigure/Trinucleotide_Sequence_Context/$sample/$sample-MutationSpectraPercent-Genomic.txt $sample $folderFigure/Trinucleotide_Sequence_Context/$sample $folder_temp $c_ca6_g $c_cg6_g $c_ct6_g $c_ta6_g $c_tc6_g $c_tg6_g`;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2386
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2387
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2388 ######## Write the figures in the Excel report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2389 # Bar char for SBS distribution (Figure 1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2390 $ws->insert_image(1, 0, "$folder_temp/$sample-SBS_distribution-Report.png", 0, 0, .2, .2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2391
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2392 # Impact of the SBS on the protein (Figure 2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2393 $ws->write(0, 6, "Graph 2. Impact on protein sequence", $formatT_graphTitle);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2394 $ws->insert_image(1, 6, "$folder_temp/$sample-DistributionExoFunc-Report.png", 0, 0, .2, .2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2395
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2396 # Stranded distribution of SBS (Figure 3)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2397 $ws->write(0, 11, "Graph 3. Stranded distribution of SBS", $formatT_graphTitle);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2398 $ws->insert_image(1, 11, "$folder_temp/$sample-StrandBias-Report.png", 0, 0, .2, .2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2399
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2400 ## Trinucleotide context on coding strand (Scale the inserted image: width x 0.7, height x 0.8)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2401 $ws->insert_image($row+3, $col+15, "$folder_temp/$sample-StrandedSignatureCount-Report.png", 0, 0, .16, .16);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2402 $ws->insert_image($row+24, $col+15, "$folder_temp/$sample-StrandedSignaturePercent-Report.png", 0, 0, .16, .16);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2403
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2404 # Heatamp for the sequence context on the genomic strand (6 mutation types)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2405 $ws->insert_image(4, $col, "$folder_temp/$sample-HeatmapCount-Genomic-Report.png");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2406 $ws->insert_image(4, $col+10, "$folder_temp/$sample-HeatmapPercent-Genomic-Report.png");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2407
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2408 # Bar plot for the sequence context on the genomic strand (6 mutation types)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2409 $ws->insert_image(27, $col+3, "$folder_temp/$sample-MutationSpectraPercent-Genomic-Report.png");
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2410 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2411
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2412
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2413 ############################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2414 # Write NMF input for count and percentages in the Excel report
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2415 sub writeInputNMF
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2416 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2417 my ($ws_inputNMF_count, $ws_inputNMF_percent, $outCount, $outPercent) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2418
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2419
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2420 open(OUTINPUTNMFC, ">", $outCount) or die "$!: $outCount\n"; # with the count
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2421 open(OUTINPUTNMFP, ">", $outPercent) or die "$!: $outPercent\n"; # With the frequency un-normalized
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2422
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2423 foreach my $k_sample (@{$h_inputNMF{'Sample'}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2424 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2425 print OUTINPUTNMFC "\t$k_sample";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2426 print OUTINPUTNMFP "\t$k_sample";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2427 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2428 print OUTINPUTNMFC "\n"; print OUTINPUTNMFP "\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2429
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2430 my $row_inputNMF = 1;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2431 foreach my $k_context (sort keys %{$h_inputNMF{'Count'}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2432 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2433 $k_context =~ /(\w)_(\w)/; my ($base5, $base3) = ($1, $2);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2434 foreach my $k_mutation (sort keys %{$h_inputNMF{'Count'}{$k_context}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2435 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2436 my ($col_inputNMF_Count, $col_inputNMF_Percent) = (1, 1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2437 my $contextNMF = $base5."[$k_mutation]".$base3;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2438
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2439 # Write the input in the Excel report, only when all the samples are in the same workbook
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2440 if($oneReportPerSample == 2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2441 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2442 $ws_inputNMF_count->write($row_inputNMF, 0, $contextNMF);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2443 $ws_inputNMF_percent->write($row_inputNMF, 0, $contextNMF);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2444 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2445
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2446 print OUTINPUTNMFC $contextNMF,"\t"; print OUTINPUTNMFP $contextNMF,"\t";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2447
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2448 foreach (@{$h_inputNMF{'Count'}{$k_context}{$k_mutation}}) { print OUTINPUTNMFC "$_\t"; } print OUTINPUTNMFC "\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2449 foreach (@{$h_inputNMF{'Percent'}{$k_context}{$k_mutation}}) { print OUTINPUTNMFP "$_\t"; } print OUTINPUTNMFP "\n";
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2450
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2451 foreach (@{$h_inputNMF{'Count'}{$k_context}{$k_mutation}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2452 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2453 if($oneReportPerSample == 2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2454 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2455 $ws_inputNMF_count->write($row_inputNMF, $col_inputNMF_Count, $_);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2456 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2457 $col_inputNMF_Count++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2458 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2459 foreach (@{$h_inputNMF{'Percent'}{$k_context}{$k_mutation}})
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2460 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2461 if($oneReportPerSample == 2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2462 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2463 $ws_inputNMF_percent->write($row_inputNMF, $col_inputNMF_Percent, $_);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2464 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2465 $col_inputNMF_Percent++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2466 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2467 $row_inputNMF++;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2468 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2469 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2470 close OUTINPUTNMFP; close OUTINPUTNMFC;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2471 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2472
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2473
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2474 ######################################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2475 # Define format and background colors for the Excel report #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2476 ######################################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2477 # Font: Arial size 10
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2478 sub Format_A10
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2479 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2480 my ($wb, $format) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2481 $$format = $wb->add_format(font=>'Arial', size=>10); $$format->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2482 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2483 # Font: Arial size 11 bold and center
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2484 sub Format_A11Bold
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2485 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2486 my ($wb, $format) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2487 $$format = $wb->add_format(font=>'Arial', size=>11, bold=>1); $$format->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2488 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2489 # Font: Arial size 10 italic red and center
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2490 sub Format_A10ItalicRed
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2491 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2492 my ($wb, $format) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2493 $$format = $wb->add_format(font=>'Arial', size=>10, italic=>1, color => 'red'); $$format->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2494 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2495 # Format: Arialt size 11 bold and left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2496 sub Format_A11BoldLeft
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2497 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2498 my ($wb, $format) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2499 $$format = $wb->add_format(valign =>'left', font=>'Arial', size=>11, bold=>1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2500 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2501 # Font: Arialt size 10 bold and left
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2502 sub Format_A10BoldLeft
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2503 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2504 my ($wb, $format) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2505 $$format = $wb->add_format(valign =>'left', font=>'Arial', size=>10, bold=>1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2506 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2507 # Define the format of the border of the section (for delimiting the different section of the report)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2508 sub Format_section
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2509 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2510 my ($wb, $format_topLeft, $format_topRight, $format_bottomLeft, $format_bottomRight, $format_top, $format_right, $format_bottom, $format_left) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2511
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2512 $$format_topLeft = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2513 $$format_topLeft->set_top(2); $$format_topLeft->set_top_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2514 $$format_topLeft->set_left(2); $$format_topLeft->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2515
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2516 $$format_topRight = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2517 $$format_topRight->set_top(2); $$format_topRight->set_top_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2518 $$format_topRight->set_right(2); $$format_topRight->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2519
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2520 $$format_bottomLeft = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2521 $$format_bottomLeft->set_bottom(2); $$format_bottomLeft->set_bottom_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2522 $$format_bottomLeft->set_left(2); $$format_bottomLeft->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2523
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2524 $$format_bottomRight = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2525 $$format_bottomRight->set_bottom(2); $$format_bottomRight->set_bottom_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2526 $$format_bottomRight->set_right(2); $$format_bottomRight->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2527
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2528 $$format_top = $wb->add_format(); $$format_top->set_top(2); $$format_top->set_top_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2529 $$format_right = $wb->add_format(); $$format_right->set_right(2); $$format_right->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2530 $$format_bottom = $wb->add_format(); $$format_bottom->set_bottom(2); $$format_bottom->set_bottom_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2531 $$format_left = $wb->add_format(); $$format_left->set_left(2); $$format_left->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2532 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2533 # Define the header
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2534 sub Format_Header
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2535 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2536 my ($wb, $format_CA, $format_CG, $format_CT, $format_TA, $format_TC, $format_TG, $format_TG2, $format_LeftHeader, $format_RightHeader, $format_LeftHeader2) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2537
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2538 my ($blue, $black, $red, $gray, $green, $pink);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2539 Color($wb, \$blue, \$black, \$red, \$gray, \$green, \$pink);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2540
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2541 my ($bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2542 BackgroundColor($wb, \$bgColor_blue, \$bgColor_black, \$bgColor_red, \$bgColor_gray, \$bgColor_green, \$bgColor_pink);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2543
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2544
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2545 $$format_CA = $wb->add_format(bg_color => $blue, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CA->set_align('center'); $$format_CA->set_center_across();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2546 $$format_CG = $wb->add_format(bg_color => $black, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CG->set_align('center'); $$format_CG->set_center_across();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2547 $$format_CT = $wb->add_format(bg_color => $red, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CT->set_align('center'); $$format_CT->set_center_across();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2548 $$format_TA = $wb->add_format(bg_color => $gray, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TA->set_align('center'); $$format_TA->set_center_across();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2549 $$format_TC = $wb->add_format(bg_color => $green, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TC->set_align('center'); $$format_TC->set_center_across();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2550 $$format_TG = $wb->add_format(bg_color=>$bgColor_pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TG->set_align('center'); $$format_TG->set_center_across();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2551 $$format_TG->set_right(2); $$format_TG->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2552
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2553 $$format_TG2 = $wb->add_format(bg_color => $pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TG2->set_align('center'); $$format_TG2->set_center_across();
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2554
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2555 $$format_LeftHeader = $wb->add_format(bold=>1, font=>'Arial', size=>11); $$format_LeftHeader->set_align('center'); $$format_LeftHeader->set_left(2); $$format_LeftHeader->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2556 $$format_LeftHeader2 = $wb->add_format(bold=>1, font=>'Arial', size=>11); $$format_LeftHeader2->set_left(2); $$format_LeftHeader2->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2557 $$format_RightHeader = $wb->add_format(bold=>1, font=>'Arial', size=>11); $$format_RightHeader->set_align('center'); $$format_RightHeader->set_right(2); $$format_RightHeader->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2558 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2559 # Define the header for the part "Strand bias by segment"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2560 sub Format_HeaderSBSDistrBySegAndFunc
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2561 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2562 my ($wb, $format_LeftCA, $format_LeftCG, $format_LeftCT, $format_LeftTA, $format_LeftTC, $format_LeftTG, $format_RightCA, $format_RightCG, $format_RightCT, $format_RightTA, $format_RightTC, $format_RightTG) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2563
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2564 my ($bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2565 BackgroundColor($wb, \$bgColor_blue, \$bgColor_black, \$bgColor_red, \$bgColor_gray, \$bgColor_green, \$bgColor_pink);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2566
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2567 $$format_LeftCA = $wb->add_format(bg_color=>$bgColor_blue, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftCA->set_align('center'); $$format_LeftCA->set_left(2); $$format_LeftCA->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2568 $$format_LeftCG = $wb->add_format(bg_color=>$bgColor_black, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftCG->set_align('center'); $$format_LeftCG->set_left(2); $$format_LeftCG->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2569 $$format_LeftCT = $wb->add_format(bg_color=>$bgColor_red, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftCT->set_align('center'); $$format_LeftCT->set_left(2); $$format_LeftCT->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2570 $$format_LeftTA = $wb->add_format(bg_color=>$bgColor_gray, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftTA->set_align('center'); $$format_LeftTA->set_left(2); $$format_LeftTA->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2571 $$format_LeftTC = $wb->add_format(bg_color=>$bgColor_green, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftTC->set_align('center'); $$format_LeftTC->set_left(2); $$format_LeftTC->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2572 $$format_LeftTG = $wb->add_format(bg_color=>$bgColor_pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftTG->set_align('center'); $$format_LeftTG->set_left(2); $$format_LeftTG->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2573
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2574
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2575 $$format_RightCA = $wb->add_format(bg_color=>$bgColor_blue, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightCA->set_align('center'); $$format_RightCA->set_right(2); $$format_RightCA->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2576 $$format_RightCG = $wb->add_format(bg_color=>$bgColor_black, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightCG->set_align('center'); $$format_RightCG->set_right(2); $$format_RightCG->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2577 $$format_RightCT = $wb->add_format(bg_color=>$bgColor_red, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightCT->set_align('center'); $$format_RightCT->set_right(2); $$format_RightCT->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2578 $$format_RightTA = $wb->add_format(bg_color=>$bgColor_gray, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightTA->set_align('center'); $$format_RightTA->set_right(2); $$format_RightTA->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2579 $$format_RightTC = $wb->add_format(bg_color=>$bgColor_green, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightTC->set_align('center'); $$format_RightTC->set_right(2); $$format_RightTC->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2580 $$format_RightTG = $wb->add_format(bg_color=>$bgColor_pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightTG->set_align('center'); $$format_RightTG->set_right(2); $$format_RightTG->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2581 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2582 # Define the header for the part "Trinucleotide sequence context on the coding strand"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2583 sub Format_Header12MutType
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2584 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2585 my ($wb, $format_CA, $format_CG, $format_CT, $format_TA, $format_TC, $format_TG) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2586
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2587 my ($bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2588 BackgroundColor($wb, \$bgColor_blue, \$bgColor_black, \$bgColor_red, \$bgColor_gray, \$bgColor_green, \$bgColor_pink);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2589
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2590 $$format_CA = $wb->add_format(bg_color=>$bgColor_blue, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CA->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2591 $$format_CG = $wb->add_format(bg_color=>$bgColor_black, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CG->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2592 $$format_CT = $wb->add_format(bg_color=>$bgColor_red, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CT->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2593 $$format_TA = $wb->add_format(bg_color=>$bgColor_gray, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TA->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2594 $$format_TC = $wb->add_format(bg_color=>$bgColor_green, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TC->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2595 $$format_TG = $wb->add_format(bg_color=>$bgColor_pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TG->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2596 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2597 # Define the format for the text that needs a section border
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2598 sub Format_TextSection
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2599 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2600 my ($wb, $formatT_left, $formatT_right, $formatT_bottomRight, $formatT_bottomLeft, $formatT_bottom, $formatT_bottomHeader, $formatT_bottomRightHeader, $formatT_bottomHeader2, $formatT_rightHeader) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2601
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2602 $$formatT_left = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2603 $$formatT_left->set_left(2); $$formatT_left->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2604
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2605 $$formatT_right = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2606 $$formatT_right->set_right(2); $$formatT_right->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2607
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2608 $$formatT_bottomRight = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2609 $$formatT_bottomRight->set_bottom(2); $$formatT_bottomRight->set_bottom_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2610 $$formatT_bottomRight->set_right(2); $$formatT_bottomRight->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2611
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2612 $$formatT_bottomLeft = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2613 $$formatT_bottomLeft->set_bottom(2); $$formatT_bottomLeft->set_bottom_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2614 $$formatT_bottomLeft->set_left(2); $$formatT_bottomLeft->set_left_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2615
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2616 $$formatT_bottom = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2617 $$formatT_bottom->set_bottom(2); $$formatT_bottom->set_bottom_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2618
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2619 my $bgColor_totallighGray = $wb->set_custom_color(54, 230, 230, 230);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2620 $$formatT_bottomHeader = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>11); $$formatT_bottomHeader->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2621 $$formatT_bottomHeader->set_bottom(2); $$formatT_bottomHeader->set_bottom_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2622
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2623 $$formatT_bottomRightHeader = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>11); $$formatT_bottomRightHeader->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2624 $$formatT_bottomRightHeader->set_bottom(2); $$formatT_bottomRightHeader->set_bottom_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2625 $$formatT_bottomRightHeader->set_right(2); $$formatT_bottomRightHeader->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2626
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2627 $$formatT_bottomHeader2 = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>11); $$formatT_bottomHeader2->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2628
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2629 $$formatT_rightHeader = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>11); $$formatT_rightHeader->set_align('center');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2630 $$formatT_rightHeader->set_right(2); $$formatT_rightHeader->set_right_color('blue');
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2631 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2632 # Define the format for the graphs titles
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2633 sub Format_GraphTitle
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2634 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2635 my ($wb, $formatT_graphTitle) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2636
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2637 $$formatT_graphTitle = $wb->add_format(font=>'Arial', size=>12, bold=>1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2638 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2639 # Define the format of the border of the tables
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2640 sub Format_Table
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2641 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2642 my ($wb, $table_topleft, $table_topRight, $table_bottomleft, $table_bottomRight, $table_top, $table_right, $table_bottom, $table_bottomItalicRed, $table_left, $table_bottomrightHeader, $table_left2, $table_middleHeader, $table_middleHeader2) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2643
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2644 $$table_topleft = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_topleft->set_top(1); $$table_topleft->set_left(1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2645 $$table_topRight = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_topRight->set_top(1); $$table_topRight->set_right(1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2646 $$table_bottomleft = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_bottomleft->set_bottom(1); $$table_bottomleft->set_left(1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2647 $$table_bottomRight = $wb->add_format(valign=>'center', font=>'Arial', size=>10); $$table_bottomRight->set_bottom(1); $$table_bottomRight->set_right(1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2648
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2649 $$table_top = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_top->set_top(1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2650 $$table_right = $wb->add_format(valign=>'center', font=>'Arial', size=>10); $$table_right->set_right(1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2651 $$table_bottom = $wb->add_format(valign=>'center', font=>'Arial', size=>10); $$table_bottom->set_bottom(1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2652 $$table_bottomItalicRed = $wb->add_format(valign=>'center', font=>'Arial', size=>10, italic=>1, color => 'red'); $$table_bottomItalicRed->set_bottom(1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2653 $$table_left = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_left->set_left(1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2654
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2655 my $bgColor_totallighGray = $wb->set_custom_color(54, 230, 230, 230);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2656 $$table_bottomrightHeader = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>10); $$table_bottomrightHeader->set_bottom(1); $$table_bottomrightHeader->set_right(1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2657
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2658 $$table_left2 = $wb->add_format(valign=>'left', font=>'Arial', size=>10); $$table_left2->set_left(1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2659
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2660 $$table_middleHeader = $wb->add_format(valign=>'center', bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>10);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2661 $$table_middleHeader2 = $wb->add_format(valign=>'center', bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>10); $$table_middleHeader2->set_bottom(1);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2662 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2663 # Define the color
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2664 sub Color
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2665 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2666 my ($wb, $blue, $black, $red, $gray, $green, $pink) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2667
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2668 $$blue = $wb->set_custom_color(40, 0, 0, 204);# C:G>A:T in blue
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2669 $$black = $wb->set_custom_color(41, 0, 0, 0);# C:G>G:C in black
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2670 $$red = $wb->set_custom_color(42, 255, 0, 0);# C:G>T:A in red
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2671 $$gray = $wb->set_custom_color(43, 205, 205, 205); # T:A>A:T in light gray
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2672 $$green = $wb->set_custom_color(44, 0, 204, 51);# T:A>C:G in green
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2673 $$pink = $wb->set_custom_color(45, 255, 192, 203);# T:A>G:C in pink
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2674 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2675 sub BackgroundColor
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2676 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2677 my ($wb, $bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink) = @_;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2678
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2679 $$bgColor_blue = $wb->set_custom_color(48, 0, 0, 204);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2680 $$bgColor_black = $wb->set_custom_color(49, 0, 0, 0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2681 $$bgColor_red = $wb->set_custom_color(50, 255, 0, 0);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2682 $$bgColor_gray = $wb->set_custom_color(51, 205, 205, 205);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2683 $$bgColor_green = $wb->set_custom_color(52, 0, 204, 51);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2684 $$bgColor_pink = $wb->set_custom_color(53, 255, 192, 203);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2685 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2686
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2687
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2688
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2689
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2690 =head1 NAME
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2691
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2692 mutSpec-Stat
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2693
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2694 =head1 SYNOPSIS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2695
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2696 mutSpecstat.pl [arguments] <query-file>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2697
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2698 <query-file> a folder with one or multiple VCFs
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2699
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2700 Arguments:
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2701 -h, --help print help message
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2702 -m, --man print complete documentation
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2703 -v, --verbose use verbose output
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2704 --refGenome the reference genome to use (human, mouse or rat genomes)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2705 -o, --outfile <string> output directory for the result. If none is specify the result will be write in the same directory as the input file
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2706 --temp <string> the path for saving the temporary files
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2707 --pathSeqRefGenome the path to the fasta reference sequences
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2708 --poolData generate the pool of all the samples (optional)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2709 --reportSample generate a report for each sample (optional)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2710
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2711
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2712 Function: automatically run a pipeline and calculate various statistics on mutations
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2713
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2714 Example: mutSpecstat.pl --refGenome hg19 --outfile output_directory --temp path_to_temporary_directory --pathRscript path_to_R_scripts --pathSeqRefGenome path_fasta_ref_seq --poolData --reportSample inputFolder
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2715
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2716 Version: 02-2017 (February 2016)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2717
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2718
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2719 =head1 OPTIONS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2720
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2721 =over 8
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2722
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2723 =item B<--help>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2724
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2725 print a brief usage message and detailed explanation of options.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2726
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2727 =item B<--man>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2728
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2729 print the complete manual of the program.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2730
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2731 =item B<--verbose>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2732
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2733 use verbose output.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2734
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2735 =item B<--refGenome>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2736
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2737 the reference genome to use, could be human, mouse or rat genomes.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2738
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2739 =item B<--outfile>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2740
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2741 the directory of output file names. If it is nor specify the same directory as the input file is used.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2742
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2743 =item B<--temp>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2744
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2745 the path for saving temporary files generated by the script.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2746 If any is specify a temporary folder is created in the same directory where the script is running.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2747 Deleted when the script is finish
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2748
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2749 =item B<--pathSeqRefGenome>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2750
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2751 The path to the fasta reference sequences
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2752
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2753 =item B<--poolData only for the report>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2754
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2755 calculate the statistics on the pool of all the data pass in input
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2756
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2757 =item B<--reportSample only for the report>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2758
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2759 generate a report for each samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2760
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2761 =head1 DESCRIPTION
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2762
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2763 mutSpecstat is a perl script for calculated various statistics on mutations
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2764 An Excel report containing the mutation type distribution per functional region, the strand bias and the sequence context on genomic and coding sequence is created.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2765 The different statistics are illustrated using ggplot2.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2766
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2767 =cut