0
|
1 #!/usr/bin/env perl
|
|
2
|
|
3 #-----------------------------------#
|
|
4 # Author: Maude #
|
|
5 # Script: mutspecStat.pl #
|
|
6 # Last update: 09/04/16 #
|
|
7 #-----------------------------------#
|
|
8
|
|
9 use strict;
|
|
10 use warnings;
|
|
11 use Getopt::Long;
|
|
12 use Pod::Usage;
|
|
13 use File::Basename; # my ($filename, $directories, $suffix) = fileparse($file, qr/\.[^.]*/);
|
|
14 use File::Path;
|
|
15 use Statistics::R;
|
|
16 use Spreadsheet::WriteExcel;
|
|
17
|
|
18 our ($verbose, $man, $help) = (0, 0, 0); # Parse options and print usage if there is a syntax error, or if usage was explicitly requested.
|
|
19 our ($refGenome, $output, $folder_temp, $path_R_Scripts, $path_SeqrefGenome) = ("empty", "empty", "empty", "empty", "empty"); # The reference genome to use; The path for saving the result; The path for saving the temporary files; The path to R scripts; The path to the fasta reference sequences
|
|
20 our ($poolData, $oneReportPerSample) = (2, 2); # If a folder is pass as input file pool all the data and generate the report on the pool and for each samples; # Generate one report for each samples
|
|
21
|
|
22
|
|
23 GetOptions('verbose|v'=>\$verbose, 'help|h'=>\$help, 'man|m'=>\$man, 'refGenome=s'=>\$refGenome, 'outfile|o=s' => \$output, 'pathTemporary|temp=s' => \$folder_temp, 'pathRscript=s' => \$path_R_Scripts, 'pathSeqRefGenome=s' => \$path_SeqrefGenome, 'poolData' => \$poolData, 'reportSample' => \$oneReportPerSample) or pod2usage(2);
|
|
24
|
|
25 our ($input) = @ARGV;
|
|
26
|
|
27 pod2usage(-verbose=>1, -exitval=>1, -output=>\*STDERR) if ($help);
|
|
28 pod2usage(-verbose=>2, -exitval=>1, -output=>\*STDERR) if ($man);
|
|
29 pod2usage(-verbose=>0, -exitval=>1, -output=>\*STDERR) if(@ARGV == 0); # No argument is pass to the command line print the usage of the script
|
|
30 pod2usage(-verbose=>0, -exitval=>1, -output=>\*STDERR) if(@ARGV == 2); # Only one argument is expected to be pass to @ARGV (the input)
|
|
31
|
|
32
|
|
33
|
|
34 ######################################################################################################################################################
|
|
35 # GLOBAL VARIABLES #
|
|
36 ######################################################################################################################################################
|
|
37 # Recover the current path
|
|
38 our $pwd = `pwd`;
|
|
39 chomp($pwd);
|
|
40
|
|
41 # Path to R scripts
|
|
42 our $pathRScriptTxnSB = "$path_R_Scripts/R/transciptionalStrandBias.r";
|
|
43 our $pathRScriptMutSpectrum = "$path_R_Scripts/R/mutationSpectra_Galaxy.r";
|
|
44
|
|
45 our $folderMutAnalysis = "";
|
|
46 our @pathInput = split("/", $input);
|
|
47
|
|
48 # Hash table with the length of each chromosomes
|
|
49 our %chromosomes;
|
|
50
|
|
51 ######################################################################################################################################################
|
|
52 # MAIN #
|
|
53 ######################################################################################################################################################
|
|
54 # Check the presence of the flags and create the output and temp directories
|
|
55 CheckFlags();
|
|
56
|
|
57 # Retrieve chromosomes length
|
|
58 checkChrDir();
|
|
59
|
|
60
|
|
61 print "-----------------------------------------------------------------\n";
|
|
62 print "-----------------Report Mutational Analysis----------------------\n";
|
|
63 print"-----------------------------------------------------------------\n";
|
|
64
|
|
65 # First check if the file is annotated or not
|
|
66 CheckAnnotationFile($input);
|
|
67
|
|
68 # Calculate the statistics and generate the report
|
|
69 my @colInfoAV = qw(Chr Start Ref Alt);
|
|
70 ReportMutDist($input, $folderMutAnalysis, $folder_temp, \@colInfoAV, $refGenome);
|
|
71
|
|
72 # Remove the temporary directory
|
|
73 rmtree($folder_temp);
|
|
74
|
|
75
|
|
76 ######################################################################################################################################################
|
|
77 # FUNCTIONS #
|
|
78 ######################################################################################################################################################
|
|
79
|
|
80 # Check the presence of the flags and create the output and temp directories
|
|
81 sub CheckFlags
|
|
82 {
|
|
83 # Check the reference genome
|
|
84 if($refGenome eq "empty") { print STDERR "You forget to specify the name for the reference genome!!!\nPlease specify it with the flag --refGenome\n"; exit; }
|
|
85
|
|
86 # If no output is specified write the result as the same place as the input file
|
|
87 if($output eq "empty")
|
|
88 {
|
|
89 my $folderRes = "";
|
|
90 for(my $i=0; $i<$#pathInput; $i++) { $folderRes .= "$pathInput[$i]/"; }
|
|
91
|
|
92 $folderMutAnalysis = "$folderRes/Mutational_Analysis";
|
|
93 if(!-e $folderMutAnalysis) { mkdir($folderMutAnalysis) or die "$!: $folderMutAnalysis\n"; }
|
|
94 }
|
|
95 else
|
|
96 {
|
|
97 if(!-e $output) { mkdir($output) or die "$!: $output\n"; }
|
|
98
|
|
99 $folderMutAnalysis = "$output/Mutational_Analysis";
|
|
100 if(!-e $folderMutAnalysis) { mkdir($folderMutAnalysis) or die "$!: $folderMutAnalysis\n"; }
|
|
101 }
|
|
102
|
|
103 # If no temp folder is specified write the result in the current path
|
|
104 if($folder_temp eq "empty") { $folder_temp = "$pwd/TEMP_MutationalAnalysis_$pathInput[$#pathInput]"; }
|
|
105 if(!-e $folder_temp) { mkdir($folder_temp) or die "$!: $folder_temp\n"; }
|
|
106
|
|
107 # Check the path to the R scripts
|
|
108 if($path_R_Scripts eq "empty") { print STDERR "You forget to specify the path for the R scripts!!!\nPlease specify it with the flag --pathRscript\n"; exit; }
|
|
109
|
|
110
|
|
111 # The input is a folder
|
|
112 if(-d $input) { foreach my $file (`ls $input`) { CheckLengthFilename("$input/$file"); } }
|
|
113 # The input is one file
|
|
114 else { CheckLengthFilename($input); }
|
|
115 }
|
|
116 # Check the length of the file, must be < 32 characters for the Excel sheet
|
|
117 sub CheckLengthFilename
|
|
118 {
|
|
119 my ($inputFile) = @_;
|
|
120
|
|
121 ## Verify the name of file, must be <= 31 chars for the sheet name
|
|
122 my ($filename, $directories, $suffix) = fileparse($inputFile, qr/\.[^.]*/);
|
|
123
|
|
124 if(length($filename) > 31) { print STDERR "The file: $inputFile must be <= 31 chars\nPlease modify it before running the script\n"; exit; }
|
|
125 }
|
|
126
|
|
127 # Retrieve chromosomes length
|
|
128 sub checkChrDir
|
|
129 {
|
|
130 my @files = `ls $path_SeqrefGenome/$refGenome"_seq"/*.fa`;
|
|
131 foreach my $file (@files)
|
|
132 {
|
|
133 if ($file !~ /chr(\d+|x|y)\.fa/i){next;}
|
|
134 open(FILE,$file);
|
|
135 <FILE>;
|
|
136 my $seq="";
|
|
137 while (<FILE>){ chomp; $seq.=$_;}
|
|
138 $file =~ /chr(.*)\.fa/;
|
|
139 $chromosomes{"chr".$1}=length($seq);
|
|
140 }
|
|
141 }
|
|
142
|
|
143 # Check if the file is annotated or not
|
|
144 sub CheckAnnotationFile
|
|
145 {
|
|
146 my ($inputFile) = @_;
|
|
147
|
|
148 # A folder is pass in argument
|
|
149 if(-d $inputFile)
|
|
150 {
|
|
151 foreach my $file (`ls $inputFile`)
|
|
152 {
|
|
153 chomp($file);
|
|
154
|
|
155 open(F1, "$inputFile/$file") or die "$!: $inputFile/$file\n";
|
|
156 my $search_header = <F1>; $search_header =~ s/[\r\n]+$//; my @tab_search_header = split("\t",$search_header);
|
|
157 close F1;
|
|
158 # The number of the column
|
|
159 my $value_of_column_NB = "toto";
|
|
160 for(my $i=0; $i<=$#tab_search_header; $i++)
|
|
161 {
|
|
162 if($tab_search_header[$i] eq "Func.refGene") { $value_of_column_NB = $i; }
|
|
163 }
|
|
164 if($value_of_column_NB eq "toto") { print STDERR "Error the input file you specify is not annotated! $inputFile/$file !!!!\nPlease first annotate your file before trying to generate the report on mutation patterns\n"; exit; }
|
|
165 }
|
|
166 }
|
|
167 else
|
|
168 {
|
|
169 open(F1, $inputFile) or die "$!: $inputFile\n";
|
|
170 my $search_header = <F1>; $search_header =~ s/[\r\n]+$//; my @tab_search_header = split("\t",$search_header);
|
|
171 close F1;
|
|
172 # The number of the column
|
|
173 my $value_of_column_NB = "toto";
|
|
174 for(my $i=0; $i<=$#tab_search_header; $i++)
|
|
175 {
|
|
176 if($tab_search_header[$i] eq "Func.refGene") { $value_of_column_NB = $i; }
|
|
177 }
|
|
178 if($value_of_column_NB eq "toto") { print STDERR "Error the input file you specify is not annotated! $inputFile !!!!\nPlease first annotate your file before trying to generate the report on mutation patterns\n"; exit; }
|
|
179 }
|
|
180 }
|
|
181
|
|
182 # Calculate the statistics and generate the report
|
|
183 sub ReportMutDist
|
|
184 {
|
|
185 our ($input, $output, $folder_temp, $refTab_colInfo, $refGenome) = @_;
|
|
186
|
|
187 my @column = @$refTab_colInfo;
|
|
188
|
|
189 our ($chr_name, $start_name, $ref_name, $alt_name) = split(/,/, join(',', @column)); # Separe each element
|
|
190
|
|
191 our $func_name = "Func.refGene";
|
|
192 our $exonicFunc_name = "ExonicFunc.refGene";
|
|
193 our $strand_name = "Strand";
|
|
194 our $context_name = "context";
|
|
195
|
|
196 my $folderFigure = "$output/Figures";
|
|
197 if(-e $folderFigure) { rmtree($folderFigure); mkdir($folderFigure) or die "Can't create the directory $folderFigure\n"; }
|
|
198 else { mkdir($folderFigure) or die "Can't create the directory $folderFigure\n"; }
|
|
199 my $folderChi2 = "$folderFigure/Chi2";
|
|
200 if(!-e $folderChi2) { mkdir($folderChi2) or die "Can't create the directory $folderChi2\n"; }
|
|
201 my $folderWebLogo = "$folderFigure/WebLogo";
|
|
202 if(!-e $folderWebLogo) { mkdir($folderWebLogo) or die "Can't create the directory $folderWebLogo\n"; }
|
|
203 my $folderNMF = "$folderFigure/Input_NMF";
|
|
204 if(!-e $folderNMF) { mkdir($folderNMF) or die "Can't create the directory $folderNMF\n"; }
|
|
205
|
|
206 ################################################################################################
|
|
207 ### Calculates all the statistics ###
|
|
208 ################################################################################################
|
|
209 ############ Recover Annovar annotations (for having the save number of functional regions for each samples)
|
|
210 my @tab_func = recoverAnnovarAnnotation($input, $func_name);
|
|
211 if(@tab_func == 0) { print STDERR "Error the table for the functional region is empty!!!!! check $input $func_name\n"; exit; }
|
|
212
|
|
213 ############ Calculate the different statistics present in the report
|
|
214 my %h_file = ();
|
|
215 CalculateStatistics(\%h_file, \@tab_func);
|
|
216
|
|
217 ############ Calculate the chi2 for the strand bias
|
|
218 CalculateChi2(\%h_file, $folderChi2);
|
|
219
|
|
220 ############ Write the different statistics present in the report
|
|
221 WriteStatistics(\%h_file, $#tab_func, $folderFigure, $folderChi2, $folderNMF);
|
|
222
|
|
223 ############ Create logo for studying the 10 flanking bases of the mutation
|
|
224 CreateLogo(\%h_file, $folderWebLogo);
|
|
225
|
|
226
|
|
227 ################### Subroutines for generating the report for the mutational analysis
|
|
228 sub recoverAnnovarAnnotation
|
|
229 {
|
|
230 my ($input, $AV_annotation) = @_;
|
|
231
|
|
232 my %hash = ();
|
|
233
|
|
234 # The input is a folder
|
|
235 if(-d $input)
|
|
236 {
|
|
237 foreach my $file (`ls $input`)
|
|
238 {
|
|
239 $file =~ s/[\r\n]+$//;
|
|
240 my $AV_annotation_value = recoverNumCol("$input/$file", $AV_annotation);
|
|
241
|
|
242 open(F1, "$input/$file") or die "$!: $input/$file\n";
|
|
243 my $header = <F1>;
|
|
244 while(<F1>)
|
|
245 {
|
|
246 $_ =~ s/[\r\n]+$//;
|
|
247 my @tab = split("\t", $_);
|
|
248
|
|
249 # Some files can have an empty line at the end and WE DON'T WANT to consider it
|
|
250 if(! defined $tab[0]) { next; }
|
|
251 # Some func value are repeated and separated by ";"
|
|
252 my $funcSegment = "";
|
|
253 if($tab[$AV_annotation_value] =~ /;/) { my @temp = split(";", $tab[$AV_annotation_value]); $funcSegment = $temp[0]; }
|
|
254 else { $funcSegment = $tab[$AV_annotation_value]; }
|
|
255
|
|
256 $hash{$funcSegment} = "";
|
|
257 }
|
|
258 close F1;
|
|
259 }
|
|
260 my @tab_AVAnnotation = ();
|
|
261 foreach my $k (sort keys %hash) { push(@tab_AVAnnotation, $k); }
|
|
262 return @tab_AVAnnotation;
|
|
263 }
|
|
264 # The input is a file
|
|
265 else
|
|
266 {
|
|
267 my $AV_annotation_value = recoverNumCol($input, $AV_annotation);
|
|
268
|
|
269 open(F1, $input) or die "$!: $input\n";
|
|
270 my $header = <F1>;
|
|
271 while(<F1>)
|
|
272 {
|
|
273 $_ =~ s/[\r\n]+$//;
|
|
274 my @tab = split("\t", $_);
|
|
275
|
|
276 # Some func value are repeated and separated by ";"
|
|
277 my $funcSegment = "";
|
|
278 if($tab[$AV_annotation_value] =~ /;/) { my @temp = split(";", $tab[$AV_annotation_value]); $funcSegment = $temp[0]; }
|
|
279 else { $funcSegment = $tab[$AV_annotation_value]; }
|
|
280
|
|
281 $hash{$funcSegment} = "";
|
|
282 }
|
|
283 close F1;
|
|
284 my @tab_AVAnnotation = ();
|
|
285 foreach my$k (sort keys %hash) { push(@tab_AVAnnotation, $k); }
|
|
286 return @tab_AVAnnotation;
|
|
287 }
|
|
288 }
|
|
289 # Calculate the different statistics present in the report
|
|
290 sub CalculateStatistics
|
|
291 {
|
|
292 my ($refH_file, $refT_func) = @_;
|
|
293
|
|
294 my ($chr_value, $start_value, $ref_value, $alt_value, $func_value, $exonicFunc_value, $strand_value, $contextSeq_value) = ("", "", "", "", "", "", "", "", "", "");
|
|
295
|
|
296 # If the input is a folder
|
|
297 if(-d $input)
|
|
298 {
|
|
299 my $folderPool = "$folder_temp/Pool";
|
|
300 if(!-e $folderPool) { mkdir($folderPool) or die "Can't create the directory $folderPool\n"; }
|
|
301
|
|
302 # Copy each sample
|
|
303 foreach my $file (`ls $input`) { chomp($file); system("cp $input/$file $folderPool"); }
|
|
304
|
|
305 # Generate the pool of all the data
|
|
306 if($poolData == 1)
|
|
307 {
|
|
308 my @listFile = `ls $input`;
|
|
309
|
|
310 # For keeping the header only one time
|
|
311 chomp($listFile[0]);
|
|
312 system("cp $input/$listFile[0] $folderPool/Pool_Data.txt");
|
|
313
|
|
314 open(OUT, ">>", "$folderPool/Pool_Data.txt") or die "$!: $folderPool/Pool_Data.txt\n";
|
|
315
|
|
316 for(my $i=1; $i<=$#listFile; $i++)
|
|
317 {
|
|
318 chomp($listFile[$i]);
|
|
319 open(F1, "$input/$listFile[$i]") or die "$!: $input/$listFile[$i]\n";
|
|
320 my $header = <F1>;
|
|
321 while(<F1>) { print OUT $_; }
|
|
322 close F1;
|
|
323 }
|
|
324 close OUT;
|
|
325 }
|
|
326
|
|
327 foreach my $file (`ls $folderPool`)
|
|
328 {
|
|
329 chomp($file);
|
|
330 ############ Recover the number of the columns of interest
|
|
331 $chr_value = recoverNumCol("$folderPool/$file", $chr_name);
|
|
332 $start_value = recoverNumCol("$folderPool/$file", $start_name);
|
|
333 $ref_value = recoverNumCol("$folderPool/$file", $ref_name);
|
|
334 $alt_value = recoverNumCol("$folderPool/$file", $alt_name);
|
|
335 $func_value = recoverNumCol("$folderPool/$file", $func_name);
|
|
336 $exonicFunc_value = recoverNumCol("$folderPool/$file", $exonicFunc_name);
|
|
337 $strand_value = recoverNumCol("$folderPool/$file", $strand_name);
|
|
338 $contextSeq_value = recoverNumCol("$folderPool/$file", $context_name);
|
|
339 ############ Recover the number of the columns of interest
|
|
340
|
|
341 ############ Control the annotated file pass in argument
|
|
342 ## Check if the files have variants
|
|
343 my $nbLines_originalFile = `wc -l $folderPool/$file`; $nbLines_originalFile =~ /(\d+) /;
|
|
344 if($1==1) { print STDERR "\n\nNo line in the file $folderPool/$file\n\n"; exit; }
|
|
345 ## Check if there is variant with strand information. If not the rest of the script generates errors
|
|
346 my $testFile = 0;
|
|
347 CheckVariantReport("$folderPool/$file", $strand_value, \$testFile);
|
|
348 if($testFile==0) { print STDERR "\n\nNo strand information for the file $folderPool/$file\n\n"; exit; }
|
|
349 ############ Control the annotated file pass in argument
|
|
350
|
|
351 ############ Calculate the statistics
|
|
352 File2Hash("$folderPool/$file", $func_value, $exonicFunc_value, $chr_value, $ref_value, $alt_value, $strand_value, $contextSeq_value, $refH_file, $refT_func);
|
|
353 }
|
|
354 }
|
|
355 # If the input is a file
|
|
356 else
|
|
357 {
|
|
358 ############ Recover the number of the columns of interest
|
|
359 $chr_value = recoverNumCol($input, $chr_name);
|
|
360 $start_value = recoverNumCol($input, $start_name);
|
|
361 $ref_value = recoverNumCol($input, $ref_name);
|
|
362 $alt_value = recoverNumCol($input, $alt_name);
|
|
363 $func_value = recoverNumCol($input, $func_name);
|
|
364 $exonicFunc_value = recoverNumCol($input, $exonicFunc_name);
|
|
365 $strand_value = recoverNumCol($input, $strand_name);
|
|
366 $contextSeq_value = recoverNumCol($input, $context_name);
|
|
367 ############ Recover the number of the columns of interest
|
|
368
|
|
369 ############ Control the annotated file pass in argument
|
|
370 ## Check if the files have variants
|
|
371 my $nbLines_originalFile = `wc -l $input`; $nbLines_originalFile =~ /(\d+) /;
|
|
372 if($1==1) { print STDERR "\n\nNo line in the file $input\n\n"; exit; }
|
|
373 ## Check if there is variant with strand information. If not the rest of the script generates errors
|
|
374 my $testFile = 0;
|
|
375 CheckVariantReport($input, $strand_value, \$testFile);
|
|
376 if($testFile==0) { print STDERR "\n\nNo strand information for the file $input\n\n"; exit; }
|
|
377 ############ Control the annotated file pass in argument
|
|
378
|
|
379 ############ Calculate the statistics
|
|
380 File2Hash($input, $func_value, $exonicFunc_value, $chr_value, $ref_value, $alt_value, $strand_value, $contextSeq_value, $refH_file, $refT_func);
|
|
381 }
|
|
382 }
|
|
383 # Check if there is at least one variant with a strand information
|
|
384 sub CheckVariantReport
|
|
385 {
|
|
386 my ($file, $strand_value, $refS_testFile) = @_;
|
|
387
|
|
388 open(F1, $file) or die "$!: $file\n";
|
|
389 my $header = <F1>;
|
|
390 while(<F1>)
|
|
391 {
|
|
392 $_ =~ s/[\r\n]+$//;
|
|
393 my @tab = split("\t", $_);
|
|
394
|
|
395 if( ($tab[$strand_value] eq "+") || ($tab[$strand_value] eq "-") ) { $$refS_testFile++; }
|
|
396 }
|
|
397 close F1;
|
|
398 }
|
|
399 # Convert the annotated VCF into a hash table
|
|
400 sub File2Hash
|
|
401 {
|
|
402 my ($inputFile, $func_value, $exonicFunc_value, $chr_value, $ref_value, $alt_value, $strand_value, $contextSeq_value, $refH_file, $refT_func) = @_;
|
|
403 my ($filename, $directories, $suffix) = fileparse($inputFile, qr/\.[^.]*/);
|
|
404
|
|
405 # Initialisation of the hash
|
|
406 my @tab_mutation = qw(C:G>A:T C:G>G:C C:G>T:A T:A>A:T T:A>C:G T:A>G:C);
|
|
407 my @tab_aaChange = ("NonTr", "Tr", "TotalMutG");
|
|
408 my @tabExoFunc = ("frameshift insertion", "frameshift deletion", "frameshift block substitution", "frameshift substitution", "stopgain", "stoploss", "nonframeshift insertion", "nonframeshift deletion", "nonframeshift substitution", "nonframeshift block substitution", "nonsynonymous SNV", "synonymous SNV", "unknown", "NA");
|
|
409
|
|
410 # Total number of SBS on the genomic strand
|
|
411 $refH_file->{$filename}{'TotalSBSGenomic'} = 0;
|
|
412 # Total number of Indel on the genomic strand
|
|
413 $refH_file->{$filename}{'TotalIndelGenomic'} = 0;
|
|
414 # Total number of SBS on the coding strand
|
|
415 $refH_file->{$filename}{'TotalSBSCoding'} = 0;
|
|
416 # Total number of SBS and Indel on the genomic strand
|
|
417 $refH_file->{$filename}{'TotalMutGenomic'} = 0;
|
|
418
|
|
419 #####################################
|
|
420 # SBS by segment (6 mutation types) #
|
|
421 #####################################
|
|
422 foreach my $elt_tabFunc (@$refT_func)
|
|
423 {
|
|
424 foreach my $elt_tabMutation (@tab_mutation)
|
|
425 {
|
|
426 foreach my $elt_aaChange (@tab_aaChange)
|
|
427 {
|
|
428 $refH_file->{$filename}{'6mutType'}{$elt_tabFunc}{$elt_tabMutation}{$elt_aaChange} = 0;
|
|
429 }
|
|
430 }
|
|
431 }
|
|
432
|
|
433 #######################
|
|
434 # Pearson correlation #
|
|
435 #######################
|
|
436 $refH_file->{$filename}{'SBSPerChr'}{'AllMutType'} = 0;
|
|
437 # Count of SBS per chromosome foreach mutation types
|
|
438 foreach my $elt_tabMutation (@tab_mutation)
|
|
439 {
|
|
440 foreach my $chromosome (sort keys %chromosomes){ $refH_file->{$filename}{'SBSPerChr'}{$elt_tabMutation}{'CHR'}{$chromosome}{'chr'} = 0;}
|
|
441 $refH_file->{$filename}{'SBSPerChr'}{$elt_tabMutation}{'Pearson'} = 0;
|
|
442 }
|
|
443
|
|
444 foreach my $chromosome (sort keys %chromosomes){
|
|
445 $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}{$chromosome}{'chr'}=0;
|
|
446 }
|
|
447
|
|
448 ############################
|
|
449 # Impact of SBS on protein #
|
|
450 ############################
|
|
451 foreach my $elt_exoFunc (@tabExoFunc)
|
|
452 {
|
|
453 $refH_file->{$filename}{'ImpactSBS'}{$elt_exoFunc} = 0;
|
|
454 }
|
|
455
|
|
456 #####################################
|
|
457 # Sequence context (genomic strand) #
|
|
458 #####################################
|
|
459 my @tab_mutation2 = qw(C>A C>G C>T T>A T>C T>G);
|
|
460 my @tab_context = qw(A_A A_C A_G A_T C_A C_C C_G C_T G_A G_C G_G G_T T_A T_C T_G T_T);
|
|
461 foreach my $elt_context (@tab_context)
|
|
462 {
|
|
463 foreach my $elt_mutation3 (@tab_mutation2)
|
|
464 {
|
|
465 $refH_file->{$filename}{'SeqContextG'}{$elt_context}{$elt_mutation3} = 0;
|
|
466 }
|
|
467 }
|
|
468
|
|
469 ####################################
|
|
470 # Sequence context (coding strand) #
|
|
471 ####################################
|
|
472 my @tab_TrNonTr = qw(NonTr Tr);
|
|
473 foreach my $elt_context (@tab_context)
|
|
474 {
|
|
475 foreach my $elt_mutation2 (@tab_mutation2)
|
|
476 {
|
|
477 foreach my $trNonTr (@tab_TrNonTr)
|
|
478 {
|
|
479 $refH_file->{$filename}{'SeqContextC'}{$elt_context}{$elt_mutation2}{$trNonTr} = 0;
|
|
480 }
|
|
481 }
|
|
482 }
|
|
483
|
|
484 open(F1,$inputFile) or die "$!: $inputFile\n";
|
|
485 my $header = <F1>;
|
|
486 while(<F1>)
|
|
487 {
|
|
488 $_ =~ s/[\r\n]+$//;
|
|
489 my @tab = split("\t", $_);
|
|
490
|
|
491 # Random chromosome and chromosome M
|
|
492 if( ($tab[$chr_value] =~ /random/i) || ($tab[$chr_value] =~ /M/i) ) { next; }
|
|
493
|
|
494 ############################################## Extract the base just before and after the mutation ##############################################
|
|
495 my $context = "";
|
|
496 my $contextSequence = $tab[$contextSeq_value]; $contextSequence =~ tr/a-z/A-Z/;
|
|
497 my @tempContextSequence = split("", $contextSequence);
|
|
498 my $total_nbBaseContext = $#tempContextSequence;
|
|
499 my $midlle_totalNbBaseContext = $total_nbBaseContext/2; # For having the middle of the sequence
|
|
500 my $before = $midlle_totalNbBaseContext - 1; my $after = $midlle_totalNbBaseContext + 1;
|
|
501 $context = $tempContextSequence[$before]."_".$tempContextSequence[$after];
|
|
502 ############################################## Extract the base just before and after the mutation ##############################################
|
|
503
|
|
504
|
|
505 ############################################################### Impact on protein ###############################################################
|
|
506 my $exoFunc = "";
|
|
507 # Sometimes the annotation is repeated frameshift deletion;frameshift deletion
|
|
508 if($tab[$exonicFunc_value] =~ /\;/)
|
|
509 {
|
|
510 my @temp = split(";", $tab[$exonicFunc_value]);
|
|
511 if($temp[0] eq $temp[1]) { $exoFunc = $temp[0]; }
|
|
512 }
|
|
513 # The annotations have changed after MAJ Annovar 2014Jul22 (stopgain SNV => stopgain)
|
|
514 elsif($tab[$exonicFunc_value] eq "stopgain SNV") { $exoFunc = "stopgain"; }
|
|
515 elsif($tab[$exonicFunc_value] eq "stoploss SNV") { $exoFunc = "stoploss"; }
|
|
516 elsif($tab[$exonicFunc_value] eq "nonsynonymous_SNV") { $exoFunc = "nonsynonymous SNV"; }
|
|
517 elsif($tab[$exonicFunc_value] eq "stopgain_SNV") { $exoFunc = "stopgain SNV"; }
|
|
518 elsif($tab[$exonicFunc_value] eq "synonymous_SNV") { $exoFunc = "synonymous SNV"; }
|
|
519 else { $exoFunc = $tab[$exonicFunc_value]; }
|
|
520
|
|
521 if(exists $refH_file->{$filename}{'ImpactSBS'}{$exoFunc})
|
|
522 {
|
|
523 # If the sequence context if not recovered correctly don't considered the variants
|
|
524 if( ($context =~ /N/) || (length($context) != 3) ) { next; }
|
|
525
|
|
526 $refH_file->{$filename}{'ImpactSBS'}{$exoFunc}++;
|
|
527 $refH_file->{$filename}{'TotalMutGenomic'}++;
|
|
528 }
|
|
529 else { print "WARNING: Exonic function not considered: $exoFunc\n"; }
|
|
530 ############################################################### Impact on protein ###############################################################
|
|
531
|
|
532 ################################################### Only SBS are considered for the statistics ##################################################
|
|
533 if( ($tab[$ref_value] =~ /^[ACGT]$/i) && ($tab[$alt_value] =~ /^[ACGT]$/i) )
|
|
534 {
|
|
535 # If the sequence context if not recovered correctly don't considered the variants
|
|
536 if( ($context =~ /N/) || (length($context) != 3) ) { next; }
|
|
537
|
|
538 # Total number of SBS on the genomic strand
|
|
539 $refH_file->{$filename}{'TotalSBSGenomic'}++;
|
|
540
|
|
541 # Total number of SBS on the coding strand with a sequence context
|
|
542 if( ($tab[$strand_value] eq "+") || ($tab[$strand_value] eq "-") )
|
|
543 {
|
|
544 if( ($context ne "NA") && (($context =~ /N/) || (length($context) != 3)) ) { next; }
|
|
545 $refH_file->{$filename}{'TotalSBSCoding'}++;
|
|
546 }
|
|
547 }
|
|
548 else { $refH_file->{$filename}{'TotalIndelGenomic'}++; }
|
|
549 ################################################### Only SBS are considered for the statistics ##################################################
|
|
550
|
|
551 # Number of SBS per chromosome: remove the "chr"
|
|
552 my $chrNameForH=$tab[$chr_value];
|
|
553 if(exists $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}{$chrNameForH}{'chr'}++; }
|
|
554
|
|
555
|
|
556 ################################################### Some func value are repeated and separated by ";" ##################################################
|
|
557 my $funcSegment = "";
|
|
558 if($tab[$func_value] =~ /;/) { my @temp = split(";", $tab[$func_value]); $funcSegment = $temp[0]; }
|
|
559 else { $funcSegment = $tab[$func_value]; }
|
|
560
|
|
561
|
|
562 ############################################################### MUTATION C> #############################################
|
|
563 ###################################### C:G>A:T
|
|
564 if( (($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "A")) || ( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "T") ) )
|
|
565 {
|
|
566 my $mutation = "C:G>A:T";
|
|
567 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++; # Count the total number of mutations
|
|
568
|
|
569 # Pearson correlation
|
|
570 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++; }
|
|
571
|
|
572 # Sequence context - 6 mutation types - genomic strand
|
|
573 my $mutationSeqContext6mutType = "C>A";
|
|
574 # We want to express the mutation in C>
|
|
575 if( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "T") )
|
|
576 {
|
|
577 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
578 my $context_reverse = $base5."_".$base3;
|
|
579 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++; }
|
|
580 }
|
|
581 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++; }
|
|
582
|
|
583 # Strand analysis C>A on NonTr strand
|
|
584 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "A"))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "T"))) )
|
|
585 {
|
|
586 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++; }
|
|
587
|
|
588 # C>A With the sequence context (C>A strand = +)
|
|
589 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "A")) )
|
|
590 {
|
|
591 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'C>A'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'C>A'}{'NonTr'}++; }
|
|
592 }
|
|
593 # C>A With the sequence context (G>T strand = -)
|
|
594 else
|
|
595 {
|
|
596 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
597 my $context_reverse = $base5."_".$base3;
|
|
598 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>A'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>A'}{'NonTr'}++; }
|
|
599 }
|
|
600 }
|
|
601 # Strand analysis C>A on Tr strand
|
|
602 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "A"))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "T"))) )
|
|
603 {
|
|
604 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++; }
|
|
605
|
|
606 # C>A With the sequence context (C>A strand = -)
|
|
607 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "A")) )
|
|
608 {
|
|
609 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'C>A'}{'Tr'}) { { $refH_file->{$filename}{'SeqContextC'}{$context}{'C>A'}{'Tr'}++; } }
|
|
610 }
|
|
611 # C>A with the sequence context (G>T strand = +)
|
|
612 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "T")) )
|
|
613 {
|
|
614 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
615 my $context_reverse = $base5."_".$base3;
|
|
616 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>A'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>A'}{'Tr'}++; }
|
|
617 }
|
|
618 }
|
|
619 # WebLogo-3
|
|
620 if(($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "A"))
|
|
621 {
|
|
622 # For the logo all the sequences must have the same length
|
|
623 if(scalar(@tempContextSequence) == 2) { next; }
|
|
624 my ($contextTemp1, $contextTemp2) = ("", "");
|
|
625 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
|
|
626 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
|
|
627 my $context = $contextTemp1."C".$contextTemp2;
|
|
628 push(@{$refH_file->{$filename}{'WebLogo3'}{'CA'}}, $context);
|
|
629 }
|
|
630 else
|
|
631 {
|
|
632 if(scalar(@tempContextSequence) == 2) { next; }
|
|
633 my ($contextTemp1, $contextTemp2) = ("", "");
|
|
634 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
|
|
635 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
|
|
636 my $context = $contextTemp1."C".$contextTemp2; $context = reverse $context;
|
|
637 push(@{$refH_file->{$filename}{'WebLogo3'}{'CA'}}, $context);
|
|
638 }
|
|
639 }
|
|
640 ###################################### C:G>G:C
|
|
641 if( (($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "G")) || ( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "C") ) )
|
|
642 {
|
|
643 my $mutation = "C:G>G:C";
|
|
644 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++; # Count the total number of mutations
|
|
645
|
|
646 # Pearson correlation
|
|
647 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++; }
|
|
648
|
|
649 # Sequence context - 6 mutation types - genomic strand
|
|
650 my $mutationSeqContext6mutType = "C>G";
|
|
651 # We want to express the mutation in C>
|
|
652 if( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "C") )
|
|
653 {
|
|
654 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
655 my $context_reverse = $base5."_".$base3;
|
|
656 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++; }
|
|
657 }
|
|
658 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++; }
|
|
659
|
|
660 # Strand analysis C>G on NonTr strand
|
|
661 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "G"))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "C"))) )
|
|
662 {
|
|
663 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++; }
|
|
664
|
|
665 # C>G with the sequence context (C>G strand = +)
|
|
666 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "G")) )
|
|
667 {
|
|
668 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'C>G'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'C>G'}{'NonTr'}++; }
|
|
669 }
|
|
670 # C>G with the sequence context (G>C strand = -)
|
|
671 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "C")) )
|
|
672 {
|
|
673 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
674 my $context_reverse = $base5."_".$base3;
|
|
675 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>G'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>G'}{'NonTr'}++; }
|
|
676 }
|
|
677 }
|
|
678 # Strand analysis C>G on Tr strand
|
|
679 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "G"))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "C"))) )
|
|
680 {
|
|
681 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++; }
|
|
682
|
|
683 # C>G with the sequence context (C>G strand = -)
|
|
684 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "G")) )
|
|
685 {
|
|
686 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'C>G'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'C>G'}{'Tr'}++; }
|
|
687 }
|
|
688 # C>G with the sequence context (G>C strand = +)
|
|
689 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "C")) )
|
|
690 {
|
|
691 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
692 my $context_reverse = $base5."_".$base3;
|
|
693 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>G'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>G'}{'Tr'}++; }
|
|
694 }
|
|
695 }
|
|
696 # WebLogo-3
|
|
697 if(($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "G"))
|
|
698 {
|
|
699 if(scalar(@tempContextSequence) == 2) { next; }
|
|
700 my ($contextTemp1, $contextTemp2) = ("", "");
|
|
701 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
|
|
702 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
|
|
703 my $context = $contextTemp1."C".$contextTemp2;
|
|
704 push(@{$refH_file->{$filename}{'WebLogo3'}{'CG'}}, $context);
|
|
705 }
|
|
706 else
|
|
707 {
|
|
708 if(scalar(@tempContextSequence) == 2) { next; }
|
|
709 my ($contextTemp1, $contextTemp2) = ("", "");
|
|
710 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
|
|
711 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
|
|
712 my $context = $contextTemp1."C".$contextTemp2; $context = reverse $context;
|
|
713 push(@{$refH_file->{$filename}{'WebLogo3'}{'CG'}}, $context);
|
|
714 }
|
|
715 }
|
|
716 ###################################### C:G>T:A
|
|
717 if( (($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "T")) || ( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "A") ) )
|
|
718 {
|
|
719 my $mutation = "C:G>T:A";
|
|
720 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++; # Count the total number of mutations
|
|
721
|
|
722 # Pearson correlation
|
|
723 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++; }
|
|
724
|
|
725 # Sequence context - 6 mutation types - genomic strand
|
|
726 my $mutationSeqContext6mutType = "C>T";
|
|
727 # We want to express the mutation in C>
|
|
728 if( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "A") )
|
|
729 {
|
|
730 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
731 my $context_reverse = $base5."_".$base3;
|
|
732 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++; }
|
|
733 }
|
|
734 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++; }
|
|
735
|
|
736 # Strand analysis C>T on NonTr strand
|
|
737 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "T"))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "A"))) )
|
|
738 {
|
|
739 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++; }
|
|
740
|
|
741 # C>T with the sequence context (C>T strand = +)
|
|
742 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "T")) )
|
|
743 {
|
|
744 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'C>T'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'C>T'}{'NonTr'}++; }
|
|
745 }
|
|
746 # C>T with the sequence context (G>A strand = -)
|
|
747 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "A")) )
|
|
748 {
|
|
749 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
750 my $context_reverse = $base5."_".$base3;
|
|
751 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>T'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>T'}{'NonTr'}++; }
|
|
752 }
|
|
753 }
|
|
754 # Strand analysis C>T on Tr strand
|
|
755 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "T"))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "A"))) )
|
|
756 {
|
|
757 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++; }
|
|
758
|
|
759 # C>T with the sequence context (C>T strand = -)
|
|
760 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "T")) )
|
|
761 {
|
|
762 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'C>T'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'C>T'}{'Tr'}++; }
|
|
763 }
|
|
764 # C>T with the sequence context (G>A strand = +)
|
|
765 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "A")) )
|
|
766 {
|
|
767 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
768 my $context_reverse = $base5."_".$base3;
|
|
769 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>T'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>T'}{'Tr'}++; }
|
|
770 }
|
|
771 }
|
|
772 # WebLogo-3
|
|
773 if(($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "T"))
|
|
774 {
|
|
775 if(scalar(@tempContextSequence) == 2) { next; }
|
|
776 my ($contextTemp1, $contextTemp2) = ("", "");
|
|
777 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
|
|
778 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
|
|
779 my $context = $contextTemp1."C".$contextTemp2;
|
|
780 push(@{$refH_file->{$filename}{'WebLogo3'}{'CT'}}, $context);
|
|
781 }
|
|
782 else
|
|
783 {
|
|
784 if(scalar(@tempContextSequence) == 2) { next; }
|
|
785 my ($contextTemp1, $contextTemp2) = ("", "");
|
|
786 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
|
|
787 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
|
|
788 my $context = $contextTemp1."C".$contextTemp2; $context = reverse $context;
|
|
789 push(@{$refH_file->{$filename}{'WebLogo3'}{'CT'}}, $context);
|
|
790 }
|
|
791 }
|
|
792
|
|
793 ############################################################### MUTATION T> #############################################
|
|
794 ###################################### T:A>A:T
|
|
795 if( (($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "A")) || ( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "T") ) )
|
|
796 {
|
|
797 my $mutation = "T:A>A:T";
|
|
798 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++; # Count the total number of mutations
|
|
799
|
|
800 # Pearson correlation
|
|
801 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++; }
|
|
802
|
|
803 # Sequence context - 6 mutation types - genomic strand
|
|
804 my $mutationSeqContext6mutType = "T>A";
|
|
805 # We want to express the mutation in T>
|
|
806 if( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "T") )
|
|
807 {
|
|
808 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
809 my $context_reverse = $base5."_".$base3;
|
|
810 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++; }
|
|
811 }
|
|
812 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++; }
|
|
813
|
|
814 # Strand analysis T>A on NonTr stand
|
|
815 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "A"))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "A")&&($tab[$alt_value] eq "T"))) )
|
|
816 {
|
|
817 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++; }
|
|
818
|
|
819 # T>A with the sequence context (T>A strand = +)
|
|
820 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "A")) )
|
|
821 {
|
|
822 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'T>A'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'T>A'}{'NonTr'}++; }
|
|
823 }
|
|
824 # T>A with the sequence context (A>T strand = -)
|
|
825 else
|
|
826 {
|
|
827 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
828 my $context_reverse = $base5."_".$base3;
|
|
829 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>A'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>A'}{'NonTr'}++; }
|
|
830 }
|
|
831 }
|
|
832 # Strand analysis T>A on Tr strand
|
|
833 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "A"))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "A")&&($tab[$alt_value] eq "T"))) )
|
|
834 {
|
|
835 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++; }
|
|
836
|
|
837 # T>A <ith the sequence context (T>A strand = -)
|
|
838 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "A")) )
|
|
839 {
|
|
840 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'T>A'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'T>A'}{'Tr'}++; }
|
|
841 }
|
|
842 # T>A with the sequence context (A>T strand = +)
|
|
843 else
|
|
844 {
|
|
845 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
846 my $context_reverse = $base5."_".$base3;
|
|
847 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>A'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>A'}{'Tr'}++; }
|
|
848 }
|
|
849 }
|
|
850 # WebLogo-3
|
|
851 if(($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "A"))
|
|
852 {
|
|
853 if(scalar(@tempContextSequence) == 2) { next; }
|
|
854 my ($contextTemp1, $contextTemp2) = ("", "");
|
|
855 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
|
|
856 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
|
|
857 my $context = $contextTemp1."T".$contextTemp2;
|
|
858 push(@{$refH_file->{$filename}{'WebLogo3'}{'TA'}}, $context);
|
|
859 }
|
|
860 else
|
|
861 {
|
|
862 if(scalar(@tempContextSequence) == 2) { next; }
|
|
863 my ($contextTemp1, $contextTemp2) = ("", "");
|
|
864 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
|
|
865 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
|
|
866 my $context = $contextTemp1."T".$contextTemp2; $context = reverse $context;
|
|
867 push(@{$refH_file->{$filename}{'WebLogo3'}{'TA'}}, $context);
|
|
868 }
|
|
869 }
|
|
870 ###################################### T:A>C:G
|
|
871 if( (($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "C")) || ( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "G")) )
|
|
872 {
|
|
873 my $mutation = "T:A>C:G";
|
|
874 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++; # Count the total number of mutations
|
|
875
|
|
876 # Pearson correlation
|
|
877 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++; }
|
|
878
|
|
879 # Sequence context - 6 mutation types - genomic strand
|
|
880 my $mutationSeqContext6mutType = "T>C";
|
|
881 # We want to express the mutation in T>
|
|
882 if( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "T") )
|
|
883 {
|
|
884 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
885 my $context_reverse = $base5."_".$base3;
|
|
886 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++; }
|
|
887 }
|
|
888 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++; }
|
|
889
|
|
890 # Strand analysis T>C on NonTr strand
|
|
891 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "C"))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "A")&&($tab[$alt_value] eq "G"))) )
|
|
892 {
|
|
893 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++; }
|
|
894
|
|
895 # T>C (T>C strand = +)
|
|
896 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "C")) )
|
|
897 {
|
|
898 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'T>C'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'T>C'}{'NonTr'}++; }
|
|
899 }
|
|
900 # T>C (A>G strand = -)
|
|
901 else
|
|
902 {
|
|
903 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
904 my $context_reverse = $base5."_".$base3;
|
|
905 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>C'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>C'}{'NonTr'}++; }
|
|
906 }
|
|
907 }
|
|
908 # Strand analysis T>C on Tr strand
|
|
909 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "C"))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "A")&&($tab[$alt_value] eq "G"))) )
|
|
910 {
|
|
911 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++; }
|
|
912
|
|
913 # T>C (T>C strand = -)
|
|
914 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "C")) )
|
|
915 {
|
|
916 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'T>C'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'T>C'}{'Tr'}++; }
|
|
917 }
|
|
918 # T>C (A>G strand = +)
|
|
919 else
|
|
920 {
|
|
921 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
922 my $context_reverse = $base5."_".$base3;
|
|
923 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>C'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>C'}{'Tr'}++; }
|
|
924 }
|
|
925 }
|
|
926 # WebLogo-3
|
|
927 if(($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "C"))
|
|
928 {
|
|
929 if(scalar(@tempContextSequence) == 2) { next; }
|
|
930 my ($contextTemp1, $contextTemp2) = ("", "");
|
|
931 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
|
|
932 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
|
|
933 my $context = $contextTemp1."T".$contextTemp2; $context = reverse $context;
|
|
934 push(@{$refH_file->{$filename}{'WebLogo3'}{'TC'}}, $context);
|
|
935 }
|
|
936 else
|
|
937 {
|
|
938 if(scalar(@tempContextSequence) == 2) { next; }
|
|
939 my ($contextTemp1, $contextTemp2) = ("", "");
|
|
940 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
|
|
941 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
|
|
942 my $context = $contextTemp1."T".$contextTemp2;
|
|
943 push(@{$refH_file->{$filename}{'WebLogo3'}{'TC'}}, $context);
|
|
944 }
|
|
945 }
|
|
946 ###################################### T:A>G:C
|
|
947 if( (($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "G")) || ( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "C")) )
|
|
948 {
|
|
949 my $mutation = "T:A>G:C";
|
|
950 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++; # Count the total number of mutations
|
|
951
|
|
952 # Pearson correlation
|
|
953 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++; }
|
|
954
|
|
955 # Sequence context - 6 mutation types - genomic strand
|
|
956 my $mutationSeqContext6mutType = "T>G";
|
|
957 # We want to express the mutation in T>
|
|
958 if( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "T") )
|
|
959 {
|
|
960 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
961 my $context_reverse = $base5."_".$base3;
|
|
962 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++; }
|
|
963 }
|
|
964 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++; }
|
|
965
|
|
966 # Strand analysis T>G on NonTr strand
|
|
967 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "G"))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "A")&&($tab[$alt_value] eq "C"))) )
|
|
968 {
|
|
969 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++; }
|
|
970
|
|
971 # T>G (T>G strand = +)
|
|
972 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "G")) )
|
|
973 {
|
|
974 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'T>G'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'T>G'}{'NonTr'}++; }
|
|
975 }
|
|
976 # T>G (A>C strand = -)
|
|
977 else
|
|
978 {
|
|
979 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
980 my $context_reverse = $base5."_".$base3;
|
|
981 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>G'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>G'}{'NonTr'}++; }
|
|
982 }
|
|
983 }
|
|
984 # Strand analysis T>G on Tr strand
|
|
985 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "G"))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "A")&&($tab[$alt_value] eq "C"))) )
|
|
986 {
|
|
987 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++; }
|
|
988
|
|
989 # T>G (T>G strand = -)
|
|
990 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "G")) )
|
|
991 {
|
|
992 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'T>G'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'T>G'}{'Tr'}++; }
|
|
993 }
|
|
994 # T>G (A>C strand = +)
|
|
995 else
|
|
996 {
|
|
997 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
|
|
998 my $context_reverse = $base5."_".$base3;
|
|
999 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>G'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>G'}{'Tr'}++; }
|
|
1000 }
|
|
1001 }
|
|
1002 # WebLogo-3
|
|
1003 if(($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "G"))
|
|
1004 {
|
|
1005 if(scalar(@tempContextSequence) == 2) { next; }
|
|
1006 my ($contextTemp1, $contextTemp2) = ("", "");
|
|
1007 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
|
|
1008 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
|
|
1009 my $context = $contextTemp1."T".$contextTemp2; $context = reverse $context;
|
|
1010 push(@{$refH_file->{$filename}{'WebLogo3'}{'TG'}}, $context);
|
|
1011 }
|
|
1012 else
|
|
1013 {
|
|
1014 if(scalar(@tempContextSequence) == 2) { next; }
|
|
1015 my ($contextTemp1, $contextTemp2) = ("", "");
|
|
1016 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
|
|
1017 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
|
|
1018 my $context = $contextTemp1."T".$contextTemp2;
|
|
1019 push(@{$refH_file->{$filename}{'WebLogo3'}{'TG'}}, $context);
|
|
1020 }
|
|
1021 }
|
|
1022 }
|
|
1023 close F1;
|
|
1024 }
|
|
1025 # Write the different statistics in the report
|
|
1026 sub WriteStatistics
|
|
1027 {
|
|
1028 my ($refH_file, $nb_func, $folderFigure, $folderChi2, $folderNMF) = @_;
|
|
1029
|
|
1030 # Save the different graphs in specific folder instead of in a general one.
|
|
1031 if(!-e "$folderFigure/Overall_mutation_distribution") { mkdir("$folderFigure/Overall_mutation_distribution") or die "Can't create the directory $folderFigure/Overall_mutation_distribution\n"; }
|
|
1032 if(!-e "$folderFigure/Impact_protein_sequence") { mkdir("$folderFigure/Impact_protein_sequence") or die "Can't create the directory $folderFigure/Impact_protein_sequence\n"; }
|
|
1033 if(!-e "$folderFigure/SBS_distribution") { mkdir("$folderFigure/SBS_distribution") or die "Can't create the directory $folderFigure/SBS_distribution\n"; }
|
|
1034 if(!-e "$folderFigure/Stranded_Analysis") { mkdir("$folderFigure/Stranded_Analysis") or die "Can't create the directory $folderFigure/Stranded_Analysis\n"; }
|
|
1035 if(!-e "$folderFigure/Trinucleotide_Sequence_Context") { mkdir("$folderFigure/Trinucleotide_Sequence_Context") or die "Can't create the directory $folderFigure/Trinucleotide_Sequence_Context\n"; }
|
|
1036 if(!-e "$folderFigure/Distribution_SBS_Per_Chromosomes") { mkdir("$folderFigure/Distribution_SBS_Per_Chromosomes") or die "Can't create the directory $folderFigure/Distribution_SBS_Per_Chromosomes\n"; }
|
|
1037
|
|
1038
|
|
1039 # Create a workbook with all the samples
|
|
1040 my $wb = ""; my $ws_sum = ""; my %h_chi2 = ();
|
|
1041 my ($ws_inputNMF_count, $ws_inputNMF_percent) = ("", "");
|
|
1042 ############### Define the format
|
|
1043 my ($format_A10, $format_A10Boldleft, $format_A10ItalicRed) = ("", "", "");
|
|
1044 my ($formatT_left, $formatT_right, $formatT_bottomRight, $formatT_bottomLeft, $formatT_bottom, $formatT_bottomHeader, $formatT_bottomRightHeader, $formatT_bottomHeader2, $formatT_rightHeader);
|
|
1045 my ($formatT_graphTitle);
|
|
1046 my ($table_topleft, $table_topRight, $table_bottomleft, $table_bottomRight, $table_top, $table_right, $table_bottom, $table_bottomItalicRed, $table_left, $table_bottomrightHeader, $table_left2, $table_middleHeader, $table_middleHeader2);
|
|
1047
|
|
1048 if($oneReportPerSample == 2)
|
|
1049 {
|
|
1050 $wb = Spreadsheet::WriteExcel->new("$output/Report_Mutation_Spectra.xls");
|
|
1051
|
|
1052 ############### Define the format
|
|
1053 Format_A10($wb, \$format_A10); # Text center in Arial 10
|
|
1054 Format_A10BoldLeft($wb, \$format_A10Boldleft); # Text on the left in Arial 10 bold
|
|
1055 Format_TextSection($wb, \$formatT_left, \$formatT_right, \$formatT_bottomRight, \$formatT_bottomLeft, \$formatT_bottom, \$formatT_bottomHeader, \$formatT_bottomRightHeader, \$formatT_bottomHeader2, \$formatT_rightHeader);
|
|
1056 Format_GraphTitle($wb, \$formatT_graphTitle);
|
|
1057 Format_Table($wb, \$table_topleft, \$table_topRight, \$table_bottomleft, \$table_bottomRight, \$table_top, \$table_right, \$table_bottom, \$table_bottomItalicRed, \$table_left, \$table_bottomrightHeader, \$table_left2, \$table_middleHeader, \$table_middleHeader2);
|
|
1058 Format_A10ItalicRed($wb, \$format_A10ItalicRed);
|
|
1059
|
|
1060
|
|
1061 ############### Worksheet with a summary of the samples
|
|
1062 $ws_sum = $wb->add_worksheet("Sample_List");
|
|
1063 $ws_sum->write(0, 0, "Samples", $format_A10); $ws_sum->write(0, 1, "Total number SBS", $format_A10); $ws_sum->write(0, 2, "Total number of Indel", $format_A10); $ws_sum->write(0, 3, "Total number of mutations", $format_A10);
|
|
1064 $ws_sum->set_column(0,0, 50); $ws_sum->set_column(1,1, 20); $ws_sum->set_column(2,2, 20); $ws_sum->set_column(3,3, 22);
|
|
1065
|
|
1066 ############### Save the chi2 values into a hash table
|
|
1067 if(-e "$folderChi2/Output_chi2_strandBias.txt")
|
|
1068 {
|
|
1069 open(F1, "$folderChi2/Output_chi2_strandBias.txt") or die "$!: $folderChi2/Output_chi2_strandBias.txt\n";
|
|
1070 my $header = <F1>; # Strand_Bias($tab[0]) NonTr-Tr($tab[1]) Proportion($tab[2]) P-val-Chi2($tab[3]) FDR($tab[4]) Confidence Interval($tab[5]) Mutation_Type($tab[6]) SampleName($tab[7])
|
|
1071 while(<F1>)
|
|
1072 {
|
|
1073 $_ =~ s/[\r\n]+$//;
|
|
1074 my @tab = split("\t", $_);
|
|
1075
|
|
1076 $h_chi2{$tab[7]}{$tab[6]}{'p-value'} = $tab[3]; $h_chi2{$tab[7]}{$tab[6]}{'ConfInt'} = $tab[5];
|
|
1077
|
|
1078 # For the pool data the FDR isn't calculated so replace the NA (=Missing values) by "-"
|
|
1079 if($tab[7] eq "Pool_Data") { $h_chi2{$tab[7]}{$tab[6]}{'FDR'} = "-"; }
|
|
1080 else { $h_chi2{$tab[7]}{$tab[6]}{'FDR'} = $tab[4]; }
|
|
1081 }
|
|
1082 close F1;
|
|
1083 }
|
|
1084 ############### Write the input matrix for NMF for the count and the un-normalized frequency
|
|
1085 $ws_inputNMF_count = $wb->add_worksheet("Input_NMF_Count");
|
|
1086 $ws_inputNMF_percent = $wb->add_worksheet("Input_NMF_Percent");
|
|
1087 }
|
|
1088
|
|
1089
|
|
1090 ################################################ Set the Rows and columns of the different part of the report ################################################
|
|
1091 my $row_SumSheet = 1; # First row for the summary sheet of the report
|
|
1092 my $rowStart_SBSdistrBySeg = 48; my $colStart_SBSdistrBySeg = 0; # For the table SBS distribution by segment
|
|
1093 my $colStart_matrixSeqContext = 19; # Sequence context
|
|
1094 my $col_inputNMF = 0; # Write the names of the samples with at least 33 SBS
|
|
1095
|
|
1096 # For NMF input
|
|
1097 my %h_inputNMF = ();
|
|
1098
|
|
1099 ## For each file
|
|
1100 foreach my $k_file (sort keys $refH_file)
|
|
1101 {
|
|
1102 print "File in process: $k_file\n";
|
|
1103 if($k_file ne "Pool_Data") { $col_inputNMF++; }
|
|
1104
|
|
1105 # Create one workbook for each sample
|
|
1106 if($oneReportPerSample == 1)
|
|
1107 {
|
|
1108 $wb = Spreadsheet::WriteExcel->new("$output/Report_Mutation_Spectra-$k_file.xls");
|
|
1109
|
|
1110 ############### Define the format
|
|
1111 Format_A10($wb, \$format_A10); # Text center in Arial 10
|
|
1112 Format_A10BoldLeft($wb, \$format_A10Boldleft); # Text on the left in Arial 10 bold
|
|
1113 Format_TextSection($wb, \$formatT_left, \$formatT_right, \$formatT_bottomRight, \$formatT_bottomLeft, \$formatT_bottom, \$formatT_bottomHeader, \$formatT_bottomRightHeader, \$formatT_bottomHeader2, \$formatT_rightHeader);
|
|
1114 Format_GraphTitle($wb, \$formatT_graphTitle);
|
|
1115 Format_Table($wb, \$table_topleft, \$table_topRight, \$table_bottomleft, \$table_bottomRight, \$table_top, \$table_right, \$table_bottom, \$table_bottomItalicRed, \$table_left, \$table_bottomrightHeader, \$table_left2, \$table_middleHeader, \$table_middleHeader2);
|
|
1116 Format_A10ItalicRed($wb, \$format_A10ItalicRed);
|
|
1117
|
|
1118
|
|
1119 ############### Worksheet with a summary of the samples
|
|
1120 $ws_sum = $wb->add_worksheet("Sample_List");
|
|
1121 $ws_sum->write(0, 0, "Samples", $format_A10); $ws_sum->write(0, 1, "Total number SBS", $format_A10); $ws_sum->write(0, 2, "Total number of Indel", $format_A10); $ws_sum->write(0, 3, "Total number of mutations", $format_A10);
|
|
1122 $ws_sum->set_column(0,0, 50); $ws_sum->set_column(1,1, 20); $ws_sum->set_column(2,2, 20); $ws_sum->set_column(3,3, 22);
|
|
1123 # Write in the Samples sheet the name and the total number of SBS
|
|
1124 $ws_sum->write(1, 0, "$k_file", $format_A10);
|
|
1125 $ws_sum->write(1, 1, $refH_file->{$k_file}{'TotalSBSGenomic'}, $format_A10); $ws_sum->write(1, 2, $refH_file->{$k_file}{'TotalIndelGenomic'}, $format_A10); $ws_sum->write($row_SumSheet, 3, $refH_file->{$k_file}{'TotalMutGenomic'}, $format_A10);
|
|
1126
|
|
1127 ############### Save the chi2 values into a hash table
|
|
1128 if(-e "$folderChi2/Output_chi2_strandBias.txt")
|
|
1129 {
|
|
1130 open(F1, "$folderChi2/Output_chi2_strandBias.txt") or die "$!: $folderChi2/Output_chi2_strandBias.txt\n";
|
|
1131 my $header = <F1>; # Strand_Bias($tab[0]) NonTr-Tr($tab[1]) Proportion($tab[2]) P-val-Chi2($tab[3]) FDR($tab[4]) Confidence Interval($tab[5]) Mutation_Type($tab[6]) SampleName($tab[7])
|
|
1132 while(<F1>)
|
|
1133 {
|
|
1134 $_ =~ s/[\r\n]+$//;
|
|
1135 my @tab = split("\t", $_);
|
|
1136
|
|
1137 if($tab[7] eq $k_file)
|
|
1138 {
|
|
1139 $h_chi2{$tab[7]}{$tab[6]}{'p-value'} = $tab[3]; $h_chi2{$tab[7]}{$tab[6]}{'ConfInt'} = $tab[5];
|
|
1140
|
|
1141 # For the pool data the FDR isn't calculated so replace the NA (=Missing values) by "-"
|
|
1142 if($tab[7] eq "Pool_Data") { $h_chi2{$tab[7]}{$tab[6]}{'FDR'} = "-"; }
|
|
1143 else { $h_chi2{$tab[7]}{$tab[6]}{'FDR'} = $tab[4]; }
|
|
1144 }
|
|
1145 }
|
|
1146 close F1;
|
|
1147 }
|
|
1148
|
|
1149 ############### Write the input matrix for NMF
|
|
1150 if($k_file ne "Pool_Data")
|
|
1151 {
|
|
1152 # For NMF don't consider the pool of the samples
|
|
1153 $ws_inputNMF_count = $wb->add_worksheet("Input_NMF_Count");
|
|
1154 $ws_inputNMF_percent = $wb->add_worksheet("Input_NMF_Percent");
|
|
1155 # Write in the input NMF sheet the name of the samples
|
|
1156 $ws_inputNMF_count->write(0, 1, $k_file);
|
|
1157 $ws_inputNMF_percent->write(0, 1, $k_file);
|
|
1158 }
|
|
1159 }
|
|
1160 # One workbook with all the samples
|
|
1161 else
|
|
1162 {
|
|
1163 # Write in the Samples sheet the name and the total number of SBS
|
|
1164 $ws_sum->write($row_SumSheet, 0, $k_file, $format_A10);
|
|
1165 $ws_sum->write($row_SumSheet, 1, $refH_file->{$k_file}{'TotalSBSGenomic'}, $format_A10); $ws_sum->write($row_SumSheet, 2, $refH_file->{$k_file}{'TotalIndelGenomic'}, $format_A10); $ws_sum->write($row_SumSheet, 3, $refH_file->{$k_file}{'TotalMutGenomic'}, $format_A10);
|
|
1166
|
|
1167 # For NMF don't consider the pool of the samples
|
|
1168 if($k_file ne "Pool_Data")
|
|
1169 {
|
|
1170 # Write in the input NMF sheet the name of the samples
|
|
1171 $ws_inputNMF_count->write(0, $col_inputNMF, $k_file);
|
|
1172 $ws_inputNMF_percent->write(0, $col_inputNMF, $k_file);
|
|
1173 }
|
|
1174 }
|
|
1175
|
|
1176 # Count of SBS per chromosome
|
|
1177 PearsonCoefficient($refH_file, $k_file);
|
|
1178
|
|
1179 # Add a worksheet to the workbook
|
|
1180 my $ws = $wb->add_worksheet($k_file);
|
|
1181
|
|
1182 # Write the titles of the different sections of the report
|
|
1183 WriteBoderSection($wb, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $colStart_matrixSeqContext);
|
|
1184
|
|
1185 # Write the mutation types (6 types)
|
|
1186 WriteHeaderSection($wb, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $colStart_matrixSeqContext);
|
|
1187
|
|
1188
|
|
1189 # Save the figures of each samples in a different folder
|
|
1190 if(!-e "$folderFigure/Overall_mutation_distribution/$k_file") { mkdir("$folderFigure/Overall_mutation_distribution/$k_file") or die "Can't create the directory $folderFigure/Overall_mutation_distribution/$k_file\n"; }
|
|
1191 if(!-e "$folderFigure/Impact_protein_sequence/$k_file") { mkdir("$folderFigure/Impact_protein_sequence/$k_file") or die "Can't create the directory $folderFigure/Impact_protein_sequence/$k_file\n"; }
|
|
1192 if(!-e "$folderFigure/SBS_distribution/$k_file") { mkdir("$folderFigure/SBS_distribution/$k_file") or die "Can't create the directory $folderFigure/SBS_distribution\n"; }
|
|
1193 if(!-e "$folderFigure/Stranded_Analysis/$k_file") { mkdir("$folderFigure/Stranded_Analysis/$k_file") or die "Can't create the directory $folderFigure/Stranded_Analysis/$k_file\n"; }
|
|
1194 if(!-e "$folderFigure/Trinucleotide_Sequence_Context/$k_file") { mkdir("$folderFigure/Trinucleotide_Sequence_Context/$k_file") or die "Can't create the directory $folderFigure/Trinucleotide_Sequence_Context/$k_file\n"; }
|
|
1195
|
|
1196
|
|
1197
|
|
1198 ###########################################################################################################################################################
|
|
1199 ################################################################# Write the statistics ###################################################################
|
|
1200 ###########################################################################################################################################################
|
|
1201 my ($ca_genomique, $cg_genomique, $ct_genomique, $ta_genomique, $tc_genomique, $tg_genomique) = (0,0,0,0,0,0);
|
|
1202 my ($ca_NonTr, $ca_Tr, $cg_NonTr, $cg_Tr, $ct_NonTr, $ct_Tr, $ta_NonTr, $ta_Tr, $tc_NonTr, $tc_Tr, $tg_NonTr, $tg_Tr) = (0,0,0,0,0,0, 0,0,0,0,0,0);
|
|
1203
|
|
1204 my $row_SBSdistrBySeg = $rowStart_SBSdistrBySeg+4;
|
|
1205 my $row_SBSDistrBySegAndFunc_CA = $rowStart_SBSdistrBySeg+$nb_func+12;
|
|
1206 my $row_SBSDistrBySegAndFunc_CG = $rowStart_SBSdistrBySeg+($nb_func*2)+16; my $rowEndCG_SBSDistrBySegAndFunc_CG = $row_SBSDistrBySegAndFunc_CG+$nb_func;
|
|
1207 my $row_SBSDistrBySegAndFunc_CT = $rowStart_SBSdistrBySeg+($nb_func*3)+20;
|
|
1208
|
|
1209 ## 6 mutation types by segment
|
|
1210 foreach my $k_func (sort keys $refH_file->{$k_file}{'6mutType'})
|
|
1211 {
|
|
1212 my $totalSBS_bySegment = 0;
|
|
1213
|
|
1214 # Write the functional region for the section SBS distribution by segment
|
|
1215 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg, $k_func, $formatT_left);
|
|
1216
|
|
1217 # Write the exonic func for the section strand bias by segment
|
|
1218 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg, $k_func, $formatT_left);
|
|
1219
|
|
1220 if($row_SBSDistrBySegAndFunc_CG == $rowEndCG_SBSDistrBySegAndFunc_CG)
|
|
1221 {
|
|
1222 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg, $k_func, $formatT_bottomLeft);
|
|
1223 }
|
|
1224 else
|
|
1225 {
|
|
1226 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg, $k_func, $formatT_left);
|
|
1227 }
|
|
1228
|
|
1229 foreach my $k_mutation (sort keys $refH_file->{$k_file}{'6mutType'}{$k_func})
|
|
1230 {
|
|
1231 if($k_mutation eq "C:G>A:T")
|
|
1232 {
|
|
1233 # Write the ratio NonTr(CA)/Tr(GT)
|
|
1234 my $ratioSB = 0;
|
|
1235 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) ) { $ratioSB = 0; }
|
|
1236 else { $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}; }
|
|
1237 $ratioSB = sprintf("%.2f", $ratioSB);
|
|
1238 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+1, $ratioSB, $format_A10);
|
|
1239
|
|
1240 # Write the count of SBS in the NonTr and Tr strand
|
|
1241 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
|
|
1242 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $format_A10);
|
|
1243
|
|
1244 # Calculate the total number of SBS per mut type (genomic strand)
|
|
1245 $ca_genomique += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
|
|
1246 # Calculate the total number of SBS by NonTr / Tr strand
|
|
1247 $ca_NonTr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}; $ca_Tr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
|
|
1248
|
|
1249 # Write the count by exonic region
|
|
1250 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $format_A10);
|
|
1251 }
|
|
1252 if($k_mutation eq "C:G>G:C")
|
|
1253 {
|
|
1254 # Write the ratio NonTr(CG)/Tr(GC)
|
|
1255 my $ratioSB = 0;
|
|
1256 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) ) { $ratioSB = 0; }
|
|
1257 else { $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}; }
|
|
1258 $ratioSB = sprintf("%.2f", $ratioSB);
|
|
1259 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+5, $ratioSB, $format_A10);
|
|
1260
|
|
1261 # Write the count of SBS in the NonTr and Tr strand
|
|
1262 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+6, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
|
|
1263 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+7, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $format_A10);
|
|
1264
|
|
1265 # Calculate the total number of SBS per mut type (genomic strand)
|
|
1266 $cg_genomique += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
|
|
1267 # Calculate the total number of SBS by NonTr / Tr strand
|
|
1268 $cg_NonTr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}; $cg_Tr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
|
|
1269
|
|
1270 # Write the count by exonic region
|
|
1271 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+5, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $format_A10);
|
|
1272 }
|
|
1273 if($k_mutation eq "C:G>T:A")
|
|
1274 {
|
|
1275 # Write the ratio NonTr(CT)/Tr(GA)
|
|
1276 my $ratioSB = 0;
|
|
1277 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) ) { $ratioSB = 0; }
|
|
1278 else { $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}; }
|
|
1279 $ratioSB = sprintf("%.2f", $ratioSB);
|
|
1280 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+9, $ratioSB, $format_A10);
|
|
1281
|
|
1282 # Write the count of SBS in the NonTr and Tr strand
|
|
1283 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+10, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
|
|
1284 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+11, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_right);
|
|
1285
|
|
1286 # Calculate the total number of SBS per mut type (genomic strand)
|
|
1287 $ct_genomique += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
|
|
1288 # Calculate the total number of SBS by NonTr / Tr strand
|
|
1289 $ct_NonTr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}; $ct_Tr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
|
|
1290
|
|
1291 # Write the count by exonic region
|
|
1292 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+7, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $format_A10);
|
|
1293 }
|
|
1294 if($k_mutation eq "T:A>A:T")
|
|
1295 {
|
|
1296 # Write the ratio NonTr(AT)/Tr(TA)
|
|
1297 my $ratioSB = 0;
|
|
1298 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) ) { $ratioSB = 0; }
|
|
1299 else { $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}; }
|
|
1300 $ratioSB = sprintf("%.2f", $ratioSB);
|
|
1301
|
|
1302
|
|
1303 if($row_SBSDistrBySegAndFunc_CG == $rowEndCG_SBSDistrBySegAndFunc_CG)
|
|
1304 {
|
|
1305 # Write the ratio NonTr(AC)/Tr(TG)
|
|
1306 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+1, $ratioSB, $formatT_bottom);
|
|
1307 # Write the count of SBS in the NonTr and Tr strand
|
|
1308 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $formatT_bottom);
|
|
1309 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_bottom);
|
|
1310 }
|
|
1311 else
|
|
1312 {
|
|
1313 # Write the ratio NonTr(AC)/Tr(TG)
|
|
1314 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+1, $ratioSB, $format_A10);
|
|
1315 # Write the count of SBS in the NonTr and Tr strand
|
|
1316 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
|
|
1317 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $format_A10);
|
|
1318 }
|
|
1319
|
|
1320
|
|
1321 # Calculate the total number of SBS per mut type (genomic strand)
|
|
1322 $ta_genomique += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
|
|
1323 # Calculate the total number of SBS by NonTr / Tr strand
|
|
1324 $ta_NonTr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}; $ta_Tr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
|
|
1325
|
|
1326 # Write the count by exonic region
|
|
1327 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+9, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $format_A10);
|
|
1328 }
|
|
1329 if($k_mutation eq "T:A>C:G")
|
|
1330 {
|
|
1331 # Write the ratio NonTr(AG)/Tr(TC)
|
|
1332 my $ratioSB = 0;
|
|
1333 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) ) { $ratioSB = 0; }
|
|
1334 else { $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}; }
|
|
1335 $ratioSB = sprintf("%.2f", $ratioSB);
|
|
1336
|
|
1337 if($row_SBSDistrBySegAndFunc_CG == $rowEndCG_SBSDistrBySegAndFunc_CG)
|
|
1338 {
|
|
1339 # Write the ratio NonTr(AC)/Tr(TG)
|
|
1340 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+5, $ratioSB, $formatT_bottom);
|
|
1341 # Write the count of SBS in the NonTr and Tr strand
|
|
1342 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+6, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $formatT_bottom);
|
|
1343 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+7, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_bottom);
|
|
1344 }
|
|
1345 else
|
|
1346 {
|
|
1347 # Write the ratio NonTr(AC)/Tr(TG)
|
|
1348 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+5, $ratioSB, $format_A10);
|
|
1349 # Write the count of SBS in the NonTr and Tr strand
|
|
1350 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+6, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
|
|
1351 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+7, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $format_A10);
|
|
1352 }
|
|
1353
|
|
1354 # Calculate the total number of SBS per mut type (genomic strand)
|
|
1355 $tc_genomique += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
|
|
1356 # Calculate the total number of SBS by NonTr / Tr strand
|
|
1357 $tc_NonTr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}; $tc_Tr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
|
|
1358
|
|
1359 # Write the count by exonic region
|
|
1360 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+11, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $format_A10);
|
|
1361 }
|
|
1362 if($k_mutation eq "T:A>G:C")
|
|
1363 {
|
|
1364 # Calculate the ratio for the strand bias
|
|
1365 my $ratioSB = 0;
|
|
1366 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) ) { $ratioSB = 0; }
|
|
1367 else { $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}; }
|
|
1368 $ratioSB = sprintf("%.2f", $ratioSB);
|
|
1369
|
|
1370 if($row_SBSDistrBySegAndFunc_CG == $rowEndCG_SBSDistrBySegAndFunc_CG)
|
|
1371 {
|
|
1372 # Write the ratio NonTr(AC)/Tr(TG)
|
|
1373 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+9, $ratioSB, $formatT_bottom);
|
|
1374 # Write the count of SBS in the NonTr and Tr strand
|
|
1375 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+10, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $formatT_bottom);
|
|
1376 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+11, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_bottomRight);
|
|
1377 }
|
|
1378 else
|
|
1379 {
|
|
1380 # Write the ratio NonTr(AC)/Tr(TG)
|
|
1381 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+9, $ratioSB, $format_A10);
|
|
1382 # Write the count of SBS in the NonTr and Tr strand
|
|
1383 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+10, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
|
|
1384 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+11, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_right);
|
|
1385 }
|
|
1386
|
|
1387 # Calculate the total number of SBS per mut type (genomic strand)
|
|
1388 $tg_genomique += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
|
|
1389 # Calculate the total number of SBS by NonTr / Tr strand
|
|
1390 $tg_NonTr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}; $tg_Tr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
|
|
1391
|
|
1392 # Write the count by exonic region
|
|
1393 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+13, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $formatT_right);
|
|
1394 }
|
|
1395
|
|
1396 # Calculate the total number of SBS on the genomic strand for each mutation types by exonic region
|
|
1397 $totalSBS_bySegment += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
|
|
1398 } # End $k_mutation
|
|
1399
|
|
1400 $row_SBSDistrBySegAndFunc_CA++; $row_SBSDistrBySegAndFunc_CG++; $row_SBSDistrBySegAndFunc_CT++;
|
|
1401
|
|
1402 # Write the percent by exonic region
|
|
1403 my $percent_ca = 0;
|
|
1404 if($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} == 0) { $percent_ca = 0; }
|
|
1405 else { $percent_ca = ($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} / $totalSBS_bySegment ) * 100; $percent_ca = sprintf("%.2f", $percent_ca); }
|
|
1406 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+2, $percent_ca, $format_A10);
|
|
1407 my $percent_cg = 0;
|
|
1408 if($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} == 0) { $percent_cg = 0; }
|
|
1409 else { $percent_cg = ($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>G:C'}{'TotalMutG'} / $totalSBS_bySegment ) * 100; $percent_cg = sprintf("%.2f", $percent_cg); }
|
|
1410 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+4, $percent_cg, $format_A10);
|
|
1411 my $percent_ct = 0;
|
|
1412 if($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} == 0) { $percent_ct = 0; }
|
|
1413 else { $percent_ct = ($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>T:A'}{'TotalMutG'} / $totalSBS_bySegment ) * 100; $percent_ct = sprintf("%.2f", $percent_ct); }
|
|
1414 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+6, $percent_ct, $format_A10);
|
|
1415 my $percent_ta = 0;
|
|
1416 if($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} == 0) { $percent_ta = 0; }
|
|
1417 else { $percent_ta = ($refH_file->{$k_file}{'6mutType'}{$k_func}{'T:A>A:T'}{'TotalMutG'} / $totalSBS_bySegment ) * 100; $percent_ta = sprintf("%.2f", $percent_ta); }
|
|
1418 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+8, $percent_ta, $format_A10);
|
|
1419 my $percent_tc = 0;
|
|
1420 if($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} == 0) { $percent_tc = 0; }
|
|
1421 else { $percent_tc = ($refH_file->{$k_file}{'6mutType'}{$k_func}{'T:A>C:G'}{'TotalMutG'} / $totalSBS_bySegment ) * 100; $percent_tc = sprintf("%.2f", $percent_tc); }
|
|
1422 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+10, $percent_tc, $format_A10);
|
|
1423 my $percent_tg = 0;
|
|
1424 if($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} == 0) { $percent_tg = 0; }
|
|
1425 else { $percent_tg = ($refH_file->{$k_file}{'6mutType'}{$k_func}{'T:A>G:C'}{'TotalMutG'} / $totalSBS_bySegment ) * 100; $percent_tg = sprintf("%.2f", $percent_tg); }
|
|
1426 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+12, $percent_tg, $format_A10);
|
|
1427
|
|
1428 # Write the count of SBS by segment
|
|
1429 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+1, $totalSBS_bySegment, $format_A10);
|
|
1430
|
|
1431 $row_SBSdistrBySeg++;
|
|
1432 } # End $k_func
|
|
1433
|
|
1434 # Write the total number of SBS on the genomic strand
|
|
1435 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+1, $refH_file->{$k_file}{'TotalSBSGenomic'}, $formatT_bottomHeader);
|
|
1436
|
|
1437 # Write the total and the percentage of SBS for each mutation types and save it to a text file
|
|
1438 open(DISTRSBS, ">", "$folderFigure/SBS_distribution/$k_file/$k_file-SBS_distribution.txt") or die "$!: $folderFigure/SBS_distribution/$k_file/$k_file-SBS_distribution.txt\n";
|
|
1439 print DISTRSBS "Mutation_Type\tCount\tPercentage\tSample\n";
|
|
1440 my $percent_ca = 0;
|
|
1441 if($ca_genomique == 0) { $percent_ca = 0; }
|
|
1442 else { $percent_ca = ($ca_genomique/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_ca = sprintf("%.2f", $percent_ca); }
|
|
1443 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+2, $percent_ca, $formatT_bottom); print DISTRSBS "C:G>A:T\t$ca_genomique\t$percent_ca\t$k_file\n";
|
|
1444 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+3, $ca_genomique, $formatT_bottomHeader);
|
|
1445 my $percent_cg = 0;
|
|
1446 if($cg_genomique == 0) { $percent_cg = 0; }
|
|
1447 else { $percent_cg = ($cg_genomique/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_cg = sprintf("%.2f", $percent_cg); }
|
|
1448 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+4, $percent_cg, $formatT_bottom); print DISTRSBS "C:G>G:C\t$cg_genomique\t$percent_cg\t$k_file\n";
|
|
1449 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+5, $cg_genomique, $formatT_bottomHeader);
|
|
1450 my $percent_ct = 0;
|
|
1451 if($ct_genomique == 0) { $percent_ct = 0; }
|
|
1452 else { $percent_ct = ($ct_genomique/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_ct = sprintf("%.2f", $percent_ct); }
|
|
1453 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+6, $percent_ct, $formatT_bottom); print DISTRSBS "C:G>T:A\t$ct_genomique\t$percent_ct\t$k_file\n";
|
|
1454 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+7, $ct_genomique, $formatT_bottomHeader);
|
|
1455 my $percent_ta = 0;
|
|
1456 if($ta_genomique == 0) { $percent_ta = 0; }
|
|
1457 else { $percent_ta = ($ta_genomique/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_ta = sprintf("%.2f", $percent_ta); }
|
|
1458 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+8, $percent_ta, $formatT_bottom); print DISTRSBS "T:A>A:T\t$ta_genomique\t$percent_ta\t$k_file\n";
|
|
1459 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+9, $ta_genomique, $formatT_bottomHeader);
|
|
1460 my $percent_tc = 0;
|
|
1461 if($tc_genomique == 0) { $percent_tc = 0; }
|
|
1462 else { $percent_tc = ($tc_genomique/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_tc = sprintf("%.2f", $percent_tc); }
|
|
1463 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+10, $percent_tc, $formatT_bottom); print DISTRSBS "T:A>C:G\t$tc_genomique\t$percent_tc\t$k_file\n";
|
|
1464 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+11, $tc_genomique, $formatT_bottomHeader);
|
|
1465 my $percent_tg = 0;
|
|
1466 if($tg_genomique == 0) { $percent_tg = 0; }
|
|
1467 else { $percent_tg = ($tg_genomique/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_tg = sprintf("%.2f", $percent_tg); }
|
|
1468 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+12, $percent_tg, $formatT_bottom); print DISTRSBS "T:A>G:C\t$tg_genomique\t$percent_tg\t$k_file\n";
|
|
1469 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+13, $tg_genomique, $formatT_bottomRightHeader);
|
|
1470 close DISTRSBS;
|
|
1471
|
|
1472 ###########################################################################################################################################################
|
|
1473 ################################################################### Write Strand BIAS #####################################################################
|
|
1474 ###########################################################################################################################################################
|
|
1475 # Write the SB for each mutation type (table 3)
|
|
1476 $ws->write(28, 11, "Table 3. Significance of the strand biases", $format_A10Boldleft);
|
|
1477 $ws->set_column(11, 11, 13); $ws->set_column(16, 16, 15); $ws->set_column(17, 17, 10);
|
|
1478 $ws->write(29, 11, "Mutation Type", $table_topleft); $ws->write(29, 12, "Non-Tr/Tr", $table_top); $ws->write(29, 13, "Non-Tr", $table_top); $ws->write(29, 14, "Tr", $table_top); $ws->write(29, 15, "P-value", $table_top); $ws->write(29, 16, "FDR q value", $table_top); $ws->write(29, 17, "95% CI", $table_topRight);
|
|
1479
|
|
1480 $ws->write(39, 11, "Table 3. Significance of the strand biases", $format_A10Boldleft);
|
|
1481 $ws->write(40, 11, "Mutation Type", $table_topleft); $ws->write(40, 12, "Non-Tr/Tr", $table_top); $ws->write(40, 13, "Non-Tr", $table_top); $ws->write(40, 14, "Tr", $table_top); $ws->write(40, 15, "P-value", $table_top); $ws->write(40, 16, "FDR q value", $table_top); $ws->write(40, 17, "95% CI", $table_topRight);
|
|
1482
|
|
1483 # For ggplot2
|
|
1484 open(SB, ">", "$folderFigure/Stranded_Analysis/$k_file/$k_file-StrandBias.txt") or die "$!: $folderFigure/Stranded_Analysis/$k_file/$k_file-StrandBias.txt\n";
|
|
1485 print SB "Alteration\tStrand\tCount\n";
|
|
1486
|
|
1487
|
|
1488 #-----------------------------------------------------------------------------------------------------#
|
|
1489 my ($ratio_ca, $ratio_gt, $percent_ca_NonTr, $percent_ca_Tr) = (0, 0, 0, 0, 0);
|
|
1490 if( ($ca_NonTr==0) || ($ca_Tr==0) ) { $ratio_ca = 0; $ratio_gt = 0; $percent_ca_NonTr = 0; $percent_ca_Tr = 0; }
|
|
1491 else
|
|
1492 {
|
|
1493 $ratio_ca = $ca_NonTr/$ca_Tr; $ratio_ca = sprintf("%.2f", $ratio_ca);
|
|
1494 $ratio_gt = $ca_Tr/$ca_NonTr; $ratio_gt = sprintf("%.2f", $ratio_gt);
|
|
1495 $percent_ca_NonTr = ($ca_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_ca_Tr = ($ca_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
|
|
1496 }
|
|
1497 print SB "C>A\tNonTranscribed\t$ca_NonTr\n", "C>A\tTranscribed\t$ca_Tr\n";
|
|
1498 # C>A
|
|
1499 $ws->write(30, 11, "C>A", $table_left); $ws->write(30, 12, $ratio_ca, $table_middleHeader); $ws->write(30, 13, $ca_NonTr, $format_A10); $ws->write(30, 14, $ca_Tr, $format_A10);
|
|
1500 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
|
|
1501 if(($ca_NonTr+$ca_Tr)< 10)
|
|
1502 {
|
|
1503 if($h_chi2{$k_file}{'C>A'}{'p-value'} eq "NA") { $ws->write_string(30, 15, $h_chi2{$k_file}{'C>A'}{'p-value'}, $format_A10); }
|
|
1504 else { $ws->write_string(30, 15, $h_chi2{$k_file}{'C>A'}{'p-value'}, $format_A10ItalicRed); }
|
|
1505 }
|
|
1506 else { $ws->write_string(30, 15, $h_chi2{$k_file}{'C>A'}{'p-value'}, $format_A10); }
|
|
1507 $ws->write(30, 16, $h_chi2{$k_file}{'C>A'}{'FDR'}, $format_A10); $ws->write(30, 17, $h_chi2{$k_file}{'C>A'}{'ConfInt'}, $table_right);
|
|
1508 # G>T
|
|
1509 $ws->write(41, 11, "G>T", $table_left); $ws->write(41, 12, $ratio_gt, $table_middleHeader); $ws->write(41, 13, $ca_Tr, $format_A10); $ws->write(41, 14, $ca_NonTr, $format_A10);
|
|
1510 if(($ca_NonTr+$ca_Tr)< 10)
|
|
1511 {
|
|
1512 if($h_chi2{$k_file}{'C>A'}{'p-value'} eq "NA") { $ws->write_string(41, 15, $h_chi2{$k_file}{'C>A'}{'p-value'}, $format_A10); }
|
|
1513 else { $ws->write_string(41, 15, $h_chi2{$k_file}{'C>A'}{'p-value'}, $format_A10ItalicRed); }
|
|
1514 }
|
|
1515 else { $ws->write_string(41, 15, $h_chi2{$k_file}{'C>A'}{'p-value'}, $format_A10); }
|
|
1516 $ws->write(41, 16, $h_chi2{$k_file}{'C>A'}{'FDR'}, $format_A10); $ws->write(41, 17, $h_chi2{$k_file}{'C>A'}{'ConfInt'}, $table_right);
|
|
1517
|
|
1518 #-----------------------------------------------------------------------------------------------------#
|
|
1519 my ($ratio_cg, $ratio_gc, $percent_cg_NonTr, $percent_cg_Tr) = (0, 0, 0, 0, 0);
|
|
1520 if( ($cg_NonTr==0) || ($cg_Tr==0) ) { $ratio_cg = 0; $ratio_gc = 0; $percent_cg_NonTr = 0; $percent_cg_Tr = 0; }
|
|
1521 else
|
|
1522 {
|
|
1523 $ratio_cg = $cg_NonTr/$cg_Tr; $ratio_cg = sprintf("%.2f", $ratio_cg);
|
|
1524 $ratio_gc = $cg_Tr/$cg_NonTr; $ratio_gc = sprintf("%.2f", $ratio_gc);
|
|
1525 $percent_cg_NonTr = ($cg_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_cg_Tr = ($cg_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
|
|
1526 }
|
|
1527 print SB "C>G\tNonTranscribed\t$cg_NonTr\n", "C>G\tTranscribed\t$cg_Tr\n";
|
|
1528 # C>G
|
|
1529 $ws->write(31, 11, "C>G", $table_left); $ws->write(31, 12, $ratio_cg, $table_middleHeader); $ws->write(31, 13, $cg_NonTr, $format_A10); $ws->write(31, 14, $cg_Tr, $format_A10);
|
|
1530 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
|
|
1531 if(($cg_NonTr+$cg_Tr)< 10)
|
|
1532 {
|
|
1533 if($h_chi2{$k_file}{'C>G'}{'p-value'} eq "NA") { $ws->write_string(31, 15, $h_chi2{$k_file}{'C>G'}{'p-value'}, $format_A10); }
|
|
1534 else { $ws->write_string(31, 15, $h_chi2{$k_file}{'C>G'}{'p-value'}, $format_A10ItalicRed); }
|
|
1535 }
|
|
1536 else { $ws->write_string(31, 15, $h_chi2{$k_file}{'C>G'}{'p-value'}, $format_A10); }
|
|
1537 $ws->write(31, 16, $h_chi2{$k_file}{'C>G'}{'FDR'}, $format_A10); $ws->write(31, 17, $h_chi2{$k_file}{'C>G'}{'ConfInt'}, $table_right);
|
|
1538 # G>C
|
|
1539 $ws->write(42, 11, "G>C", $table_left); $ws->write(42, 12, $ratio_gc, $table_middleHeader); $ws->write(42, 13, $cg_Tr, $format_A10); $ws->write(42, 14, $cg_NonTr, $format_A10);
|
|
1540 if(($cg_NonTr+$cg_Tr)< 10)
|
|
1541 {
|
|
1542 if($h_chi2{$k_file}{'C>G'}{'p-value'} eq "NA") { $ws->write_string(42, 15, $h_chi2{$k_file}{'C>G'}{'p-value'}, $format_A10); }
|
|
1543 else { $ws->write_string(42, 15, $h_chi2{$k_file}{'C>G'}{'p-value'}, $format_A10ItalicRed); }
|
|
1544 }
|
|
1545 else { $ws->write_string(42, 15, $h_chi2{$k_file}{'C>G'}{'p-value'}, $format_A10); }
|
|
1546 $ws->write(42, 16, $h_chi2{$k_file}{'C>G'}{'FDR'}, $format_A10); $ws->write(42, 17, $h_chi2{$k_file}{'C>G'}{'ConfInt'}, $table_right);
|
|
1547
|
|
1548 #-----------------------------------------------------------------------------------------------------#
|
|
1549 my ($ratio_ct, $ratio_ga, $percent_ct_NonTr, $percent_ct_Tr) = (0, 0, 0, 0, 0);
|
|
1550 if( ($ct_NonTr==0) || ($ct_Tr==0) ) { $ratio_ct = 0; $ratio_ga = 0; $percent_ct_NonTr = 0; $percent_ct_Tr = 0; }
|
|
1551 else
|
|
1552 {
|
|
1553 $ratio_ct = $ct_NonTr/$ct_Tr; $ratio_ct = sprintf("%.2f", $ratio_ct);
|
|
1554 $ratio_ga = $ct_Tr/$ct_NonTr; $ratio_ga = sprintf("%.2f", $ratio_ga);
|
|
1555 $percent_ct_NonTr = ($ct_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_ct_Tr = ($ct_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
|
|
1556 }
|
|
1557 print SB "C>T\tNonTranscribed\t$ct_NonTr\n", "C>T\tTranscribed\t$ct_Tr\n";
|
|
1558 # C>T
|
|
1559 $ws->write(32, 11, "C>T", $table_left); $ws->write(32, 12, $ratio_ct, $table_middleHeader); $ws->write(32, 13, $ct_NonTr, $format_A10); $ws->write(32, 14, $ct_Tr, $format_A10);
|
|
1560 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
|
|
1561 if(($ct_NonTr+$ct_Tr)< 10)
|
|
1562 {
|
|
1563 if($h_chi2{$k_file}{'C>T'}{'p-value'} eq "NA") { $ws->write_string(32, 15, $h_chi2{$k_file}{'C>T'}{'p-value'}, $format_A10); }
|
|
1564 else { $ws->write_string(32, 15, $h_chi2{$k_file}{'C>T'}{'p-value'}, $format_A10ItalicRed); }
|
|
1565 }
|
|
1566 else { $ws->write_string(32, 15, $h_chi2{$k_file}{'C>T'}{'p-value'}, $format_A10); }
|
|
1567 $ws->write(32, 16, $h_chi2{$k_file}{'C>T'}{'FDR'}, $format_A10); $ws->write(32, 17, $h_chi2{$k_file}{'C>T'}{'ConfInt'}, $table_right);
|
|
1568 # G>A
|
|
1569 $ws->write(43, 11, "G>A", $table_left); $ws->write(43, 12, $ratio_ga, $table_middleHeader); $ws->write(43, 13, $ct_Tr, $format_A10); $ws->write(43, 14, $ct_NonTr, $format_A10);
|
|
1570 if(($ct_NonTr+$ct_Tr)< 10)
|
|
1571 {
|
|
1572 if($h_chi2{$k_file}{'C>T'}{'p-value'} eq "NA") { $ws->write_string(43, 15, $h_chi2{$k_file}{'C>T'}{'p-value'}, $format_A10); }
|
|
1573 else { $ws->write_string(43, 15, $h_chi2{$k_file}{'C>T'}{'p-value'}, $format_A10ItalicRed); }
|
|
1574 }
|
|
1575 else { $ws->write_string(43, 15, $h_chi2{$k_file}{'C>T'}{'p-value'}, $format_A10); }
|
|
1576 $ws->write(43, 16, $h_chi2{$k_file}{'C>T'}{'FDR'}, $format_A10); $ws->write(43, 17, $h_chi2{$k_file}{'C>T'}{'ConfInt'}, $table_right);
|
|
1577
|
|
1578 #-----------------------------------------------------------------------------------------------------#
|
|
1579 my ($ratio_ta, $ratio_at, $percent_ta_NonTr, $percent_ta_Tr) = (0, 0, 0, 0, 0);
|
|
1580 if( ($ta_NonTr==0) || ($ta_Tr==0) ) { $ratio_ta = 0; $ratio_at = 0; $percent_ta_NonTr = 0; $percent_ta_Tr = 0; }
|
|
1581 else
|
|
1582 {
|
|
1583 $ratio_ta = $ta_NonTr/$ta_Tr; $ratio_ta = sprintf("%.2f", $ratio_ta);
|
|
1584 $ratio_at = $ta_Tr/$ta_NonTr; $ratio_at = sprintf("%.2f", $ratio_at);
|
|
1585 $percent_ta_NonTr = ($ta_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_ta_Tr = ($ta_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
|
|
1586 }
|
|
1587 print SB "T>A\tNonTranscribed\t$ta_NonTr\n", "T>A\tTranscribed\t$ta_Tr\n";
|
|
1588 # T>A
|
|
1589 $ws->write(33, 11, "T>A", $table_left); $ws->write(33, 12, $ratio_ta, $table_middleHeader); $ws->write(33, 13, $ta_NonTr, $format_A10); $ws->write(33, 14, $ta_Tr, $format_A10);
|
|
1590 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
|
|
1591 if(($ta_NonTr+$ta_Tr)< 10)
|
|
1592 {
|
|
1593 if($h_chi2{$k_file}{'T>A'}{'p-value'} eq "NA") { $ws->write_string(33, 15, $h_chi2{$k_file}{'T>A'}{'p-value'}, $format_A10); }
|
|
1594 else { $ws->write_string(33, 15, $h_chi2{$k_file}{'T>A'}{'p-value'}, $format_A10ItalicRed); }
|
|
1595 }
|
|
1596 else { $ws->write_string(33, 15, $h_chi2{$k_file}{'T>A'}{'p-value'}, $format_A10); }
|
|
1597 $ws->write(33, 16, $h_chi2{$k_file}{'T>A'}{'FDR'}, $format_A10); $ws->write(33, 17, $h_chi2{$k_file}{'T>A'}{'ConfInt'}, $table_right);
|
|
1598 # A>T
|
|
1599 $ws->write(44, 11, "A>T", $table_left); $ws->write(44, 12, $ratio_at, $table_middleHeader); $ws->write(44, 13, $ta_Tr, $format_A10); $ws->write(44, 14, $ta_NonTr, $format_A10);
|
|
1600 if(($ta_NonTr+$ta_Tr)< 10)
|
|
1601 {
|
|
1602 if($h_chi2{$k_file}{'T>A'}{'p-value'} eq "NA") { $ws->write_string(44, 15, $h_chi2{$k_file}{'T>A'}{'p-value'}, $format_A10); }
|
|
1603 else { $ws->write_string(44, 15, $h_chi2{$k_file}{'T>A'}{'p-value'}, $format_A10ItalicRed); }
|
|
1604 }
|
|
1605 else { $ws->write_string(44, 15, $h_chi2{$k_file}{'T>A'}{'p-value'}, $format_A10); }
|
|
1606 $ws->write(44, 16, $h_chi2{$k_file}{'T>A'}{'FDR'}, $format_A10); $ws->write(44, 17, $h_chi2{$k_file}{'T>A'}{'ConfInt'}, $table_right);
|
|
1607
|
|
1608 #-----------------------------------------------------------------------------------------------------#
|
|
1609 my ($ratio_tc, $ratio_ag, $percent_tc_NonTr, $percent_tc_Tr) = (0, 0, 0, 0, 0);
|
|
1610 if( ($tc_NonTr==0) || ($tc_Tr==0) ) { $ratio_tc = 0; $ratio_ag = 0; $percent_tc_NonTr = 0; $percent_tc_Tr = 0; }
|
|
1611 else
|
|
1612 {
|
|
1613 $ratio_tc = $tc_NonTr/$tc_Tr; $ratio_tc = sprintf("%.2f", $ratio_tc);
|
|
1614 $ratio_ag = $tc_Tr/$tc_NonTr; $ratio_ag = sprintf("%.2f", $ratio_ag);
|
|
1615 $percent_tc_NonTr = ($tc_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_tc_Tr = ($tc_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
|
|
1616 }
|
|
1617 print SB "T>C\tNonTranscribed\t$tc_NonTr\n", "T>C\tTranscribed\t$tc_Tr\n";
|
|
1618 # T>C
|
|
1619 $ws->write(34, 11, "T>C", $table_left); $ws->write(34, 12, $ratio_tc, $table_middleHeader); $ws->write(34, 13, $tc_NonTr, $format_A10); $ws->write(34, 14, $tc_Tr, $format_A10);
|
|
1620 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
|
|
1621 if(($tc_NonTr+$tc_Tr)< 10)
|
|
1622 {
|
|
1623 if($h_chi2{$k_file}{'T>C'}{'p-value'} eq "NA") { $ws->write_string(34, 15, $h_chi2{$k_file}{'T>C'}{'p-value'}, $format_A10); }
|
|
1624 else { $ws->write_string(34, 15, $h_chi2{$k_file}{'T>C'}{'p-value'}, $format_A10ItalicRed); }
|
|
1625 }
|
|
1626 else { $ws->write_string(34, 15, $h_chi2{$k_file}{'T>C'}{'p-value'}, $format_A10); }
|
|
1627 $ws->write(34, 16, $h_chi2{$k_file}{'T>C'}{'FDR'}, $format_A10); $ws->write(34, 17, $h_chi2{$k_file}{'T>C'}{'ConfInt'}, $table_right);
|
|
1628 # A>G
|
|
1629 $ws->write(45, 11, "A>G", $table_left); $ws->write(45, 12, $ratio_ag, $table_middleHeader); $ws->write(45, 13, $tc_Tr, $format_A10); $ws->write(45, 14, $tc_NonTr, $format_A10);
|
|
1630 if(($tc_NonTr+$tc_Tr)< 10)
|
|
1631 {
|
|
1632 if($h_chi2{$k_file}{'T>C'}{'p-value'} eq "NA") { $ws->write_string(45, 15, $h_chi2{$k_file}{'T>C'}{'p-value'}, $format_A10); }
|
|
1633 else { $ws->write_string(45, 15, $h_chi2{$k_file}{'T>C'}{'p-value'}, $format_A10ItalicRed); }
|
|
1634 }
|
|
1635 else { $ws->write_string(45, 15, $h_chi2{$k_file}{'T>C'}{'p-value'}, $format_A10); }
|
|
1636 $ws->write(45, 16, $h_chi2{$k_file}{'T>C'}{'FDR'}, $format_A10); $ws->write(45, 17, $h_chi2{$k_file}{'T>C'}{'ConfInt'}, $table_right);
|
|
1637
|
|
1638 #-----------------------------------------------------------------------------------------------------#
|
|
1639 my ($ratio_tg, $ratio_ac, $percent_tg_NonTr, $percent_tg_Tr) = (0, 0, 0, 0, 0);
|
|
1640 if( ($tg_NonTr==0) || ($tg_Tr==0) ) { $ratio_tg = 0; $ratio_ac = 0; $percent_tg_NonTr = 0; $percent_tg_Tr = 0; }
|
|
1641 else
|
|
1642 {
|
|
1643 $ratio_tg = $tg_NonTr/$tg_Tr; $ratio_tg = sprintf("%.2f", $ratio_tg);
|
|
1644 $ratio_ac = $tg_Tr/$tg_NonTr; $ratio_ac = sprintf("%.2f", $ratio_ac);
|
|
1645 $percent_tg_NonTr = ($tg_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_tg_Tr = ($tg_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
|
|
1646 }
|
|
1647 print SB "T>G\tNonTranscribed\t$tg_NonTr\n", "T>G\tTranscribed\t$tg_Tr\n";
|
|
1648 # T>G
|
|
1649 $ws->write(35, 11, "T>G", $table_bottomleft); $ws->write(35, 12, $ratio_tg, $table_middleHeader2); $ws->write(35, 13, $tg_NonTr, $table_bottom); $ws->write(35, 14, $tg_Tr, $table_bottom);
|
|
1650 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
|
|
1651 if(($tg_NonTr+$tg_Tr)< 10)
|
|
1652 {
|
|
1653 if($h_chi2{$k_file}{'T>G'}{'p-value'} eq "NA") { $ws->write_string(35, 15, $h_chi2{$k_file}{'T>G'}{'p-value'}, $table_bottom); }
|
|
1654 else { $ws->write_string(35, 15, $h_chi2{$k_file}{'T>G'}{'p-value'}, $table_bottomItalicRed); }
|
|
1655 }
|
|
1656 else { $ws->write_string(35, 15, $h_chi2{$k_file}{'T>G'}{'p-value'}, $table_bottom); }
|
|
1657 $ws->write(35, 16, $h_chi2{$k_file}{'T>G'}{'FDR'}, $table_bottom); $ws->write(35, 17, $h_chi2{$k_file}{'T>G'}{'ConfInt'}, $table_bottomRight);
|
|
1658 # A>C
|
|
1659 $ws->write(46, 11, "A>C", $table_bottomleft); $ws->write(46, 12, $ratio_ac, $table_middleHeader2); $ws->write(46, 13, $tg_Tr, $table_bottom); $ws->write(46, 14, $tg_NonTr, $table_bottom);
|
|
1660 if(($tg_NonTr+$tg_Tr)< 10)
|
|
1661 {
|
|
1662 if($h_chi2{$k_file}{'T>G'}{'p-value'} eq "NA") { $ws->write_string(46, 15, $h_chi2{$k_file}{'T>G'}{'p-value'}, $table_bottom); }
|
|
1663 else { $ws->write_string(46, 15, $h_chi2{$k_file}{'T>G'}{'p-value'}, $table_bottomItalicRed); }
|
|
1664 }
|
|
1665 else { $ws->write_string(46, 15, $h_chi2{$k_file}{'T>G'}{'p-value'}, $table_bottom); }
|
|
1666 $ws->write(46, 16, $h_chi2{$k_file}{'T>G'}{'FDR'}, $table_bottom); $ws->write(46, 17, $h_chi2{$k_file}{'T>G'}{'ConfInt'}, $table_bottomRight);
|
|
1667
|
|
1668 ### Write a warning message when NonTr+Tr < 10
|
|
1669 my $format_italic_red = $wb->add_format(font=>'Arial', size=>10, italic=>1, color => 'red');
|
|
1670
|
|
1671 if( (($ca_NonTr+$ca_Tr)< 10) || (($cg_NonTr+$cg_Tr)< 10) || (($ct_NonTr+$ct_Tr)< 10) || (($ta_NonTr+$ta_Tr)< 10) || (($tc_NonTr+$tc_Tr)< 10) || (($tg_NonTr+$tg_Tr)< 10) )
|
|
1672 {
|
|
1673 $ws->write(36, 11, "Warning message: chi-squared approximation may be incorrect because the number of SBS", $format_italic_red);
|
|
1674 $ws->write(37, 11, "on Non-transcribed and transcribed strand is lower than 10", $format_italic_red);
|
|
1675 }
|
|
1676
|
|
1677 close SB;
|
|
1678
|
|
1679
|
|
1680 ###########################################################################################################################################################
|
|
1681 ################################################################### Write SBS Per Chr #####################################################################
|
|
1682 ###########################################################################################################################################################
|
|
1683 # For the HTML report
|
|
1684 open(SBSPerChr, ">", "$folderFigure/Distribution_SBS_Per_Chromosomes/$k_file-DistributionSNVS_per_chromosome.txt") or die "$!: $folderFigure/Distribution_SBS_Per_Chromosomes/$k_file-DistributionSNVS_per_chromosome.txt\n";
|
|
1685 print SBSPerChr "\tPearson\t$refH_file->{$k_file}{'SBSPerChr'}{'AllMutType'}\t", $refH_file->{$k_file}{'SBSPerChr'}{"C:G>A:T"}{'Pearson'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"C:G>G:C"}{'Pearson'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"C:G>T:A"}{'Pearson'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"T:A>A:T"}{'Pearson'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"T:A>C:G"}{'Pearson'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"T:A>G:C"}{'Pearson'},"\n";
|
|
1686 print SBSPerChr "Chr\tSize\tAll SBS\tC:G>A:T\tC:G>G:C\tC:G>T:A\tT:A>A:T\tT:A>C:G\tT:A>G:C\n";
|
|
1687
|
|
1688 my $row_SBSPerChr = $row_SBSDistrBySegAndFunc_CG + 8; # Line 158
|
|
1689
|
|
1690 # Write the Pearson coefficient
|
|
1691 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'SBSPerChr'}{"C:G>A:T"}{'Pearson'}, $format_A10);
|
|
1692 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+4, $refH_file->{$k_file}{'SBSPerChr'}{"C:G>G:C"}{'Pearson'}, $format_A10);
|
|
1693 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+5, $refH_file->{$k_file}{'SBSPerChr'}{"C:G>T:A"}{'Pearson'}, $format_A10);
|
|
1694 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+6, $refH_file->{$k_file}{'SBSPerChr'}{"T:A>A:T"}{'Pearson'}, $format_A10);
|
|
1695 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+7, $refH_file->{$k_file}{'SBSPerChr'}{"T:A>C:G"}{'Pearson'}, $format_A10);
|
|
1696 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+8, $refH_file->{$k_file}{'SBSPerChr'}{"T:A>G:C"}{'Pearson'}, $formatT_right);
|
|
1697
|
|
1698 # Write the chromosome number and their sizes / Write the total of SBS per chromosome
|
|
1699 my $line=0;
|
|
1700
|
|
1701 foreach my $chromosome (sort keys %chromosomes)
|
|
1702 {
|
|
1703 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg, $chromosome, $formatT_left);
|
|
1704 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+1, $chromosomes{$chromosome}, $format_A10);
|
|
1705 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'SBSPerChr'}{'TotalPerChr'}{$chromosome}{'chr'}, $format_A10);
|
|
1706
|
|
1707 # Write the count per mutation
|
|
1708 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'SBSPerChr'}{"C:G>A:T"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
|
|
1709 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+4, $refH_file->{$k_file}{'SBSPerChr'}{"C:G>G:C"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
|
|
1710 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+5, $refH_file->{$k_file}{'SBSPerChr'}{"C:G>T:A"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
|
|
1711 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+6, $refH_file->{$k_file}{'SBSPerChr'}{"T:A>A:T"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
|
|
1712 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+7, $refH_file->{$k_file}{'SBSPerChr'}{"T:A>C:G"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
|
|
1713 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+8, $refH_file->{$k_file}{'SBSPerChr'}{"T:A>G:C"}{'CHR'}{$chromosome}{'chr'}, $formatT_right);
|
|
1714
|
|
1715
|
|
1716 # For the HTML report
|
|
1717 print SBSPerChr "$chromosome\t", $chromosomes{$chromosome},"\t", $refH_file->{$k_file}{'SBSPerChr'}{'TotalPerChr'}{$chromosome}{'chr'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"C:G>A:T"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"C:G>G:C"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"C:G>T:A"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"T:A>A:T"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"T:A>C:G"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"T:A>G:C"}{'CHR'}{$chromosome}{'chr'},"\n";
|
|
1718 $line++;
|
|
1719 }
|
|
1720
|
|
1721 # Write the Pearson coefficient for the total number of SBS
|
|
1722 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'SBSPerChr'}{'AllMutType'}, $format_A10);
|
|
1723 $ws->write($row_SBSPerChr+(keys %chromosomes), $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'TotalSBSGenomic'}, $formatT_bottomHeader);
|
|
1724
|
|
1725 print SBSPerChr "\t\t$refH_file->{$k_file}{'TotalSBSGenomic'}\n";
|
|
1726 close SBSPerChr;
|
|
1727
|
|
1728
|
|
1729
|
|
1730 ###########################################################################################################################################################
|
|
1731 ####################################################################### Impact on protein #################################################################
|
|
1732 ###########################################################################################################################################################
|
|
1733 $ws->write(29, 6, "Table 2. Frequency and counts of functional impact", $format_A10Boldleft);
|
|
1734 $ws->set_column(6, 6, 13); $ws->set_column(10, 10, 15);
|
|
1735 $ws->write(30, 6, "RefSeq gene", $table_topleft); $ws->write(30, 7, "", $table_top); $ws->write(30, 8, "Percent", $table_top); $ws->write(30, 9, "Count", $table_topRight);
|
|
1736 my $lImpactSBS = 31;
|
|
1737 open(IMPACTSBS, ">", "$folderFigure/Impact_protein_sequence/$k_file/$k_file-DistributionExoFunc.txt") or die "$!: $folderFigure/Impact_protein_sequence/$k_file/$k_file-DistributionExoFunc.txt\n";
|
|
1738 print IMPACTSBS "AA_Change\tCount\tPercent\n";
|
|
1739
|
|
1740 # Pie chart with the distribution of SBS vs Indel
|
|
1741 open(SBSINDEL, ">", "$folderFigure/Overall_mutation_distribution/$k_file/$k_file-OverallMutationDistribution.txt") or die "$!: $folderFigure/Overall_mutation_distribution/$k_file/$k_file-OverallMutationDistribution.txt\n";
|
|
1742 print SBSINDEL "Variant_type\tCount\tPercent\n";
|
|
1743 my ($deletion, $insertion) = (0, 0);
|
|
1744
|
|
1745
|
|
1746 foreach my $k_exoFunc(sort keys $refH_file->{$k_file}{'ImpactSBS'})
|
|
1747 {
|
|
1748 my $percent = ($refH_file->{$k_file}{'ImpactSBS'}{$k_exoFunc} / $refH_file->{$k_file}{'TotalMutGenomic'})*100;
|
|
1749 $percent = sprintf("%.2f", $percent);
|
|
1750
|
|
1751 if($k_exoFunc eq "NA") { print IMPACTSBS "Not_Applicable\t$percent\t$refH_file->{$k_file}{'ImpactSBS'}{$k_exoFunc}\n"; }
|
|
1752 else { my $temp = $k_exoFunc; $temp =~ s/ /_/g; print IMPACTSBS "$temp\t$percent\t$refH_file->{$k_file}{'ImpactSBS'}{$k_exoFunc}\n"; }
|
|
1753
|
|
1754 $ws->write($lImpactSBS, 6, $k_exoFunc, $table_left2); $ws->write($lImpactSBS, 8, $percent, $format_A10); $ws->write($lImpactSBS, 9, $refH_file->{$k_file}{'ImpactSBS'}{$k_exoFunc}, $table_right);
|
|
1755 $lImpactSBS++;
|
|
1756
|
|
1757 # Pie chart with the distribution of SBS vs Indel
|
|
1758 if($k_exoFunc =~ /deletion/i) { $deletion += $refH_file->{$k_file}{'ImpactSBS'}{$k_exoFunc}; }
|
|
1759 elsif($k_exoFunc =~ /insertion/i) { $insertion += $refH_file->{$k_file}{'ImpactSBS'}{$k_exoFunc}; }
|
|
1760 }
|
|
1761 close IMPACTSBS;
|
|
1762 $ws->write($lImpactSBS, 9, $refH_file->{$k_file}{'TotalMutGenomic'}, $table_bottomrightHeader);
|
|
1763 $ws->write($lImpactSBS, 6, "", $table_bottomleft); $ws->write($lImpactSBS, 7, "", $table_bottom); $ws->write($lImpactSBS, 8, "", $table_bottom);
|
|
1764
|
|
1765 # Pie chart with the distribution of SBS vs Indel
|
|
1766 my $percentSBSIndel = ($deletion/$refH_file->{$k_file}{'TotalMutGenomic'})*100; $percentSBSIndel = sprintf("%.2f", $percentSBSIndel);
|
|
1767 print SBSINDEL "Deletion\t$deletion\t$percentSBSIndel\n";
|
|
1768 $percentSBSIndel = ($insertion/$refH_file->{$k_file}{'TotalMutGenomic'})*100; $percentSBSIndel = sprintf("%.2f", $percentSBSIndel);
|
|
1769 print SBSINDEL "Insertion\t$insertion\t$percentSBSIndel\n";
|
|
1770 $percentSBSIndel = ($refH_file->{$k_file}{TotalSBSGenomic}/$refH_file->{$k_file}{'TotalMutGenomic'})*100; $percentSBSIndel = sprintf("%.2f", $percentSBSIndel);
|
|
1771 print SBSINDEL "SBS\t$refH_file->{$k_file}{TotalSBSGenomic}\t$percentSBSIndel\n";
|
|
1772 close SBSINDEL;
|
|
1773
|
|
1774 ###########################################################################################################################################################
|
|
1775 ######################################################## SEQUENCE CONTEXT ON GENOMIC STRAND ###############################################################
|
|
1776 ###########################################################################################################################################################
|
|
1777 my $row_SeqContext6 = 4;
|
|
1778 # Count the total of mutations for 6 mutation types on genomic strand
|
|
1779 my ($c_ca6_g, $c_cg6_g, $c_ct6_g, $c_ta6_g, $c_tc6_g, $c_tg6_g) = (0,0,0, 0,0,0);
|
|
1780 my ($p_ca6_g, $p_cg6_g, $p_ct6_g, $p_ta6_g, $p_tc6_g, $p_tg6_g) = (0,0,0, 0,0,0);
|
|
1781 my $maxValue = 0; # For the heatmap
|
|
1782
|
|
1783 # For checking if the total number of SBS is correct
|
|
1784 my $total_SBS_genomic = 0;
|
|
1785
|
|
1786
|
|
1787 open(HEATMAPCGENOMIC, ">", "$folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-HeatmapCount-Genomic.txt") or die "$!: $folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-HeatmapCount-Genomic.txt\n";
|
|
1788 print HEATMAPCGENOMIC "\tC>A\tC>G\tC>T\tT>A\tT>C\tT>G\n";
|
|
1789 open(HEATMAPPGENOMIC, ">", "$folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-HeatmapPercent-Genomic.txt") or die "$!: $folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-HeatmapPercent-Genomic.txt\n";
|
|
1790 print HEATMAPPGENOMIC "\tC>A\tC>G\tC>T\tT>A\tT>C\tT>G\n";
|
|
1791
|
|
1792 ## Bar plot NMF like
|
|
1793 open(BARPLOTNMFLIKE, ">", "$folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-MutationSpectraPercent-Genomic.txt") or die "$!: $folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-MutationSpectraPercent-Genomic.txt\n";
|
|
1794 print BARPLOTNMFLIKE "alteration\tcontext\tvalue\n";
|
|
1795
|
|
1796 foreach my $k_context (sort keys $refH_file->{$k_file}{'SeqContextG'})
|
|
1797 {
|
|
1798 if( ($k_context =~ /N/) || (length($k_context) != 3) ) { next; }
|
|
1799
|
|
1800 # Write the context: 6 mut type on genomic strand
|
|
1801 $ws->write($row_SeqContext6 , $colStart_matrixSeqContext+3, $k_context, $format_A10); $ws->write($row_SeqContext6 , $colStart_matrixSeqContext+13, $k_context, $format_A10);
|
|
1802
|
|
1803 foreach my $k_mutation (sort keys $refH_file->{$k_file}{'SeqContextG'}{$k_context})
|
|
1804 {
|
|
1805 # For checking the total number of SBS
|
|
1806 $total_SBS_genomic += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1807
|
|
1808 # Calculate the percentages
|
|
1809 my $percent = 0;
|
|
1810 if($refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation} == 0) { $percent = 0; }
|
|
1811 else
|
|
1812 {
|
|
1813 $percent = ($refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation} / $refH_file->{$k_file}{'TotalSBSGenomic'}) * 100;
|
|
1814 $percent = sprintf("%.2f", $percent);
|
|
1815 }
|
|
1816
|
|
1817 # For representing the sequence context with a bar plot (NMF like style)
|
|
1818 print BARPLOTNMFLIKE $k_mutation,"\t", $k_context,"\t", $percent,"\n";
|
|
1819
|
|
1820 if($k_mutation eq "C>A")
|
|
1821 {
|
|
1822 ### COUNT
|
|
1823 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+4, $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}, $format_A10);
|
|
1824 # Write the count for the heatmap
|
|
1825 print HEATMAPCGENOMIC "$k_context\t$refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}\t";
|
|
1826
|
|
1827 ### PERCENTAGE
|
|
1828 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+14, $percent, $format_A10);
|
|
1829 print HEATMAPPGENOMIC "$k_context\t$percent\t";
|
|
1830
|
|
1831 # For NMF input
|
|
1832 my $count = $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1833 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{'C>A'}}, $count); }
|
|
1834 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{'C>A'}}, $percent); }
|
|
1835
|
|
1836 # For the heatmap
|
|
1837 if($percent >= $maxValue) { $maxValue = $percent; }
|
|
1838
|
|
1839 # For the total amount per mutation types
|
|
1840 $c_ca6_g += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1841 $p_ca6_g += $percent;
|
|
1842 }
|
|
1843 if($k_mutation eq "C>G")
|
|
1844 {
|
|
1845 ### COUNT
|
|
1846 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+5, $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}, $format_A10);
|
|
1847 # Write the count for the heatmap
|
|
1848 print HEATMAPCGENOMIC "$refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}\t";
|
|
1849
|
|
1850 ### PERCENTAGE
|
|
1851 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+15, $percent, $format_A10);
|
|
1852 print HEATMAPPGENOMIC "$percent\t";
|
|
1853
|
|
1854 # For NMF input
|
|
1855 my $count = $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1856 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{'C>G'}}, $count); }
|
|
1857 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{'C>G'}}, $percent); }
|
|
1858
|
|
1859 # For the heatmap
|
|
1860 if($percent >= $maxValue) { $maxValue = $percent; }
|
|
1861
|
|
1862 # For the total amount per mutation types
|
|
1863 $c_cg6_g += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1864 $p_cg6_g += $percent;
|
|
1865 }
|
|
1866 if($k_mutation eq "C>T")
|
|
1867 {
|
|
1868 ### COUNT
|
|
1869 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+6, $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}, $format_A10);
|
|
1870 # Write the count for the heatmap
|
|
1871 print HEATMAPCGENOMIC "$refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}\t";
|
|
1872
|
|
1873 ### PERCENTAGE
|
|
1874 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+16, $percent, $format_A10);
|
|
1875 print HEATMAPPGENOMIC "$percent\t";
|
|
1876
|
|
1877 # For NMF input
|
|
1878 my $count = $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1879 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{'C>T'}}, $count); }
|
|
1880 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{'C>T'}}, $percent); }
|
|
1881
|
|
1882 # For the heatmap
|
|
1883 if($percent >= $maxValue) { $maxValue = $percent; }
|
|
1884
|
|
1885 # For the total amount per mutation types
|
|
1886 $c_ct6_g += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1887 $p_ct6_g += $percent;
|
|
1888 }
|
|
1889 if($k_mutation eq "T>A")
|
|
1890 {
|
|
1891 ### COUNT
|
|
1892 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+7, $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}, $format_A10);
|
|
1893 # Write the count for the heatmap
|
|
1894 print HEATMAPCGENOMIC "$refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}\t";
|
|
1895
|
|
1896 ### PERCENTAGE
|
|
1897 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+17, $percent, $format_A10);
|
|
1898 print HEATMAPPGENOMIC "$percent\t";
|
|
1899
|
|
1900 # For NMF input
|
|
1901 my $count = $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1902 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{'T>A'}}, $count); }
|
|
1903 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{'T>A'}}, $percent); }
|
|
1904
|
|
1905 # For the heatmap
|
|
1906 if($percent >= $maxValue) { $maxValue = $percent; }
|
|
1907
|
|
1908 # For the total amount per mutation types
|
|
1909 $c_ta6_g += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1910 $p_ta6_g += $percent;
|
|
1911 }
|
|
1912 if($k_mutation eq "T>C")
|
|
1913 {
|
|
1914 ### COUNT
|
|
1915 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+8, $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}, $format_A10);
|
|
1916 # Write the count for the heatmap
|
|
1917 print HEATMAPCGENOMIC "$refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}\t";
|
|
1918
|
|
1919 ### PERCENTAGE
|
|
1920 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+18, $percent, $format_A10);
|
|
1921 print HEATMAPPGENOMIC "$percent\t";
|
|
1922
|
|
1923 # For NMF input
|
|
1924 my $count = $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1925 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{'T>C'}}, $count); }
|
|
1926 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{'T>C'}}, $percent); }
|
|
1927
|
|
1928 # For the heatmap
|
|
1929 if($percent >= $maxValue) { $maxValue = $percent; }
|
|
1930
|
|
1931 # For the total amount per mutation types
|
|
1932 $c_tc6_g += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1933 $p_tc6_g += $percent;
|
|
1934 }
|
|
1935 if($k_mutation eq "T>G")
|
|
1936 {
|
|
1937 ### COUNT
|
|
1938 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+9, $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}, $format_A10);
|
|
1939 # Write the count for the heatmap
|
|
1940 print HEATMAPCGENOMIC "$refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}\n";
|
|
1941
|
|
1942 ### PERCENTAGE
|
|
1943 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+19, $percent, $format_A10);
|
|
1944 print HEATMAPPGENOMIC "$percent\n";
|
|
1945
|
|
1946 # For NMF input
|
|
1947 my $count = $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1948 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{'T>G'}}, $count); }
|
|
1949 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{'T>G'}}, $percent); }
|
|
1950
|
|
1951 # For the heatmap
|
|
1952 if($percent >= $maxValue) { $maxValue = $percent; }
|
|
1953
|
|
1954 # For the total amount per mutation types
|
|
1955 $c_tg6_g += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
|
|
1956 $p_tg6_g += $percent;
|
|
1957 }
|
|
1958 }
|
|
1959 $row_SeqContext6++;
|
|
1960 }
|
|
1961 close HEATMAPCGENOMIC; close HEATMAPPGENOMIC;
|
|
1962 close BARPLOTNMFLIKE;
|
|
1963
|
|
1964
|
|
1965 # Write the total number of SBS per mutation type: COUNT
|
|
1966 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+4, $c_ca6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+5, $c_cg6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+6, $c_ct6_g, $formatT_bottomHeader2);
|
|
1967 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+7, $c_ta6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+8, $c_tc6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+9, $c_tg6_g, $formatT_bottomHeader2);
|
|
1968 if($total_SBS_genomic != $refH_file->{$k_file}{'TotalSBSGenomic'}) { print STDERR "Error in the calculation of the total number of SBS on the genomic strand!!!!\nFrom hash table $refH_file->{$k_file}{'TotalSBSGenomic'}\tVS\t$total_SBS_genomic\n"; exit; }
|
|
1969
|
|
1970 # Write the total number of SBS per mutation type: PERCENT
|
|
1971 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+14, $p_ca6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+15, $p_cg6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+16, $p_ct6_g, $formatT_bottomHeader2);
|
|
1972 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+17, $p_ta6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+18, $p_tc6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+19, $p_tg6_g, $formatT_bottomHeader2);
|
|
1973 my $totalPercent_genomic = $p_ca6_g + $p_cg6_g + $p_ct6_g + $p_ta6_g + $p_tc6_g + $p_tg6_g; $totalPercent_genomic = sprintf("%.0f", $totalPercent_genomic);
|
|
1974 if($totalPercent_genomic != 100) { print STDERR "Error in the calculation of the total percentages on the genomic strand!!!\nThe total is equal to=\t$totalPercent_genomic\n"; exit; }
|
|
1975
|
|
1976
|
|
1977 #----------------------------------------------------------------------------------------------------------------------------------------------------------------#
|
|
1978 # For the input matrix for NMF
|
|
1979 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Sample'}}, $k_file); }
|
|
1980
|
|
1981
|
|
1982 ###########################################################################################################################################################
|
|
1983 ######################################################## SEQUENCE CONTEXT ON CODING STRAND ###############################################################
|
|
1984 ###########################################################################################################################################################
|
|
1985 my $row_SeqContext12 = $rowStart_SBSdistrBySeg+6; my $row_SeqContext12Percent = $rowStart_SBSdistrBySeg+27;
|
|
1986 # Reset the total count and percent calculated for the strand bias
|
|
1987 ($ca_NonTr, $ca_Tr, $cg_NonTr, $cg_Tr, $ct_NonTr, $ct_Tr, $ta_NonTr, $ta_Tr, $tc_NonTr, $tc_Tr, $tg_NonTr, $tg_Tr) = (0,0,0, 0,0,0, 0,0,0, 0,0,0);
|
|
1988 ($percent_ca_NonTr, $percent_ca_Tr, $percent_cg_NonTr, $percent_cg_Tr, $percent_ct_NonTr, $percent_ct_Tr, $percent_ta_NonTr, $percent_ta_Tr, $percent_tc_NonTr, $percent_tc_Tr, $percent_tg_NonTr, $percent_tg_Tr) = (0,0,0, 0,0,0, 0,0,0, 0,0,0);
|
|
1989
|
|
1990 # For checking if the total number of SBS is correct
|
|
1991 my $total_SBS_coding = 0;
|
|
1992
|
|
1993 open(COUNT, ">", "$folderFigure/Stranded_Analysis/$k_file/$k_file-StrandedSignatureCount.txt") or die "$!: $folderFigure/Stranded_Analysis/$k_file/$k_file-StrandedSignatureCount.txt\n";
|
|
1994 print COUNT "MutationTypeContext\tStrand\tValue\tSample\n";
|
|
1995 open(PERCENT, ">", "$folderFigure/Stranded_Analysis/$k_file/$k_file-StrandedSignaturePercent.txt") or die "$!: $folderFigure/Stranded_Analysis/$k_file/$k_file-StrandedSignaturePercent.txt\n";
|
|
1996 print PERCENT "MutationTypeContext\tStrand\tValue\tSample\n";
|
|
1997
|
|
1998 foreach my $k_context (sort keys $refH_file->{$k_file}{'SeqContextC'})
|
|
1999 {
|
|
2000 if( ($k_context =~ /N/) || (length($k_context) != 3) ) { next; }
|
|
2001
|
|
2002 # Write the context: 12 mut type on coding strand
|
|
2003 $ws->write($row_SeqContext12 , $colStart_matrixSeqContext, $k_context, $formatT_left); $ws->write($row_SeqContext12Percent , $colStart_matrixSeqContext, $k_context, $formatT_left);
|
|
2004
|
|
2005 foreach my $k_mutation (sort keys $refH_file->{$k_file}{'SeqContextC'}{$k_context})
|
|
2006 {
|
|
2007 # Percent: 12 mut type on coding strand
|
|
2008 my ($percent_NonTr, $percent_Tr) = (0, 0);
|
|
2009 if($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'} == 0) { $percent_NonTr = 0; }
|
|
2010 else { $percent_NonTr = ( $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'TotalSBSCoding'} ) * 100 }
|
|
2011 if($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'} == 0) { $percent_Tr = 0; }
|
|
2012 else { $percent_Tr = ( $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'} / $refH_file->{$k_file}{'TotalSBSCoding'} ) * 100 }
|
|
2013
|
|
2014
|
|
2015 # Calculate the total number for each mutation types
|
|
2016 if($k_mutation eq "C>A")
|
|
2017 {
|
|
2018 $ca_NonTr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'};
|
|
2019 $ca_Tr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
|
|
2020
|
|
2021 # COUNT : 12 mutation type (stranded bar graph)
|
|
2022 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+1, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}, $format_A10);
|
|
2023 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$k_file\n";
|
|
2024 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+2, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}, $format_A10);
|
|
2025 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$k_file\n";
|
|
2026
|
|
2027 ## PERCENT : 12 mutation type (stranded bar graph)
|
|
2028 my $percent_NonTr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
|
|
2029 $percent_NonTr = sprintf("%.2f", $percent_NonTr); $percent_ca_NonTr += $percent_NonTr;
|
|
2030 my $percent_Tr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
|
|
2031 $percent_Tr = sprintf("%.2f", $percent_Tr); $percent_ca_Tr += $percent_Tr;
|
|
2032 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$k_file\n";
|
|
2033 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$k_file\n";
|
|
2034
|
|
2035 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+1, $percent_NonTr, $format_A10);
|
|
2036 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+2, $percent_Tr, $format_A10);
|
|
2037 }
|
|
2038 if($k_mutation eq "C>G")
|
|
2039 {
|
|
2040 $cg_NonTr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'};
|
|
2041 $cg_Tr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
|
|
2042
|
|
2043 # COUNT : 12 mutation type (stranded bar graph)
|
|
2044 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+3, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}, $format_A10);
|
|
2045 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$k_file\n";
|
|
2046 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+4, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}, $format_A10);
|
|
2047 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$k_file\n";
|
|
2048
|
|
2049 ## PERCENT : 12 mutation type (stranded bar graph)
|
|
2050 my $percent_NonTr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
|
|
2051 $percent_NonTr = sprintf("%.2f", $percent_NonTr); $percent_cg_NonTr += $percent_NonTr;
|
|
2052 my $percent_Tr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
|
|
2053 $percent_Tr = sprintf("%.2f", $percent_Tr); $percent_cg_Tr += $percent_Tr;
|
|
2054 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$k_file\n";
|
|
2055 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$k_file\n";
|
|
2056
|
|
2057 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+3, $percent_NonTr, $format_A10);
|
|
2058 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+4, $percent_Tr, $format_A10);
|
|
2059 }
|
|
2060 if($k_mutation eq "C>T")
|
|
2061 {
|
|
2062 $ct_NonTr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'};
|
|
2063 $ct_Tr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
|
|
2064
|
|
2065 # COUNT : 12 mutation type (stranded bar graph)
|
|
2066 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+5, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}, $format_A10);
|
|
2067 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$k_file\n";
|
|
2068 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+6, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}, $format_A10);
|
|
2069 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$k_file\n";
|
|
2070
|
|
2071 ## PERCENT : 12 mutation type (stranded bar graph)
|
|
2072 my $percent_NonTr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
|
|
2073 $percent_NonTr = sprintf("%.2f", $percent_NonTr); $percent_ct_NonTr += $percent_NonTr;
|
|
2074 my $percent_Tr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
|
|
2075 $percent_Tr = sprintf("%.2f", $percent_Tr); $percent_ct_Tr += $percent_Tr;
|
|
2076 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$k_file\n";
|
|
2077 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$k_file\n";
|
|
2078
|
|
2079 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+5, $percent_NonTr, $format_A10);
|
|
2080 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+6, $percent_Tr, $format_A10);
|
|
2081 }
|
|
2082 if($k_mutation eq "T>A")
|
|
2083 {
|
|
2084 $ta_NonTr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'};
|
|
2085 $ta_Tr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
|
|
2086
|
|
2087 # COUNT : 12 mutation type (stranded bar graph)
|
|
2088 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+7, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}, $format_A10);
|
|
2089 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$k_file\n";
|
|
2090 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+8, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}, $format_A10);
|
|
2091 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$k_file\n";
|
|
2092
|
|
2093 ## PERCENT : 12 mutation type (stranded bar graph)
|
|
2094 my $percent_NonTr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
|
|
2095 $percent_NonTr = sprintf("%.2f", $percent_NonTr); $percent_ta_NonTr += $percent_NonTr;
|
|
2096 my $percent_Tr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
|
|
2097 $percent_Tr = sprintf("%.2f", $percent_Tr); $percent_ta_Tr += $percent_Tr;
|
|
2098 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$k_file\n";
|
|
2099 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$k_file\n";
|
|
2100
|
|
2101 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+7, $percent_NonTr, $format_A10);
|
|
2102 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+8, $percent_Tr, $format_A10);
|
|
2103 }
|
|
2104 if($k_mutation eq "T>C")
|
|
2105 {
|
|
2106 $tc_NonTr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'};
|
|
2107 $tc_Tr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
|
|
2108
|
|
2109 # COUNT : 12 mutation type (stranded bar graph)
|
|
2110 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+9, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}, $format_A10);
|
|
2111 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$k_file\n";
|
|
2112 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+10, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}, $format_A10);
|
|
2113 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$k_file\n";
|
|
2114
|
|
2115 ## PERCENT : 12 mutation type (stranded bar graph)
|
|
2116 my $percent_NonTr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
|
|
2117 $percent_NonTr = sprintf("%.2f", $percent_NonTr); $percent_tc_NonTr += $percent_NonTr;
|
|
2118 my $percent_Tr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
|
|
2119 $percent_Tr = sprintf("%.2f", $percent_Tr); $percent_tc_Tr += $percent_Tr;
|
|
2120 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$k_file\n";
|
|
2121 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$k_file\n";
|
|
2122
|
|
2123 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+9, $percent_NonTr, $format_A10);
|
|
2124 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+10, $percent_Tr, $format_A10);
|
|
2125 }
|
|
2126 if($k_mutation eq "T>G")
|
|
2127 {
|
|
2128 $tg_NonTr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'};
|
|
2129 $tg_Tr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
|
|
2130
|
|
2131 # COUNT : 12 mutation type (stranded bar graph)
|
|
2132 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+11, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}, $format_A10);
|
|
2133 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$k_file\n";
|
|
2134 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+12, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}, $format_A10);
|
|
2135 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$k_file\n";
|
|
2136
|
|
2137 ## PERCENT : 12 mutation type (stranded bar graph)
|
|
2138 my $percent_NonTr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
|
|
2139 $percent_NonTr = sprintf("%.2f", $percent_NonTr); $percent_tg_NonTr += $percent_NonTr;
|
|
2140 my $percent_Tr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
|
|
2141 $percent_Tr = sprintf("%.2f", $percent_Tr); $percent_tg_Tr += $percent_Tr;
|
|
2142 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$k_file\n";
|
|
2143 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$k_file\n";
|
|
2144
|
|
2145 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+11, $percent_NonTr, $format_A10);
|
|
2146 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+12, $percent_Tr, $format_A10);
|
|
2147 }
|
|
2148
|
|
2149 # For checking if the total number of SBS is correct
|
|
2150 $total_SBS_coding += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'} + $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
|
|
2151 }
|
|
2152 $row_SeqContext12++; $row_SeqContext12Percent++;
|
|
2153 }
|
|
2154 close COUNT; close PERCENT;
|
|
2155
|
|
2156 ## Write the total of each mutation types: 12 mut type on coding strand
|
|
2157 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+1, $ca_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $colStart_matrixSeqContext+2, $ca_Tr, $formatT_bottomHeader2);
|
|
2158 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+3, $cg_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $colStart_matrixSeqContext+4, $cg_Tr, $formatT_bottomHeader2);
|
|
2159 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+5, $ct_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $colStart_matrixSeqContext+6, $ct_Tr, $formatT_bottomHeader2);
|
|
2160 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+7, $ta_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $colStart_matrixSeqContext+8, $ta_Tr, $formatT_bottomHeader2);
|
|
2161 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+9, $tc_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $colStart_matrixSeqContext+10, $tc_Tr, $formatT_bottomHeader2);
|
|
2162 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+11, $tg_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $colStart_matrixSeqContext+12, $tg_Tr, $formatT_bottomHeader2);
|
|
2163 # Write the total percentages of each mutation types: 12 mut type on coding strand
|
|
2164 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+1, $percent_ca_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+2, $percent_ca_Tr, $formatT_bottomHeader);
|
|
2165 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+3, $percent_cg_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+4, $percent_cg_Tr, $formatT_bottomHeader);
|
|
2166 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+5, $percent_ct_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+6, $percent_ct_Tr, $formatT_bottomHeader);
|
|
2167 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+7, $percent_ta_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+8, $percent_ta_Tr, $formatT_bottomHeader);
|
|
2168 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+9, $percent_tc_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+10, $percent_tc_Tr, $formatT_bottomHeader);
|
|
2169 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+11, $percent_tg_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+12, $percent_tg_Tr, $formatT_bottomHeader);
|
|
2170
|
|
2171 if($total_SBS_coding == $refH_file->{$k_file}{'TotalSBSCoding'}) { $ws->write($row_SeqContext12, $colStart_matrixSeqContext+13, $refH_file->{$k_file}{'TotalSBSCoding'}, $formatT_bottomHeader2) }
|
|
2172 else { print STDERR "Error in the calculation of the total number of SBS on the coding strand!!!!\nFrom hash table $refH_file->{$k_file}{'TotalSBSCoding'}\tVS\t$total_SBS_coding\n"; exit; }
|
|
2173
|
|
2174
|
|
2175 my $totalP_SBS_coding = $percent_ca_NonTr + $percent_ca_Tr + $percent_cg_NonTr + $percent_cg_Tr + $percent_ct_NonTr + $percent_ct_Tr + $percent_ta_NonTr + $percent_ta_Tr + $percent_tc_NonTr + $percent_tc_Tr + $percent_tg_NonTr + $percent_tg_Tr; $totalP_SBS_coding = sprintf("%.0f", $totalP_SBS_coding);
|
|
2176 if($totalP_SBS_coding != 100) { print STDERR "The percentages for the trinucleotide sequence context on the coding strand for 12 mutation types is not equal to 100!!!\n$totalP_SBS_coding\n"; exit; }
|
|
2177
|
|
2178
|
|
2179 ###########################################################################################################################################################
|
|
2180 ################################################################### GRAPHS & TABLES #######################################################################
|
|
2181 ###########################################################################################################################################################
|
|
2182 Create_Graph($folderFigure, $k_file, $maxValue);
|
|
2183
|
|
2184 ## Distribution of SBS into the Excel report (Figure 1 + Table 1)
|
|
2185 $ws->write(0, 0, "Graph 1. SBS distribution", $formatT_graphTitle); $ws->set_row(0, 18);
|
|
2186 $ws->insert_image(1, 0, "$folder_temp/$k_file-SBS_distribution-Report.png", 0, 0, .2, .2);
|
|
2187 $ws->write(29, 0, "Table 1. Frequency and counts of all SBS", $format_A10Boldleft);
|
|
2188 $ws->write(30, 0, "Mutation type", $table_topleft); $ws->write(30, 1, "Percentage", $table_top); $ws->write(30, 2, "Count", $table_topRight);
|
|
2189 $ws->write(31, 0, "C:G>A:T", $table_left); $ws->write(31, 1, $percent_ca, $format_A10); $ws->write(31, 2, $ca_genomique, $table_right);
|
|
2190 $ws->write(32, 0, "C:G>G:C", $table_left); $ws->write(32, 1, $percent_cg, $format_A10); $ws->write(32, 2, $cg_genomique, $table_right);
|
|
2191 $ws->write(33, 0, "C:G>T:A", $table_left); $ws->write(33, 1, $percent_ct, $format_A10); $ws->write(33, 2, $ct_genomique, $table_right);
|
|
2192 $ws->write(34, 0, "T:A>A:T", $table_left); $ws->write(34, 1, $percent_ta, $format_A10); $ws->write(34, 2, $ta_genomique, $table_right);
|
|
2193 $ws->write(35, 0, "T:A>C:G", $table_left); $ws->write(35, 1, $percent_tc, $format_A10); $ws->write(35, 2, $tc_genomique, $table_right);
|
|
2194 $ws->write(36, 0, "T:A>G:C", $table_left); $ws->write(36, 1, $percent_tg, $format_A10); $ws->write(36, 2, $tg_genomique, $table_right);
|
|
2195 $ws->write(37, 0, "", $table_bottomleft); $ws->write(37, 1, "", $table_bottom); $ws->write(37, 2, $refH_file->{$k_file}{'TotalSBSGenomic'}, $table_bottomrightHeader);
|
|
2196
|
|
2197 ## Impact of the SBS on the protein
|
|
2198 $ws->write(0, 6, "Graph 2. Impact on protein sequence", $formatT_graphTitle);
|
|
2199 $ws->insert_image(1, 6, "$folder_temp/$k_file-DistributionExoFunc-Report.png", 0, 0, .2, .2);
|
|
2200
|
|
2201 ## Strand Bias
|
|
2202 $ws->write(0, 11, "Graph 3. Stranded distribution of SBS", $formatT_graphTitle);
|
|
2203 $ws->insert_image(1, 11, "$folder_temp/$k_file-StrandBias-Report.png", 0, 0, .2, .2);
|
|
2204
|
|
2205 ## Stranded signature (Scale the inserted image: width x 0.7, height x 0.8)
|
|
2206 $ws->insert_image($rowStart_SBSdistrBySeg+3, $colStart_matrixSeqContext+15, "$folder_temp/$k_file-StrandedSignatureCount-Report.png", 0, 0, .16, .16);
|
|
2207 $ws->insert_image($rowStart_SBSdistrBySeg+24, $colStart_matrixSeqContext+15, "$folder_temp/$k_file-StrandedSignaturePercent-Report.png", 0, 0, .16, .16);
|
|
2208
|
|
2209
|
|
2210 # Heatamp for the sequence context on the genomic strand (6 mutation types)
|
|
2211 $ws->insert_image(4, $colStart_matrixSeqContext, "$folder_temp/$k_file-HeatmapCount-Genomic-Report.png");
|
|
2212 $ws->insert_image(4, $colStart_matrixSeqContext+10, "$folder_temp/$k_file-HeatmapPercent-Genomic-Report.png");
|
|
2213
|
|
2214
|
|
2215 ## Bar plot for representing the sequence context (NMF like style)
|
|
2216 `Rscript $pathRScriptMutSpectrum $folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-MutationSpectraPercent-Genomic.txt $k_file $folderFigure/Trinucleotide_Sequence_Context/$k_file $folder_temp $c_ca6_g $c_cg6_g $c_ct6_g $c_ta6_g $c_tc6_g $c_tg6_g 2>&1`;
|
|
2217
|
|
2218 # Bar plot for the sequence context on the genomic strand (6 mutation types)
|
|
2219 $ws->insert_image(27, $colStart_matrixSeqContext+3, "$folder_temp/$k_file-MutationSpectraPercent-Genomic-Report.png");
|
|
2220
|
|
2221 # Next sample
|
|
2222 $row_SumSheet++;
|
|
2223 } # End $k_file
|
|
2224
|
|
2225 #----------------------------------------------------------------------------------------------------------------------------------------------------------------#
|
|
2226 # Write the input matrix for NMF
|
|
2227 open(OUTINPUTNMFC, ">", "$folderNMF/Input_NMF_Count.txt") or die "$!: $folderNMF/Input_NMF_Count.txt\n"; # with the count
|
|
2228 open(OUTINPUTNMFP, ">", "$folderNMF/Input_NMF_Frequency.txt") or die "$!: $folderNMF/Input_NMF_Frequency.txt\n"; # With the frequency un-normalized
|
|
2229
|
|
2230 foreach my $k_sample (@{$h_inputNMF{'Sample'}}) { print OUTINPUTNMFC "\t$k_sample"; print OUTINPUTNMFP "\t$k_sample"; }
|
|
2231 print OUTINPUTNMFC "\n"; print OUTINPUTNMFP "\n";
|
|
2232
|
|
2233 my $row_inputNMF = 1;
|
|
2234 foreach my $k_context (sort keys $h_inputNMF{'Count'})
|
|
2235 {
|
|
2236 $k_context =~ /(\w)_(\w)/; my ($base5, $base3) = ($1, $2);
|
|
2237 foreach my $k_mutation (sort keys $h_inputNMF{'Count'}{$k_context})
|
|
2238 {
|
|
2239 my ($col_inputNMF_Count, $col_inputNMF_Percent) = (1, 1);
|
|
2240 my $contextNMF = $base5."[$k_mutation]".$base3;
|
|
2241 $ws_inputNMF_count->write($row_inputNMF, 0, $contextNMF); $ws_inputNMF_percent->write($row_inputNMF, 0, $contextNMF);
|
|
2242 print OUTINPUTNMFC $contextNMF,"\t"; print OUTINPUTNMFP $contextNMF,"\t";
|
|
2243
|
|
2244 foreach (@{$h_inputNMF{'Count'}{$k_context}{$k_mutation}}) { print OUTINPUTNMFC "$_\t"; } print OUTINPUTNMFC "\n";
|
|
2245 foreach (@{$h_inputNMF{'Percent'}{$k_context}{$k_mutation}}) { print OUTINPUTNMFP "$_\t"; } print OUTINPUTNMFP "\n";
|
|
2246
|
|
2247 foreach (@{$h_inputNMF{'Count'}{$k_context}{$k_mutation}})
|
|
2248 {
|
|
2249 # print "\t$k_context\t$k_mutation\t";
|
|
2250 # print "\t$row_inputNMF\t$col_inputNMF_Count\t$_\n";
|
|
2251 $ws_inputNMF_count->write($row_inputNMF, $col_inputNMF_Count, $_); $col_inputNMF_Count++;
|
|
2252 }
|
|
2253 foreach (@{$h_inputNMF{'Percent'}{$k_context}{$k_mutation}}) { $ws_inputNMF_percent->write($row_inputNMF, $col_inputNMF_Percent, $_); $col_inputNMF_Percent++; }
|
|
2254 $row_inputNMF++;
|
|
2255 }
|
|
2256 }
|
|
2257 close OUTINPUTNMFP; close OUTINPUTNMFC;
|
|
2258
|
|
2259
|
|
2260 # Close the workbook
|
|
2261 $wb->close();
|
|
2262 }
|
|
2263 # Calculate the chi2 for the strand bias
|
|
2264 sub CalculateChi2
|
|
2265 {
|
|
2266 my ($refH_file, $folderChi2) = @_;
|
|
2267
|
|
2268 # No value for the chi2
|
|
2269 if(scalar (keys $refH_file) == 0) { print STDERR "No value for calculating the chi2 for the strand bias\n"; exit; }
|
|
2270
|
|
2271 # Strand bias for one mutation type for all the samples
|
|
2272 my %h_tempchi2 = ();
|
|
2273 my ($ca_NonTr, $ca_Tr, $cg_NonTr, $cg_Tr, $ct_NonTr, $ct_Tr, $ta_NonTr, $ta_Tr, $tc_NonTr, $tc_Tr, $tg_NonTr, $tg_Tr) = (0,0,0,0,0,0, 0,0,0,0,0,0);
|
|
2274
|
|
2275 my $nb_file = 0;
|
|
2276
|
|
2277 foreach my $k_file (sort keys $refH_file)
|
|
2278 {
|
|
2279 $nb_file++;
|
|
2280 foreach my $k_func (sort keys $refH_file->{$k_file}{'6mutType'})
|
|
2281 {
|
|
2282 foreach my $k_mutation (sort keys $refH_file->{$k_file}{'6mutType'}{$k_func})
|
|
2283 {
|
|
2284 if($k_mutation eq "C:G>A:T")
|
|
2285 {
|
|
2286 $h_tempchi2{'C>A'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
|
|
2287 $h_tempchi2{'C>A'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
|
|
2288 }
|
|
2289 if($k_mutation eq "C:G>G:C")
|
|
2290 {
|
|
2291 $h_tempchi2{'C>G'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
|
|
2292 $h_tempchi2{'C>G'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
|
|
2293 }
|
|
2294 if($k_mutation eq "C:G>T:A")
|
|
2295 {
|
|
2296 $h_tempchi2{'C>T'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
|
|
2297 $h_tempchi2{'C>T'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
|
|
2298 }
|
|
2299 if($k_mutation eq "T:A>A:T")
|
|
2300 {
|
|
2301 $h_tempchi2{'T>A'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
|
|
2302 $h_tempchi2{'T>A'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
|
|
2303 }
|
|
2304 if($k_mutation eq "T:A>C:G")
|
|
2305 {
|
|
2306 $h_tempchi2{'T>C'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
|
|
2307 $h_tempchi2{'T>C'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
|
|
2308 }
|
|
2309 if($k_mutation eq "T:A>G:C")
|
|
2310 {
|
|
2311 $h_tempchi2{'T>G'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
|
|
2312 $h_tempchi2{'T>G'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
|
|
2313 }
|
|
2314 }
|
|
2315 }
|
|
2316 }
|
|
2317
|
|
2318 # Create the input file for NMF
|
|
2319 open(CHI2, ">", "$folderChi2/Input_chi2_strandBias.txt") or die "$!: $folderChi2/Input_chi2_strandBias.txt\n";
|
|
2320 print CHI2 "SampleName\tNonTr\tTr\tAlteration\n";
|
|
2321
|
|
2322 foreach my $k_mutation (sort keys %h_tempchi2)
|
|
2323 {
|
|
2324 foreach my $k_file (sort keys $h_tempchi2{$k_mutation})
|
|
2325 {
|
|
2326 print CHI2 "$k_file\t$h_tempchi2{$k_mutation}{$k_file}{'NonTr'}\t$h_tempchi2{$k_mutation}{$k_file}{'Tr'}\t$k_mutation\n";
|
|
2327 }
|
|
2328 }
|
|
2329 close CHI2;
|
|
2330
|
|
2331
|
|
2332 # Open the connection with R
|
|
2333 my $R = Statistics::R->new() or die "Impossible to create a communication bridge with R\n";
|
|
2334
|
|
2335 $R->send(qq`## Load the data. There is one column with the mutation type and the sample name but it's just for knowing what is corresponding to each line. The two columns with the number of variant per strand would be sufficient.
|
|
2336 strBias<-read.delim("$folderChi2/Input_chi2_strandBias.txt", dec=".");`);
|
|
2337 $R->send(q`# Chi2
|
|
2338 pValChi2 <- c() # First I create an empty vector and then I apply a for on the data load
|
|
2339 pValChi2_round <- c() # Empty vector with the rounded p-values
|
|
2340 confInt <- c() # Empty vector for the confident interval
|
|
2341 proportion <- c() # Empty vector for the proportion of NonTr compared to the (NonTr+Tr)
|
|
2342 sampleSize <- c() # Empty vector for the count of samples in NonTr and Tr
|
|
2343 # For Pool_Data save the p-values in a different vector for not having them for the FDR
|
|
2344 pValChi2_PoolData <- c()
|
|
2345 pValChi2_PoolData_Round <- c()
|
|
2346
|
|
2347 j = 1 # Timer for pValChi2_PoolData vector
|
|
2348 k = 1 # Timer for pValChi2
|
|
2349
|
|
2350 for(i in 1:nrow(strBias))
|
|
2351 {
|
|
2352 if(! sum(strBias[i,2:3]) == 0)
|
|
2353 {
|
|
2354 # For Pool_Data
|
|
2355 if(strBias[i,1] == "Pool_Data")
|
|
2356 {
|
|
2357 pValChi2_PoolData[j] <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$p.value
|
|
2358 j <- j+1
|
|
2359 }
|
|
2360 # For the other sample(s)
|
|
2361 else
|
|
2362 {
|
|
2363 # Calculate the p-value
|
|
2364 pValChi2[k] <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$p.value
|
|
2365 k <- k+1
|
|
2366 }
|
|
2367
|
|
2368 # Calculate the confidence interval
|
|
2369 temp <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$conf.int
|
|
2370 confInt[i] <- paste0("[", round(temp[1],2), "-", round(temp[2],2), "]") # Same as paste(sep="")
|
|
2371
|
|
2372 # Save the proportion
|
|
2373 proportion[i] <- strBias[i,2] / sum(strBias[i,2:3])
|
|
2374
|
|
2375 # Save the sample size (count on NonTr and Tr)
|
|
2376 sampleSize[i] <- paste(strBias[i,2], strBias[i,3], sep="-")
|
|
2377 } else
|
|
2378 {
|
|
2379 if(strBias[i,1] == "Pool_Data")
|
|
2380 {
|
|
2381 pValChi2_PoolData[j] <- NA
|
|
2382 pValChi2_PoolData_Round[j] <- NA
|
|
2383 j <- j+1
|
|
2384 }
|
|
2385 else
|
|
2386 {
|
|
2387 # Not enough effective for the test
|
|
2388 pValChi2[k] <- NA
|
|
2389 confInt[k] <- NA
|
|
2390 proportion[k] <- NA
|
|
2391 sampleSize[k] <- NA
|
|
2392 pValChi2_round[k] <- NA
|
|
2393 k <- k+1
|
|
2394 }
|
|
2395 }
|
|
2396 }
|
|
2397 # Adjust with FDR
|
|
2398 FDR<-p.adjust(pValChi2, method="BH")
|
|
2399
|
|
2400 # Rount the p-value
|
|
2401 for(i in 1:nrow(strBias))
|
|
2402 {
|
|
2403 if( (! is.na(pValChi2[i])) && (pValChi2[i] < 0.0001) )
|
|
2404 {
|
|
2405 pValChi2_round[i] <- format(pValChi2[i], scientific=T, digits=3)
|
|
2406 } else if(! is.na(pValChi2[i]))
|
|
2407 {
|
|
2408 pValChi2_round[i] <- as.character(round(pValChi2[i], 3))
|
|
2409 }
|
|
2410 }
|
|
2411
|
|
2412 # The option for the pool is specified
|
|
2413 if(!is.null(pValChi2_PoolData))
|
|
2414 {
|
|
2415 # Round the p-value for Pool_Data
|
|
2416 for(i in 1:6)
|
|
2417 {
|
|
2418 if( (! is.na(pValChi2_PoolData[i])) && (pValChi2_PoolData[i] < 0.0001) )
|
|
2419 {
|
|
2420 pValChi2_PoolData_Round[i] <- format(pValChi2_PoolData[i], scientific=T, digits=3)
|
|
2421 } else if(! is.na(pValChi2_PoolData[i]))
|
|
2422 {
|
|
2423 pValChi2_PoolData_Round[i] <- as.character(round(pValChi2_PoolData[i], 3))
|
|
2424 }
|
|
2425 }
|
|
2426 }
|
|
2427
|
|
2428
|
|
2429 # I create a dataframe for add what I want
|
|
2430 outputChi2 <- data.frame(round(strBias[,2]/strBias[,3], digits=2), sampleSize, round(proportion, 3), confInt)
|
|
2431 outputChi2$Mut.type <- strBias$Alteration
|
|
2432 outputChi2$SampleName <- strBias$SampleName
|
|
2433 colnames(outputChi2)[1:6]<-c("Strand_Bias", "NonTr-Tr", "Proportion", "Confidence Interval", "Mutation_Type", "SampleName")
|
|
2434
|
|
2435 # Transform the data frame into a matrix for adding the p-value for the samples and Pool_Data
|
|
2436 matrix <- as.matrix(outputChi2)
|
|
2437 tempColPValFDR <- matrix(, nrow=length(sampleSize), ncol = 2) # Create an empty matrix with 2 columns for adding the p-value and the FDR
|
|
2438 matrix <- cbind(matrix, tempColPValFDR)
|
|
2439 j = 1 # Timer for all the sample
|
|
2440 k = 1 # Timer for Pool_Data
|
|
2441 for(i in 1:nrow(matrix))
|
|
2442 {
|
|
2443 if(matrix[i,6] == "Pool_Data")
|
|
2444 {
|
|
2445 matrix[i,7] <- pValChi2_PoolData_Round[k]
|
|
2446 matrix[i,8] <- "NA" # No FDR for Pool_Data
|
|
2447 k = k+1
|
|
2448 }
|
|
2449 else
|
|
2450 {
|
|
2451 matrix[i,7] <- pValChi2_round[j]
|
|
2452 matrix[i,8] <- round(FDR[j], 3)
|
|
2453 j = j+1
|
|
2454 }
|
|
2455 }
|
|
2456 # Reorder the columns
|
|
2457 matrix <- cbind(matrix[,1:3], matrix[,7], matrix[,8], matrix[,4:6])
|
|
2458 colnames(matrix)[4] <- "P-val-Chi2"
|
|
2459 colnames(matrix)[5] <- "FDR"`);
|
|
2460
|
|
2461 $R->send(qq`# Export the file
|
|
2462 # dec=".": Set the separator for the decimal by "."
|
|
2463 write.table(matrix,file="$folderChi2/Output_chi2_strandBias.txt",quote = FALSE,sep="\t",row.names = FALSE,dec=".");`);
|
|
2464
|
|
2465 # Stop the connection with R
|
|
2466 $R->stop();
|
|
2467 }
|
|
2468 # Pearson correlation
|
|
2469 sub PearsonCoefficient
|
|
2470 {
|
|
2471 our ($refH_file, $filename) = @_;
|
|
2472
|
|
2473 #### Calculate the Pearson coefficient
|
|
2474 my @total_SBS = (); # Pearson for all mutation types
|
|
2475
|
|
2476 # Create a 2D array
|
|
2477 foreach my $k_mutation (sort keys $refH_file->{$filename}{'SBSPerChr'})
|
|
2478 {
|
|
2479 my $x = [];
|
|
2480 my $correlation = 0;
|
|
2481
|
|
2482 if($k_mutation eq "AllMutType") { next; }
|
|
2483 elsif($k_mutation eq "TotalPerChr") { next; }
|
|
2484 elsif($k_mutation eq "ChrSize") { next; }
|
|
2485 else
|
|
2486 {
|
|
2487 my $testZero = 0; # The correlation function doesn't works if all the variables are equal to zero
|
|
2488 # generate an anonymous 2D array where $x->[1] is the row
|
|
2489 # $x->[1][1] is the value in row 1 column 1 and $x->[1][2] is the value of row 1 column 2
|
|
2490 # once you build the entire array, pass it to the correlation subroutine
|
|
2491 my $i=1;
|
|
2492 while ( my ($chromosome, $lenght) = each (%chromosomes))
|
|
2493 {
|
|
2494 $x->[$i][1] = $lenght; # First column contains the chromosome size
|
|
2495 $x->[$i][2] = $refH_file->{$filename}{'SBSPerChr'}{$k_mutation}{'CHR'}{$chromosome}{'chr'}; # Second column contains the count of SBS
|
|
2496 if($refH_file->{$filename}{'SBSPerChr'}{$k_mutation}{'CHR'}{$chromosome}{'chr'}==0) { $testZero++; }
|
|
2497 $i++;
|
|
2498 }
|
|
2499 if( $testZero == keys $refH_file->{$filename}{'SBSPerChr'}{$k_mutation}{'CHR'} ) { $correlation = 0; }
|
|
2500 # Pass the 2D array to the correlation subroutine
|
|
2501 else { $correlation = correlation($x); }
|
|
2502
|
|
2503 $refH_file->{$filename}{'SBSPerChr'}{$k_mutation}{'Pearson'} = $correlation; # Pearson per mutation type
|
|
2504 }
|
|
2505 }
|
|
2506
|
|
2507 #generate an anonymous 2D array for all mutation type
|
|
2508 my $testZero = 0;
|
|
2509 my $x = [];
|
|
2510 my $correlation = 0;
|
|
2511 my $i=1;
|
|
2512 while ( my ($chromosome, $lenght) = each (%chromosomes))
|
|
2513 {
|
|
2514 $x->[$i][1] = $lenght;
|
|
2515 $x->[$i][2] = $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}{$chromosome}{'chr'};
|
|
2516 $i++;
|
|
2517 }
|
|
2518 if($testZero == keys $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}) { $correlation = 0; }
|
|
2519 else { $correlation = correlation($x); }
|
|
2520 # Pass the 2D array to the correlation subroutine
|
|
2521 $refH_file->{$filename}{'SBSPerChr'}{'AllMutType'} = $correlation;
|
|
2522
|
|
2523 sub correlation
|
|
2524 {
|
|
2525 my ($x) = @_;
|
|
2526 my ($mean_x,$mean_y) = mean($x);
|
|
2527 my $ssxx=ss($x,$mean_x,$mean_y,1,1);
|
|
2528 my $ssyy=ss($x,$mean_x,$mean_y,2,2);
|
|
2529 my $ssxy=ss($x,$mean_x,$mean_y,1,2);
|
|
2530 my $correl=correl($ssxx,$ssyy,$ssxy);;
|
|
2531 my $xcorrel=sprintf("%.2f",$correl);
|
|
2532 return($xcorrel);
|
|
2533
|
|
2534 sub mean
|
|
2535 {
|
|
2536 my ($x)=@_;
|
|
2537 my $num = scalar(@{$x}) - 2;
|
|
2538 my $sum_x = '0';
|
|
2539 my $sum_y = '0';
|
|
2540 for (my $i = 2; $i < scalar(@{$x}); ++$i)
|
|
2541 {
|
|
2542 $sum_x += $x->[$i][1];
|
|
2543 $sum_y += $x->[$i][2];
|
|
2544 }
|
|
2545 my $mu_x = $sum_x / $num;
|
|
2546 my $mu_y = $sum_y / $num;
|
|
2547 return($mu_x,$mu_y);
|
|
2548 }
|
|
2549
|
|
2550 ### ss = sum of squared (deviations to the mean)
|
|
2551 sub ss
|
|
2552 {
|
|
2553 my ($x,$mean_x,$mean_y,$one,$two)=@_;
|
|
2554 my $sum = '0';
|
|
2555 for (my $i=2;$i<scalar(@{$x});++$i)
|
|
2556 {
|
|
2557 $sum += ($x->[$i][$one]-$mean_x)*($x->[$i][$two]-$mean_y);
|
|
2558 }
|
|
2559 return $sum;
|
|
2560 }
|
|
2561
|
|
2562 sub correl
|
|
2563 {
|
|
2564 my($ssxx,$ssyy,$ssxy)=@_;
|
|
2565
|
|
2566 my ($sign, $correl) = (0,0);
|
|
2567 if(abs($ssxy) == 0) { $sign = 0 }
|
|
2568 else { $sign=$ssxy/abs($ssxy); }
|
|
2569
|
|
2570 if( ($ssxx==0) || ($ssyy==0) ) { $correl = 0 }
|
|
2571 else { $correl=$sign*sqrt($ssxy*$ssxy/($ssxx*$ssyy)); }
|
|
2572
|
|
2573 return $correl;
|
|
2574 }
|
|
2575 }
|
|
2576 }
|
|
2577 # Complement bases (for the sequence context)
|
|
2578 sub complement
|
|
2579 {
|
|
2580 if($_[0] eq "A") { return "T"; }
|
|
2581 if($_[0] eq "C") { return "G"; }
|
|
2582 if($_[0] eq "G") { return "C"; }
|
|
2583 if($_[0] eq "T") { return "A"; }
|
|
2584 }
|
|
2585 # Create and write some graphics
|
|
2586 sub Create_Graph
|
|
2587 {
|
|
2588 our ($folderFigure, $filename, $maxValue) = @_;
|
|
2589
|
|
2590 # Open the connection with R
|
|
2591 my $R = Statistics::R->new() or die "Impossible to create a communication bridge with R\n";
|
|
2592 $R->startR() ;
|
|
2593 # Load the Library
|
|
2594 $R->send(q`library(ggplot2)`);
|
|
2595 $R->send(q`library(gplots)`);
|
|
2596 $R->send(q`library(gtable)`);
|
|
2597
|
|
2598
|
|
2599 $R->send(qq`##########################################
|
|
2600 ## OVERALL MUTATION DISTRIBUTION ##
|
|
2601 ##########################################
|
|
2602 distrMut <- read.table("$folderFigure/Overall_mutation_distribution/$filename/$filename-OverallMutationDistribution.txt", header=T)`);
|
|
2603 $R->send(q`# Add the count of each category in the legend
|
|
2604 distrMut$Legend[[1]] <- paste0(distrMut$Variant_type[[1]], " (", distrMut$Count[[1]], ")")
|
|
2605 distrMut$Legend[[2]] <- paste0(distrMut$Variant_type[[2]], " (", distrMut$Count[[2]], ")")
|
|
2606 distrMut$Legend[[3]] <- paste0(distrMut$Variant_type[[3]], " (", distrMut$Count[[3]], ")")`);
|
|
2607
|
|
2608 $R->send(qq`# Base plot
|
|
2609 pie <- ggplot(distrMut, aes(x=factor(""), fill=Legend, weight=Count)) + geom_bar(width=1) + coord_polar(theta="y") + scale_x_discrete("", breaks=NULL) + scale_y_continuous("", breaks=NULL) + labs(fill="")
|
|
2610 # Background of the plot entire white
|
|
2611 pie <- pie + theme(panel.grid.major = element_line(colour="white"), panel.grid.minor = element_line(colour="white"), panel.background = element_rect(fill="white"))
|
|
2612 # Legend on right in 3 rows
|
|
2613 pie <- pie + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3))
|
|
2614 # Change the color and the title of the legend
|
|
2615 pie <- pie + scale_fill_brewer("Variant type", palette="Set1")
|
|
2616 # Remove all the margins
|
|
2617 pie <- pie + theme(plot.margin=unit(c(-1, 0, -1.5, 0), "cm"))
|
|
2618 # Save the pie chart for the HTML page (higher resolution)
|
|
2619 options(bitmapType='cairo') # Use cairo device as isn't possible to install X11 on the server...
|
|
2620 png("$folderFigure/Overall_mutation_distribution/$filename/$filename-OverallMutationDistribution.png", width=700, height=1100, res=300)
|
|
2621 print(pie)
|
|
2622 dev.off()
|
|
2623
|
|
2624
|
|
2625 ##########################################
|
|
2626 ## SBS MUTATION DISTRIBUTION ##
|
|
2627 ##########################################
|
|
2628 distrSBS <- read.delim("$folderFigure/SBS_distribution/$filename/$filename-SBS_distribution.txt")
|
|
2629 distrSBS <- data.frame(distrSBS)
|
|
2630 bar <- ggplot(distrSBS, aes(x=Mutation_Type, y=Percentage, fill=Mutation_Type))
|
|
2631 bar <- bar + geom_bar(stat="identity", width=0.5)
|
|
2632 # Theme classic
|
|
2633 bar <- bar + theme_classic()
|
|
2634 # Remove the axis legend
|
|
2635 bar <- bar + xlab("")
|
|
2636 # Set the color of the bars and Changing the labels in the legend
|
|
2637 bar <- bar + scale_fill_manual(values=c("blue", "black", "red", "gray", "#00CC33", "pink"),
|
|
2638 labels=c("C:G>A:T", "C:G>G:C", "C:G>T:A", "T:A>A:T", "T:A>C:G", "T:A>G:C")
|
|
2639 )
|
|
2640 # Remove the label in x axis
|
|
2641 bar <- bar + theme(axis.text.x = element_blank())
|
|
2642 # Change the name of the y label
|
|
2643 bar <- bar + ylab("Percent")
|
|
2644 # Save the plot for the HTML page (higher resolution)
|
|
2645 options(bitmapType='cairo')
|
|
2646 png("$folderFigure/SBS_distribution/$filename/$filename-SBS_distribution.png", width=1800, height=1500, res=300)
|
|
2647 print(bar);
|
|
2648 dev.off()
|
|
2649 # Save the plot for the report
|
|
2650 bar
|
|
2651 ggsave("$folder_temp/$filename-SBS_distribution-Report.png")
|
|
2652
|
|
2653
|
|
2654 ##########################################
|
|
2655 ## IMPACT ON PROTEIN ##
|
|
2656 ##########################################
|
|
2657 impactProt <- read.table("$folderFigure/Impact_protein_sequence/$filename/$filename-DistributionExoFunc.txt", header=T)
|
|
2658 # Custom palette: black, orange, dark green, yellow, light blue, dark blue, darkslateblue, red, purple, pink, light green, turquoise, gray
|
|
2659 cb_palette <- c("#000000", "#E69F00", "#006600", "#660000", "#F0E442", "#56B4E9", "#3300FF", "#483D8B", "#FF0000", "#9900CC", "#FF66CC", "#00CC00", "#66FFFF", "#C0C0C0")
|
|
2660 pie <- ggplot(impactProt, aes(x=factor(""), fill=AA_Change, weight=Count)) + geom_bar(width=1) + coord_polar(theta="y") + scale_x_discrete("", breaks=NULL)+ scale_y_continuous("", breaks=NULL) + scale_fill_manual(values=cb_palette)
|
|
2661 # Background of the plot entire white
|
|
2662 pie <- pie + theme(panel.grid.major = element_line(colour="white"), panel.grid.minor = element_line(colour="white"), panel.background = element_rect(fill="white"))
|
|
2663 # Legend in two column
|
|
2664 pie <- pie + guides(fill=guide_legend(ncol=2)) + theme(legend.position="bottom")
|
|
2665 # Remove the legend title
|
|
2666 pie <- pie + labs(fill="")
|
|
2667 # Save the plot for the HTML page (higher resolution)
|
|
2668 options(bitmapType='cairo')
|
|
2669 png("$folderFigure/Impact_protein_sequence/$filename/$filename-DistributionExoFunc.png", width=1600, height=1800, res=300)
|
|
2670 print(pie)
|
|
2671 dev.off()
|
|
2672 # Save the plot for the report
|
|
2673 pie
|
|
2674 ggsave("$folder_temp/$filename-DistributionExoFunc-Report.png")
|
|
2675
|
|
2676
|
|
2677 ##########################################
|
|
2678 ## STRAND BIAS ##
|
|
2679 ##########################################
|
|
2680 cb_palette_SB <- c("#0072B2", "#CC0000")
|
|
2681 file_sb <- read.table("$folderFigure/Stranded_Analysis/$filename/$filename-StrandBias.txt", header=T);
|
|
2682 p_sb <- ggplot(file_sb, aes(x=Alteration, y=Count, fill=Strand)) + theme_classic() + geom_bar(stat="identity", position="dodge") + scale_fill_manual(values=cb_palette_SB) + theme(axis.text.x = element_text(angle=60, hjust=1)) + xlab("") + theme(legend.position="bottom")
|
|
2683 # Save the plot for the HTML page (higher resolution)
|
|
2684 options(bitmapType='cairo')
|
|
2685 png("$folderFigure/Stranded_Analysis/$filename/$filename-StrandBias.png", width=1000, height=1200, res=300)
|
|
2686 print(p_sb)
|
|
2687 dev.off()
|
|
2688 # Save the plot for the report
|
|
2689 p_sb
|
|
2690 ggsave("$folder_temp/$filename-StrandBias-Report.png")
|
|
2691
|
|
2692
|
|
2693 ##########################################
|
|
2694 ## HEATMAP SEQUENCE CONTEXT ##
|
|
2695 ## GENOMIC STRAND ##
|
|
2696 ##########################################
|
|
2697 ## COUNT
|
|
2698 heatmap_G <- read.table("$folderFigure/Trinucleotide_Sequence_Context/$filename/$filename-HeatmapCount-Genomic.txt", header=T)
|
|
2699 # Save the plot for the report
|
|
2700 options(bitmapType='cairo')
|
|
2701 png(filename="$folder_temp/$filename-HeatmapCount-Genomic-Report.png", bg="transparent", width=240, height=360)
|
|
2702 # Heatmap with an absolute scale
|
|
2703 heatmap.2(as.matrix(heatmap_G),Rowv=F,Colv=F,col=colorpanel(384,low="yellow",high="red"),dendrogram="none",scale="none",trace="none",key=F,labRow=rownames(as.matrix(heatmap_G)),labCol=colnames(as.matrix(heatmap_G)),lmat=rbind(c(5,1,4),c(3,1,2)), lhei=c(0.75,0.75),lwid=c(0.5,1.5,0.5))
|
|
2704 dev.off()
|
|
2705 # Save the plot for the HTML page (higher resolution)
|
|
2706 options(bitmapType='cairo')
|
|
2707 png(filename="$folderFigure/Trinucleotide_Sequence_Context/$filename/$filename-HeatmapCount-Genomic.png", width=1100, height=1600, res=300)
|
|
2708 heatmap.2(as.matrix(heatmap_G),Rowv=F,Colv=F,col=colorpanel(384,low="yellow",high="red"),dendrogram="none",scale="none",trace="none",key=F,labRow=rownames(as.matrix(heatmap_G)),labCol=colnames(as.matrix(heatmap_G)),lmat=rbind(c(5,1,4),c(3,1,2)), lhei=c(0.75,0.75),lwid=c(0.5,1.5,0.5))
|
|
2709 dev.off()
|
|
2710
|
|
2711 ## PERCENT
|
|
2712 heatmap_G <- read.table("$folderFigure/Trinucleotide_Sequence_Context/$filename/$filename-HeatmapPercent-Genomic.txt", header=T)
|
|
2713 # Save the plot for the report
|
|
2714 options(bitmapType='cairo')
|
|
2715 png(filename="$folder_temp/$filename-HeatmapPercent-Genomic-Report.png",bg="transparent", width=240, height=360)
|
|
2716 # Heatmap with an absolute scale
|
|
2717 heatmap.2(as.matrix(heatmap_G),Rowv=F,Colv=F,col=colorpanel(384,low="yellow",high="red"),dendrogram="none",scale="none",trace="none",key=F,labRow=rownames(as.matrix(heatmap_G)),labCol=colnames(as.matrix(heatmap_G)),lmat=rbind(c(5,1,4),c(3,1,2)), lhei=c(0.75,0.75),lwid=c(0.5,1.5,0.5))
|
|
2718 dev.off()
|
|
2719 # Save the plot for the HTML page (higher resolution)
|
|
2720 options(bitmapType='cairo')
|
|
2721 png(filename="$folderFigure/Trinucleotide_Sequence_Context/$filename/$filename-HeatmapPercent-Genomic.png", width=1100, height=1600, res=300)
|
|
2722 heatmap.2(as.matrix(heatmap_G),Rowv=F,Colv=F,col=colorpanel(384,low="yellow",high="red"),dendrogram="none",scale="none",trace="none",key=F,labRow=rownames(as.matrix(heatmap_G)),labCol=colnames(as.matrix(heatmap_G)),lmat=rbind(c(5,1,4),c(3,1,2)), lhei=c(0.75,0.75),lwid=c(0.5,1.5,0.5))
|
|
2723 dev.off()`);
|
|
2724 $R->stopR() ;
|
|
2725
|
|
2726 ## Plot the transcriptional strand bias in mutation signature
|
|
2727 `Rscript $pathRScriptTxnSB $folderFigure/Stranded_Analysis/$filename/$filename-StrandedSignatureCount.txt $folderFigure/Stranded_Analysis/$filename/$filename-StrandedSignatureCount $folder_temp/$filename-StrandedSignatureCount Count 2>&1`;
|
|
2728 `Rscript $pathRScriptTxnSB $folderFigure/Stranded_Analysis/$filename/$filename-StrandedSignaturePercent.txt $folderFigure/Stranded_Analysis/$filename/$filename-StrandedSignaturePercent $folder_temp/$filename-StrandedSignaturePercent Percent 2>&1`;
|
|
2729 }
|
|
2730 # Write the titles of the different sections of the report
|
|
2731 sub WriteBoderSection
|
|
2732 {
|
|
2733 our ($wb, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $colStart_matrixSeqContext) = @_;
|
|
2734
|
|
2735 our ($format_topLeft, $format_topRight, $format_bottomLeft, $format_bottomRight, $format_top, $format_right, $format_bottom, $format_left);
|
|
2736 Format_section($wb, \$format_topLeft, \$format_topRight, \$format_bottomLeft, \$format_bottomRight, \$format_top, \$format_right, \$format_bottom, \$format_left);
|
|
2737
|
|
2738 TableSBSDistrBySeg();
|
|
2739 TableStrandBiasBySegment();
|
|
2740 CountSBSPerChr();
|
|
2741 ShortTriNtContext(); # 6 mut type
|
|
2742 LongTriNtContext(); # 12 mut type
|
|
2743
|
|
2744 sub TableSBSDistrBySeg
|
|
2745 {
|
|
2746 # Top-Left
|
|
2747 $ws->write($rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, "Table 4. SBS distribution by functional region", $format_topLeft); $ws->set_row($rowStart_SBSdistrBySeg, 18); # Set the height of the row to 0.25"
|
|
2748 # Top
|
|
2749 for(my $i=1; $i<=13; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg+$i, $format_top); }
|
|
2750 # Top-Right
|
|
2751 $ws->write_blank($rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg+13, $format_topRight);
|
|
2752 # Right
|
|
2753 $ws->write_blank($rowStart_SBSdistrBySeg+1, $colStart_SBSdistrBySeg+13, $format_right);
|
|
2754 # Bottom-left
|
|
2755 $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+5, $colStart_SBSdistrBySeg, $format_bottomLeft);
|
|
2756 # Left
|
|
2757 $ws->write_blank($rowStart_SBSdistrBySeg+1, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg, $format_left);
|
|
2758 }
|
|
2759
|
|
2760 sub TableStrandBiasBySegment
|
|
2761 {
|
|
2762 # Top-Left
|
|
2763 $ws->write($rowStart_SBSdistrBySeg+$nb_func+8, $colStart_SBSdistrBySeg, "Table 5. Strand bias by functional region", $format_topLeft); $ws->set_row($rowStart_SBSdistrBySeg+$nb_func+8, 18); # Set the height of the row to 0.25"
|
|
2764 # Top
|
|
2765 for(my $i=1; $i<=10; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+8, $colStart_SBSdistrBySeg+$i, $format_top); }
|
|
2766 # Top-Right
|
|
2767 $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+8, $colStart_SBSdistrBySeg+11, $format_topRight);
|
|
2768 # Right
|
|
2769 $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+9, $colStart_SBSdistrBySeg+11, $format_right); $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*2)+13, $colStart_SBSdistrBySeg+11, $format_right);
|
|
2770 # Left
|
|
2771 $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+9, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+10, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*2)+13, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*2)+14, $colStart_SBSdistrBySeg, $format_left);
|
|
2772 # Bottom
|
|
2773 $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*3)+16, $colStart_SBSdistrBySeg+4, $format_bottom); $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*3)+16, $colStart_SBSdistrBySeg+8, $format_bottom);
|
|
2774 }
|
|
2775
|
|
2776 sub CountSBSPerChr
|
|
2777 {
|
|
2778 #### Top-Left
|
|
2779 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+4, $colStart_SBSdistrBySeg, "Table 6. SBS distribution per chromosome", $format_topLeft); $ws->set_row($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+4, 18); # Set the height of the row to 0.25"
|
|
2780 #### Top
|
|
2781 for(my $i=1; $i<8; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+4, $colStart_SBSdistrBySeg+$i, $format_top); }
|
|
2782 #### Top-Right
|
|
2783 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+4, $colStart_SBSdistrBySeg+8, $format_topRight);
|
|
2784 #### Right
|
|
2785 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+5, $colStart_SBSdistrBySeg+8, $format_right); $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+6, $colStart_SBSdistrBySeg+8, $format_right);
|
|
2786
|
|
2787 #### Bottom-Right
|
|
2788 # Human genome = 24 chromosomes
|
|
2789 if($refGenome =~ /hg/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+33, $colStart_SBSdistrBySeg+8, $format_bottomRight); }
|
|
2790 # Mouse genome = 21 chromosomes
|
|
2791 if($refGenome =~ /mm/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+30, $colStart_SBSdistrBySeg+8, $format_bottomRight); }
|
|
2792 # Rat genome = 22 chromosomes
|
|
2793 if($refGenome =~ /rn/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+31, $colStart_SBSdistrBySeg+8, $format_bottomRight); }
|
|
2794
|
|
2795 #### Bottom
|
|
2796 if($refGenome =~ /hg/)
|
|
2797 {
|
|
2798 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+33, $colStart_SBSdistrBySeg+1, $format_bottom);
|
|
2799 for(my $i=3; $i<=7; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+33, $colStart_SBSdistrBySeg+$i, $format_bottom); }
|
|
2800 }
|
|
2801 if($refGenome =~ /mm/)
|
|
2802 {
|
|
2803 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+30, $colStart_SBSdistrBySeg+1, $format_bottom);
|
|
2804 for(my $i=3; $i<=7; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+30, $colStart_SBSdistrBySeg+$i, $format_bottom); }
|
|
2805 }
|
|
2806 if($refGenome =~ /rn/)
|
|
2807 {
|
|
2808 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+31, $colStart_SBSdistrBySeg+1, $format_bottom);
|
|
2809 for(my $i=3; $i<=7; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+31, $colStart_SBSdistrBySeg+$i, $format_bottom); }
|
|
2810 }
|
|
2811
|
|
2812 #### Left
|
|
2813 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+5, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+6, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+7, $colStart_SBSdistrBySeg, $format_left);
|
|
2814
|
|
2815 #### Bottom-left
|
|
2816 if($refGenome =~ /hg/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+33, $colStart_SBSdistrBySeg, $format_bottomLeft); }
|
|
2817 if($refGenome =~ /mm/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+30, $colStart_SBSdistrBySeg, $format_bottomLeft); }
|
|
2818 if($refGenome =~ /rn/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+31, $colStart_SBSdistrBySeg, $format_bottomLeft); }
|
|
2819 }
|
|
2820
|
|
2821 sub ShortTriNtContext
|
|
2822 {
|
|
2823 my $format_headerSection = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
|
|
2824 $format_headerSection->set_left(2); $format_headerSection->set_left_color('blue');
|
|
2825
|
|
2826 # Top-left
|
|
2827 $ws->write(0, $colStart_matrixSeqContext, "Panel 1. Trinucleotide sequence context of SBS on the genomic sequence", $format_topLeft);
|
|
2828 # Top
|
|
2829 for(my $i=1; $i<=19; $i++) { $ws->write_blank(0, $colStart_matrixSeqContext+$i, $format_top); }
|
|
2830 # Top-right
|
|
2831 $ws->write_blank(0, $colStart_matrixSeqContext+20, $format_topRight);
|
|
2832 # Right
|
|
2833 for(my $i=1; $i<=37; $i++) { $ws->write_blank($i, $colStart_matrixSeqContext+20, $format_right); }
|
|
2834 # Bottom-right
|
|
2835 $ws->write_blank(37, $colStart_matrixSeqContext+20, $format_bottomRight);
|
|
2836 # Bottom
|
|
2837 for(my $i=1; $i<=19; $i++) { $ws->write_blank(38, $colStart_matrixSeqContext+$i, $format_top); }
|
|
2838 # Bottom-left
|
|
2839 $ws->write_blank(37, $colStart_matrixSeqContext, $format_bottomLeft);
|
|
2840 # Left
|
|
2841 $ws->write(1, $colStart_matrixSeqContext, "", $format_left);
|
|
2842 for(my $i=3; $i<=36; $i++) { $ws->write_blank($i, $colStart_matrixSeqContext, $format_left); }
|
|
2843 }
|
|
2844
|
|
2845 sub LongTriNtContext
|
|
2846 {
|
|
2847 # Top-left
|
|
2848 $ws->write($rowStart_SBSdistrBySeg, $colStart_matrixSeqContext, "Panel 2. Stranded analysis of trinucleotide sequence context of SBS", $format_topLeft);
|
|
2849 # Top
|
|
2850 for(my $i=1; $i<=28; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg, $colStart_matrixSeqContext+$i, $format_top); }
|
|
2851 # Top-right
|
|
2852 $ws->write_blank($rowStart_SBSdistrBySeg, $colStart_matrixSeqContext+29, $format_topRight);
|
|
2853 # Right
|
|
2854 for(my $i=1; $i<=42; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+$i, $colStart_matrixSeqContext+29, $format_right); }
|
|
2855 # Bottom-right
|
|
2856 $ws->write_blank(91, $colStart_matrixSeqContext+29, $format_bottomRight);
|
|
2857 # Bottom
|
|
2858 for(my $i=13; $i<=28; $i++) { $ws->write_blank(92, $colStart_matrixSeqContext+$i, $format_top); }
|
|
2859 # Bottom-left
|
|
2860 $ws->write_blank(91, $colStart_matrixSeqContext, $format_bottomLeft);
|
|
2861 # Left
|
|
2862 $ws->write_blank($rowStart_SBSdistrBySeg+1, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext, $format_left);
|
|
2863 $ws->write_blank($rowStart_SBSdistrBySeg+22, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+23, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext, $format_left);
|
|
2864 }
|
|
2865 }
|
|
2866 # Write the header for the six mutation types
|
|
2867 sub WriteHeaderSection
|
|
2868 {
|
|
2869 our ($wb, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $colStart_matrixSeqContext) = @_;
|
|
2870
|
|
2871 our ($format_CA, $format_CG, $format_CT, $format_TA, $format_TC, $format_TG, $format_TG2, $format_LeftHeader, $format_RightHeader, $format_LeftHeader2);
|
|
2872 Format_Header($wb, \$format_CA, \$format_CG, \$format_CT, \$format_TA, \$format_TC, \$format_TG, \$format_TG2, \$format_LeftHeader, \$format_RightHeader, \$format_LeftHeader2);
|
|
2873
|
|
2874 our ($format_LeftCA, $format_LeftCG, $format_LeftCT, $format_LeftTA, $format_LeftTC, $format_LeftTG, $format_RightCA, $format_RightCG, $format_RightCT, $format_RightTA, $format_RightTC, $format_RightTG);
|
|
2875 Format_HeaderSBSDistrBySegAndFunc($wb, \$format_LeftCA, \$format_LeftCG, \$format_LeftCT, \$format_LeftTA, \$format_LeftTC, \$format_LeftTG, \$format_RightCA, \$format_RightCG, \$format_RightCT, \$format_RightTA, \$format_RightTC, \$format_RightTG);
|
|
2876
|
|
2877 our $format_A11Bold = ""; Format_A11Bold($wb, \$format_A11Bold); # Arial 11 bold and center
|
|
2878 our $format_A11BoldLeft = ""; Format_A11BoldLeft($wb, \$format_A11BoldLeft); # Arial 11 bold and left
|
|
2879
|
|
2880 our ($format_header12CA, $format_header12CG, $format_header12CT, $format_header12TA, $format_header12TC, $format_header12TG);
|
|
2881 Format_Header12MutType($wb, \$format_header12CA, \$format_header12CG, \$format_header12CT, \$format_header12TA, \$format_header12TC, \$format_header12TG);
|
|
2882
|
|
2883 ## Header for SBS distribution by segment
|
|
2884 HeaderMutTypeSBSDistrBySeg();
|
|
2885
|
|
2886 ## Header for strand bias by function
|
|
2887 $ws->set_column($colStart_SBSdistrBySeg+5, $colStart_SBSdistrBySeg+5, 11);
|
|
2888
|
|
2889 my $row = $rowStart_SBSdistrBySeg+$nb_func+10; my $col = $colStart_SBSdistrBySeg;
|
|
2890 $ws->write($row, $col+1, ' ', $format_CA); $ws->write($row, $col+2, "C>A", $format_CA); $ws->write($row, $col+3, ' ', $format_CA);
|
|
2891 $ws->write($row, $col+5, ' ', $format_CG); $ws->write($row, $col+6, "C>G", $format_CG); $ws->write($row, $col+7, ' ', $format_CG);
|
|
2892 $ws->write($row, $col+9, ' ', $format_CT); $ws->write($row, $col+10, "C>T", $format_CT); $ws->write($row, $col+11, ' ', $format_RightCT);
|
|
2893
|
|
2894 $row = $rowStart_SBSdistrBySeg+($nb_func*2)+14;
|
|
2895 $ws->write($row, $col+1, ' ', $format_TA); $ws->write($row, $col+2, "T>A", $format_TA); $ws->write($row, $col+3, ' ', $format_TA);
|
|
2896 $ws->write($row, $col+5, ' ', $format_TC); $ws->write($row, $col+6, "T>C", $format_TC); $ws->write($row, $col+7, ' ', $format_TC);
|
|
2897 $ws->write($row, $col+9, ' ', $format_TG2); $ws->write($row, $col+10, "T>G", $format_TG2); $ws->write($row, $col+11, ' ', $format_RightTG);
|
|
2898
|
|
2899 $ws->set_row($rowStart_SBSdistrBySeg+$nb_func+11, 18); $ws->set_row($rowStart_SBSdistrBySeg+($nb_func*2)+15, 18);
|
|
2900 $ws->set_column($colStart_SBSdistrBySeg+5, $colStart_SBSdistrBySeg+5, 13); $ws->set_column($colStart_SBSdistrBySeg+9, $colStart_SBSdistrBySeg+9, 13);
|
|
2901
|
|
2902 for(my $i=$rowStart_SBSdistrBySeg+$nb_func+10; $i<=$rowStart_SBSdistrBySeg+($nb_func*2)+14; $i+=$nb_func+4)
|
|
2903 {
|
|
2904 $ws->write($i+1, $colStart_SBSdistrBySeg, 'Segment', $format_LeftHeader); $ws -> write($i+1, $colStart_SBSdistrBySeg+1, 'Non-Tr/Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+2, 'Non-Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+3, 'Tr', $format_A11Bold);
|
|
2905 $ws -> write($i+1, $colStart_SBSdistrBySeg+5, 'Non-Tr/Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+6, 'Non-Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+7, 'Tr', $format_A11Bold);
|
|
2906 $ws -> write($i+1, $colStart_SBSdistrBySeg+9, 'Non-Tr/Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+10, 'Non-Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+11, 'Tr', $format_RightHeader);
|
|
2907 }
|
|
2908
|
|
2909
|
|
2910 ## Header for Counts of SBS per chromosome and mutation type
|
|
2911 HeaderCountSBSPerChr();
|
|
2912
|
|
2913 ## Header for the short sequence context
|
|
2914 HeaderShortTriNtContext();
|
|
2915
|
|
2916 ## Header for the 12 mutation types with the sequence context (coding strand)
|
|
2917 HeaderLongTriNtContext();
|
|
2918
|
|
2919 sub HeaderMutTypeSBSDistrBySeg
|
|
2920 {
|
|
2921 $ws->set_row($rowStart_SBSdistrBySeg+2, 18);
|
|
2922 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+2, "C:G>A:T", $format_CA); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+3, $format_CA);
|
|
2923 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+4, "C:G>G:C", $format_CG); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+5, $format_CG);
|
|
2924 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+6, "C:G>T:A", $format_CT); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+7, $format_CT);
|
|
2925 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+8, "T:A>A:T", $format_TA); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+9, $format_TA);
|
|
2926 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+10, "T:A>C:G", $format_TC); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+11, $format_TC);
|
|
2927 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+12, "T:A>G:C", $format_TG); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+13, $format_TG);
|
|
2928
|
|
2929 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg, "Segment", $format_LeftHeader); $ws->set_column($colStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, 13); $ws->set_row($rowStart_SBSdistrBySeg+3, 18);
|
|
2930 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+1, "Total SBS", $format_A11Bold); $ws->set_column($colStart_SBSdistrBySeg+1, $colStart_SBSdistrBySeg+1, 11);
|
|
2931 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+2, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+3, "#", $format_A11Bold);
|
|
2932 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+4, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+5, "#", $format_A11Bold);
|
|
2933 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+6, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+7, "#", $format_A11Bold);
|
|
2934 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+8, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+9, "#", $format_A11Bold);
|
|
2935 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+10, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+11, "#", $format_A11Bold);
|
|
2936 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+12, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, 13, "#", $format_RightHeader);
|
|
2937 }
|
|
2938
|
|
2939 sub HeaderCountSBSPerChr
|
|
2940 {
|
|
2941 $ws->set_column(3,3, 10); $ws->set_column(4,4, 10);
|
|
2942 $ws->set_row($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, 18);
|
|
2943 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+7, $colStart_SBSdistrBySeg+1, "Pearson", $format_A11Bold);
|
|
2944 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg, "Chr", $format_LeftHeader);
|
|
2945 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+1, "Size", $format_A11Bold);
|
|
2946 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+2, "All SBS", $format_A11Bold);
|
|
2947
|
|
2948 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+3, "C:G>A:T", $format_CA);
|
|
2949 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+4, "C:G>G:C", $format_CG);
|
|
2950 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+5, "C:G>T:A", $format_CT);
|
|
2951 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+6, "T:A>A:T", $format_TA);
|
|
2952 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+7, "T:A>C:G", $format_TC);
|
|
2953 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+8, "T:A>G:C", $format_TG);
|
|
2954 }
|
|
2955
|
|
2956 sub HeaderShortTriNtContext
|
|
2957 {
|
|
2958 ### GENOMIC STRAND
|
|
2959 $ws->write(2, $colStart_matrixSeqContext, 'Count matrix', $format_LeftHeader2);
|
|
2960 $ws->write(3, $colStart_matrixSeqContext+4, 'C>A', $format_CA); $ws->write(3, $colStart_matrixSeqContext+5, 'C>G', $format_CG); $ws->write(3, $colStart_matrixSeqContext+6, 'C>T', $format_CT); $ws->write(3, $colStart_matrixSeqContext+7, 'T>A', $format_TA); $ws->write(3, $colStart_matrixSeqContext+8, 'T>C', $format_TC); $ws->write(3, $colStart_matrixSeqContext+9, 'T>G', $format_TG2);
|
|
2961
|
|
2962 $ws->write(2, $colStart_matrixSeqContext+11, 'Frequency matrix', $format_A11BoldLeft);
|
|
2963 $ws->write(3, $colStart_matrixSeqContext+14, 'C>A', $format_CA); $ws->write(3, $colStart_matrixSeqContext+15, 'C>G', $format_CG); $ws->write(3, $colStart_matrixSeqContext+16, 'C>T', $format_CT); $ws->write(3, $colStart_matrixSeqContext+17, 'T>A', $format_TA); $ws->write(3, $colStart_matrixSeqContext+18, 'T>C', $format_TC); $ws->write(3, $colStart_matrixSeqContext+19, 'T>G', $format_TG2);
|
|
2964
|
|
2965 ### sequence context with a bar graph
|
|
2966 $ws->write(25, $colStart_matrixSeqContext+10, "Mutation spectra frequency", $format_A11Bold);
|
|
2967 }
|
|
2968
|
|
2969 sub HeaderLongTriNtContext
|
|
2970 {
|
|
2971 $ws->set_row($rowStart_SBSdistrBySeg+3, 15); $ws->set_row($rowStart_SBSdistrBySeg+4, 15); $ws->set_row($rowStart_SBSdistrBySeg+5, 15);
|
|
2972 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_matrixSeqContext, "Count matrix", $format_LeftHeader2);
|
|
2973 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+1, "C>A", $format_CA); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+2, $format_CA); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+1, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+2, "Tr", $format_A11Bold);
|
|
2974 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+3, "C>G", $format_CG); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+4, $format_CG); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+3, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+4, "Tr", $format_A11Bold);
|
|
2975 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+5, "C>T", $format_CT); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+6, $format_CT); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+5, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+6, "Tr", $format_A11Bold);
|
|
2976 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+7, "T>A", $format_TA); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+8, $format_TA); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+7, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+8, "Tr", $format_A11Bold);
|
|
2977 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+9, "T>C", $format_TC); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+10, $format_TC); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+9, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+10, "Tr", $format_A11Bold);
|
|
2978 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+11, "T>G", $format_TG2); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+12, $format_TG2); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+11, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+12, "Tr", $format_A11Bold);
|
|
2979
|
|
2980
|
|
2981 $ws->set_row($rowStart_SBSdistrBySeg+24, 15); $ws->set_row($rowStart_SBSdistrBySeg+25, 15); $ws->set_row($rowStart_SBSdistrBySeg+26, 15);
|
|
2982 $ws->write($rowStart_SBSdistrBySeg+24, $colStart_matrixSeqContext, "Frequency matrix", $format_LeftHeader2);
|
|
2983 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+1, "C>A", $format_CA); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+2, $format_CA); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+1, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+2, "Tr", $format_A11Bold);
|
|
2984 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+3, "C>G", $format_CG); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+4, $format_CG); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+3, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+4, "Tr", $format_A11Bold);
|
|
2985 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+5, "C>T", $format_CT); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+6, $format_CT); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+5, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+6, "Tr", $format_A11Bold);
|
|
2986 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+7, "T>A", $format_TA); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+8, $format_TA); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+7, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+8, "Tr", $format_A11Bold);
|
|
2987 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+9, "T>C", $format_TC); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+10, $format_TC); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+9, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+10, "Tr", $format_A11Bold);
|
|
2988 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+11, "T>G", $format_TG2); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+12, $format_TG2); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+11, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+12, "Tr", $format_A11Bold);
|
|
2989 }
|
|
2990 }
|
|
2991 # Create logo for representing the sequence context with n bases
|
|
2992 sub CreateLogo
|
|
2993 {
|
|
2994 my ($refH_file, $folderWebLogo) = @_;
|
|
2995
|
|
2996 my $folderSample = "";
|
|
2997
|
|
2998 foreach my $k_file (sort keys $refH_file)
|
|
2999 {
|
|
3000 $folderSample = "$folderWebLogo/$k_file";
|
|
3001 if(!-e $folderSample) { mkdir($folderSample) or die "Can't create the directory $folderSample\n"; }
|
|
3002
|
|
3003 my $test_lengthSeqContext = 0;
|
|
3004
|
|
3005 foreach my $k_mutation (sort keys $refH_file->{$k_file}{'WebLogo3'})
|
|
3006 {
|
|
3007 open(WEBLOGO, ">", "$folderSample/$k_file-$k_mutation.fa") or die "$!: $folderSample/$k_file-$k_mutation.fa\n";
|
|
3008 foreach (@{$refH_file->{$k_file}{'WebLogo3'}{$k_mutation}})
|
|
3009 {
|
|
3010 print WEBLOGO ">$k_file\n$_\n";
|
|
3011
|
|
3012 if(length($_) < 10) { $test_lengthSeqContext = 0; }
|
|
3013 else { $test_lengthSeqContext = 1; }
|
|
3014 }
|
|
3015 close WEBLOGO;
|
|
3016 }
|
|
3017
|
|
3018 ## Generate the logo
|
|
3019 foreach my $fastaFile (`ls $folderSample/*.fa`)
|
|
3020 {
|
|
3021 chomp($fastaFile);
|
|
3022 my ($filename, $directories, $suffix) = fileparse("$folderSample/$fastaFile", qr/\.[^.]*/);
|
|
3023
|
|
3024 $filename =~ /(.+)\-/;
|
|
3025 my $title = $1;
|
|
3026
|
|
3027 ## Test if there is fasta sequence for the mutation type
|
|
3028 my $nbLigne_temp = `wc -l $fastaFile`;
|
|
3029 my @nbLigne = split(" ", $nbLigne_temp);
|
|
3030
|
|
3031 if($nbLigne[0] == 0) { print "WARNING: No sequence for $filename\n"; next; }
|
|
3032
|
|
3033 # When length sequence context is lower than 10 the image is to small for adding a title
|
|
3034 if($test_lengthSeqContext == 1) { system("weblogo -c classic -F png -U probability --title $title < $fastaFile > $folderSample/$filename-Probability.png"); }
|
|
3035 else { system("weblogo -c classic -F png -U probability < $fastaFile > $folderSample/$filename-Probability.png"); }
|
|
3036 }
|
|
3037 }
|
|
3038 }
|
|
3039
|
|
3040
|
|
3041 # Define the format of the worksheet: Arial font size=10
|
|
3042 sub Format_A10
|
|
3043 {
|
|
3044 my ($wb, $format) = @_;
|
|
3045 $$format = $wb->add_format(font=>'Arial', size=>10); $$format->set_align('center');
|
|
3046 }
|
|
3047 # Define the format of the worksheet: Arial font size=11 bold and center
|
|
3048 sub Format_A11Bold
|
|
3049 {
|
|
3050 my ($wb, $format) = @_;
|
|
3051 $$format = $wb->add_format(font=>'Arial', size=>11, bold=>1); $$format->set_align('center');
|
|
3052 }
|
|
3053 # Define the format of the worksheet: Arial font size=10 italic red and center
|
|
3054 sub Format_A10ItalicRed
|
|
3055 {
|
|
3056 my ($wb, $format) = @_;
|
|
3057 $$format = $wb->add_format(font=>'Arial', size=>10, italic=>1, color => 'red'); $$format->set_align('center');
|
|
3058 }
|
|
3059 # Defile the format of the worksheet: Arialt font size=11 bold and left
|
|
3060 sub Format_A11BoldLeft
|
|
3061 {
|
|
3062 my ($wb, $format) = @_;
|
|
3063 $$format = $wb->add_format(valign =>'left', font=>'Arial', size=>11, bold=>1);
|
|
3064 }
|
|
3065 # Defile the format of the worksheet: Arialt font size=10 bold and left
|
|
3066 sub Format_A10BoldLeft
|
|
3067 {
|
|
3068 my ($wb, $format) = @_;
|
|
3069 $$format = $wb->add_format(valign =>'left', font=>'Arial', size=>10, bold=>1);
|
|
3070 }
|
|
3071 # Define the format of the border of the section (for delimiting the different section of the report)
|
|
3072 sub Format_section
|
|
3073 {
|
|
3074 my ($wb, $format_topLeft, $format_topRight, $format_bottomLeft, $format_bottomRight, $format_top, $format_right, $format_bottom, $format_left) = @_;
|
|
3075
|
|
3076 $$format_topLeft = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
|
|
3077 $$format_topLeft->set_top(2); $$format_topLeft->set_top_color('blue');
|
|
3078 $$format_topLeft->set_left(2); $$format_topLeft->set_left_color('blue');
|
|
3079
|
|
3080 $$format_topRight = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
|
|
3081 $$format_topRight->set_top(2); $$format_topRight->set_top_color('blue');
|
|
3082 $$format_topRight->set_right(2); $$format_topRight->set_right_color('blue');
|
|
3083
|
|
3084 $$format_bottomLeft = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
|
|
3085 $$format_bottomLeft->set_bottom(2); $$format_bottomLeft->set_bottom_color('blue');
|
|
3086 $$format_bottomLeft->set_left(2); $$format_bottomLeft->set_left_color('blue');
|
|
3087
|
|
3088 $$format_bottomRight = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
|
|
3089 $$format_bottomRight->set_bottom(2); $$format_bottomRight->set_bottom_color('blue');
|
|
3090 $$format_bottomRight->set_right(2); $$format_bottomRight->set_right_color('blue');
|
|
3091
|
|
3092 $$format_top = $wb->add_format(); $$format_top->set_top(2); $$format_top->set_top_color('blue');
|
|
3093 $$format_right = $wb->add_format(); $$format_right->set_right(2); $$format_right->set_right_color('blue');
|
|
3094 $$format_bottom = $wb->add_format(); $$format_bottom->set_bottom(2); $$format_bottom->set_bottom_color('blue');
|
|
3095 $$format_left = $wb->add_format(); $$format_left->set_left(2); $$format_left->set_left_color('blue');
|
|
3096 }
|
|
3097 # Define the header
|
|
3098 sub Format_Header
|
|
3099 {
|
|
3100 my ($wb, $format_CA, $format_CG, $format_CT, $format_TA, $format_TC, $format_TG, $format_TG2, $format_LeftHeader, $format_RightHeader, $format_LeftHeader2) = @_;
|
|
3101
|
|
3102 my ($blue, $black, $red, $gray, $green, $pink);
|
|
3103 Color($wb, \$blue, \$black, \$red, \$gray, \$green, \$pink);
|
|
3104
|
|
3105 my ($bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink);
|
|
3106 BackgroundColor($wb, \$bgColor_blue, \$bgColor_black, \$bgColor_red, \$bgColor_gray, \$bgColor_green, \$bgColor_pink);
|
|
3107
|
|
3108
|
|
3109 $$format_CA = $wb->add_format(bg_color => $blue, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CA->set_align('center'); $$format_CA->set_center_across();
|
|
3110 $$format_CG = $wb->add_format(bg_color => $black, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CG->set_align('center'); $$format_CG->set_center_across();
|
|
3111 $$format_CT = $wb->add_format(bg_color => $red, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CT->set_align('center'); $$format_CT->set_center_across();
|
|
3112 $$format_TA = $wb->add_format(bg_color => $gray, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TA->set_align('center'); $$format_TA->set_center_across();
|
|
3113 $$format_TC = $wb->add_format(bg_color => $green, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TC->set_align('center'); $$format_TC->set_center_across();
|
|
3114 $$format_TG = $wb->add_format(bg_color=>$bgColor_pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TG->set_align('center'); $$format_TG->set_center_across();
|
|
3115 $$format_TG->set_right(2); $$format_TG->set_right_color('blue');
|
|
3116
|
|
3117 $$format_TG2 = $wb->add_format(bg_color => $pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TG2->set_align('center'); $$format_TG2->set_center_across();
|
|
3118
|
|
3119 $$format_LeftHeader = $wb->add_format(bold=>1, font=>'Arial', size=>11); $$format_LeftHeader->set_align('center'); $$format_LeftHeader->set_left(2); $$format_LeftHeader->set_left_color('blue');
|
|
3120 $$format_LeftHeader2 = $wb->add_format(bold=>1, font=>'Arial', size=>11); $$format_LeftHeader2->set_left(2); $$format_LeftHeader2->set_left_color('blue');
|
|
3121 $$format_RightHeader = $wb->add_format(bold=>1, font=>'Arial', size=>11); $$format_RightHeader->set_align('center'); $$format_RightHeader->set_right(2); $$format_RightHeader->set_right_color('blue');
|
|
3122 }
|
|
3123 # Define the mutation type header for the Strand bias by segment
|
|
3124 sub Format_HeaderSBSDistrBySegAndFunc
|
|
3125 {
|
|
3126 my ($wb, $format_LeftCA, $format_LeftCG, $format_LeftCT, $format_LeftTA, $format_LeftTC, $format_LeftTG, $format_RightCA, $format_RightCG, $format_RightCT, $format_RightTA, $format_RightTC, $format_RightTG) = @_;
|
|
3127
|
|
3128 my ($bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink);
|
|
3129 BackgroundColor($wb, \$bgColor_blue, \$bgColor_black, \$bgColor_red, \$bgColor_gray, \$bgColor_green, \$bgColor_pink);
|
|
3130
|
|
3131 $$format_LeftCA = $wb->add_format(bg_color=>$bgColor_blue, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftCA->set_align('center'); $$format_LeftCA->set_left(2); $$format_LeftCA->set_left_color('blue');
|
|
3132 $$format_LeftCG = $wb->add_format(bg_color=>$bgColor_black, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftCG->set_align('center'); $$format_LeftCG->set_left(2); $$format_LeftCG->set_left_color('blue');
|
|
3133 $$format_LeftCT = $wb->add_format(bg_color=>$bgColor_red, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftCT->set_align('center'); $$format_LeftCT->set_left(2); $$format_LeftCT->set_left_color('blue');
|
|
3134 $$format_LeftTA = $wb->add_format(bg_color=>$bgColor_gray, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftTA->set_align('center'); $$format_LeftTA->set_left(2); $$format_LeftTA->set_left_color('blue');
|
|
3135 $$format_LeftTC = $wb->add_format(bg_color=>$bgColor_green, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftTC->set_align('center'); $$format_LeftTC->set_left(2); $$format_LeftTC->set_left_color('blue');
|
|
3136 $$format_LeftTG = $wb->add_format(bg_color=>$bgColor_pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftTG->set_align('center'); $$format_LeftTG->set_left(2); $$format_LeftTG->set_left_color('blue');
|
|
3137
|
|
3138
|
|
3139 $$format_RightCA = $wb->add_format(bg_color=>$bgColor_blue, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightCA->set_align('center'); $$format_RightCA->set_right(2); $$format_RightCA->set_right_color('blue');
|
|
3140 $$format_RightCG = $wb->add_format(bg_color=>$bgColor_black, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightCG->set_align('center'); $$format_RightCG->set_right(2); $$format_RightCG->set_right_color('blue');
|
|
3141 $$format_RightCT = $wb->add_format(bg_color=>$bgColor_red, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightCT->set_align('center'); $$format_RightCT->set_right(2); $$format_RightCT->set_right_color('blue');
|
|
3142 $$format_RightTA = $wb->add_format(bg_color=>$bgColor_gray, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightTA->set_align('center'); $$format_RightTA->set_right(2); $$format_RightTA->set_right_color('blue');
|
|
3143 $$format_RightTC = $wb->add_format(bg_color=>$bgColor_green, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightTC->set_align('center'); $$format_RightTC->set_right(2); $$format_RightTC->set_right_color('blue');
|
|
3144 $$format_RightTG = $wb->add_format(bg_color=>$bgColor_pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightTG->set_align('center'); $$format_RightTG->set_right(2); $$format_RightTG->set_right_color('blue');
|
|
3145 }
|
|
3146 # Define the mutation type header for the trinucleotide sequence context on the coding strand
|
|
3147 sub Format_Header12MutType
|
|
3148 {
|
|
3149 my ($wb, $format_CA, $format_CG, $format_CT, $format_TA, $format_TC, $format_TG) = @_;
|
|
3150
|
|
3151 my ($bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink);
|
|
3152 BackgroundColor($wb, \$bgColor_blue, \$bgColor_black, \$bgColor_red, \$bgColor_gray, \$bgColor_green, \$bgColor_pink);
|
|
3153
|
|
3154 $$format_CA = $wb->add_format(bg_color=>$bgColor_blue, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CA->set_align('center');
|
|
3155 $$format_CG = $wb->add_format(bg_color=>$bgColor_black, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CG->set_align('center');
|
|
3156 $$format_CT = $wb->add_format(bg_color=>$bgColor_red, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CT->set_align('center');
|
|
3157 $$format_TA = $wb->add_format(bg_color=>$bgColor_gray, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TA->set_align('center');
|
|
3158 $$format_TC = $wb->add_format(bg_color=>$bgColor_green, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TC->set_align('center');
|
|
3159 $$format_TG = $wb->add_format(bg_color=>$bgColor_pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TG->set_align('center');
|
|
3160 }
|
|
3161 # Define the format for the text that needs a section border
|
|
3162 sub Format_TextSection
|
|
3163 {
|
|
3164 my ($wb, $formatT_left, $formatT_right, $formatT_bottomRight, $formatT_bottomLeft, $formatT_bottom, $formatT_bottomHeader, $formatT_bottomRightHeader, $formatT_bottomHeader2, $formatT_rightHeader) = @_;
|
|
3165
|
|
3166 $$formatT_left = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
|
|
3167 $$formatT_left->set_left(2); $$formatT_left->set_left_color('blue');
|
|
3168
|
|
3169 $$formatT_right = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
|
|
3170 $$formatT_right->set_right(2); $$formatT_right->set_right_color('blue');
|
|
3171
|
|
3172 $$formatT_bottomRight = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
|
|
3173 $$formatT_bottomRight->set_bottom(2); $$formatT_bottomRight->set_bottom_color('blue');
|
|
3174 $$formatT_bottomRight->set_right(2); $$formatT_bottomRight->set_right_color('blue');
|
|
3175
|
|
3176 $$formatT_bottomLeft = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
|
|
3177 $$formatT_bottomLeft->set_bottom(2); $$formatT_bottomLeft->set_bottom_color('blue');
|
|
3178 $$formatT_bottomLeft->set_left(2); $$formatT_bottomLeft->set_left_color('blue');
|
|
3179
|
|
3180 $$formatT_bottom = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
|
|
3181 $$formatT_bottom->set_bottom(2); $$formatT_bottom->set_bottom_color('blue');
|
|
3182
|
|
3183 my $bgColor_totallighGray = $wb->set_custom_color(54, 230, 230, 230);
|
|
3184 $$formatT_bottomHeader = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>11); $$formatT_bottomHeader->set_align('center');
|
|
3185 $$formatT_bottomHeader->set_bottom(2); $$formatT_bottomHeader->set_bottom_color('blue');
|
|
3186
|
|
3187 $$formatT_bottomRightHeader = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>11); $$formatT_bottomRightHeader->set_align('center');
|
|
3188 $$formatT_bottomRightHeader->set_bottom(2); $$formatT_bottomRightHeader->set_bottom_color('blue');
|
|
3189 $$formatT_bottomRightHeader->set_right(2); $$formatT_bottomRightHeader->set_right_color('blue');
|
|
3190
|
|
3191 $$formatT_bottomHeader2 = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>11); $$formatT_bottomHeader2->set_align('center');
|
|
3192
|
|
3193 $$formatT_rightHeader = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>11); $$formatT_rightHeader->set_align('center');
|
|
3194 $$formatT_rightHeader->set_right(2); $$formatT_rightHeader->set_right_color('blue');
|
|
3195 }
|
|
3196 # Define the format for the graphs titles
|
|
3197 sub Format_GraphTitle
|
|
3198 {
|
|
3199 my ($wb, $formatT_graphTitle) = @_;
|
|
3200
|
|
3201 $$formatT_graphTitle = $wb->add_format(font=>'Arial', size=>12, bold=>1);
|
|
3202 }
|
|
3203 # Define the format of the border of the tables
|
|
3204 sub Format_Table
|
|
3205 {
|
|
3206 my ($wb, $table_topleft, $table_topRight, $table_bottomleft, $table_bottomRight, $table_top, $table_right, $table_bottom, $table_bottomItalicRed, $table_left, $table_bottomrightHeader, $table_left2, $table_middleHeader, $table_middleHeader2) = @_;
|
|
3207
|
|
3208 $$table_topleft = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_topleft->set_top(1); $$table_topleft->set_left(1);
|
|
3209 $$table_topRight = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_topRight->set_top(1); $$table_topRight->set_right(1);
|
|
3210 $$table_bottomleft = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_bottomleft->set_bottom(1); $$table_bottomleft->set_left(1);
|
|
3211 $$table_bottomRight = $wb->add_format(valign=>'center', font=>'Arial', size=>10); $$table_bottomRight->set_bottom(1); $$table_bottomRight->set_right(1);
|
|
3212
|
|
3213 $$table_top = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_top->set_top(1);
|
|
3214 $$table_right = $wb->add_format(valign=>'center', font=>'Arial', size=>10); $$table_right->set_right(1);
|
|
3215 $$table_bottom = $wb->add_format(valign=>'center', font=>'Arial', size=>10); $$table_bottom->set_bottom(1);
|
|
3216 $$table_bottomItalicRed = $wb->add_format(valign=>'center', font=>'Arial', size=>10, italic=>1, color => 'red'); $$table_bottomItalicRed->set_bottom(1);
|
|
3217 $$table_left = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_left->set_left(1);
|
|
3218
|
|
3219 my $bgColor_totallighGray = $wb->set_custom_color(54, 230, 230, 230);
|
|
3220 $$table_bottomrightHeader = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>10); $$table_bottomrightHeader->set_bottom(1); $$table_bottomrightHeader->set_right(1);
|
|
3221
|
|
3222 $$table_left2 = $wb->add_format(valign=>'left', font=>'Arial', size=>10); $$table_left2->set_left(1);
|
|
3223
|
|
3224 $$table_middleHeader = $wb->add_format(valign=>'center', bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>10);
|
|
3225 $$table_middleHeader2 = $wb->add_format(valign=>'center', bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>10); $$table_middleHeader2->set_bottom(1);
|
|
3226 }
|
|
3227
|
|
3228 # Define the color
|
|
3229 sub Color
|
|
3230 {
|
|
3231 my ($wb, $blue, $black, $red, $gray, $green, $pink) = @_;
|
|
3232
|
|
3233 $$blue = $wb->set_custom_color(40, 0, 0, 204);# C:G>A:T in blue
|
|
3234 $$black = $wb->set_custom_color(41, 0, 0, 0);# C:G>G:C in black
|
|
3235 $$red = $wb->set_custom_color(42, 255, 0, 0);# C:G>T:A in red
|
|
3236 $$gray = $wb->set_custom_color(43, 205, 205, 205); # T:A>A:T in light gray
|
|
3237 $$green = $wb->set_custom_color(44, 0, 204, 51);# T:A>C:G in green
|
|
3238 $$pink = $wb->set_custom_color(45, 255, 192, 203);# T:A>G:C in pink
|
|
3239 }
|
|
3240 sub BackgroundColor
|
|
3241 {
|
|
3242 my ($wb, $bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink) = @_;
|
|
3243
|
|
3244 $$bgColor_blue = $wb->set_custom_color(48, 0, 0, 204);
|
|
3245 $$bgColor_black = $wb->set_custom_color(49, 0, 0, 0);
|
|
3246 $$bgColor_red = $wb->set_custom_color(50, 255, 0, 0);
|
|
3247 $$bgColor_gray = $wb->set_custom_color(51, 205, 205, 205);
|
|
3248 $$bgColor_green = $wb->set_custom_color(52, 0, 204, 51);
|
|
3249 $$bgColor_pink = $wb->set_custom_color(53, 255, 192, 203);
|
|
3250 }
|
|
3251 }
|
|
3252
|
|
3253
|
|
3254 sub recoverNumCol
|
|
3255 {
|
|
3256 my ($input, $name_of_column) = @_;
|
|
3257
|
|
3258 open(F1,$input) or die "recoverNumCol: $!: $input\n";
|
|
3259 # For having the name of the columns
|
|
3260 my $search_header = <F1>; $search_header =~ s/[\r\n]+$//; my @tab_search_header = split("\t",$search_header);
|
|
3261 close F1;
|
|
3262 # The number of the column
|
|
3263 my $name_of_column_NB = "toto";
|
|
3264 for(my $i=0; $i<=$#tab_search_header; $i++)
|
|
3265 {
|
|
3266 if($tab_search_header[$i] eq $name_of_column) { $name_of_column_NB = $i; last; }
|
|
3267 }
|
|
3268 if($name_of_column_NB eq "toto") { print STDERR "Error recoverNumCol(): the column named $name_of_column doesn't exits in the input file $input!!!!!\n"; exit; }
|
|
3269 else { return $name_of_column_NB; }
|
|
3270 }
|
|
3271
|
|
3272
|
|
3273
|
|
3274
|
|
3275 =head1 NAME
|
|
3276
|
|
3277 mutSpec-Stat
|
|
3278
|
|
3279 =head1 SYNOPSIS
|
|
3280
|
|
3281 mutSpecstat.pl [arguments] <query-file>
|
|
3282
|
|
3283 <query-file> can be a folder with multiple VCF or a single VCF
|
|
3284
|
|
3285 Arguments:
|
|
3286 -h, --help print help message
|
|
3287 -m, --man print complete documentation
|
|
3288 -v, --verbose use verbose output
|
|
3289 --refGenome the reference genome to use (human, mouse or rat genomes)
|
|
3290 -o, --outfile <string> output directory for the result. If none is specify the result will be write in the same directory as the input file
|
|
3291 -temp --pathTemporary <string> the path for saving the temporary files
|
|
3292 --pathSeqRefGenome the path to the fasta reference sequences
|
|
3293 --poolData generate the pool of all the samples (optional)
|
|
3294 --reportSample generate a report for each sample (optional)
|
|
3295
|
|
3296
|
|
3297 Function: automatically run a pipeline and calculate various statistics on mutations
|
|
3298
|
|
3299 Example: mutSpecstat.pl --refGenome hg19 --outfile output_directory --temp path_to_temporary_directory --pathRscript path_to_R_scripts --pathSeqRefGenome path_fasta_ref_seq --poolData --reportSample input
|
|
3300
|
|
3301 Version: 04-2016 (April 2016)
|
|
3302
|
|
3303
|
|
3304 =head1 OPTIONS
|
|
3305
|
|
3306 =over 8
|
|
3307
|
|
3308 =item B<--help>
|
|
3309
|
|
3310 print a brief usage message and detailed explanation of options.
|
|
3311
|
|
3312 =item B<--man>
|
|
3313
|
|
3314 print the complete manual of the program.
|
|
3315
|
|
3316 =item B<--verbose>
|
|
3317
|
|
3318 use verbose output.
|
|
3319
|
|
3320 =item B<--refGenome>
|
|
3321
|
|
3322 the reference genome to use, could be human, mouse or rat genomes.
|
|
3323
|
|
3324 =item B<--outfile>
|
|
3325
|
|
3326 the directory of output file names. If it is nor specify the same directory as the input file is used.
|
|
3327
|
|
3328 =item B<--pathTemporary>
|
|
3329
|
|
3330 the path for saving temporary files generated by the script.
|
|
3331 If any is specify a temporary folder is created in the same directory where the script is running.
|
|
3332 Deleted when the script is finish
|
|
3333
|
|
3334 =item B<--pathSeqRefGenome>
|
|
3335
|
|
3336 The path to the fasta reference sequences
|
|
3337
|
|
3338 =item B<--poolData only for the report>
|
|
3339
|
|
3340 calculate the statistics on the pool of all the data pass in input
|
|
3341
|
|
3342 =item B<--reportSample only for the report>
|
|
3343
|
|
3344 generate a report for each samples
|
|
3345
|
|
3346 =head1 DESCRIPTION
|
|
3347
|
|
3348 mutSpecstat is a perl script for calculated various statistics on mutations
|
|
3349 An Excel report containing the mutation type distribution per functional region, the strand bias and the sequence context on genomic and coding sequence is created.
|
|
3350 The different statistics are illustrated using ggplot2.
|
|
3351
|
|
3352 =cut
|