annotate mutspecStat.pl @ 2:9d363eb081b5 draft

Uploaded
author iarc
date Thu, 28 Apr 2016 03:43:25 -0400
parents 8c682b3a7c5b
children 46a10309dfe2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1 #!/usr/bin/env perl
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3 #-----------------------------------#
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
4 # Author: Maude #
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
5 # Script: mutspecStat.pl #
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
6 # Last update: 09/04/16 #
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
7 #-----------------------------------#
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
8
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
9 use strict;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
10 use warnings;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
11 use Getopt::Long;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
12 use Pod::Usage;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
13 use File::Basename; # my ($filename, $directories, $suffix) = fileparse($file, qr/\.[^.]*/);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
14 use File::Path;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
15 use Statistics::R;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
16 use Spreadsheet::WriteExcel;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
17
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
18 our ($verbose, $man, $help) = (0, 0, 0); # Parse options and print usage if there is a syntax error, or if usage was explicitly requested.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
19 our ($refGenome, $output, $folder_temp, $path_R_Scripts, $path_SeqrefGenome) = ("empty", "empty", "empty", "empty", "empty"); # The reference genome to use; The path for saving the result; The path for saving the temporary files; The path to R scripts; The path to the fasta reference sequences
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
20 our ($poolData, $oneReportPerSample) = (2, 2); # If a folder is pass as input file pool all the data and generate the report on the pool and for each samples; # Generate one report for each samples
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
21
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
22
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
23 GetOptions('verbose|v'=>\$verbose, 'help|h'=>\$help, 'man|m'=>\$man, 'refGenome=s'=>\$refGenome, 'outfile|o=s' => \$output, 'pathTemporary|temp=s' => \$folder_temp, 'pathRscript=s' => \$path_R_Scripts, 'pathSeqRefGenome=s' => \$path_SeqrefGenome, 'poolData' => \$poolData, 'reportSample' => \$oneReportPerSample) or pod2usage(2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
24
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
25 our ($input) = @ARGV;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
26
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
27 pod2usage(-verbose=>1, -exitval=>1, -output=>\*STDERR) if ($help);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
28 pod2usage(-verbose=>2, -exitval=>1, -output=>\*STDERR) if ($man);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
29 pod2usage(-verbose=>0, -exitval=>1, -output=>\*STDERR) if(@ARGV == 0); # No argument is pass to the command line print the usage of the script
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
30 pod2usage(-verbose=>0, -exitval=>1, -output=>\*STDERR) if(@ARGV == 2); # Only one argument is expected to be pass to @ARGV (the input)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
31
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
32
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
33
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
34 ######################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
35 # GLOBAL VARIABLES #
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
36 ######################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
37 # Recover the current path
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
38 our $pwd = `pwd`;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
39 chomp($pwd);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
40
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
41 # Path to R scripts
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
42 our $pathRScriptTxnSB = "$path_R_Scripts/R/transciptionalStrandBias.r";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
43 our $pathRScriptMutSpectrum = "$path_R_Scripts/R/mutationSpectra_Galaxy.r";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
44
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
45 our $folderMutAnalysis = "";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
46 our @pathInput = split("/", $input);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
47
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
48 # Hash table with the length of each chromosomes
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
49 our %chromosomes;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
50
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
51 ######################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
52 # MAIN #
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
53 ######################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
54 # Check the presence of the flags and create the output and temp directories
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
55 CheckFlags();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
56
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
57 # Retrieve chromosomes length
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
58 checkChrDir();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
59
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
60
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
61 print "-----------------------------------------------------------------\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
62 print "-----------------Report Mutational Analysis----------------------\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
63 print"-----------------------------------------------------------------\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
64
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
65 # First check if the file is annotated or not
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
66 CheckAnnotationFile($input);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
67
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
68 # Calculate the statistics and generate the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
69 my @colInfoAV = qw(Chr Start Ref Alt);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
70 ReportMutDist($input, $folderMutAnalysis, $folder_temp, \@colInfoAV, $refGenome);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
71
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
72 # Remove the temporary directory
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
73 rmtree($folder_temp);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
74
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
75
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
76 ######################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
77 # FUNCTIONS #
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
78 ######################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
79
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
80 # Check the presence of the flags and create the output and temp directories
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
81 sub CheckFlags
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
82 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
83 # Check the reference genome
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
84 if($refGenome eq "empty") { print STDERR "You forget to specify the name for the reference genome!!!\nPlease specify it with the flag --refGenome\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
85
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
86 # If no output is specified write the result as the same place as the input file
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
87 if($output eq "empty")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
88 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
89 my $folderRes = "";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
90 for(my $i=0; $i<$#pathInput; $i++) { $folderRes .= "$pathInput[$i]/"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
91
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
92 $folderMutAnalysis = "$folderRes/Mutational_Analysis";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
93 if(!-e $folderMutAnalysis) { mkdir($folderMutAnalysis) or die "$!: $folderMutAnalysis\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
94 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
95 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
96 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
97 if(!-e $output) { mkdir($output) or die "$!: $output\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
98
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
99 $folderMutAnalysis = "$output/Mutational_Analysis";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
100 if(!-e $folderMutAnalysis) { mkdir($folderMutAnalysis) or die "$!: $folderMutAnalysis\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
101 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
102
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
103 # If no temp folder is specified write the result in the current path
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
104 if($folder_temp eq "empty") { $folder_temp = "$pwd/TEMP_MutationalAnalysis_$pathInput[$#pathInput]"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
105 if(!-e $folder_temp) { mkdir($folder_temp) or die "$!: $folder_temp\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
106
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
107 # Check the path to the R scripts
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
108 if($path_R_Scripts eq "empty") { print STDERR "You forget to specify the path for the R scripts!!!\nPlease specify it with the flag --pathRscript\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
109
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
110
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
111 # The input is a folder
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
112 if(-d $input) { foreach my $file (`ls $input`) { CheckLengthFilename("$input/$file"); } }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
113 # The input is one file
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
114 else { CheckLengthFilename($input); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
115 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
116 # Check the length of the file, must be < 32 characters for the Excel sheet
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
117 sub CheckLengthFilename
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
118 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
119 my ($inputFile) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
120
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
121 ## Verify the name of file, must be <= 31 chars for the sheet name
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
122 my ($filename, $directories, $suffix) = fileparse($inputFile, qr/\.[^.]*/);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
123
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
124 if(length($filename) > 31) { print STDERR "The file: $inputFile must be <= 31 chars\nPlease modify it before running the script\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
125 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
126
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
127 # Retrieve chromosomes length
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
128 sub checkChrDir
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
129 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
130 my @files = `ls $path_SeqrefGenome/$refGenome"_seq"/*.fa`;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
131 foreach my $file (@files)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
132 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
133 if ($file !~ /chr(\d+|x|y)\.fa/i){next;}
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
134 open(FILE,$file);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
135 <FILE>;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
136 my $seq="";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
137 while (<FILE>){ chomp; $seq.=$_;}
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
138 $file =~ /chr(.*)\.fa/;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
139 $chromosomes{"chr".$1}=length($seq);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
140 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
141 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
142
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
143 # Check if the file is annotated or not
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
144 sub CheckAnnotationFile
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
145 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
146 my ($inputFile) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
147
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
148 # A folder is pass in argument
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
149 if(-d $inputFile)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
150 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
151 foreach my $file (`ls $inputFile`)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
152 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
153 chomp($file);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
154
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
155 open(F1, "$inputFile/$file") or die "$!: $inputFile/$file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
156 my $search_header = <F1>; $search_header =~ s/[\r\n]+$//; my @tab_search_header = split("\t",$search_header);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
157 close F1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
158 # The number of the column
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
159 my $value_of_column_NB = "toto";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
160 for(my $i=0; $i<=$#tab_search_header; $i++)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
161 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
162 if($tab_search_header[$i] eq "Func.refGene") { $value_of_column_NB = $i; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
163 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
164 if($value_of_column_NB eq "toto") { print STDERR "Error the input file you specify is not annotated! $inputFile/$file !!!!\nPlease first annotate your file before trying to generate the report on mutation patterns\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
165 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
166 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
167 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
168 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
169 open(F1, $inputFile) or die "$!: $inputFile\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
170 my $search_header = <F1>; $search_header =~ s/[\r\n]+$//; my @tab_search_header = split("\t",$search_header);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
171 close F1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
172 # The number of the column
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
173 my $value_of_column_NB = "toto";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
174 for(my $i=0; $i<=$#tab_search_header; $i++)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
175 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
176 if($tab_search_header[$i] eq "Func.refGene") { $value_of_column_NB = $i; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
177 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
178 if($value_of_column_NB eq "toto") { print STDERR "Error the input file you specify is not annotated! $inputFile !!!!\nPlease first annotate your file before trying to generate the report on mutation patterns\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
179 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
180 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
181
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
182 # Calculate the statistics and generate the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
183 sub ReportMutDist
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
184 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
185 our ($input, $output, $folder_temp, $refTab_colInfo, $refGenome) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
186
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
187 my @column = @$refTab_colInfo;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
188
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
189 our ($chr_name, $start_name, $ref_name, $alt_name) = split(/,/, join(',', @column)); # Separe each element
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
190
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
191 our $func_name = "Func.refGene";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
192 our $exonicFunc_name = "ExonicFunc.refGene";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
193 our $strand_name = "Strand";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
194 our $context_name = "context";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
195
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
196 my $folderFigure = "$output/Figures";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
197 if(-e $folderFigure) { rmtree($folderFigure); mkdir($folderFigure) or die "Can't create the directory $folderFigure\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
198 else { mkdir($folderFigure) or die "Can't create the directory $folderFigure\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
199 my $folderChi2 = "$folderFigure/Chi2";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
200 if(!-e $folderChi2) { mkdir($folderChi2) or die "Can't create the directory $folderChi2\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
201 my $folderWebLogo = "$folderFigure/WebLogo";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
202 if(!-e $folderWebLogo) { mkdir($folderWebLogo) or die "Can't create the directory $folderWebLogo\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
203 my $folderNMF = "$folderFigure/Input_NMF";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
204 if(!-e $folderNMF) { mkdir($folderNMF) or die "Can't create the directory $folderNMF\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
205
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
206 ################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
207 ### Calculates all the statistics ###
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
208 ################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
209 ############ Recover Annovar annotations (for having the save number of functional regions for each samples)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
210 my @tab_func = recoverAnnovarAnnotation($input, $func_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
211 if(@tab_func == 0) { print STDERR "Error the table for the functional region is empty!!!!! check $input $func_name\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
212
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
213 ############ Calculate the different statistics present in the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
214 my %h_file = ();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
215 CalculateStatistics(\%h_file, \@tab_func);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
216
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
217 ############ Calculate the chi2 for the strand bias
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
218 CalculateChi2(\%h_file, $folderChi2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
219
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
220 ############ Write the different statistics present in the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
221 WriteStatistics(\%h_file, $#tab_func, $folderFigure, $folderChi2, $folderNMF);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
222
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
223 ############ Create logo for studying the 10 flanking bases of the mutation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
224 CreateLogo(\%h_file, $folderWebLogo);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
225
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
226
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
227 ################### Subroutines for generating the report for the mutational analysis
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
228 sub recoverAnnovarAnnotation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
229 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
230 my ($input, $AV_annotation) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
231
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
232 my %hash = ();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
233
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
234 # The input is a folder
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
235 if(-d $input)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
236 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
237 foreach my $file (`ls $input`)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
238 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
239 $file =~ s/[\r\n]+$//;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
240 my $AV_annotation_value = recoverNumCol("$input/$file", $AV_annotation);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
241
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
242 open(F1, "$input/$file") or die "$!: $input/$file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
243 my $header = <F1>;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
244 while(<F1>)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
245 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
246 $_ =~ s/[\r\n]+$//;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
247 my @tab = split("\t", $_);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
248
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
249 # Some files can have an empty line at the end and WE DON'T WANT to consider it
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
250 if(! defined $tab[0]) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
251 # Some func value are repeated and separated by ";"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
252 my $funcSegment = "";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
253 if($tab[$AV_annotation_value] =~ /;/) { my @temp = split(";", $tab[$AV_annotation_value]); $funcSegment = $temp[0]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
254 else { $funcSegment = $tab[$AV_annotation_value]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
255
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
256 $hash{$funcSegment} = "";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
257 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
258 close F1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
259 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
260 my @tab_AVAnnotation = ();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
261 foreach my $k (sort keys %hash) { push(@tab_AVAnnotation, $k); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
262 return @tab_AVAnnotation;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
263 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
264 # The input is a file
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
265 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
266 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
267 my $AV_annotation_value = recoverNumCol($input, $AV_annotation);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
268
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
269 open(F1, $input) or die "$!: $input\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
270 my $header = <F1>;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
271 while(<F1>)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
272 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
273 $_ =~ s/[\r\n]+$//;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
274 my @tab = split("\t", $_);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
275
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
276 # Some func value are repeated and separated by ";"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
277 my $funcSegment = "";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
278 if($tab[$AV_annotation_value] =~ /;/) { my @temp = split(";", $tab[$AV_annotation_value]); $funcSegment = $temp[0]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
279 else { $funcSegment = $tab[$AV_annotation_value]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
280
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
281 $hash{$funcSegment} = "";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
282 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
283 close F1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
284 my @tab_AVAnnotation = ();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
285 foreach my$k (sort keys %hash) { push(@tab_AVAnnotation, $k); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
286 return @tab_AVAnnotation;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
287 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
288 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
289 # Calculate the different statistics present in the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
290 sub CalculateStatistics
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
291 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
292 my ($refH_file, $refT_func) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
293
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
294 my ($chr_value, $start_value, $ref_value, $alt_value, $func_value, $exonicFunc_value, $strand_value, $contextSeq_value) = ("", "", "", "", "", "", "", "", "", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
295
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
296 # If the input is a folder
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
297 if(-d $input)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
298 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
299 my $folderPool = "$folder_temp/Pool";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
300 if(!-e $folderPool) { mkdir($folderPool) or die "Can't create the directory $folderPool\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
301
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
302 # Copy each sample
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
303 foreach my $file (`ls $input`) { chomp($file); system("cp $input/$file $folderPool"); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
304
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
305 # Generate the pool of all the data
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
306 if($poolData == 1)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
307 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
308 my @listFile = `ls $input`;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
309
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
310 # For keeping the header only one time
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
311 chomp($listFile[0]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
312 system("cp $input/$listFile[0] $folderPool/Pool_Data.txt");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
313
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
314 open(OUT, ">>", "$folderPool/Pool_Data.txt") or die "$!: $folderPool/Pool_Data.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
315
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
316 for(my $i=1; $i<=$#listFile; $i++)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
317 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
318 chomp($listFile[$i]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
319 open(F1, "$input/$listFile[$i]") or die "$!: $input/$listFile[$i]\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
320 my $header = <F1>;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
321 while(<F1>) { print OUT $_; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
322 close F1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
323 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
324 close OUT;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
325 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
326
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
327 foreach my $file (`ls $folderPool`)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
328 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
329 chomp($file);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
330 ############ Recover the number of the columns of interest
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
331 $chr_value = recoverNumCol("$folderPool/$file", $chr_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
332 $start_value = recoverNumCol("$folderPool/$file", $start_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
333 $ref_value = recoverNumCol("$folderPool/$file", $ref_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
334 $alt_value = recoverNumCol("$folderPool/$file", $alt_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
335 $func_value = recoverNumCol("$folderPool/$file", $func_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
336 $exonicFunc_value = recoverNumCol("$folderPool/$file", $exonicFunc_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
337 $strand_value = recoverNumCol("$folderPool/$file", $strand_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
338 $contextSeq_value = recoverNumCol("$folderPool/$file", $context_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
339 ############ Recover the number of the columns of interest
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
340
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
341 ############ Control the annotated file pass in argument
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
342 ## Check if the files have variants
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
343 my $nbLines_originalFile = `wc -l $folderPool/$file`; $nbLines_originalFile =~ /(\d+) /;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
344 if($1==1) { print STDERR "\n\nNo line in the file $folderPool/$file\n\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
345 ## Check if there is variant with strand information. If not the rest of the script generates errors
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
346 my $testFile = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
347 CheckVariantReport("$folderPool/$file", $strand_value, \$testFile);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
348 if($testFile==0) { print STDERR "\n\nNo strand information for the file $folderPool/$file\n\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
349 ############ Control the annotated file pass in argument
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
350
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
351 ############ Calculate the statistics
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
352 File2Hash("$folderPool/$file", $func_value, $exonicFunc_value, $chr_value, $ref_value, $alt_value, $strand_value, $contextSeq_value, $refH_file, $refT_func);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
353 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
354 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
355 # If the input is a file
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
356 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
357 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
358 ############ Recover the number of the columns of interest
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
359 $chr_value = recoverNumCol($input, $chr_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
360 $start_value = recoverNumCol($input, $start_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
361 $ref_value = recoverNumCol($input, $ref_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
362 $alt_value = recoverNumCol($input, $alt_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
363 $func_value = recoverNumCol($input, $func_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
364 $exonicFunc_value = recoverNumCol($input, $exonicFunc_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
365 $strand_value = recoverNumCol($input, $strand_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
366 $contextSeq_value = recoverNumCol($input, $context_name);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
367 ############ Recover the number of the columns of interest
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
368
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
369 ############ Control the annotated file pass in argument
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
370 ## Check if the files have variants
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
371 my $nbLines_originalFile = `wc -l $input`; $nbLines_originalFile =~ /(\d+) /;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
372 if($1==1) { print STDERR "\n\nNo line in the file $input\n\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
373 ## Check if there is variant with strand information. If not the rest of the script generates errors
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
374 my $testFile = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
375 CheckVariantReport($input, $strand_value, \$testFile);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
376 if($testFile==0) { print STDERR "\n\nNo strand information for the file $input\n\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
377 ############ Control the annotated file pass in argument
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
378
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
379 ############ Calculate the statistics
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
380 File2Hash($input, $func_value, $exonicFunc_value, $chr_value, $ref_value, $alt_value, $strand_value, $contextSeq_value, $refH_file, $refT_func);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
381 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
382 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
383 # Check if there is at least one variant with a strand information
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
384 sub CheckVariantReport
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
385 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
386 my ($file, $strand_value, $refS_testFile) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
387
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
388 open(F1, $file) or die "$!: $file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
389 my $header = <F1>;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
390 while(<F1>)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
391 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
392 $_ =~ s/[\r\n]+$//;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
393 my @tab = split("\t", $_);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
394
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
395 if( ($tab[$strand_value] eq "+") || ($tab[$strand_value] eq "-") ) { $$refS_testFile++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
396 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
397 close F1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
398 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
399 # Convert the annotated VCF into a hash table
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
400 sub File2Hash
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
401 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
402 my ($inputFile, $func_value, $exonicFunc_value, $chr_value, $ref_value, $alt_value, $strand_value, $contextSeq_value, $refH_file, $refT_func) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
403 my ($filename, $directories, $suffix) = fileparse($inputFile, qr/\.[^.]*/);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
404
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
405 # Initialisation of the hash
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
406 my @tab_mutation = qw(C:G>A:T C:G>G:C C:G>T:A T:A>A:T T:A>C:G T:A>G:C);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
407 my @tab_aaChange = ("NonTr", "Tr", "TotalMutG");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
408 my @tabExoFunc = ("frameshift insertion", "frameshift deletion", "frameshift block substitution", "frameshift substitution", "stopgain", "stoploss", "nonframeshift insertion", "nonframeshift deletion", "nonframeshift substitution", "nonframeshift block substitution", "nonsynonymous SNV", "synonymous SNV", "unknown", "NA");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
409
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
410 # Total number of SBS on the genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
411 $refH_file->{$filename}{'TotalSBSGenomic'} = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
412 # Total number of Indel on the genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
413 $refH_file->{$filename}{'TotalIndelGenomic'} = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
414 # Total number of SBS on the coding strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
415 $refH_file->{$filename}{'TotalSBSCoding'} = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
416 # Total number of SBS and Indel on the genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
417 $refH_file->{$filename}{'TotalMutGenomic'} = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
418
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
419 #####################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
420 # SBS by segment (6 mutation types) #
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
421 #####################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
422 foreach my $elt_tabFunc (@$refT_func)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
423 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
424 foreach my $elt_tabMutation (@tab_mutation)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
425 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
426 foreach my $elt_aaChange (@tab_aaChange)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
427 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
428 $refH_file->{$filename}{'6mutType'}{$elt_tabFunc}{$elt_tabMutation}{$elt_aaChange} = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
429 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
430 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
431 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
432
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
433 #######################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
434 # Pearson correlation #
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
435 #######################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
436 $refH_file->{$filename}{'SBSPerChr'}{'AllMutType'} = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
437 # Count of SBS per chromosome foreach mutation types
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
438 foreach my $elt_tabMutation (@tab_mutation)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
439 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
440 foreach my $chromosome (sort keys %chromosomes){ $refH_file->{$filename}{'SBSPerChr'}{$elt_tabMutation}{'CHR'}{$chromosome}{'chr'} = 0;}
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
441 $refH_file->{$filename}{'SBSPerChr'}{$elt_tabMutation}{'Pearson'} = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
442 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
443
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
444 foreach my $chromosome (sort keys %chromosomes){
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
445 $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}{$chromosome}{'chr'}=0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
446 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
447
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
448 ############################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
449 # Impact of SBS on protein #
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
450 ############################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
451 foreach my $elt_exoFunc (@tabExoFunc)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
452 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
453 $refH_file->{$filename}{'ImpactSBS'}{$elt_exoFunc} = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
454 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
455
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
456 #####################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
457 # Sequence context (genomic strand) #
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
458 #####################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
459 my @tab_mutation2 = qw(C>A C>G C>T T>A T>C T>G);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
460 my @tab_context = qw(A_A A_C A_G A_T C_A C_C C_G C_T G_A G_C G_G G_T T_A T_C T_G T_T);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
461 foreach my $elt_context (@tab_context)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
462 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
463 foreach my $elt_mutation3 (@tab_mutation2)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
464 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
465 $refH_file->{$filename}{'SeqContextG'}{$elt_context}{$elt_mutation3} = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
466 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
467 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
468
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
469 ####################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
470 # Sequence context (coding strand) #
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
471 ####################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
472 my @tab_TrNonTr = qw(NonTr Tr);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
473 foreach my $elt_context (@tab_context)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
474 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
475 foreach my $elt_mutation2 (@tab_mutation2)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
476 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
477 foreach my $trNonTr (@tab_TrNonTr)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
478 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
479 $refH_file->{$filename}{'SeqContextC'}{$elt_context}{$elt_mutation2}{$trNonTr} = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
480 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
481 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
482 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
483
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
484 open(F1,$inputFile) or die "$!: $inputFile\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
485 my $header = <F1>;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
486 while(<F1>)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
487 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
488 $_ =~ s/[\r\n]+$//;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
489 my @tab = split("\t", $_);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
490
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
491 # Random chromosome and chromosome M
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
492 if( ($tab[$chr_value] =~ /random/i) || ($tab[$chr_value] =~ /M/i) ) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
493
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
494 ############################################## Extract the base just before and after the mutation ##############################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
495 my $context = "";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
496 my $contextSequence = $tab[$contextSeq_value]; $contextSequence =~ tr/a-z/A-Z/;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
497 my @tempContextSequence = split("", $contextSequence);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
498 my $total_nbBaseContext = $#tempContextSequence;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
499 my $midlle_totalNbBaseContext = $total_nbBaseContext/2; # For having the middle of the sequence
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
500 my $before = $midlle_totalNbBaseContext - 1; my $after = $midlle_totalNbBaseContext + 1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
501 $context = $tempContextSequence[$before]."_".$tempContextSequence[$after];
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
502 ############################################## Extract the base just before and after the mutation ##############################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
503
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
504
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
505 ############################################################### Impact on protein ###############################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
506 my $exoFunc = "";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
507 # Sometimes the annotation is repeated frameshift deletion;frameshift deletion
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
508 if($tab[$exonicFunc_value] =~ /\;/)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
509 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
510 my @temp = split(";", $tab[$exonicFunc_value]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
511 if($temp[0] eq $temp[1]) { $exoFunc = $temp[0]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
512 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
513 # The annotations have changed after MAJ Annovar 2014Jul22 (stopgain SNV => stopgain)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
514 elsif($tab[$exonicFunc_value] eq "stopgain SNV") { $exoFunc = "stopgain"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
515 elsif($tab[$exonicFunc_value] eq "stoploss SNV") { $exoFunc = "stoploss"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
516 elsif($tab[$exonicFunc_value] eq "nonsynonymous_SNV") { $exoFunc = "nonsynonymous SNV"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
517 elsif($tab[$exonicFunc_value] eq "stopgain_SNV") { $exoFunc = "stopgain SNV"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
518 elsif($tab[$exonicFunc_value] eq "synonymous_SNV") { $exoFunc = "synonymous SNV"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
519 else { $exoFunc = $tab[$exonicFunc_value]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
520
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
521 if(exists $refH_file->{$filename}{'ImpactSBS'}{$exoFunc})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
522 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
523 # If the sequence context if not recovered correctly don't considered the variants
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
524 if( ($context =~ /N/) || (length($context) != 3) ) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
525
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
526 $refH_file->{$filename}{'ImpactSBS'}{$exoFunc}++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
527 $refH_file->{$filename}{'TotalMutGenomic'}++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
528 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
529 else { print "WARNING: Exonic function not considered: $exoFunc\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
530 ############################################################### Impact on protein ###############################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
531
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
532 ################################################### Only SBS are considered for the statistics ##################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
533 if( ($tab[$ref_value] =~ /^[ACGT]$/i) && ($tab[$alt_value] =~ /^[ACGT]$/i) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
534 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
535 # If the sequence context if not recovered correctly don't considered the variants
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
536 if( ($context =~ /N/) || (length($context) != 3) ) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
537
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
538 # Total number of SBS on the genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
539 $refH_file->{$filename}{'TotalSBSGenomic'}++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
540
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
541 # Total number of SBS on the coding strand with a sequence context
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
542 if( ($tab[$strand_value] eq "+") || ($tab[$strand_value] eq "-") )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
543 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
544 if( ($context ne "NA") && (($context =~ /N/) || (length($context) != 3)) ) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
545 $refH_file->{$filename}{'TotalSBSCoding'}++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
546 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
547 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
548 else { $refH_file->{$filename}{'TotalIndelGenomic'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
549 ################################################### Only SBS are considered for the statistics ##################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
550
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
551 # Number of SBS per chromosome: remove the "chr"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
552 my $chrNameForH=$tab[$chr_value];
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
553 if(exists $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}{$chrNameForH}{'chr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
554
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
555
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
556 ################################################### Some func value are repeated and separated by ";" ##################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
557 my $funcSegment = "";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
558 if($tab[$func_value] =~ /;/) { my @temp = split(";", $tab[$func_value]); $funcSegment = $temp[0]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
559 else { $funcSegment = $tab[$func_value]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
560
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
561
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
562 ############################################################### MUTATION C> #############################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
563 ###################################### C:G>A:T
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
564 if( (($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "A")) || ( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "T") ) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
565 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
566 my $mutation = "C:G>A:T";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
567 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++; # Count the total number of mutations
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
568
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
569 # Pearson correlation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
570 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
571
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
572 # Sequence context - 6 mutation types - genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
573 my $mutationSeqContext6mutType = "C>A";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
574 # We want to express the mutation in C>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
575 if( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "T") )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
576 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
577 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
578 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
579 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
580 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
581 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
582
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
583 # Strand analysis C>A on NonTr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
584 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "A"))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "T"))) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
585 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
586 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
587
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
588 # C>A With the sequence context (C>A strand = +)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
589 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "A")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
590 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
591 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'C>A'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'C>A'}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
592 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
593 # C>A With the sequence context (G>T strand = -)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
594 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
595 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
596 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
597 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
598 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>A'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>A'}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
599 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
600 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
601 # Strand analysis C>A on Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
602 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "A"))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "T"))) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
603 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
604 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
605
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
606 # C>A With the sequence context (C>A strand = -)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
607 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "A")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
608 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
609 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'C>A'}{'Tr'}) { { $refH_file->{$filename}{'SeqContextC'}{$context}{'C>A'}{'Tr'}++; } }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
610 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
611 # C>A with the sequence context (G>T strand = +)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
612 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "T")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
613 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
614 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
615 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
616 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>A'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>A'}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
617 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
618 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
619 # WebLogo-3
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
620 if(($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "A"))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
621 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
622 # For the logo all the sequences must have the same length
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
623 if(scalar(@tempContextSequence) == 2) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
624 my ($contextTemp1, $contextTemp2) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
625 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
626 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
627 my $context = $contextTemp1."C".$contextTemp2;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
628 push(@{$refH_file->{$filename}{'WebLogo3'}{'CA'}}, $context);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
629 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
630 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
631 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
632 if(scalar(@tempContextSequence) == 2) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
633 my ($contextTemp1, $contextTemp2) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
634 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
635 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
636 my $context = $contextTemp1."C".$contextTemp2; $context = reverse $context;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
637 push(@{$refH_file->{$filename}{'WebLogo3'}{'CA'}}, $context);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
638 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
639 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
640 ###################################### C:G>G:C
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
641 if( (($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "G")) || ( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "C") ) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
642 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
643 my $mutation = "C:G>G:C";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
644 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++; # Count the total number of mutations
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
645
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
646 # Pearson correlation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
647 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
648
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
649 # Sequence context - 6 mutation types - genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
650 my $mutationSeqContext6mutType = "C>G";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
651 # We want to express the mutation in C>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
652 if( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "C") )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
653 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
654 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
655 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
656 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
657 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
658 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
659
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
660 # Strand analysis C>G on NonTr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
661 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "G"))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "C"))) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
662 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
663 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
664
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
665 # C>G with the sequence context (C>G strand = +)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
666 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "G")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
667 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
668 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'C>G'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'C>G'}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
669 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
670 # C>G with the sequence context (G>C strand = -)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
671 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "C")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
672 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
673 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
674 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
675 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>G'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>G'}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
676 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
677 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
678 # Strand analysis C>G on Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
679 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "G"))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "C"))) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
680 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
681 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
682
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
683 # C>G with the sequence context (C>G strand = -)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
684 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "G")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
685 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
686 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'C>G'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'C>G'}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
687 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
688 # C>G with the sequence context (G>C strand = +)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
689 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "C")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
690 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
691 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
692 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
693 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>G'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>G'}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
694 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
695 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
696 # WebLogo-3
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
697 if(($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "G"))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
698 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
699 if(scalar(@tempContextSequence) == 2) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
700 my ($contextTemp1, $contextTemp2) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
701 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
702 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
703 my $context = $contextTemp1."C".$contextTemp2;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
704 push(@{$refH_file->{$filename}{'WebLogo3'}{'CG'}}, $context);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
705 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
706 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
707 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
708 if(scalar(@tempContextSequence) == 2) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
709 my ($contextTemp1, $contextTemp2) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
710 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
711 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
712 my $context = $contextTemp1."C".$contextTemp2; $context = reverse $context;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
713 push(@{$refH_file->{$filename}{'WebLogo3'}{'CG'}}, $context);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
714 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
715 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
716 ###################################### C:G>T:A
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
717 if( (($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "T")) || ( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "A") ) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
718 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
719 my $mutation = "C:G>T:A";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
720 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++; # Count the total number of mutations
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
721
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
722 # Pearson correlation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
723 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
724
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
725 # Sequence context - 6 mutation types - genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
726 my $mutationSeqContext6mutType = "C>T";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
727 # We want to express the mutation in C>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
728 if( ($tab[$ref_value] eq "G") && ($tab[$alt_value] eq "A") )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
729 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
730 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
731 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
732 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
733 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
734 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
735
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
736 # Strand analysis C>T on NonTr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
737 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "T"))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "A"))) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
738 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
739 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
740
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
741 # C>T with the sequence context (C>T strand = +)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
742 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "T")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
743 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
744 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'C>T'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'C>T'}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
745 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
746 # C>T with the sequence context (G>A strand = -)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
747 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "A")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
748 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
749 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
750 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
751 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>T'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>T'}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
752 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
753 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
754 # Strand analysis C>T on Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
755 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "T"))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "A"))) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
756 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
757 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
758
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
759 # C>T with the sequence context (C>T strand = -)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
760 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "C")&&($tab[$alt_value] eq "T")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
761 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
762 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'C>T'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'C>T'}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
763 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
764 # C>T with the sequence context (G>A strand = +)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
765 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "G")&&($tab[$alt_value] eq "A")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
766 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
767 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
768 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
769 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>T'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'C>T'}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
770 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
771 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
772 # WebLogo-3
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
773 if(($tab[$ref_value] eq "C") && ($tab[$alt_value] eq "T"))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
774 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
775 if(scalar(@tempContextSequence) == 2) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
776 my ($contextTemp1, $contextTemp2) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
777 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
778 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
779 my $context = $contextTemp1."C".$contextTemp2;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
780 push(@{$refH_file->{$filename}{'WebLogo3'}{'CT'}}, $context);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
781 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
782 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
783 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
784 if(scalar(@tempContextSequence) == 2) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
785 my ($contextTemp1, $contextTemp2) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
786 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
787 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
788 my $context = $contextTemp1."C".$contextTemp2; $context = reverse $context;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
789 push(@{$refH_file->{$filename}{'WebLogo3'}{'CT'}}, $context);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
790 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
791 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
792
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
793 ############################################################### MUTATION T> #############################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
794 ###################################### T:A>A:T
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
795 if( (($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "A")) || ( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "T") ) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
796 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
797 my $mutation = "T:A>A:T";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
798 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++; # Count the total number of mutations
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
799
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
800 # Pearson correlation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
801 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
802
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
803 # Sequence context - 6 mutation types - genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
804 my $mutationSeqContext6mutType = "T>A";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
805 # We want to express the mutation in T>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
806 if( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "T") )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
807 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
808 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
809 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
810 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
811 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
812 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
813
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
814 # Strand analysis T>A on NonTr stand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
815 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "A"))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "A")&&($tab[$alt_value] eq "T"))) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
816 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
817 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
818
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
819 # T>A with the sequence context (T>A strand = +)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
820 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "A")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
821 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
822 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'T>A'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'T>A'}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
823 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
824 # T>A with the sequence context (A>T strand = -)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
825 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
826 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
827 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
828 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
829 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>A'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>A'}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
830 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
831 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
832 # Strand analysis T>A on Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
833 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "A"))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "A")&&($tab[$alt_value] eq "T"))) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
834 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
835 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
836
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
837 # T>A <ith the sequence context (T>A strand = -)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
838 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "A")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
839 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
840 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'T>A'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'T>A'}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
841 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
842 # T>A with the sequence context (A>T strand = +)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
843 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
844 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
845 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
846 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
847 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>A'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>A'}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
848 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
849 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
850 # WebLogo-3
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
851 if(($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "A"))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
852 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
853 if(scalar(@tempContextSequence) == 2) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
854 my ($contextTemp1, $contextTemp2) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
855 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
856 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
857 my $context = $contextTemp1."T".$contextTemp2;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
858 push(@{$refH_file->{$filename}{'WebLogo3'}{'TA'}}, $context);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
859 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
860 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
861 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
862 if(scalar(@tempContextSequence) == 2) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
863 my ($contextTemp1, $contextTemp2) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
864 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
865 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
866 my $context = $contextTemp1."T".$contextTemp2; $context = reverse $context;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
867 push(@{$refH_file->{$filename}{'WebLogo3'}{'TA'}}, $context);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
868 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
869 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
870 ###################################### T:A>C:G
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
871 if( (($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "C")) || ( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "G")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
872 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
873 my $mutation = "T:A>C:G";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
874 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++; # Count the total number of mutations
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
875
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
876 # Pearson correlation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
877 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
878
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
879 # Sequence context - 6 mutation types - genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
880 my $mutationSeqContext6mutType = "T>C";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
881 # We want to express the mutation in T>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
882 if( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "T") )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
883 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
884 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
885 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
886 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
887 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
888 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
889
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
890 # Strand analysis T>C on NonTr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
891 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "C"))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "A")&&($tab[$alt_value] eq "G"))) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
892 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
893 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
894
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
895 # T>C (T>C strand = +)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
896 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "C")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
897 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
898 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'T>C'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'T>C'}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
899 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
900 # T>C (A>G strand = -)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
901 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
902 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
903 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
904 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
905 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>C'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>C'}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
906 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
907 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
908 # Strand analysis T>C on Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
909 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "C"))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "A")&&($tab[$alt_value] eq "G"))) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
910 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
911 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
912
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
913 # T>C (T>C strand = -)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
914 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "C")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
915 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
916 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'T>C'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'T>C'}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
917 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
918 # T>C (A>G strand = +)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
919 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
920 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
921 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
922 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
923 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>C'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>C'}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
924 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
925 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
926 # WebLogo-3
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
927 if(($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "C"))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
928 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
929 if(scalar(@tempContextSequence) == 2) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
930 my ($contextTemp1, $contextTemp2) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
931 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
932 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
933 my $context = $contextTemp1."T".$contextTemp2; $context = reverse $context;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
934 push(@{$refH_file->{$filename}{'WebLogo3'}{'TC'}}, $context);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
935 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
936 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
937 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
938 if(scalar(@tempContextSequence) == 2) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
939 my ($contextTemp1, $contextTemp2) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
940 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
941 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
942 my $context = $contextTemp1."T".$contextTemp2;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
943 push(@{$refH_file->{$filename}{'WebLogo3'}{'TC'}}, $context);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
944 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
945 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
946 ###################################### T:A>G:C
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
947 if( (($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "G")) || ( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "C")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
948 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
949 my $mutation = "T:A>G:C";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
950 $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'TotalMutG'}++; # Count the total number of mutations
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
951
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
952 # Pearson correlation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
953 if(exists $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}) { $refH_file->{$filename}{'SBSPerChr'}{$mutation}{'CHR'}{$chrNameForH}{'chr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
954
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
955 # Sequence context - 6 mutation types - genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
956 my $mutationSeqContext6mutType = "T>G";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
957 # We want to express the mutation in T>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
958 if( ($tab[$ref_value] eq "A") && ($tab[$alt_value] eq "T") )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
959 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
960 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
961 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
962 if(exists $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context_reverse}{$mutationSeqContext6mutType}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
963 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
964 elsif(exists $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}) { $refH_file->{$filename}{'SeqContextG'}{$context}{$mutationSeqContext6mutType}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
965
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
966 # Strand analysis T>G on NonTr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
967 if( (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "G"))) || (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "A")&&($tab[$alt_value] eq "C"))) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
968 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
969 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
970
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
971 # T>G (T>G strand = +)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
972 if( ($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "G")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
973 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
974 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'T>G'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'T>G'}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
975 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
976 # T>G (A>C strand = -)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
977 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
978 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
979 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
980 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
981 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>G'}{'NonTr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>G'}{'NonTr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
982 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
983 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
984 # Strand analysis T>G on Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
985 if( (($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "G"))) || (($tab[$strand_value] eq "+") && (($tab[$ref_value] eq "A")&&($tab[$alt_value] eq "C"))) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
986 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
987 if(exists $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}) { $refH_file->{$filename}{'6mutType'}{$funcSegment}{$mutation}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
988
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
989 # T>G (T>G strand = -)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
990 if( ($tab[$strand_value] eq "-") && (($tab[$ref_value] eq "T")&&($tab[$alt_value] eq "G")) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
991 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
992 if(exists $refH_file->{$filename}{'SeqContextC'}{$context}{'T>G'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context}{'T>G'}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
993 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
994 # T>G (A>C strand = +)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
995 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
996 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
997 my $base3 = complement($tempContextSequence[$before]); my $base5 = complement($tempContextSequence[$after]);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
998 my $context_reverse = $base5."_".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
999 if(exists $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>G'}{'Tr'}) { $refH_file->{$filename}{'SeqContextC'}{$context_reverse}{'T>G'}{'Tr'}++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1000 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1001 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1002 # WebLogo-3
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1003 if(($tab[$ref_value] eq "T") && ($tab[$alt_value] eq "G"))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1004 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1005 if(scalar(@tempContextSequence) == 2) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1006 my ($contextTemp1, $contextTemp2) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1007 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= $tempContextSequence[$i]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1008 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= $tempContextSequence[$i]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1009 my $context = $contextTemp1."T".$contextTemp2; $context = reverse $context;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1010 push(@{$refH_file->{$filename}{'WebLogo3'}{'TG'}}, $context);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1011 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1012 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1013 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1014 if(scalar(@tempContextSequence) == 2) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1015 my ($contextTemp1, $contextTemp2) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1016 for(my $i=0; $i<$midlle_totalNbBaseContext; $i++) { $contextTemp1 .= complement($tempContextSequence[$i]); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1017 for(my $i=$midlle_totalNbBaseContext+1; $i<=$#tempContextSequence; $i++) { $contextTemp2 .= complement($tempContextSequence[$i]); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1018 my $context = $contextTemp1."T".$contextTemp2;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1019 push(@{$refH_file->{$filename}{'WebLogo3'}{'TG'}}, $context);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1020 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1021 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1022 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1023 close F1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1024 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1025 # Write the different statistics in the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1026 sub WriteStatistics
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1027 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1028 my ($refH_file, $nb_func, $folderFigure, $folderChi2, $folderNMF) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1029
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1030 # Save the different graphs in specific folder instead of in a general one.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1031 if(!-e "$folderFigure/Overall_mutation_distribution") { mkdir("$folderFigure/Overall_mutation_distribution") or die "Can't create the directory $folderFigure/Overall_mutation_distribution\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1032 if(!-e "$folderFigure/Impact_protein_sequence") { mkdir("$folderFigure/Impact_protein_sequence") or die "Can't create the directory $folderFigure/Impact_protein_sequence\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1033 if(!-e "$folderFigure/SBS_distribution") { mkdir("$folderFigure/SBS_distribution") or die "Can't create the directory $folderFigure/SBS_distribution\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1034 if(!-e "$folderFigure/Stranded_Analysis") { mkdir("$folderFigure/Stranded_Analysis") or die "Can't create the directory $folderFigure/Stranded_Analysis\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1035 if(!-e "$folderFigure/Trinucleotide_Sequence_Context") { mkdir("$folderFigure/Trinucleotide_Sequence_Context") or die "Can't create the directory $folderFigure/Trinucleotide_Sequence_Context\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1036 if(!-e "$folderFigure/Distribution_SBS_Per_Chromosomes") { mkdir("$folderFigure/Distribution_SBS_Per_Chromosomes") or die "Can't create the directory $folderFigure/Distribution_SBS_Per_Chromosomes\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1037
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1038
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1039 # Create a workbook with all the samples
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1040 my $wb = ""; my $ws_sum = ""; my %h_chi2 = ();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1041 my ($ws_inputNMF_count, $ws_inputNMF_percent) = ("", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1042 ############### Define the format
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1043 my ($format_A10, $format_A10Boldleft, $format_A10ItalicRed) = ("", "", "");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1044 my ($formatT_left, $formatT_right, $formatT_bottomRight, $formatT_bottomLeft, $formatT_bottom, $formatT_bottomHeader, $formatT_bottomRightHeader, $formatT_bottomHeader2, $formatT_rightHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1045 my ($formatT_graphTitle);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1046 my ($table_topleft, $table_topRight, $table_bottomleft, $table_bottomRight, $table_top, $table_right, $table_bottom, $table_bottomItalicRed, $table_left, $table_bottomrightHeader, $table_left2, $table_middleHeader, $table_middleHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1047
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1048 if($oneReportPerSample == 2)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1049 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1050 $wb = Spreadsheet::WriteExcel->new("$output/Report_Mutation_Spectra.xls");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1051
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1052 ############### Define the format
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1053 Format_A10($wb, \$format_A10); # Text center in Arial 10
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1054 Format_A10BoldLeft($wb, \$format_A10Boldleft); # Text on the left in Arial 10 bold
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1055 Format_TextSection($wb, \$formatT_left, \$formatT_right, \$formatT_bottomRight, \$formatT_bottomLeft, \$formatT_bottom, \$formatT_bottomHeader, \$formatT_bottomRightHeader, \$formatT_bottomHeader2, \$formatT_rightHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1056 Format_GraphTitle($wb, \$formatT_graphTitle);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1057 Format_Table($wb, \$table_topleft, \$table_topRight, \$table_bottomleft, \$table_bottomRight, \$table_top, \$table_right, \$table_bottom, \$table_bottomItalicRed, \$table_left, \$table_bottomrightHeader, \$table_left2, \$table_middleHeader, \$table_middleHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1058 Format_A10ItalicRed($wb, \$format_A10ItalicRed);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1059
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1060
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1061 ############### Worksheet with a summary of the samples
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1062 $ws_sum = $wb->add_worksheet("Sample_List");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1063 $ws_sum->write(0, 0, "Samples", $format_A10); $ws_sum->write(0, 1, "Total number SBS", $format_A10); $ws_sum->write(0, 2, "Total number of Indel", $format_A10); $ws_sum->write(0, 3, "Total number of mutations", $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1064 $ws_sum->set_column(0,0, 50); $ws_sum->set_column(1,1, 20); $ws_sum->set_column(2,2, 20); $ws_sum->set_column(3,3, 22);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1065
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1066 ############### Save the chi2 values into a hash table
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1067 if(-e "$folderChi2/Output_chi2_strandBias.txt")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1068 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1069 open(F1, "$folderChi2/Output_chi2_strandBias.txt") or die "$!: $folderChi2/Output_chi2_strandBias.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1070 my $header = <F1>; # Strand_Bias($tab[0]) NonTr-Tr($tab[1]) Proportion($tab[2]) P-val-Chi2($tab[3]) FDR($tab[4]) Confidence Interval($tab[5]) Mutation_Type($tab[6]) SampleName($tab[7])
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1071 while(<F1>)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1072 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1073 $_ =~ s/[\r\n]+$//;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1074 my @tab = split("\t", $_);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1075
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1076 $h_chi2{$tab[7]}{$tab[6]}{'p-value'} = $tab[3]; $h_chi2{$tab[7]}{$tab[6]}{'ConfInt'} = $tab[5];
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1077
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1078 # For the pool data the FDR isn't calculated so replace the NA (=Missing values) by "-"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1079 if($tab[7] eq "Pool_Data") { $h_chi2{$tab[7]}{$tab[6]}{'FDR'} = "-"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1080 else { $h_chi2{$tab[7]}{$tab[6]}{'FDR'} = $tab[4]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1081 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1082 close F1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1083 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1084 ############### Write the input matrix for NMF for the count and the un-normalized frequency
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1085 $ws_inputNMF_count = $wb->add_worksheet("Input_NMF_Count");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1086 $ws_inputNMF_percent = $wb->add_worksheet("Input_NMF_Percent");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1087 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1088
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1089
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1090 ################################################ Set the Rows and columns of the different part of the report ################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1091 my $row_SumSheet = 1; # First row for the summary sheet of the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1092 my $rowStart_SBSdistrBySeg = 48; my $colStart_SBSdistrBySeg = 0; # For the table SBS distribution by segment
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1093 my $colStart_matrixSeqContext = 19; # Sequence context
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1094 my $col_inputNMF = 0; # Write the names of the samples with at least 33 SBS
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1095
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1096 # For NMF input
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1097 my %h_inputNMF = ();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1098
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1099 ## For each file
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1100 foreach my $k_file (sort keys $refH_file)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1101 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1102 print "File in process: $k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1103 if($k_file ne "Pool_Data") { $col_inputNMF++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1104
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1105 # Create one workbook for each sample
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1106 if($oneReportPerSample == 1)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1107 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1108 $wb = Spreadsheet::WriteExcel->new("$output/Report_Mutation_Spectra-$k_file.xls");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1109
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1110 ############### Define the format
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1111 Format_A10($wb, \$format_A10); # Text center in Arial 10
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1112 Format_A10BoldLeft($wb, \$format_A10Boldleft); # Text on the left in Arial 10 bold
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1113 Format_TextSection($wb, \$formatT_left, \$formatT_right, \$formatT_bottomRight, \$formatT_bottomLeft, \$formatT_bottom, \$formatT_bottomHeader, \$formatT_bottomRightHeader, \$formatT_bottomHeader2, \$formatT_rightHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1114 Format_GraphTitle($wb, \$formatT_graphTitle);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1115 Format_Table($wb, \$table_topleft, \$table_topRight, \$table_bottomleft, \$table_bottomRight, \$table_top, \$table_right, \$table_bottom, \$table_bottomItalicRed, \$table_left, \$table_bottomrightHeader, \$table_left2, \$table_middleHeader, \$table_middleHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1116 Format_A10ItalicRed($wb, \$format_A10ItalicRed);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1117
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1118
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1119 ############### Worksheet with a summary of the samples
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1120 $ws_sum = $wb->add_worksheet("Sample_List");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1121 $ws_sum->write(0, 0, "Samples", $format_A10); $ws_sum->write(0, 1, "Total number SBS", $format_A10); $ws_sum->write(0, 2, "Total number of Indel", $format_A10); $ws_sum->write(0, 3, "Total number of mutations", $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1122 $ws_sum->set_column(0,0, 50); $ws_sum->set_column(1,1, 20); $ws_sum->set_column(2,2, 20); $ws_sum->set_column(3,3, 22);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1123 # Write in the Samples sheet the name and the total number of SBS
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1124 $ws_sum->write(1, 0, "$k_file", $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1125 $ws_sum->write(1, 1, $refH_file->{$k_file}{'TotalSBSGenomic'}, $format_A10); $ws_sum->write(1, 2, $refH_file->{$k_file}{'TotalIndelGenomic'}, $format_A10); $ws_sum->write($row_SumSheet, 3, $refH_file->{$k_file}{'TotalMutGenomic'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1126
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1127 ############### Save the chi2 values into a hash table
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1128 if(-e "$folderChi2/Output_chi2_strandBias.txt")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1129 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1130 open(F1, "$folderChi2/Output_chi2_strandBias.txt") or die "$!: $folderChi2/Output_chi2_strandBias.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1131 my $header = <F1>; # Strand_Bias($tab[0]) NonTr-Tr($tab[1]) Proportion($tab[2]) P-val-Chi2($tab[3]) FDR($tab[4]) Confidence Interval($tab[5]) Mutation_Type($tab[6]) SampleName($tab[7])
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1132 while(<F1>)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1133 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1134 $_ =~ s/[\r\n]+$//;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1135 my @tab = split("\t", $_);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1136
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1137 if($tab[7] eq $k_file)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1138 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1139 $h_chi2{$tab[7]}{$tab[6]}{'p-value'} = $tab[3]; $h_chi2{$tab[7]}{$tab[6]}{'ConfInt'} = $tab[5];
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1140
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1141 # For the pool data the FDR isn't calculated so replace the NA (=Missing values) by "-"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1142 if($tab[7] eq "Pool_Data") { $h_chi2{$tab[7]}{$tab[6]}{'FDR'} = "-"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1143 else { $h_chi2{$tab[7]}{$tab[6]}{'FDR'} = $tab[4]; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1144 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1145 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1146 close F1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1147 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1148
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1149 ############### Write the input matrix for NMF
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1150 if($k_file ne "Pool_Data")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1151 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1152 # For NMF don't consider the pool of the samples
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1153 $ws_inputNMF_count = $wb->add_worksheet("Input_NMF_Count");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1154 $ws_inputNMF_percent = $wb->add_worksheet("Input_NMF_Percent");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1155 # Write in the input NMF sheet the name of the samples
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1156 $ws_inputNMF_count->write(0, 1, $k_file);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1157 $ws_inputNMF_percent->write(0, 1, $k_file);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1158 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1159 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1160 # One workbook with all the samples
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1161 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1162 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1163 # Write in the Samples sheet the name and the total number of SBS
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1164 $ws_sum->write($row_SumSheet, 0, $k_file, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1165 $ws_sum->write($row_SumSheet, 1, $refH_file->{$k_file}{'TotalSBSGenomic'}, $format_A10); $ws_sum->write($row_SumSheet, 2, $refH_file->{$k_file}{'TotalIndelGenomic'}, $format_A10); $ws_sum->write($row_SumSheet, 3, $refH_file->{$k_file}{'TotalMutGenomic'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1166
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1167 # For NMF don't consider the pool of the samples
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1168 if($k_file ne "Pool_Data")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1169 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1170 # Write in the input NMF sheet the name of the samples
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1171 $ws_inputNMF_count->write(0, $col_inputNMF, $k_file);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1172 $ws_inputNMF_percent->write(0, $col_inputNMF, $k_file);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1173 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1174 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1175
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1176 # Count of SBS per chromosome
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1177 PearsonCoefficient($refH_file, $k_file);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1178
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1179 # Add a worksheet to the workbook
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1180 my $ws = $wb->add_worksheet($k_file);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1181
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1182 # Write the titles of the different sections of the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1183 WriteBoderSection($wb, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $colStart_matrixSeqContext);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1184
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1185 # Write the mutation types (6 types)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1186 WriteHeaderSection($wb, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $colStart_matrixSeqContext);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1187
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1188
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1189 # Save the figures of each samples in a different folder
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1190 if(!-e "$folderFigure/Overall_mutation_distribution/$k_file") { mkdir("$folderFigure/Overall_mutation_distribution/$k_file") or die "Can't create the directory $folderFigure/Overall_mutation_distribution/$k_file\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1191 if(!-e "$folderFigure/Impact_protein_sequence/$k_file") { mkdir("$folderFigure/Impact_protein_sequence/$k_file") or die "Can't create the directory $folderFigure/Impact_protein_sequence/$k_file\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1192 if(!-e "$folderFigure/SBS_distribution/$k_file") { mkdir("$folderFigure/SBS_distribution/$k_file") or die "Can't create the directory $folderFigure/SBS_distribution\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1193 if(!-e "$folderFigure/Stranded_Analysis/$k_file") { mkdir("$folderFigure/Stranded_Analysis/$k_file") or die "Can't create the directory $folderFigure/Stranded_Analysis/$k_file\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1194 if(!-e "$folderFigure/Trinucleotide_Sequence_Context/$k_file") { mkdir("$folderFigure/Trinucleotide_Sequence_Context/$k_file") or die "Can't create the directory $folderFigure/Trinucleotide_Sequence_Context/$k_file\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1195
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1196
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1197
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1198 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1199 ################################################################# Write the statistics ###################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1200 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1201 my ($ca_genomique, $cg_genomique, $ct_genomique, $ta_genomique, $tc_genomique, $tg_genomique) = (0,0,0,0,0,0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1202 my ($ca_NonTr, $ca_Tr, $cg_NonTr, $cg_Tr, $ct_NonTr, $ct_Tr, $ta_NonTr, $ta_Tr, $tc_NonTr, $tc_Tr, $tg_NonTr, $tg_Tr) = (0,0,0,0,0,0, 0,0,0,0,0,0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1203
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1204 my $row_SBSdistrBySeg = $rowStart_SBSdistrBySeg+4;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1205 my $row_SBSDistrBySegAndFunc_CA = $rowStart_SBSdistrBySeg+$nb_func+12;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1206 my $row_SBSDistrBySegAndFunc_CG = $rowStart_SBSdistrBySeg+($nb_func*2)+16; my $rowEndCG_SBSDistrBySegAndFunc_CG = $row_SBSDistrBySegAndFunc_CG+$nb_func;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1207 my $row_SBSDistrBySegAndFunc_CT = $rowStart_SBSdistrBySeg+($nb_func*3)+20;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1208
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1209 ## 6 mutation types by segment
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1210 foreach my $k_func (sort keys $refH_file->{$k_file}{'6mutType'})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1211 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1212 my $totalSBS_bySegment = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1213
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1214 # Write the functional region for the section SBS distribution by segment
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1215 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg, $k_func, $formatT_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1216
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1217 # Write the exonic func for the section strand bias by segment
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1218 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg, $k_func, $formatT_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1219
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1220 if($row_SBSDistrBySegAndFunc_CG == $rowEndCG_SBSDistrBySegAndFunc_CG)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1221 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1222 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg, $k_func, $formatT_bottomLeft);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1223 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1224 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1225 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1226 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg, $k_func, $formatT_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1227 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1228
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1229 foreach my $k_mutation (sort keys $refH_file->{$k_file}{'6mutType'}{$k_func})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1230 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1231 if($k_mutation eq "C:G>A:T")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1232 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1233 # Write the ratio NonTr(CA)/Tr(GT)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1234 my $ratioSB = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1235 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) ) { $ratioSB = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1236 else { $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1237 $ratioSB = sprintf("%.2f", $ratioSB);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1238 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+1, $ratioSB, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1239
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1240 # Write the count of SBS in the NonTr and Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1241 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1242 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1243
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1244 # Calculate the total number of SBS per mut type (genomic strand)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1245 $ca_genomique += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1246 # Calculate the total number of SBS by NonTr / Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1247 $ca_NonTr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}; $ca_Tr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1248
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1249 # Write the count by exonic region
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1250 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1251 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1252 if($k_mutation eq "C:G>G:C")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1253 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1254 # Write the ratio NonTr(CG)/Tr(GC)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1255 my $ratioSB = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1256 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) ) { $ratioSB = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1257 else { $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1258 $ratioSB = sprintf("%.2f", $ratioSB);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1259 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+5, $ratioSB, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1260
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1261 # Write the count of SBS in the NonTr and Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1262 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+6, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1263 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+7, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1264
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1265 # Calculate the total number of SBS per mut type (genomic strand)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1266 $cg_genomique += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1267 # Calculate the total number of SBS by NonTr / Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1268 $cg_NonTr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}; $cg_Tr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1269
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1270 # Write the count by exonic region
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1271 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+5, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1272 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1273 if($k_mutation eq "C:G>T:A")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1274 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1275 # Write the ratio NonTr(CT)/Tr(GA)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1276 my $ratioSB = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1277 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) ) { $ratioSB = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1278 else { $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1279 $ratioSB = sprintf("%.2f", $ratioSB);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1280 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+9, $ratioSB, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1281
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1282 # Write the count of SBS in the NonTr and Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1283 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+10, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1284 $ws->write($row_SBSDistrBySegAndFunc_CA, $colStart_SBSdistrBySeg+11, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1285
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1286 # Calculate the total number of SBS per mut type (genomic strand)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1287 $ct_genomique += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1288 # Calculate the total number of SBS by NonTr / Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1289 $ct_NonTr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}; $ct_Tr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1290
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1291 # Write the count by exonic region
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1292 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+7, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1293 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1294 if($k_mutation eq "T:A>A:T")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1295 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1296 # Write the ratio NonTr(AT)/Tr(TA)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1297 my $ratioSB = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1298 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) ) { $ratioSB = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1299 else { $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1300 $ratioSB = sprintf("%.2f", $ratioSB);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1301
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1302
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1303 if($row_SBSDistrBySegAndFunc_CG == $rowEndCG_SBSDistrBySegAndFunc_CG)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1304 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1305 # Write the ratio NonTr(AC)/Tr(TG)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1306 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+1, $ratioSB, $formatT_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1307 # Write the count of SBS in the NonTr and Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1308 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $formatT_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1309 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1310 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1311 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1312 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1313 # Write the ratio NonTr(AC)/Tr(TG)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1314 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+1, $ratioSB, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1315 # Write the count of SBS in the NonTr and Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1316 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1317 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1318 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1319
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1320
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1321 # Calculate the total number of SBS per mut type (genomic strand)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1322 $ta_genomique += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1323 # Calculate the total number of SBS by NonTr / Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1324 $ta_NonTr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}; $ta_Tr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1325
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1326 # Write the count by exonic region
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1327 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+9, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1328 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1329 if($k_mutation eq "T:A>C:G")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1330 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1331 # Write the ratio NonTr(AG)/Tr(TC)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1332 my $ratioSB = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1333 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) ) { $ratioSB = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1334 else { $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1335 $ratioSB = sprintf("%.2f", $ratioSB);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1336
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1337 if($row_SBSDistrBySegAndFunc_CG == $rowEndCG_SBSDistrBySegAndFunc_CG)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1338 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1339 # Write the ratio NonTr(AC)/Tr(TG)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1340 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+5, $ratioSB, $formatT_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1341 # Write the count of SBS in the NonTr and Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1342 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+6, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $formatT_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1343 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+7, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1344 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1345 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1346 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1347 # Write the ratio NonTr(AC)/Tr(TG)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1348 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+5, $ratioSB, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1349 # Write the count of SBS in the NonTr and Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1350 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+6, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1351 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+7, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1352 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1353
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1354 # Calculate the total number of SBS per mut type (genomic strand)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1355 $tc_genomique += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1356 # Calculate the total number of SBS by NonTr / Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1357 $tc_NonTr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}; $tc_Tr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1358
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1359 # Write the count by exonic region
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1360 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+11, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1361 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1362 if($k_mutation eq "T:A>G:C")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1363 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1364 # Calculate the ratio for the strand bias
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1365 my $ratioSB = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1366 if( ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} == 0) || ($refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'} == 0) ) { $ratioSB = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1367 else { $ratioSB = $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1368 $ratioSB = sprintf("%.2f", $ratioSB);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1369
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1370 if($row_SBSDistrBySegAndFunc_CG == $rowEndCG_SBSDistrBySegAndFunc_CG)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1371 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1372 # Write the ratio NonTr(AC)/Tr(TG)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1373 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+9, $ratioSB, $formatT_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1374 # Write the count of SBS in the NonTr and Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1375 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+10, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $formatT_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1376 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+11, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_bottomRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1377 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1378 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1379 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1380 # Write the ratio NonTr(AC)/Tr(TG)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1381 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+9, $ratioSB, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1382 # Write the count of SBS in the NonTr and Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1383 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+10, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1384 $ws->write($row_SBSDistrBySegAndFunc_CG, $colStart_SBSdistrBySeg+11, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'}, $formatT_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1385 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1386
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1387 # Calculate the total number of SBS per mut type (genomic strand)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1388 $tg_genomique += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1389 # Calculate the total number of SBS by NonTr / Tr strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1390 $tg_NonTr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'}; $tg_Tr += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1391
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1392 # Write the count by exonic region
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1393 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+13, $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'}, $formatT_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1394 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1395
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1396 # Calculate the total number of SBS on the genomic strand for each mutation types by exonic region
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1397 $totalSBS_bySegment += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'TotalMutG'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1398 } # End $k_mutation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1399
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1400 $row_SBSDistrBySegAndFunc_CA++; $row_SBSDistrBySegAndFunc_CG++; $row_SBSDistrBySegAndFunc_CT++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1401
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1402 # Write the percent by exonic region
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1403 my $percent_ca = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1404 if($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} == 0) { $percent_ca = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1405 else { $percent_ca = ($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} / $totalSBS_bySegment ) * 100; $percent_ca = sprintf("%.2f", $percent_ca); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1406 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+2, $percent_ca, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1407 my $percent_cg = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1408 if($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} == 0) { $percent_cg = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1409 else { $percent_cg = ($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>G:C'}{'TotalMutG'} / $totalSBS_bySegment ) * 100; $percent_cg = sprintf("%.2f", $percent_cg); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1410 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+4, $percent_cg, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1411 my $percent_ct = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1412 if($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} == 0) { $percent_ct = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1413 else { $percent_ct = ($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>T:A'}{'TotalMutG'} / $totalSBS_bySegment ) * 100; $percent_ct = sprintf("%.2f", $percent_ct); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1414 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+6, $percent_ct, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1415 my $percent_ta = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1416 if($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} == 0) { $percent_ta = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1417 else { $percent_ta = ($refH_file->{$k_file}{'6mutType'}{$k_func}{'T:A>A:T'}{'TotalMutG'} / $totalSBS_bySegment ) * 100; $percent_ta = sprintf("%.2f", $percent_ta); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1418 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+8, $percent_ta, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1419 my $percent_tc = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1420 if($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} == 0) { $percent_tc = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1421 else { $percent_tc = ($refH_file->{$k_file}{'6mutType'}{$k_func}{'T:A>C:G'}{'TotalMutG'} / $totalSBS_bySegment ) * 100; $percent_tc = sprintf("%.2f", $percent_tc); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1422 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+10, $percent_tc, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1423 my $percent_tg = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1424 if($refH_file->{$k_file}{'6mutType'}{$k_func}{'C:G>A:T'}{'TotalMutG'} == 0) { $percent_tg = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1425 else { $percent_tg = ($refH_file->{$k_file}{'6mutType'}{$k_func}{'T:A>G:C'}{'TotalMutG'} / $totalSBS_bySegment ) * 100; $percent_tg = sprintf("%.2f", $percent_tg); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1426 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+12, $percent_tg, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1427
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1428 # Write the count of SBS by segment
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1429 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+1, $totalSBS_bySegment, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1430
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1431 $row_SBSdistrBySeg++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1432 } # End $k_func
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1433
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1434 # Write the total number of SBS on the genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1435 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+1, $refH_file->{$k_file}{'TotalSBSGenomic'}, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1436
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1437 # Write the total and the percentage of SBS for each mutation types and save it to a text file
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1438 open(DISTRSBS, ">", "$folderFigure/SBS_distribution/$k_file/$k_file-SBS_distribution.txt") or die "$!: $folderFigure/SBS_distribution/$k_file/$k_file-SBS_distribution.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1439 print DISTRSBS "Mutation_Type\tCount\tPercentage\tSample\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1440 my $percent_ca = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1441 if($ca_genomique == 0) { $percent_ca = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1442 else { $percent_ca = ($ca_genomique/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_ca = sprintf("%.2f", $percent_ca); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1443 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+2, $percent_ca, $formatT_bottom); print DISTRSBS "C:G>A:T\t$ca_genomique\t$percent_ca\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1444 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+3, $ca_genomique, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1445 my $percent_cg = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1446 if($cg_genomique == 0) { $percent_cg = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1447 else { $percent_cg = ($cg_genomique/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_cg = sprintf("%.2f", $percent_cg); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1448 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+4, $percent_cg, $formatT_bottom); print DISTRSBS "C:G>G:C\t$cg_genomique\t$percent_cg\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1449 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+5, $cg_genomique, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1450 my $percent_ct = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1451 if($ct_genomique == 0) { $percent_ct = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1452 else { $percent_ct = ($ct_genomique/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_ct = sprintf("%.2f", $percent_ct); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1453 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+6, $percent_ct, $formatT_bottom); print DISTRSBS "C:G>T:A\t$ct_genomique\t$percent_ct\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1454 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+7, $ct_genomique, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1455 my $percent_ta = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1456 if($ta_genomique == 0) { $percent_ta = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1457 else { $percent_ta = ($ta_genomique/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_ta = sprintf("%.2f", $percent_ta); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1458 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+8, $percent_ta, $formatT_bottom); print DISTRSBS "T:A>A:T\t$ta_genomique\t$percent_ta\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1459 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+9, $ta_genomique, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1460 my $percent_tc = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1461 if($tc_genomique == 0) { $percent_tc = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1462 else { $percent_tc = ($tc_genomique/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_tc = sprintf("%.2f", $percent_tc); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1463 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+10, $percent_tc, $formatT_bottom); print DISTRSBS "T:A>C:G\t$tc_genomique\t$percent_tc\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1464 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+11, $tc_genomique, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1465 my $percent_tg = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1466 if($tg_genomique == 0) { $percent_tg = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1467 else { $percent_tg = ($tg_genomique/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_tg = sprintf("%.2f", $percent_tg); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1468 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+12, $percent_tg, $formatT_bottom); print DISTRSBS "T:A>G:C\t$tg_genomique\t$percent_tg\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1469 $ws->write($row_SBSdistrBySeg, $colStart_SBSdistrBySeg+13, $tg_genomique, $formatT_bottomRightHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1470 close DISTRSBS;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1471
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1472 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1473 ################################################################### Write Strand BIAS #####################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1474 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1475 # Write the SB for each mutation type (table 3)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1476 $ws->write(28, 11, "Table 3. Significance of the strand biases", $format_A10Boldleft);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1477 $ws->set_column(11, 11, 13); $ws->set_column(16, 16, 15); $ws->set_column(17, 17, 10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1478 $ws->write(29, 11, "Mutation Type", $table_topleft); $ws->write(29, 12, "Non-Tr/Tr", $table_top); $ws->write(29, 13, "Non-Tr", $table_top); $ws->write(29, 14, "Tr", $table_top); $ws->write(29, 15, "P-value", $table_top); $ws->write(29, 16, "FDR q value", $table_top); $ws->write(29, 17, "95% CI", $table_topRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1479
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1480 $ws->write(39, 11, "Table 3. Significance of the strand biases", $format_A10Boldleft);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1481 $ws->write(40, 11, "Mutation Type", $table_topleft); $ws->write(40, 12, "Non-Tr/Tr", $table_top); $ws->write(40, 13, "Non-Tr", $table_top); $ws->write(40, 14, "Tr", $table_top); $ws->write(40, 15, "P-value", $table_top); $ws->write(40, 16, "FDR q value", $table_top); $ws->write(40, 17, "95% CI", $table_topRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1482
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1483 # For ggplot2
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1484 open(SB, ">", "$folderFigure/Stranded_Analysis/$k_file/$k_file-StrandBias.txt") or die "$!: $folderFigure/Stranded_Analysis/$k_file/$k_file-StrandBias.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1485 print SB "Alteration\tStrand\tCount\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1486
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1487
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1488 #-----------------------------------------------------------------------------------------------------#
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1489 my ($ratio_ca, $ratio_gt, $percent_ca_NonTr, $percent_ca_Tr) = (0, 0, 0, 0, 0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1490 if( ($ca_NonTr==0) || ($ca_Tr==0) ) { $ratio_ca = 0; $ratio_gt = 0; $percent_ca_NonTr = 0; $percent_ca_Tr = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1491 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1492 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1493 $ratio_ca = $ca_NonTr/$ca_Tr; $ratio_ca = sprintf("%.2f", $ratio_ca);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1494 $ratio_gt = $ca_Tr/$ca_NonTr; $ratio_gt = sprintf("%.2f", $ratio_gt);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1495 $percent_ca_NonTr = ($ca_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_ca_Tr = ($ca_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1496 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1497 print SB "C>A\tNonTranscribed\t$ca_NonTr\n", "C>A\tTranscribed\t$ca_Tr\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1498 # C>A
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1499 $ws->write(30, 11, "C>A", $table_left); $ws->write(30, 12, $ratio_ca, $table_middleHeader); $ws->write(30, 13, $ca_NonTr, $format_A10); $ws->write(30, 14, $ca_Tr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1500 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1501 if(($ca_NonTr+$ca_Tr)< 10)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1502 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1503 if($h_chi2{$k_file}{'C>A'}{'p-value'} eq "NA") { $ws->write_string(30, 15, $h_chi2{$k_file}{'C>A'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1504 else { $ws->write_string(30, 15, $h_chi2{$k_file}{'C>A'}{'p-value'}, $format_A10ItalicRed); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1505 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1506 else { $ws->write_string(30, 15, $h_chi2{$k_file}{'C>A'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1507 $ws->write(30, 16, $h_chi2{$k_file}{'C>A'}{'FDR'}, $format_A10); $ws->write(30, 17, $h_chi2{$k_file}{'C>A'}{'ConfInt'}, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1508 # G>T
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1509 $ws->write(41, 11, "G>T", $table_left); $ws->write(41, 12, $ratio_gt, $table_middleHeader); $ws->write(41, 13, $ca_Tr, $format_A10); $ws->write(41, 14, $ca_NonTr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1510 if(($ca_NonTr+$ca_Tr)< 10)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1511 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1512 if($h_chi2{$k_file}{'C>A'}{'p-value'} eq "NA") { $ws->write_string(41, 15, $h_chi2{$k_file}{'C>A'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1513 else { $ws->write_string(41, 15, $h_chi2{$k_file}{'C>A'}{'p-value'}, $format_A10ItalicRed); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1514 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1515 else { $ws->write_string(41, 15, $h_chi2{$k_file}{'C>A'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1516 $ws->write(41, 16, $h_chi2{$k_file}{'C>A'}{'FDR'}, $format_A10); $ws->write(41, 17, $h_chi2{$k_file}{'C>A'}{'ConfInt'}, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1517
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1518 #-----------------------------------------------------------------------------------------------------#
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1519 my ($ratio_cg, $ratio_gc, $percent_cg_NonTr, $percent_cg_Tr) = (0, 0, 0, 0, 0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1520 if( ($cg_NonTr==0) || ($cg_Tr==0) ) { $ratio_cg = 0; $ratio_gc = 0; $percent_cg_NonTr = 0; $percent_cg_Tr = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1521 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1522 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1523 $ratio_cg = $cg_NonTr/$cg_Tr; $ratio_cg = sprintf("%.2f", $ratio_cg);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1524 $ratio_gc = $cg_Tr/$cg_NonTr; $ratio_gc = sprintf("%.2f", $ratio_gc);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1525 $percent_cg_NonTr = ($cg_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_cg_Tr = ($cg_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1526 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1527 print SB "C>G\tNonTranscribed\t$cg_NonTr\n", "C>G\tTranscribed\t$cg_Tr\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1528 # C>G
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1529 $ws->write(31, 11, "C>G", $table_left); $ws->write(31, 12, $ratio_cg, $table_middleHeader); $ws->write(31, 13, $cg_NonTr, $format_A10); $ws->write(31, 14, $cg_Tr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1530 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1531 if(($cg_NonTr+$cg_Tr)< 10)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1532 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1533 if($h_chi2{$k_file}{'C>G'}{'p-value'} eq "NA") { $ws->write_string(31, 15, $h_chi2{$k_file}{'C>G'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1534 else { $ws->write_string(31, 15, $h_chi2{$k_file}{'C>G'}{'p-value'}, $format_A10ItalicRed); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1535 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1536 else { $ws->write_string(31, 15, $h_chi2{$k_file}{'C>G'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1537 $ws->write(31, 16, $h_chi2{$k_file}{'C>G'}{'FDR'}, $format_A10); $ws->write(31, 17, $h_chi2{$k_file}{'C>G'}{'ConfInt'}, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1538 # G>C
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1539 $ws->write(42, 11, "G>C", $table_left); $ws->write(42, 12, $ratio_gc, $table_middleHeader); $ws->write(42, 13, $cg_Tr, $format_A10); $ws->write(42, 14, $cg_NonTr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1540 if(($cg_NonTr+$cg_Tr)< 10)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1541 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1542 if($h_chi2{$k_file}{'C>G'}{'p-value'} eq "NA") { $ws->write_string(42, 15, $h_chi2{$k_file}{'C>G'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1543 else { $ws->write_string(42, 15, $h_chi2{$k_file}{'C>G'}{'p-value'}, $format_A10ItalicRed); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1544 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1545 else { $ws->write_string(42, 15, $h_chi2{$k_file}{'C>G'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1546 $ws->write(42, 16, $h_chi2{$k_file}{'C>G'}{'FDR'}, $format_A10); $ws->write(42, 17, $h_chi2{$k_file}{'C>G'}{'ConfInt'}, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1547
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1548 #-----------------------------------------------------------------------------------------------------#
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1549 my ($ratio_ct, $ratio_ga, $percent_ct_NonTr, $percent_ct_Tr) = (0, 0, 0, 0, 0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1550 if( ($ct_NonTr==0) || ($ct_Tr==0) ) { $ratio_ct = 0; $ratio_ga = 0; $percent_ct_NonTr = 0; $percent_ct_Tr = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1551 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1552 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1553 $ratio_ct = $ct_NonTr/$ct_Tr; $ratio_ct = sprintf("%.2f", $ratio_ct);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1554 $ratio_ga = $ct_Tr/$ct_NonTr; $ratio_ga = sprintf("%.2f", $ratio_ga);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1555 $percent_ct_NonTr = ($ct_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_ct_Tr = ($ct_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1556 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1557 print SB "C>T\tNonTranscribed\t$ct_NonTr\n", "C>T\tTranscribed\t$ct_Tr\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1558 # C>T
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1559 $ws->write(32, 11, "C>T", $table_left); $ws->write(32, 12, $ratio_ct, $table_middleHeader); $ws->write(32, 13, $ct_NonTr, $format_A10); $ws->write(32, 14, $ct_Tr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1560 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1561 if(($ct_NonTr+$ct_Tr)< 10)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1562 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1563 if($h_chi2{$k_file}{'C>T'}{'p-value'} eq "NA") { $ws->write_string(32, 15, $h_chi2{$k_file}{'C>T'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1564 else { $ws->write_string(32, 15, $h_chi2{$k_file}{'C>T'}{'p-value'}, $format_A10ItalicRed); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1565 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1566 else { $ws->write_string(32, 15, $h_chi2{$k_file}{'C>T'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1567 $ws->write(32, 16, $h_chi2{$k_file}{'C>T'}{'FDR'}, $format_A10); $ws->write(32, 17, $h_chi2{$k_file}{'C>T'}{'ConfInt'}, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1568 # G>A
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1569 $ws->write(43, 11, "G>A", $table_left); $ws->write(43, 12, $ratio_ga, $table_middleHeader); $ws->write(43, 13, $ct_Tr, $format_A10); $ws->write(43, 14, $ct_NonTr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1570 if(($ct_NonTr+$ct_Tr)< 10)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1571 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1572 if($h_chi2{$k_file}{'C>T'}{'p-value'} eq "NA") { $ws->write_string(43, 15, $h_chi2{$k_file}{'C>T'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1573 else { $ws->write_string(43, 15, $h_chi2{$k_file}{'C>T'}{'p-value'}, $format_A10ItalicRed); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1574 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1575 else { $ws->write_string(43, 15, $h_chi2{$k_file}{'C>T'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1576 $ws->write(43, 16, $h_chi2{$k_file}{'C>T'}{'FDR'}, $format_A10); $ws->write(43, 17, $h_chi2{$k_file}{'C>T'}{'ConfInt'}, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1577
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1578 #-----------------------------------------------------------------------------------------------------#
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1579 my ($ratio_ta, $ratio_at, $percent_ta_NonTr, $percent_ta_Tr) = (0, 0, 0, 0, 0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1580 if( ($ta_NonTr==0) || ($ta_Tr==0) ) { $ratio_ta = 0; $ratio_at = 0; $percent_ta_NonTr = 0; $percent_ta_Tr = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1581 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1582 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1583 $ratio_ta = $ta_NonTr/$ta_Tr; $ratio_ta = sprintf("%.2f", $ratio_ta);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1584 $ratio_at = $ta_Tr/$ta_NonTr; $ratio_at = sprintf("%.2f", $ratio_at);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1585 $percent_ta_NonTr = ($ta_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_ta_Tr = ($ta_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1586 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1587 print SB "T>A\tNonTranscribed\t$ta_NonTr\n", "T>A\tTranscribed\t$ta_Tr\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1588 # T>A
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1589 $ws->write(33, 11, "T>A", $table_left); $ws->write(33, 12, $ratio_ta, $table_middleHeader); $ws->write(33, 13, $ta_NonTr, $format_A10); $ws->write(33, 14, $ta_Tr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1590 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1591 if(($ta_NonTr+$ta_Tr)< 10)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1592 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1593 if($h_chi2{$k_file}{'T>A'}{'p-value'} eq "NA") { $ws->write_string(33, 15, $h_chi2{$k_file}{'T>A'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1594 else { $ws->write_string(33, 15, $h_chi2{$k_file}{'T>A'}{'p-value'}, $format_A10ItalicRed); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1595 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1596 else { $ws->write_string(33, 15, $h_chi2{$k_file}{'T>A'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1597 $ws->write(33, 16, $h_chi2{$k_file}{'T>A'}{'FDR'}, $format_A10); $ws->write(33, 17, $h_chi2{$k_file}{'T>A'}{'ConfInt'}, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1598 # A>T
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1599 $ws->write(44, 11, "A>T", $table_left); $ws->write(44, 12, $ratio_at, $table_middleHeader); $ws->write(44, 13, $ta_Tr, $format_A10); $ws->write(44, 14, $ta_NonTr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1600 if(($ta_NonTr+$ta_Tr)< 10)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1601 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1602 if($h_chi2{$k_file}{'T>A'}{'p-value'} eq "NA") { $ws->write_string(44, 15, $h_chi2{$k_file}{'T>A'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1603 else { $ws->write_string(44, 15, $h_chi2{$k_file}{'T>A'}{'p-value'}, $format_A10ItalicRed); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1604 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1605 else { $ws->write_string(44, 15, $h_chi2{$k_file}{'T>A'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1606 $ws->write(44, 16, $h_chi2{$k_file}{'T>A'}{'FDR'}, $format_A10); $ws->write(44, 17, $h_chi2{$k_file}{'T>A'}{'ConfInt'}, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1607
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1608 #-----------------------------------------------------------------------------------------------------#
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1609 my ($ratio_tc, $ratio_ag, $percent_tc_NonTr, $percent_tc_Tr) = (0, 0, 0, 0, 0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1610 if( ($tc_NonTr==0) || ($tc_Tr==0) ) { $ratio_tc = 0; $ratio_ag = 0; $percent_tc_NonTr = 0; $percent_tc_Tr = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1611 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1612 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1613 $ratio_tc = $tc_NonTr/$tc_Tr; $ratio_tc = sprintf("%.2f", $ratio_tc);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1614 $ratio_ag = $tc_Tr/$tc_NonTr; $ratio_ag = sprintf("%.2f", $ratio_ag);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1615 $percent_tc_NonTr = ($tc_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_tc_Tr = ($tc_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1616 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1617 print SB "T>C\tNonTranscribed\t$tc_NonTr\n", "T>C\tTranscribed\t$tc_Tr\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1618 # T>C
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1619 $ws->write(34, 11, "T>C", $table_left); $ws->write(34, 12, $ratio_tc, $table_middleHeader); $ws->write(34, 13, $tc_NonTr, $format_A10); $ws->write(34, 14, $tc_Tr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1620 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1621 if(($tc_NonTr+$tc_Tr)< 10)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1622 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1623 if($h_chi2{$k_file}{'T>C'}{'p-value'} eq "NA") { $ws->write_string(34, 15, $h_chi2{$k_file}{'T>C'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1624 else { $ws->write_string(34, 15, $h_chi2{$k_file}{'T>C'}{'p-value'}, $format_A10ItalicRed); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1625 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1626 else { $ws->write_string(34, 15, $h_chi2{$k_file}{'T>C'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1627 $ws->write(34, 16, $h_chi2{$k_file}{'T>C'}{'FDR'}, $format_A10); $ws->write(34, 17, $h_chi2{$k_file}{'T>C'}{'ConfInt'}, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1628 # A>G
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1629 $ws->write(45, 11, "A>G", $table_left); $ws->write(45, 12, $ratio_ag, $table_middleHeader); $ws->write(45, 13, $tc_Tr, $format_A10); $ws->write(45, 14, $tc_NonTr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1630 if(($tc_NonTr+$tc_Tr)< 10)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1631 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1632 if($h_chi2{$k_file}{'T>C'}{'p-value'} eq "NA") { $ws->write_string(45, 15, $h_chi2{$k_file}{'T>C'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1633 else { $ws->write_string(45, 15, $h_chi2{$k_file}{'T>C'}{'p-value'}, $format_A10ItalicRed); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1634 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1635 else { $ws->write_string(45, 15, $h_chi2{$k_file}{'T>C'}{'p-value'}, $format_A10); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1636 $ws->write(45, 16, $h_chi2{$k_file}{'T>C'}{'FDR'}, $format_A10); $ws->write(45, 17, $h_chi2{$k_file}{'T>C'}{'ConfInt'}, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1637
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1638 #-----------------------------------------------------------------------------------------------------#
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1639 my ($ratio_tg, $ratio_ac, $percent_tg_NonTr, $percent_tg_Tr) = (0, 0, 0, 0, 0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1640 if( ($tg_NonTr==0) || ($tg_Tr==0) ) { $ratio_tg = 0; $ratio_ac = 0; $percent_tg_NonTr = 0; $percent_tg_Tr = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1641 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1642 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1643 $ratio_tg = $tg_NonTr/$tg_Tr; $ratio_tg = sprintf("%.2f", $ratio_tg);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1644 $ratio_ac = $tg_Tr/$tg_NonTr; $ratio_ac = sprintf("%.2f", $ratio_ac);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1645 $percent_tg_NonTr = ($tg_NonTr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100; $percent_tg_Tr = ($tg_Tr/$refH_file->{$k_file}{'TotalSBSGenomic'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1646 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1647 print SB "T>G\tNonTranscribed\t$tg_NonTr\n", "T>G\tTranscribed\t$tg_Tr\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1648 # T>G
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1649 $ws->write(35, 11, "T>G", $table_bottomleft); $ws->write(35, 12, $ratio_tg, $table_middleHeader2); $ws->write(35, 13, $tg_NonTr, $table_bottom); $ws->write(35, 14, $tg_Tr, $table_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1650 # Write in italic and red (= warning message) when the count of NonTr + Tr is lower than 10
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1651 if(($tg_NonTr+$tg_Tr)< 10)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1652 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1653 if($h_chi2{$k_file}{'T>G'}{'p-value'} eq "NA") { $ws->write_string(35, 15, $h_chi2{$k_file}{'T>G'}{'p-value'}, $table_bottom); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1654 else { $ws->write_string(35, 15, $h_chi2{$k_file}{'T>G'}{'p-value'}, $table_bottomItalicRed); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1655 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1656 else { $ws->write_string(35, 15, $h_chi2{$k_file}{'T>G'}{'p-value'}, $table_bottom); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1657 $ws->write(35, 16, $h_chi2{$k_file}{'T>G'}{'FDR'}, $table_bottom); $ws->write(35, 17, $h_chi2{$k_file}{'T>G'}{'ConfInt'}, $table_bottomRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1658 # A>C
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1659 $ws->write(46, 11, "A>C", $table_bottomleft); $ws->write(46, 12, $ratio_ac, $table_middleHeader2); $ws->write(46, 13, $tg_Tr, $table_bottom); $ws->write(46, 14, $tg_NonTr, $table_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1660 if(($tg_NonTr+$tg_Tr)< 10)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1661 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1662 if($h_chi2{$k_file}{'T>G'}{'p-value'} eq "NA") { $ws->write_string(46, 15, $h_chi2{$k_file}{'T>G'}{'p-value'}, $table_bottom); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1663 else { $ws->write_string(46, 15, $h_chi2{$k_file}{'T>G'}{'p-value'}, $table_bottomItalicRed); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1664 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1665 else { $ws->write_string(46, 15, $h_chi2{$k_file}{'T>G'}{'p-value'}, $table_bottom); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1666 $ws->write(46, 16, $h_chi2{$k_file}{'T>G'}{'FDR'}, $table_bottom); $ws->write(46, 17, $h_chi2{$k_file}{'T>G'}{'ConfInt'}, $table_bottomRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1667
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1668 ### Write a warning message when NonTr+Tr < 10
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1669 my $format_italic_red = $wb->add_format(font=>'Arial', size=>10, italic=>1, color => 'red');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1670
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1671 if( (($ca_NonTr+$ca_Tr)< 10) || (($cg_NonTr+$cg_Tr)< 10) || (($ct_NonTr+$ct_Tr)< 10) || (($ta_NonTr+$ta_Tr)< 10) || (($tc_NonTr+$tc_Tr)< 10) || (($tg_NonTr+$tg_Tr)< 10) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1672 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1673 $ws->write(36, 11, "Warning message: chi-squared approximation may be incorrect because the number of SBS", $format_italic_red);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1674 $ws->write(37, 11, "on Non-transcribed and transcribed strand is lower than 10", $format_italic_red);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1675 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1676
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1677 close SB;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1678
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1679
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1680 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1681 ################################################################### Write SBS Per Chr #####################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1682 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1683 # For the HTML report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1684 open(SBSPerChr, ">", "$folderFigure/Distribution_SBS_Per_Chromosomes/$k_file-DistributionSNVS_per_chromosome.txt") or die "$!: $folderFigure/Distribution_SBS_Per_Chromosomes/$k_file-DistributionSNVS_per_chromosome.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1685 print SBSPerChr "\tPearson\t$refH_file->{$k_file}{'SBSPerChr'}{'AllMutType'}\t", $refH_file->{$k_file}{'SBSPerChr'}{"C:G>A:T"}{'Pearson'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"C:G>G:C"}{'Pearson'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"C:G>T:A"}{'Pearson'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"T:A>A:T"}{'Pearson'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"T:A>C:G"}{'Pearson'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"T:A>G:C"}{'Pearson'},"\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1686 print SBSPerChr "Chr\tSize\tAll SBS\tC:G>A:T\tC:G>G:C\tC:G>T:A\tT:A>A:T\tT:A>C:G\tT:A>G:C\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1687
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1688 my $row_SBSPerChr = $row_SBSDistrBySegAndFunc_CG + 8; # Line 158
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1689
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1690 # Write the Pearson coefficient
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1691 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'SBSPerChr'}{"C:G>A:T"}{'Pearson'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1692 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+4, $refH_file->{$k_file}{'SBSPerChr'}{"C:G>G:C"}{'Pearson'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1693 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+5, $refH_file->{$k_file}{'SBSPerChr'}{"C:G>T:A"}{'Pearson'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1694 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+6, $refH_file->{$k_file}{'SBSPerChr'}{"T:A>A:T"}{'Pearson'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1695 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+7, $refH_file->{$k_file}{'SBSPerChr'}{"T:A>C:G"}{'Pearson'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1696 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+8, $refH_file->{$k_file}{'SBSPerChr'}{"T:A>G:C"}{'Pearson'}, $formatT_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1697
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1698 # Write the chromosome number and their sizes / Write the total of SBS per chromosome
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1699 my $line=0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1700
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1701 foreach my $chromosome (sort keys %chromosomes)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1702 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1703 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg, $chromosome, $formatT_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1704 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+1, $chromosomes{$chromosome}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1705 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'SBSPerChr'}{'TotalPerChr'}{$chromosome}{'chr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1706
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1707 # Write the count per mutation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1708 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+3, $refH_file->{$k_file}{'SBSPerChr'}{"C:G>A:T"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1709 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+4, $refH_file->{$k_file}{'SBSPerChr'}{"C:G>G:C"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1710 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+5, $refH_file->{$k_file}{'SBSPerChr'}{"C:G>T:A"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1711 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+6, $refH_file->{$k_file}{'SBSPerChr'}{"T:A>A:T"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1712 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+7, $refH_file->{$k_file}{'SBSPerChr'}{"T:A>C:G"}{'CHR'}{$chromosome}{'chr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1713 $ws->write($row_SBSPerChr+($line), $colStart_SBSdistrBySeg+8, $refH_file->{$k_file}{'SBSPerChr'}{"T:A>G:C"}{'CHR'}{$chromosome}{'chr'}, $formatT_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1714
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1715
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1716 # For the HTML report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1717 print SBSPerChr "$chromosome\t", $chromosomes{$chromosome},"\t", $refH_file->{$k_file}{'SBSPerChr'}{'TotalPerChr'}{$chromosome}{'chr'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"C:G>A:T"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"C:G>G:C"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"C:G>T:A"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"T:A>A:T"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"T:A>C:G"}{'CHR'}{$chromosome}{'chr'},"\t", $refH_file->{$k_file}{'SBSPerChr'}{"T:A>G:C"}{'CHR'}{$chromosome}{'chr'},"\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1718 $line++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1719 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1720
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1721 # Write the Pearson coefficient for the total number of SBS
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1722 $ws->write($row_SBSDistrBySegAndFunc_CG+6, $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'SBSPerChr'}{'AllMutType'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1723 $ws->write($row_SBSPerChr+(keys %chromosomes), $colStart_SBSdistrBySeg+2, $refH_file->{$k_file}{'TotalSBSGenomic'}, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1724
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1725 print SBSPerChr "\t\t$refH_file->{$k_file}{'TotalSBSGenomic'}\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1726 close SBSPerChr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1727
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1728
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1729
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1730 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1731 ####################################################################### Impact on protein #################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1732 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1733 $ws->write(29, 6, "Table 2. Frequency and counts of functional impact", $format_A10Boldleft);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1734 $ws->set_column(6, 6, 13); $ws->set_column(10, 10, 15);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1735 $ws->write(30, 6, "RefSeq gene", $table_topleft); $ws->write(30, 7, "", $table_top); $ws->write(30, 8, "Percent", $table_top); $ws->write(30, 9, "Count", $table_topRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1736 my $lImpactSBS = 31;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1737 open(IMPACTSBS, ">", "$folderFigure/Impact_protein_sequence/$k_file/$k_file-DistributionExoFunc.txt") or die "$!: $folderFigure/Impact_protein_sequence/$k_file/$k_file-DistributionExoFunc.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1738 print IMPACTSBS "AA_Change\tCount\tPercent\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1739
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1740 # Pie chart with the distribution of SBS vs Indel
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1741 open(SBSINDEL, ">", "$folderFigure/Overall_mutation_distribution/$k_file/$k_file-OverallMutationDistribution.txt") or die "$!: $folderFigure/Overall_mutation_distribution/$k_file/$k_file-OverallMutationDistribution.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1742 print SBSINDEL "Variant_type\tCount\tPercent\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1743 my ($deletion, $insertion) = (0, 0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1744
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1745
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1746 foreach my $k_exoFunc(sort keys $refH_file->{$k_file}{'ImpactSBS'})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1747 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1748 my $percent = ($refH_file->{$k_file}{'ImpactSBS'}{$k_exoFunc} / $refH_file->{$k_file}{'TotalMutGenomic'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1749 $percent = sprintf("%.2f", $percent);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1750
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1751 if($k_exoFunc eq "NA") { print IMPACTSBS "Not_Applicable\t$percent\t$refH_file->{$k_file}{'ImpactSBS'}{$k_exoFunc}\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1752 else { my $temp = $k_exoFunc; $temp =~ s/ /_/g; print IMPACTSBS "$temp\t$percent\t$refH_file->{$k_file}{'ImpactSBS'}{$k_exoFunc}\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1753
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1754 $ws->write($lImpactSBS, 6, $k_exoFunc, $table_left2); $ws->write($lImpactSBS, 8, $percent, $format_A10); $ws->write($lImpactSBS, 9, $refH_file->{$k_file}{'ImpactSBS'}{$k_exoFunc}, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1755 $lImpactSBS++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1756
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1757 # Pie chart with the distribution of SBS vs Indel
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1758 if($k_exoFunc =~ /deletion/i) { $deletion += $refH_file->{$k_file}{'ImpactSBS'}{$k_exoFunc}; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1759 elsif($k_exoFunc =~ /insertion/i) { $insertion += $refH_file->{$k_file}{'ImpactSBS'}{$k_exoFunc}; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1760 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1761 close IMPACTSBS;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1762 $ws->write($lImpactSBS, 9, $refH_file->{$k_file}{'TotalMutGenomic'}, $table_bottomrightHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1763 $ws->write($lImpactSBS, 6, "", $table_bottomleft); $ws->write($lImpactSBS, 7, "", $table_bottom); $ws->write($lImpactSBS, 8, "", $table_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1764
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1765 # Pie chart with the distribution of SBS vs Indel
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1766 my $percentSBSIndel = ($deletion/$refH_file->{$k_file}{'TotalMutGenomic'})*100; $percentSBSIndel = sprintf("%.2f", $percentSBSIndel);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1767 print SBSINDEL "Deletion\t$deletion\t$percentSBSIndel\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1768 $percentSBSIndel = ($insertion/$refH_file->{$k_file}{'TotalMutGenomic'})*100; $percentSBSIndel = sprintf("%.2f", $percentSBSIndel);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1769 print SBSINDEL "Insertion\t$insertion\t$percentSBSIndel\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1770 $percentSBSIndel = ($refH_file->{$k_file}{TotalSBSGenomic}/$refH_file->{$k_file}{'TotalMutGenomic'})*100; $percentSBSIndel = sprintf("%.2f", $percentSBSIndel);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1771 print SBSINDEL "SBS\t$refH_file->{$k_file}{TotalSBSGenomic}\t$percentSBSIndel\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1772 close SBSINDEL;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1773
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1774 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1775 ######################################################## SEQUENCE CONTEXT ON GENOMIC STRAND ###############################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1776 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1777 my $row_SeqContext6 = 4;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1778 # Count the total of mutations for 6 mutation types on genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1779 my ($c_ca6_g, $c_cg6_g, $c_ct6_g, $c_ta6_g, $c_tc6_g, $c_tg6_g) = (0,0,0, 0,0,0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1780 my ($p_ca6_g, $p_cg6_g, $p_ct6_g, $p_ta6_g, $p_tc6_g, $p_tg6_g) = (0,0,0, 0,0,0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1781 my $maxValue = 0; # For the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1782
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1783 # For checking if the total number of SBS is correct
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1784 my $total_SBS_genomic = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1785
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1786
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1787 open(HEATMAPCGENOMIC, ">", "$folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-HeatmapCount-Genomic.txt") or die "$!: $folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-HeatmapCount-Genomic.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1788 print HEATMAPCGENOMIC "\tC>A\tC>G\tC>T\tT>A\tT>C\tT>G\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1789 open(HEATMAPPGENOMIC, ">", "$folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-HeatmapPercent-Genomic.txt") or die "$!: $folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-HeatmapPercent-Genomic.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1790 print HEATMAPPGENOMIC "\tC>A\tC>G\tC>T\tT>A\tT>C\tT>G\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1791
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1792 ## Bar plot NMF like
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1793 open(BARPLOTNMFLIKE, ">", "$folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-MutationSpectraPercent-Genomic.txt") or die "$!: $folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-MutationSpectraPercent-Genomic.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1794 print BARPLOTNMFLIKE "alteration\tcontext\tvalue\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1795
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1796 foreach my $k_context (sort keys $refH_file->{$k_file}{'SeqContextG'})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1797 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1798 if( ($k_context =~ /N/) || (length($k_context) != 3) ) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1799
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1800 # Write the context: 6 mut type on genomic strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1801 $ws->write($row_SeqContext6 , $colStart_matrixSeqContext+3, $k_context, $format_A10); $ws->write($row_SeqContext6 , $colStart_matrixSeqContext+13, $k_context, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1802
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1803 foreach my $k_mutation (sort keys $refH_file->{$k_file}{'SeqContextG'}{$k_context})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1804 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1805 # For checking the total number of SBS
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1806 $total_SBS_genomic += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1807
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1808 # Calculate the percentages
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1809 my $percent = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1810 if($refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation} == 0) { $percent = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1811 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1812 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1813 $percent = ($refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation} / $refH_file->{$k_file}{'TotalSBSGenomic'}) * 100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1814 $percent = sprintf("%.2f", $percent);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1815 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1816
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1817 # For representing the sequence context with a bar plot (NMF like style)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1818 print BARPLOTNMFLIKE $k_mutation,"\t", $k_context,"\t", $percent,"\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1819
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1820 if($k_mutation eq "C>A")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1821 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1822 ### COUNT
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1823 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+4, $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1824 # Write the count for the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1825 print HEATMAPCGENOMIC "$k_context\t$refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}\t";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1826
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1827 ### PERCENTAGE
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1828 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+14, $percent, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1829 print HEATMAPPGENOMIC "$k_context\t$percent\t";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1830
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1831 # For NMF input
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1832 my $count = $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1833 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{'C>A'}}, $count); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1834 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{'C>A'}}, $percent); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1835
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1836 # For the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1837 if($percent >= $maxValue) { $maxValue = $percent; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1838
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1839 # For the total amount per mutation types
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1840 $c_ca6_g += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1841 $p_ca6_g += $percent;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1842 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1843 if($k_mutation eq "C>G")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1844 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1845 ### COUNT
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1846 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+5, $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1847 # Write the count for the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1848 print HEATMAPCGENOMIC "$refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}\t";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1849
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1850 ### PERCENTAGE
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1851 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+15, $percent, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1852 print HEATMAPPGENOMIC "$percent\t";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1853
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1854 # For NMF input
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1855 my $count = $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1856 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{'C>G'}}, $count); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1857 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{'C>G'}}, $percent); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1858
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1859 # For the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1860 if($percent >= $maxValue) { $maxValue = $percent; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1861
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1862 # For the total amount per mutation types
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1863 $c_cg6_g += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1864 $p_cg6_g += $percent;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1865 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1866 if($k_mutation eq "C>T")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1867 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1868 ### COUNT
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1869 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+6, $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1870 # Write the count for the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1871 print HEATMAPCGENOMIC "$refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}\t";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1872
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1873 ### PERCENTAGE
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1874 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+16, $percent, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1875 print HEATMAPPGENOMIC "$percent\t";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1876
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1877 # For NMF input
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1878 my $count = $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1879 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{'C>T'}}, $count); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1880 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{'C>T'}}, $percent); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1881
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1882 # For the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1883 if($percent >= $maxValue) { $maxValue = $percent; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1884
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1885 # For the total amount per mutation types
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1886 $c_ct6_g += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1887 $p_ct6_g += $percent;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1888 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1889 if($k_mutation eq "T>A")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1890 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1891 ### COUNT
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1892 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+7, $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1893 # Write the count for the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1894 print HEATMAPCGENOMIC "$refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}\t";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1895
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1896 ### PERCENTAGE
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1897 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+17, $percent, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1898 print HEATMAPPGENOMIC "$percent\t";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1899
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1900 # For NMF input
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1901 my $count = $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1902 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{'T>A'}}, $count); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1903 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{'T>A'}}, $percent); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1904
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1905 # For the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1906 if($percent >= $maxValue) { $maxValue = $percent; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1907
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1908 # For the total amount per mutation types
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1909 $c_ta6_g += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1910 $p_ta6_g += $percent;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1911 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1912 if($k_mutation eq "T>C")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1913 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1914 ### COUNT
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1915 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+8, $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1916 # Write the count for the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1917 print HEATMAPCGENOMIC "$refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}\t";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1918
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1919 ### PERCENTAGE
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1920 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+18, $percent, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1921 print HEATMAPPGENOMIC "$percent\t";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1922
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1923 # For NMF input
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1924 my $count = $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1925 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{'T>C'}}, $count); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1926 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{'T>C'}}, $percent); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1927
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1928 # For the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1929 if($percent >= $maxValue) { $maxValue = $percent; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1930
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1931 # For the total amount per mutation types
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1932 $c_tc6_g += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1933 $p_tc6_g += $percent;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1934 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1935 if($k_mutation eq "T>G")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1936 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1937 ### COUNT
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1938 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+9, $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1939 # Write the count for the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1940 print HEATMAPCGENOMIC "$refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation}\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1941
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1942 ### PERCENTAGE
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1943 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+19, $percent, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1944 print HEATMAPPGENOMIC "$percent\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1945
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1946 # For NMF input
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1947 my $count = $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1948 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Count'}{$k_context}{'T>G'}}, $count); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1949 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Percent'}{$k_context}{'T>G'}}, $percent); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1950
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1951 # For the heatmap
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1952 if($percent >= $maxValue) { $maxValue = $percent; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1953
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1954 # For the total amount per mutation types
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1955 $c_tg6_g += $refH_file->{$k_file}{'SeqContextG'}{$k_context}{$k_mutation};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1956 $p_tg6_g += $percent;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1957 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1958 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1959 $row_SeqContext6++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1960 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1961 close HEATMAPCGENOMIC; close HEATMAPPGENOMIC;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1962 close BARPLOTNMFLIKE;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1963
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1964
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1965 # Write the total number of SBS per mutation type: COUNT
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1966 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+4, $c_ca6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+5, $c_cg6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+6, $c_ct6_g, $formatT_bottomHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1967 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+7, $c_ta6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+8, $c_tc6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+9, $c_tg6_g, $formatT_bottomHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1968 if($total_SBS_genomic != $refH_file->{$k_file}{'TotalSBSGenomic'}) { print STDERR "Error in the calculation of the total number of SBS on the genomic strand!!!!\nFrom hash table $refH_file->{$k_file}{'TotalSBSGenomic'}\tVS\t$total_SBS_genomic\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1969
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1970 # Write the total number of SBS per mutation type: PERCENT
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1971 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+14, $p_ca6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+15, $p_cg6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+16, $p_ct6_g, $formatT_bottomHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1972 $ws->write($row_SeqContext6, $colStart_matrixSeqContext+17, $p_ta6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+18, $p_tc6_g, $formatT_bottomHeader2); $ws->write($row_SeqContext6, $colStart_matrixSeqContext+19, $p_tg6_g, $formatT_bottomHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1973 my $totalPercent_genomic = $p_ca6_g + $p_cg6_g + $p_ct6_g + $p_ta6_g + $p_tc6_g + $p_tg6_g; $totalPercent_genomic = sprintf("%.0f", $totalPercent_genomic);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1974 if($totalPercent_genomic != 100) { print STDERR "Error in the calculation of the total percentages on the genomic strand!!!\nThe total is equal to=\t$totalPercent_genomic\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1975
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1976
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1977 #----------------------------------------------------------------------------------------------------------------------------------------------------------------#
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1978 # For the input matrix for NMF
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1979 if($k_file ne "Pool_Data") { push(@{$h_inputNMF{'Sample'}}, $k_file); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1980
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1981
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1982 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1983 ######################################################## SEQUENCE CONTEXT ON CODING STRAND ###############################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1984 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1985 my $row_SeqContext12 = $rowStart_SBSdistrBySeg+6; my $row_SeqContext12Percent = $rowStart_SBSdistrBySeg+27;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1986 # Reset the total count and percent calculated for the strand bias
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1987 ($ca_NonTr, $ca_Tr, $cg_NonTr, $cg_Tr, $ct_NonTr, $ct_Tr, $ta_NonTr, $ta_Tr, $tc_NonTr, $tc_Tr, $tg_NonTr, $tg_Tr) = (0,0,0, 0,0,0, 0,0,0, 0,0,0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1988 ($percent_ca_NonTr, $percent_ca_Tr, $percent_cg_NonTr, $percent_cg_Tr, $percent_ct_NonTr, $percent_ct_Tr, $percent_ta_NonTr, $percent_ta_Tr, $percent_tc_NonTr, $percent_tc_Tr, $percent_tg_NonTr, $percent_tg_Tr) = (0,0,0, 0,0,0, 0,0,0, 0,0,0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1989
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1990 # For checking if the total number of SBS is correct
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1991 my $total_SBS_coding = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1992
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1993 open(COUNT, ">", "$folderFigure/Stranded_Analysis/$k_file/$k_file-StrandedSignatureCount.txt") or die "$!: $folderFigure/Stranded_Analysis/$k_file/$k_file-StrandedSignatureCount.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1994 print COUNT "MutationTypeContext\tStrand\tValue\tSample\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1995 open(PERCENT, ">", "$folderFigure/Stranded_Analysis/$k_file/$k_file-StrandedSignaturePercent.txt") or die "$!: $folderFigure/Stranded_Analysis/$k_file/$k_file-StrandedSignaturePercent.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1996 print PERCENT "MutationTypeContext\tStrand\tValue\tSample\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1997
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1998 foreach my $k_context (sort keys $refH_file->{$k_file}{'SeqContextC'})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1999 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2000 if( ($k_context =~ /N/) || (length($k_context) != 3) ) { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2001
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2002 # Write the context: 12 mut type on coding strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2003 $ws->write($row_SeqContext12 , $colStart_matrixSeqContext, $k_context, $formatT_left); $ws->write($row_SeqContext12Percent , $colStart_matrixSeqContext, $k_context, $formatT_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2004
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2005 foreach my $k_mutation (sort keys $refH_file->{$k_file}{'SeqContextC'}{$k_context})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2006 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2007 # Percent: 12 mut type on coding strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2008 my ($percent_NonTr, $percent_Tr) = (0, 0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2009 if($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'} == 0) { $percent_NonTr = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2010 else { $percent_NonTr = ( $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'} / $refH_file->{$k_file}{'TotalSBSCoding'} ) * 100 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2011 if($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'} == 0) { $percent_Tr = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2012 else { $percent_Tr = ( $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'} / $refH_file->{$k_file}{'TotalSBSCoding'} ) * 100 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2013
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2014
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2015 # Calculate the total number for each mutation types
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2016 if($k_mutation eq "C>A")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2017 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2018 $ca_NonTr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2019 $ca_Tr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2020
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2021 # COUNT : 12 mutation type (stranded bar graph)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2022 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+1, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2023 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2024 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+2, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2025 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2026
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2027 ## PERCENT : 12 mutation type (stranded bar graph)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2028 my $percent_NonTr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2029 $percent_NonTr = sprintf("%.2f", $percent_NonTr); $percent_ca_NonTr += $percent_NonTr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2030 my $percent_Tr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2031 $percent_Tr = sprintf("%.2f", $percent_Tr); $percent_ca_Tr += $percent_Tr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2032 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2033 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2034
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2035 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+1, $percent_NonTr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2036 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+2, $percent_Tr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2037 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2038 if($k_mutation eq "C>G")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2039 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2040 $cg_NonTr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2041 $cg_Tr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2042
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2043 # COUNT : 12 mutation type (stranded bar graph)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2044 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+3, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2045 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2046 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+4, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2047 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2048
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2049 ## PERCENT : 12 mutation type (stranded bar graph)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2050 my $percent_NonTr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2051 $percent_NonTr = sprintf("%.2f", $percent_NonTr); $percent_cg_NonTr += $percent_NonTr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2052 my $percent_Tr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2053 $percent_Tr = sprintf("%.2f", $percent_Tr); $percent_cg_Tr += $percent_Tr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2054 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2055 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2056
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2057 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+3, $percent_NonTr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2058 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+4, $percent_Tr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2059 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2060 if($k_mutation eq "C>T")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2061 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2062 $ct_NonTr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2063 $ct_Tr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2064
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2065 # COUNT : 12 mutation type (stranded bar graph)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2066 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+5, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2067 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2068 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+6, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2069 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2070
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2071 ## PERCENT : 12 mutation type (stranded bar graph)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2072 my $percent_NonTr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2073 $percent_NonTr = sprintf("%.2f", $percent_NonTr); $percent_ct_NonTr += $percent_NonTr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2074 my $percent_Tr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2075 $percent_Tr = sprintf("%.2f", $percent_Tr); $percent_ct_Tr += $percent_Tr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2076 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2077 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2078
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2079 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+5, $percent_NonTr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2080 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+6, $percent_Tr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2081 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2082 if($k_mutation eq "T>A")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2083 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2084 $ta_NonTr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2085 $ta_Tr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2086
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2087 # COUNT : 12 mutation type (stranded bar graph)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2088 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+7, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2089 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2090 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+8, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2091 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2092
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2093 ## PERCENT : 12 mutation type (stranded bar graph)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2094 my $percent_NonTr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2095 $percent_NonTr = sprintf("%.2f", $percent_NonTr); $percent_ta_NonTr += $percent_NonTr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2096 my $percent_Tr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2097 $percent_Tr = sprintf("%.2f", $percent_Tr); $percent_ta_Tr += $percent_Tr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2098 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2099 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2100
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2101 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+7, $percent_NonTr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2102 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+8, $percent_Tr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2103 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2104 if($k_mutation eq "T>C")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2105 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2106 $tc_NonTr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2107 $tc_Tr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2108
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2109 # COUNT : 12 mutation type (stranded bar graph)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2110 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+9, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2111 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2112 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+10, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2113 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2114
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2115 ## PERCENT : 12 mutation type (stranded bar graph)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2116 my $percent_NonTr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2117 $percent_NonTr = sprintf("%.2f", $percent_NonTr); $percent_tc_NonTr += $percent_NonTr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2118 my $percent_Tr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2119 $percent_Tr = sprintf("%.2f", $percent_Tr); $percent_tc_Tr += $percent_Tr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2120 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2121 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2122
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2123 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+9, $percent_NonTr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2124 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+10, $percent_Tr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2125 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2126 if($k_mutation eq "T>G")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2127 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2128 $tg_NonTr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2129 $tg_Tr += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2130
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2131 # COUNT : 12 mutation type (stranded bar graph)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2132 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+11, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2133 print COUNT "$k_mutation:$k_context\tNonTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2134 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+12, $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2135 print COUNT "$k_mutation:$k_context\tTranscribed\t$refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2136
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2137 ## PERCENT : 12 mutation type (stranded bar graph)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2138 my $percent_NonTr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2139 $percent_NonTr = sprintf("%.2f", $percent_NonTr); $percent_tg_NonTr += $percent_NonTr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2140 my $percent_Tr = ($refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'}/$refH_file->{$k_file}{'TotalSBSCoding'})*100;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2141 $percent_Tr = sprintf("%.2f", $percent_Tr); $percent_tg_Tr += $percent_Tr;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2142 print PERCENT "$k_mutation:$k_context\tNonTranscribed\t$percent_NonTr\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2143 print PERCENT "$k_mutation:$k_context\tTranscribed\t$percent_Tr\t$k_file\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2144
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2145 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+11, $percent_NonTr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2146 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+12, $percent_Tr, $format_A10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2147 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2148
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2149 # For checking if the total number of SBS is correct
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2150 $total_SBS_coding += $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'NonTr'} + $refH_file->{$k_file}{'SeqContextC'}{$k_context}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2151 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2152 $row_SeqContext12++; $row_SeqContext12Percent++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2153 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2154 close COUNT; close PERCENT;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2155
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2156 ## Write the total of each mutation types: 12 mut type on coding strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2157 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+1, $ca_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $colStart_matrixSeqContext+2, $ca_Tr, $formatT_bottomHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2158 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+3, $cg_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $colStart_matrixSeqContext+4, $cg_Tr, $formatT_bottomHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2159 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+5, $ct_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $colStart_matrixSeqContext+6, $ct_Tr, $formatT_bottomHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2160 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+7, $ta_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $colStart_matrixSeqContext+8, $ta_Tr, $formatT_bottomHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2161 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+9, $tc_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $colStart_matrixSeqContext+10, $tc_Tr, $formatT_bottomHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2162 $ws->write($row_SeqContext12, $colStart_matrixSeqContext+11, $tg_NonTr, $formatT_bottomHeader2); $ws->write($row_SeqContext12, $colStart_matrixSeqContext+12, $tg_Tr, $formatT_bottomHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2163 # Write the total percentages of each mutation types: 12 mut type on coding strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2164 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+1, $percent_ca_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+2, $percent_ca_Tr, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2165 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+3, $percent_cg_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+4, $percent_cg_Tr, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2166 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+5, $percent_ct_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+6, $percent_ct_Tr, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2167 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+7, $percent_ta_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+8, $percent_ta_Tr, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2168 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+9, $percent_tc_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+10, $percent_tc_Tr, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2169 $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+11, $percent_tg_NonTr, $formatT_bottomHeader); $ws->write($row_SeqContext12Percent, $colStart_matrixSeqContext+12, $percent_tg_Tr, $formatT_bottomHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2170
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2171 if($total_SBS_coding == $refH_file->{$k_file}{'TotalSBSCoding'}) { $ws->write($row_SeqContext12, $colStart_matrixSeqContext+13, $refH_file->{$k_file}{'TotalSBSCoding'}, $formatT_bottomHeader2) }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2172 else { print STDERR "Error in the calculation of the total number of SBS on the coding strand!!!!\nFrom hash table $refH_file->{$k_file}{'TotalSBSCoding'}\tVS\t$total_SBS_coding\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2173
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2174
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2175 my $totalP_SBS_coding = $percent_ca_NonTr + $percent_ca_Tr + $percent_cg_NonTr + $percent_cg_Tr + $percent_ct_NonTr + $percent_ct_Tr + $percent_ta_NonTr + $percent_ta_Tr + $percent_tc_NonTr + $percent_tc_Tr + $percent_tg_NonTr + $percent_tg_Tr; $totalP_SBS_coding = sprintf("%.0f", $totalP_SBS_coding);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2176 if($totalP_SBS_coding != 100) { print STDERR "The percentages for the trinucleotide sequence context on the coding strand for 12 mutation types is not equal to 100!!!\n$totalP_SBS_coding\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2177
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2178
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2179 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2180 ################################################################### GRAPHS & TABLES #######################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2181 ###########################################################################################################################################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2182 Create_Graph($folderFigure, $k_file, $maxValue);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2183
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2184 ## Distribution of SBS into the Excel report (Figure 1 + Table 1)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2185 $ws->write(0, 0, "Graph 1. SBS distribution", $formatT_graphTitle); $ws->set_row(0, 18);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2186 $ws->insert_image(1, 0, "$folder_temp/$k_file-SBS_distribution-Report.png", 0, 0, .2, .2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2187 $ws->write(29, 0, "Table 1. Frequency and counts of all SBS", $format_A10Boldleft);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2188 $ws->write(30, 0, "Mutation type", $table_topleft); $ws->write(30, 1, "Percentage", $table_top); $ws->write(30, 2, "Count", $table_topRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2189 $ws->write(31, 0, "C:G>A:T", $table_left); $ws->write(31, 1, $percent_ca, $format_A10); $ws->write(31, 2, $ca_genomique, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2190 $ws->write(32, 0, "C:G>G:C", $table_left); $ws->write(32, 1, $percent_cg, $format_A10); $ws->write(32, 2, $cg_genomique, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2191 $ws->write(33, 0, "C:G>T:A", $table_left); $ws->write(33, 1, $percent_ct, $format_A10); $ws->write(33, 2, $ct_genomique, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2192 $ws->write(34, 0, "T:A>A:T", $table_left); $ws->write(34, 1, $percent_ta, $format_A10); $ws->write(34, 2, $ta_genomique, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2193 $ws->write(35, 0, "T:A>C:G", $table_left); $ws->write(35, 1, $percent_tc, $format_A10); $ws->write(35, 2, $tc_genomique, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2194 $ws->write(36, 0, "T:A>G:C", $table_left); $ws->write(36, 1, $percent_tg, $format_A10); $ws->write(36, 2, $tg_genomique, $table_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2195 $ws->write(37, 0, "", $table_bottomleft); $ws->write(37, 1, "", $table_bottom); $ws->write(37, 2, $refH_file->{$k_file}{'TotalSBSGenomic'}, $table_bottomrightHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2196
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2197 ## Impact of the SBS on the protein
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2198 $ws->write(0, 6, "Graph 2. Impact on protein sequence", $formatT_graphTitle);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2199 $ws->insert_image(1, 6, "$folder_temp/$k_file-DistributionExoFunc-Report.png", 0, 0, .2, .2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2200
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2201 ## Strand Bias
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2202 $ws->write(0, 11, "Graph 3. Stranded distribution of SBS", $formatT_graphTitle);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2203 $ws->insert_image(1, 11, "$folder_temp/$k_file-StrandBias-Report.png", 0, 0, .2, .2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2204
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2205 ## Stranded signature (Scale the inserted image: width x 0.7, height x 0.8)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2206 $ws->insert_image($rowStart_SBSdistrBySeg+3, $colStart_matrixSeqContext+15, "$folder_temp/$k_file-StrandedSignatureCount-Report.png", 0, 0, .16, .16);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2207 $ws->insert_image($rowStart_SBSdistrBySeg+24, $colStart_matrixSeqContext+15, "$folder_temp/$k_file-StrandedSignaturePercent-Report.png", 0, 0, .16, .16);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2208
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2209
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2210 # Heatamp for the sequence context on the genomic strand (6 mutation types)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2211 $ws->insert_image(4, $colStart_matrixSeqContext, "$folder_temp/$k_file-HeatmapCount-Genomic-Report.png");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2212 $ws->insert_image(4, $colStart_matrixSeqContext+10, "$folder_temp/$k_file-HeatmapPercent-Genomic-Report.png");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2213
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2214
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2215 ## Bar plot for representing the sequence context (NMF like style)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2216 `Rscript $pathRScriptMutSpectrum $folderFigure/Trinucleotide_Sequence_Context/$k_file/$k_file-MutationSpectraPercent-Genomic.txt $k_file $folderFigure/Trinucleotide_Sequence_Context/$k_file $folder_temp $c_ca6_g $c_cg6_g $c_ct6_g $c_ta6_g $c_tc6_g $c_tg6_g 2>&1`;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2217
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2218 # Bar plot for the sequence context on the genomic strand (6 mutation types)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2219 $ws->insert_image(27, $colStart_matrixSeqContext+3, "$folder_temp/$k_file-MutationSpectraPercent-Genomic-Report.png");
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2220
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2221 # Next sample
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2222 $row_SumSheet++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2223 } # End $k_file
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2224
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2225 #----------------------------------------------------------------------------------------------------------------------------------------------------------------#
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2226 # Write the input matrix for NMF
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2227 open(OUTINPUTNMFC, ">", "$folderNMF/Input_NMF_Count.txt") or die "$!: $folderNMF/Input_NMF_Count.txt\n"; # with the count
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2228 open(OUTINPUTNMFP, ">", "$folderNMF/Input_NMF_Frequency.txt") or die "$!: $folderNMF/Input_NMF_Frequency.txt\n"; # With the frequency un-normalized
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2229
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2230 foreach my $k_sample (@{$h_inputNMF{'Sample'}}) { print OUTINPUTNMFC "\t$k_sample"; print OUTINPUTNMFP "\t$k_sample"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2231 print OUTINPUTNMFC "\n"; print OUTINPUTNMFP "\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2232
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2233 my $row_inputNMF = 1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2234 foreach my $k_context (sort keys $h_inputNMF{'Count'})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2235 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2236 $k_context =~ /(\w)_(\w)/; my ($base5, $base3) = ($1, $2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2237 foreach my $k_mutation (sort keys $h_inputNMF{'Count'}{$k_context})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2238 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2239 my ($col_inputNMF_Count, $col_inputNMF_Percent) = (1, 1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2240 my $contextNMF = $base5."[$k_mutation]".$base3;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2241 $ws_inputNMF_count->write($row_inputNMF, 0, $contextNMF); $ws_inputNMF_percent->write($row_inputNMF, 0, $contextNMF);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2242 print OUTINPUTNMFC $contextNMF,"\t"; print OUTINPUTNMFP $contextNMF,"\t";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2243
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2244 foreach (@{$h_inputNMF{'Count'}{$k_context}{$k_mutation}}) { print OUTINPUTNMFC "$_\t"; } print OUTINPUTNMFC "\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2245 foreach (@{$h_inputNMF{'Percent'}{$k_context}{$k_mutation}}) { print OUTINPUTNMFP "$_\t"; } print OUTINPUTNMFP "\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2246
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2247 foreach (@{$h_inputNMF{'Count'}{$k_context}{$k_mutation}})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2248 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2249 # print "\t$k_context\t$k_mutation\t";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2250 # print "\t$row_inputNMF\t$col_inputNMF_Count\t$_\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2251 $ws_inputNMF_count->write($row_inputNMF, $col_inputNMF_Count, $_); $col_inputNMF_Count++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2252 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2253 foreach (@{$h_inputNMF{'Percent'}{$k_context}{$k_mutation}}) { $ws_inputNMF_percent->write($row_inputNMF, $col_inputNMF_Percent, $_); $col_inputNMF_Percent++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2254 $row_inputNMF++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2255 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2256 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2257 close OUTINPUTNMFP; close OUTINPUTNMFC;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2258
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2259
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2260 # Close the workbook
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2261 $wb->close();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2262 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2263 # Calculate the chi2 for the strand bias
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2264 sub CalculateChi2
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2265 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2266 my ($refH_file, $folderChi2) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2267
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2268 # No value for the chi2
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2269 if(scalar (keys $refH_file) == 0) { print STDERR "No value for calculating the chi2 for the strand bias\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2270
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2271 # Strand bias for one mutation type for all the samples
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2272 my %h_tempchi2 = ();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2273 my ($ca_NonTr, $ca_Tr, $cg_NonTr, $cg_Tr, $ct_NonTr, $ct_Tr, $ta_NonTr, $ta_Tr, $tc_NonTr, $tc_Tr, $tg_NonTr, $tg_Tr) = (0,0,0,0,0,0, 0,0,0,0,0,0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2274
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2275 my $nb_file = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2276
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2277 foreach my $k_file (sort keys $refH_file)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2278 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2279 $nb_file++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2280 foreach my $k_func (sort keys $refH_file->{$k_file}{'6mutType'})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2281 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2282 foreach my $k_mutation (sort keys $refH_file->{$k_file}{'6mutType'}{$k_func})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2283 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2284 if($k_mutation eq "C:G>A:T")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2285 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2286 $h_tempchi2{'C>A'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2287 $h_tempchi2{'C>A'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2288 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2289 if($k_mutation eq "C:G>G:C")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2290 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2291 $h_tempchi2{'C>G'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2292 $h_tempchi2{'C>G'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2293 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2294 if($k_mutation eq "C:G>T:A")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2295 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2296 $h_tempchi2{'C>T'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2297 $h_tempchi2{'C>T'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2298 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2299 if($k_mutation eq "T:A>A:T")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2300 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2301 $h_tempchi2{'T>A'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2302 $h_tempchi2{'T>A'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2303 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2304 if($k_mutation eq "T:A>C:G")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2305 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2306 $h_tempchi2{'T>C'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2307 $h_tempchi2{'T>C'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2308 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2309 if($k_mutation eq "T:A>G:C")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2310 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2311 $h_tempchi2{'T>G'}{$k_file}{'NonTr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'NonTr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2312 $h_tempchi2{'T>G'}{$k_file}{'Tr'} += $refH_file->{$k_file}{'6mutType'}{$k_func}{$k_mutation}{'Tr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2313 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2314 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2315 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2316 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2317
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2318 # Create the input file for NMF
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2319 open(CHI2, ">", "$folderChi2/Input_chi2_strandBias.txt") or die "$!: $folderChi2/Input_chi2_strandBias.txt\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2320 print CHI2 "SampleName\tNonTr\tTr\tAlteration\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2321
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2322 foreach my $k_mutation (sort keys %h_tempchi2)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2323 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2324 foreach my $k_file (sort keys $h_tempchi2{$k_mutation})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2325 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2326 print CHI2 "$k_file\t$h_tempchi2{$k_mutation}{$k_file}{'NonTr'}\t$h_tempchi2{$k_mutation}{$k_file}{'Tr'}\t$k_mutation\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2327 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2328 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2329 close CHI2;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2330
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2331
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2332 # Open the connection with R
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2333 my $R = Statistics::R->new() or die "Impossible to create a communication bridge with R\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2334
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2335 $R->send(qq`## Load the data. There is one column with the mutation type and the sample name but it's just for knowing what is corresponding to each line. The two columns with the number of variant per strand would be sufficient.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2336 strBias<-read.delim("$folderChi2/Input_chi2_strandBias.txt", dec=".");`);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2337 $R->send(q`# Chi2
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2338 pValChi2 <- c() # First I create an empty vector and then I apply a for on the data load
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2339 pValChi2_round <- c() # Empty vector with the rounded p-values
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2340 confInt <- c() # Empty vector for the confident interval
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2341 proportion <- c() # Empty vector for the proportion of NonTr compared to the (NonTr+Tr)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2342 sampleSize <- c() # Empty vector for the count of samples in NonTr and Tr
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2343 # For Pool_Data save the p-values in a different vector for not having them for the FDR
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2344 pValChi2_PoolData <- c()
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2345 pValChi2_PoolData_Round <- c()
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2346
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2347 j = 1 # Timer for pValChi2_PoolData vector
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2348 k = 1 # Timer for pValChi2
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2349
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2350 for(i in 1:nrow(strBias))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2351 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2352 if(! sum(strBias[i,2:3]) == 0)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2353 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2354 # For Pool_Data
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2355 if(strBias[i,1] == "Pool_Data")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2356 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2357 pValChi2_PoolData[j] <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$p.value
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2358 j <- j+1
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2359 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2360 # For the other sample(s)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2361 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2362 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2363 # Calculate the p-value
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2364 pValChi2[k] <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$p.value
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2365 k <- k+1
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2366 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2367
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2368 # Calculate the confidence interval
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2369 temp <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$conf.int
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2370 confInt[i] <- paste0("[", round(temp[1],2), "-", round(temp[2],2), "]") # Same as paste(sep="")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2371
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2372 # Save the proportion
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2373 proportion[i] <- strBias[i,2] / sum(strBias[i,2:3])
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2374
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2375 # Save the sample size (count on NonTr and Tr)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2376 sampleSize[i] <- paste(strBias[i,2], strBias[i,3], sep="-")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2377 } else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2378 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2379 if(strBias[i,1] == "Pool_Data")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2380 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2381 pValChi2_PoolData[j] <- NA
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2382 pValChi2_PoolData_Round[j] <- NA
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2383 j <- j+1
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2384 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2385 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2386 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2387 # Not enough effective for the test
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2388 pValChi2[k] <- NA
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2389 confInt[k] <- NA
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2390 proportion[k] <- NA
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2391 sampleSize[k] <- NA
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2392 pValChi2_round[k] <- NA
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2393 k <- k+1
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2394 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2395 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2396 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2397 # Adjust with FDR
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2398 FDR<-p.adjust(pValChi2, method="BH")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2399
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2400 # Rount the p-value
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2401 for(i in 1:nrow(strBias))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2402 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2403 if( (! is.na(pValChi2[i])) && (pValChi2[i] < 0.0001) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2404 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2405 pValChi2_round[i] <- format(pValChi2[i], scientific=T, digits=3)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2406 } else if(! is.na(pValChi2[i]))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2407 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2408 pValChi2_round[i] <- as.character(round(pValChi2[i], 3))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2409 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2410 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2411
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2412 # The option for the pool is specified
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2413 if(!is.null(pValChi2_PoolData))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2414 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2415 # Round the p-value for Pool_Data
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2416 for(i in 1:6)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2417 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2418 if( (! is.na(pValChi2_PoolData[i])) && (pValChi2_PoolData[i] < 0.0001) )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2419 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2420 pValChi2_PoolData_Round[i] <- format(pValChi2_PoolData[i], scientific=T, digits=3)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2421 } else if(! is.na(pValChi2_PoolData[i]))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2422 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2423 pValChi2_PoolData_Round[i] <- as.character(round(pValChi2_PoolData[i], 3))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2424 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2425 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2426 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2427
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2428
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2429 # I create a dataframe for add what I want
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2430 outputChi2 <- data.frame(round(strBias[,2]/strBias[,3], digits=2), sampleSize, round(proportion, 3), confInt)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2431 outputChi2$Mut.type <- strBias$Alteration
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2432 outputChi2$SampleName <- strBias$SampleName
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2433 colnames(outputChi2)[1:6]<-c("Strand_Bias", "NonTr-Tr", "Proportion", "Confidence Interval", "Mutation_Type", "SampleName")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2434
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2435 # Transform the data frame into a matrix for adding the p-value for the samples and Pool_Data
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2436 matrix <- as.matrix(outputChi2)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2437 tempColPValFDR <- matrix(, nrow=length(sampleSize), ncol = 2) # Create an empty matrix with 2 columns for adding the p-value and the FDR
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2438 matrix <- cbind(matrix, tempColPValFDR)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2439 j = 1 # Timer for all the sample
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2440 k = 1 # Timer for Pool_Data
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2441 for(i in 1:nrow(matrix))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2442 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2443 if(matrix[i,6] == "Pool_Data")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2444 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2445 matrix[i,7] <- pValChi2_PoolData_Round[k]
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2446 matrix[i,8] <- "NA" # No FDR for Pool_Data
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2447 k = k+1
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2448 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2449 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2450 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2451 matrix[i,7] <- pValChi2_round[j]
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2452 matrix[i,8] <- round(FDR[j], 3)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2453 j = j+1
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2454 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2455 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2456 # Reorder the columns
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2457 matrix <- cbind(matrix[,1:3], matrix[,7], matrix[,8], matrix[,4:6])
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2458 colnames(matrix)[4] <- "P-val-Chi2"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2459 colnames(matrix)[5] <- "FDR"`);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2460
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2461 $R->send(qq`# Export the file
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2462 # dec=".": Set the separator for the decimal by "."
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2463 write.table(matrix,file="$folderChi2/Output_chi2_strandBias.txt",quote = FALSE,sep="\t",row.names = FALSE,dec=".");`);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2464
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2465 # Stop the connection with R
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2466 $R->stop();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2467 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2468 # Pearson correlation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2469 sub PearsonCoefficient
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2470 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2471 our ($refH_file, $filename) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2472
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2473 #### Calculate the Pearson coefficient
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2474 my @total_SBS = (); # Pearson for all mutation types
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2475
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2476 # Create a 2D array
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2477 foreach my $k_mutation (sort keys $refH_file->{$filename}{'SBSPerChr'})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2478 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2479 my $x = [];
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2480 my $correlation = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2481
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2482 if($k_mutation eq "AllMutType") { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2483 elsif($k_mutation eq "TotalPerChr") { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2484 elsif($k_mutation eq "ChrSize") { next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2485 else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2486 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2487 my $testZero = 0; # The correlation function doesn't works if all the variables are equal to zero
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2488 # generate an anonymous 2D array where $x->[1] is the row
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2489 # $x->[1][1] is the value in row 1 column 1 and $x->[1][2] is the value of row 1 column 2
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2490 # once you build the entire array, pass it to the correlation subroutine
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2491 my $i=1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2492 while ( my ($chromosome, $lenght) = each (%chromosomes))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2493 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2494 $x->[$i][1] = $lenght; # First column contains the chromosome size
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2495 $x->[$i][2] = $refH_file->{$filename}{'SBSPerChr'}{$k_mutation}{'CHR'}{$chromosome}{'chr'}; # Second column contains the count of SBS
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2496 if($refH_file->{$filename}{'SBSPerChr'}{$k_mutation}{'CHR'}{$chromosome}{'chr'}==0) { $testZero++; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2497 $i++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2498 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2499 if( $testZero == keys $refH_file->{$filename}{'SBSPerChr'}{$k_mutation}{'CHR'} ) { $correlation = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2500 # Pass the 2D array to the correlation subroutine
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2501 else { $correlation = correlation($x); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2502
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2503 $refH_file->{$filename}{'SBSPerChr'}{$k_mutation}{'Pearson'} = $correlation; # Pearson per mutation type
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2504 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2505 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2506
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2507 #generate an anonymous 2D array for all mutation type
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2508 my $testZero = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2509 my $x = [];
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2510 my $correlation = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2511 my $i=1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2512 while ( my ($chromosome, $lenght) = each (%chromosomes))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2513 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2514 $x->[$i][1] = $lenght;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2515 $x->[$i][2] = $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}{$chromosome}{'chr'};
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2516 $i++;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2517 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2518 if($testZero == keys $refH_file->{$filename}{'SBSPerChr'}{'TotalPerChr'}) { $correlation = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2519 else { $correlation = correlation($x); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2520 # Pass the 2D array to the correlation subroutine
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2521 $refH_file->{$filename}{'SBSPerChr'}{'AllMutType'} = $correlation;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2522
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2523 sub correlation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2524 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2525 my ($x) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2526 my ($mean_x,$mean_y) = mean($x);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2527 my $ssxx=ss($x,$mean_x,$mean_y,1,1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2528 my $ssyy=ss($x,$mean_x,$mean_y,2,2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2529 my $ssxy=ss($x,$mean_x,$mean_y,1,2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2530 my $correl=correl($ssxx,$ssyy,$ssxy);;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2531 my $xcorrel=sprintf("%.2f",$correl);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2532 return($xcorrel);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2533
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2534 sub mean
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2535 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2536 my ($x)=@_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2537 my $num = scalar(@{$x}) - 2;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2538 my $sum_x = '0';
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2539 my $sum_y = '0';
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2540 for (my $i = 2; $i < scalar(@{$x}); ++$i)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2541 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2542 $sum_x += $x->[$i][1];
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2543 $sum_y += $x->[$i][2];
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2544 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2545 my $mu_x = $sum_x / $num;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2546 my $mu_y = $sum_y / $num;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2547 return($mu_x,$mu_y);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2548 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2549
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2550 ### ss = sum of squared (deviations to the mean)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2551 sub ss
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2552 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2553 my ($x,$mean_x,$mean_y,$one,$two)=@_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2554 my $sum = '0';
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2555 for (my $i=2;$i<scalar(@{$x});++$i)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2556 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2557 $sum += ($x->[$i][$one]-$mean_x)*($x->[$i][$two]-$mean_y);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2558 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2559 return $sum;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2560 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2561
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2562 sub correl
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2563 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2564 my($ssxx,$ssyy,$ssxy)=@_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2565
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2566 my ($sign, $correl) = (0,0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2567 if(abs($ssxy) == 0) { $sign = 0 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2568 else { $sign=$ssxy/abs($ssxy); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2569
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2570 if( ($ssxx==0) || ($ssyy==0) ) { $correl = 0 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2571 else { $correl=$sign*sqrt($ssxy*$ssxy/($ssxx*$ssyy)); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2572
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2573 return $correl;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2574 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2575 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2576 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2577 # Complement bases (for the sequence context)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2578 sub complement
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2579 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2580 if($_[0] eq "A") { return "T"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2581 if($_[0] eq "C") { return "G"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2582 if($_[0] eq "G") { return "C"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2583 if($_[0] eq "T") { return "A"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2584 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2585 # Create and write some graphics
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2586 sub Create_Graph
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2587 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2588 our ($folderFigure, $filename, $maxValue) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2589
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2590 # Open the connection with R
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2591 my $R = Statistics::R->new() or die "Impossible to create a communication bridge with R\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2592 $R->startR() ;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2593 # Load the Library
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2594 $R->send(q`library(ggplot2)`);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2595 $R->send(q`library(gplots)`);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2596 $R->send(q`library(gtable)`);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2597
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2598
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2599 $R->send(qq`##########################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2600 ## OVERALL MUTATION DISTRIBUTION ##
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2601 ##########################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2602 distrMut <- read.table("$folderFigure/Overall_mutation_distribution/$filename/$filename-OverallMutationDistribution.txt", header=T)`);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2603 $R->send(q`# Add the count of each category in the legend
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2604 distrMut$Legend[[1]] <- paste0(distrMut$Variant_type[[1]], " (", distrMut$Count[[1]], ")")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2605 distrMut$Legend[[2]] <- paste0(distrMut$Variant_type[[2]], " (", distrMut$Count[[2]], ")")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2606 distrMut$Legend[[3]] <- paste0(distrMut$Variant_type[[3]], " (", distrMut$Count[[3]], ")")`);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2607
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2608 $R->send(qq`# Base plot
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2609 pie <- ggplot(distrMut, aes(x=factor(""), fill=Legend, weight=Count)) + geom_bar(width=1) + coord_polar(theta="y") + scale_x_discrete("", breaks=NULL) + scale_y_continuous("", breaks=NULL) + labs(fill="")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2610 # Background of the plot entire white
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2611 pie <- pie + theme(panel.grid.major = element_line(colour="white"), panel.grid.minor = element_line(colour="white"), panel.background = element_rect(fill="white"))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2612 # Legend on right in 3 rows
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2613 pie <- pie + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2614 # Change the color and the title of the legend
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2615 pie <- pie + scale_fill_brewer("Variant type", palette="Set1")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2616 # Remove all the margins
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2617 pie <- pie + theme(plot.margin=unit(c(-1, 0, -1.5, 0), "cm"))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2618 # Save the pie chart for the HTML page (higher resolution)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2619 options(bitmapType='cairo') # Use cairo device as isn't possible to install X11 on the server...
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2620 png("$folderFigure/Overall_mutation_distribution/$filename/$filename-OverallMutationDistribution.png", width=700, height=1100, res=300)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2621 print(pie)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2622 dev.off()
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2623
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2624
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2625 ##########################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2626 ## SBS MUTATION DISTRIBUTION ##
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2627 ##########################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2628 distrSBS <- read.delim("$folderFigure/SBS_distribution/$filename/$filename-SBS_distribution.txt")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2629 distrSBS <- data.frame(distrSBS)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2630 bar <- ggplot(distrSBS, aes(x=Mutation_Type, y=Percentage, fill=Mutation_Type))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2631 bar <- bar + geom_bar(stat="identity", width=0.5)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2632 # Theme classic
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2633 bar <- bar + theme_classic()
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2634 # Remove the axis legend
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2635 bar <- bar + xlab("")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2636 # Set the color of the bars and Changing the labels in the legend
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2637 bar <- bar + scale_fill_manual(values=c("blue", "black", "red", "gray", "#00CC33", "pink"),
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2638 labels=c("C:G>A:T", "C:G>G:C", "C:G>T:A", "T:A>A:T", "T:A>C:G", "T:A>G:C")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2639 )
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2640 # Remove the label in x axis
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2641 bar <- bar + theme(axis.text.x = element_blank())
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2642 # Change the name of the y label
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2643 bar <- bar + ylab("Percent")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2644 # Save the plot for the HTML page (higher resolution)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2645 options(bitmapType='cairo')
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2646 png("$folderFigure/SBS_distribution/$filename/$filename-SBS_distribution.png", width=1800, height=1500, res=300)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2647 print(bar);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2648 dev.off()
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2649 # Save the plot for the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2650 bar
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2651 ggsave("$folder_temp/$filename-SBS_distribution-Report.png")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2652
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2653
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2654 ##########################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2655 ## IMPACT ON PROTEIN ##
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2656 ##########################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2657 impactProt <- read.table("$folderFigure/Impact_protein_sequence/$filename/$filename-DistributionExoFunc.txt", header=T)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2658 # Custom palette: black, orange, dark green, yellow, light blue, dark blue, darkslateblue, red, purple, pink, light green, turquoise, gray
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2659 cb_palette <- c("#000000", "#E69F00", "#006600", "#660000", "#F0E442", "#56B4E9", "#3300FF", "#483D8B", "#FF0000", "#9900CC", "#FF66CC", "#00CC00", "#66FFFF", "#C0C0C0")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2660 pie <- ggplot(impactProt, aes(x=factor(""), fill=AA_Change, weight=Count)) + geom_bar(width=1) + coord_polar(theta="y") + scale_x_discrete("", breaks=NULL)+ scale_y_continuous("", breaks=NULL) + scale_fill_manual(values=cb_palette)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2661 # Background of the plot entire white
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2662 pie <- pie + theme(panel.grid.major = element_line(colour="white"), panel.grid.minor = element_line(colour="white"), panel.background = element_rect(fill="white"))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2663 # Legend in two column
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2664 pie <- pie + guides(fill=guide_legend(ncol=2)) + theme(legend.position="bottom")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2665 # Remove the legend title
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2666 pie <- pie + labs(fill="")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2667 # Save the plot for the HTML page (higher resolution)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2668 options(bitmapType='cairo')
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2669 png("$folderFigure/Impact_protein_sequence/$filename/$filename-DistributionExoFunc.png", width=1600, height=1800, res=300)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2670 print(pie)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2671 dev.off()
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2672 # Save the plot for the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2673 pie
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2674 ggsave("$folder_temp/$filename-DistributionExoFunc-Report.png")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2675
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2676
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2677 ##########################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2678 ## STRAND BIAS ##
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2679 ##########################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2680 cb_palette_SB <- c("#0072B2", "#CC0000")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2681 file_sb <- read.table("$folderFigure/Stranded_Analysis/$filename/$filename-StrandBias.txt", header=T);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2682 p_sb <- ggplot(file_sb, aes(x=Alteration, y=Count, fill=Strand)) + theme_classic() + geom_bar(stat="identity", position="dodge") + scale_fill_manual(values=cb_palette_SB) + theme(axis.text.x = element_text(angle=60, hjust=1)) + xlab("") + theme(legend.position="bottom")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2683 # Save the plot for the HTML page (higher resolution)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2684 options(bitmapType='cairo')
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2685 png("$folderFigure/Stranded_Analysis/$filename/$filename-StrandBias.png", width=1000, height=1200, res=300)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2686 print(p_sb)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2687 dev.off()
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2688 # Save the plot for the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2689 p_sb
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2690 ggsave("$folder_temp/$filename-StrandBias-Report.png")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2691
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2692
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2693 ##########################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2694 ## HEATMAP SEQUENCE CONTEXT ##
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2695 ## GENOMIC STRAND ##
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2696 ##########################################
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2697 ## COUNT
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2698 heatmap_G <- read.table("$folderFigure/Trinucleotide_Sequence_Context/$filename/$filename-HeatmapCount-Genomic.txt", header=T)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2699 # Save the plot for the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2700 options(bitmapType='cairo')
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2701 png(filename="$folder_temp/$filename-HeatmapCount-Genomic-Report.png", bg="transparent", width=240, height=360)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2702 # Heatmap with an absolute scale
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2703 heatmap.2(as.matrix(heatmap_G),Rowv=F,Colv=F,col=colorpanel(384,low="yellow",high="red"),dendrogram="none",scale="none",trace="none",key=F,labRow=rownames(as.matrix(heatmap_G)),labCol=colnames(as.matrix(heatmap_G)),lmat=rbind(c(5,1,4),c(3,1,2)), lhei=c(0.75,0.75),lwid=c(0.5,1.5,0.5))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2704 dev.off()
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2705 # Save the plot for the HTML page (higher resolution)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2706 options(bitmapType='cairo')
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2707 png(filename="$folderFigure/Trinucleotide_Sequence_Context/$filename/$filename-HeatmapCount-Genomic.png", width=1100, height=1600, res=300)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2708 heatmap.2(as.matrix(heatmap_G),Rowv=F,Colv=F,col=colorpanel(384,low="yellow",high="red"),dendrogram="none",scale="none",trace="none",key=F,labRow=rownames(as.matrix(heatmap_G)),labCol=colnames(as.matrix(heatmap_G)),lmat=rbind(c(5,1,4),c(3,1,2)), lhei=c(0.75,0.75),lwid=c(0.5,1.5,0.5))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2709 dev.off()
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2710
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2711 ## PERCENT
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2712 heatmap_G <- read.table("$folderFigure/Trinucleotide_Sequence_Context/$filename/$filename-HeatmapPercent-Genomic.txt", header=T)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2713 # Save the plot for the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2714 options(bitmapType='cairo')
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2715 png(filename="$folder_temp/$filename-HeatmapPercent-Genomic-Report.png",bg="transparent", width=240, height=360)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2716 # Heatmap with an absolute scale
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2717 heatmap.2(as.matrix(heatmap_G),Rowv=F,Colv=F,col=colorpanel(384,low="yellow",high="red"),dendrogram="none",scale="none",trace="none",key=F,labRow=rownames(as.matrix(heatmap_G)),labCol=colnames(as.matrix(heatmap_G)),lmat=rbind(c(5,1,4),c(3,1,2)), lhei=c(0.75,0.75),lwid=c(0.5,1.5,0.5))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2718 dev.off()
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2719 # Save the plot for the HTML page (higher resolution)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2720 options(bitmapType='cairo')
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2721 png(filename="$folderFigure/Trinucleotide_Sequence_Context/$filename/$filename-HeatmapPercent-Genomic.png", width=1100, height=1600, res=300)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2722 heatmap.2(as.matrix(heatmap_G),Rowv=F,Colv=F,col=colorpanel(384,low="yellow",high="red"),dendrogram="none",scale="none",trace="none",key=F,labRow=rownames(as.matrix(heatmap_G)),labCol=colnames(as.matrix(heatmap_G)),lmat=rbind(c(5,1,4),c(3,1,2)), lhei=c(0.75,0.75),lwid=c(0.5,1.5,0.5))
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2723 dev.off()`);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2724 $R->stopR() ;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2725
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2726 ## Plot the transcriptional strand bias in mutation signature
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2727 `Rscript $pathRScriptTxnSB $folderFigure/Stranded_Analysis/$filename/$filename-StrandedSignatureCount.txt $folderFigure/Stranded_Analysis/$filename/$filename-StrandedSignatureCount $folder_temp/$filename-StrandedSignatureCount Count 2>&1`;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2728 `Rscript $pathRScriptTxnSB $folderFigure/Stranded_Analysis/$filename/$filename-StrandedSignaturePercent.txt $folderFigure/Stranded_Analysis/$filename/$filename-StrandedSignaturePercent $folder_temp/$filename-StrandedSignaturePercent Percent 2>&1`;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2729 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2730 # Write the titles of the different sections of the report
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2731 sub WriteBoderSection
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2732 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2733 our ($wb, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $colStart_matrixSeqContext) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2734
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2735 our ($format_topLeft, $format_topRight, $format_bottomLeft, $format_bottomRight, $format_top, $format_right, $format_bottom, $format_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2736 Format_section($wb, \$format_topLeft, \$format_topRight, \$format_bottomLeft, \$format_bottomRight, \$format_top, \$format_right, \$format_bottom, \$format_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2737
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2738 TableSBSDistrBySeg();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2739 TableStrandBiasBySegment();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2740 CountSBSPerChr();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2741 ShortTriNtContext(); # 6 mut type
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2742 LongTriNtContext(); # 12 mut type
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2743
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2744 sub TableSBSDistrBySeg
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2745 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2746 # Top-Left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2747 $ws->write($rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, "Table 4. SBS distribution by functional region", $format_topLeft); $ws->set_row($rowStart_SBSdistrBySeg, 18); # Set the height of the row to 0.25"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2748 # Top
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2749 for(my $i=1; $i<=13; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg+$i, $format_top); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2750 # Top-Right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2751 $ws->write_blank($rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg+13, $format_topRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2752 # Right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2753 $ws->write_blank($rowStart_SBSdistrBySeg+1, $colStart_SBSdistrBySeg+13, $format_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2754 # Bottom-left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2755 $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+5, $colStart_SBSdistrBySeg, $format_bottomLeft);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2756 # Left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2757 $ws->write_blank($rowStart_SBSdistrBySeg+1, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg, $format_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2758 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2759
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2760 sub TableStrandBiasBySegment
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2761 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2762 # Top-Left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2763 $ws->write($rowStart_SBSdistrBySeg+$nb_func+8, $colStart_SBSdistrBySeg, "Table 5. Strand bias by functional region", $format_topLeft); $ws->set_row($rowStart_SBSdistrBySeg+$nb_func+8, 18); # Set the height of the row to 0.25"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2764 # Top
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2765 for(my $i=1; $i<=10; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+8, $colStart_SBSdistrBySeg+$i, $format_top); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2766 # Top-Right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2767 $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+8, $colStart_SBSdistrBySeg+11, $format_topRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2768 # Right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2769 $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+9, $colStart_SBSdistrBySeg+11, $format_right); $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*2)+13, $colStart_SBSdistrBySeg+11, $format_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2770 # Left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2771 $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+9, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+$nb_func+10, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*2)+13, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*2)+14, $colStart_SBSdistrBySeg, $format_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2772 # Bottom
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2773 $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*3)+16, $colStart_SBSdistrBySeg+4, $format_bottom); $ws->write_blank($rowStart_SBSdistrBySeg+($nb_func*3)+16, $colStart_SBSdistrBySeg+8, $format_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2774 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2775
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2776 sub CountSBSPerChr
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2777 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2778 #### Top-Left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2779 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+4, $colStart_SBSdistrBySeg, "Table 6. SBS distribution per chromosome", $format_topLeft); $ws->set_row($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+4, 18); # Set the height of the row to 0.25"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2780 #### Top
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2781 for(my $i=1; $i<8; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+4, $colStart_SBSdistrBySeg+$i, $format_top); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2782 #### Top-Right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2783 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+4, $colStart_SBSdistrBySeg+8, $format_topRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2784 #### Right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2785 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+5, $colStart_SBSdistrBySeg+8, $format_right); $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+6, $colStart_SBSdistrBySeg+8, $format_right);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2786
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2787 #### Bottom-Right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2788 # Human genome = 24 chromosomes
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2789 if($refGenome =~ /hg/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+33, $colStart_SBSdistrBySeg+8, $format_bottomRight); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2790 # Mouse genome = 21 chromosomes
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2791 if($refGenome =~ /mm/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+30, $colStart_SBSdistrBySeg+8, $format_bottomRight); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2792 # Rat genome = 22 chromosomes
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2793 if($refGenome =~ /rn/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+31, $colStart_SBSdistrBySeg+8, $format_bottomRight); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2794
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2795 #### Bottom
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2796 if($refGenome =~ /hg/)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2797 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2798 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+33, $colStart_SBSdistrBySeg+1, $format_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2799 for(my $i=3; $i<=7; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+33, $colStart_SBSdistrBySeg+$i, $format_bottom); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2800 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2801 if($refGenome =~ /mm/)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2802 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2803 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+30, $colStart_SBSdistrBySeg+1, $format_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2804 for(my $i=3; $i<=7; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+30, $colStart_SBSdistrBySeg+$i, $format_bottom); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2805 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2806 if($refGenome =~ /rn/)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2807 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2808 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+31, $colStart_SBSdistrBySeg+1, $format_bottom);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2809 for(my $i=3; $i<=7; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+31, $colStart_SBSdistrBySeg+$i, $format_bottom); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2810 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2811
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2812 #### Left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2813 $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+5, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+6, $colStart_SBSdistrBySeg, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+7, $colStart_SBSdistrBySeg, $format_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2814
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2815 #### Bottom-left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2816 if($refGenome =~ /hg/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+33, $colStart_SBSdistrBySeg, $format_bottomLeft); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2817 if($refGenome =~ /mm/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+30, $colStart_SBSdistrBySeg, $format_bottomLeft); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2818 if($refGenome =~ /rn/) { $ws->write_blank($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+31, $colStart_SBSdistrBySeg, $format_bottomLeft); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2819 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2820
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2821 sub ShortTriNtContext
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2822 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2823 my $format_headerSection = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2824 $format_headerSection->set_left(2); $format_headerSection->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2825
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2826 # Top-left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2827 $ws->write(0, $colStart_matrixSeqContext, "Panel 1. Trinucleotide sequence context of SBS on the genomic sequence", $format_topLeft);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2828 # Top
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2829 for(my $i=1; $i<=19; $i++) { $ws->write_blank(0, $colStart_matrixSeqContext+$i, $format_top); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2830 # Top-right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2831 $ws->write_blank(0, $colStart_matrixSeqContext+20, $format_topRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2832 # Right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2833 for(my $i=1; $i<=37; $i++) { $ws->write_blank($i, $colStart_matrixSeqContext+20, $format_right); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2834 # Bottom-right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2835 $ws->write_blank(37, $colStart_matrixSeqContext+20, $format_bottomRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2836 # Bottom
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2837 for(my $i=1; $i<=19; $i++) { $ws->write_blank(38, $colStart_matrixSeqContext+$i, $format_top); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2838 # Bottom-left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2839 $ws->write_blank(37, $colStart_matrixSeqContext, $format_bottomLeft);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2840 # Left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2841 $ws->write(1, $colStart_matrixSeqContext, "", $format_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2842 for(my $i=3; $i<=36; $i++) { $ws->write_blank($i, $colStart_matrixSeqContext, $format_left); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2843 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2844
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2845 sub LongTriNtContext
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2846 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2847 # Top-left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2848 $ws->write($rowStart_SBSdistrBySeg, $colStart_matrixSeqContext, "Panel 2. Stranded analysis of trinucleotide sequence context of SBS", $format_topLeft);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2849 # Top
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2850 for(my $i=1; $i<=28; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg, $colStart_matrixSeqContext+$i, $format_top); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2851 # Top-right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2852 $ws->write_blank($rowStart_SBSdistrBySeg, $colStart_matrixSeqContext+29, $format_topRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2853 # Right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2854 for(my $i=1; $i<=42; $i++) { $ws->write_blank($rowStart_SBSdistrBySeg+$i, $colStart_matrixSeqContext+29, $format_right); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2855 # Bottom-right
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2856 $ws->write_blank(91, $colStart_matrixSeqContext+29, $format_bottomRight);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2857 # Bottom
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2858 for(my $i=13; $i<=28; $i++) { $ws->write_blank(92, $colStart_matrixSeqContext+$i, $format_top); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2859 # Bottom-left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2860 $ws->write_blank(91, $colStart_matrixSeqContext, $format_bottomLeft);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2861 # Left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2862 $ws->write_blank($rowStart_SBSdistrBySeg+1, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext, $format_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2863 $ws->write_blank($rowStart_SBSdistrBySeg+22, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+23, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext, $format_left); $ws->write_blank($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext, $format_left);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2864 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2865 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2866 # Write the header for the six mutation types
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2867 sub WriteHeaderSection
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2868 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2869 our ($wb, $ws, $rowStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, $nb_func, $colStart_matrixSeqContext) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2870
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2871 our ($format_CA, $format_CG, $format_CT, $format_TA, $format_TC, $format_TG, $format_TG2, $format_LeftHeader, $format_RightHeader, $format_LeftHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2872 Format_Header($wb, \$format_CA, \$format_CG, \$format_CT, \$format_TA, \$format_TC, \$format_TG, \$format_TG2, \$format_LeftHeader, \$format_RightHeader, \$format_LeftHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2873
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2874 our ($format_LeftCA, $format_LeftCG, $format_LeftCT, $format_LeftTA, $format_LeftTC, $format_LeftTG, $format_RightCA, $format_RightCG, $format_RightCT, $format_RightTA, $format_RightTC, $format_RightTG);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2875 Format_HeaderSBSDistrBySegAndFunc($wb, \$format_LeftCA, \$format_LeftCG, \$format_LeftCT, \$format_LeftTA, \$format_LeftTC, \$format_LeftTG, \$format_RightCA, \$format_RightCG, \$format_RightCT, \$format_RightTA, \$format_RightTC, \$format_RightTG);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2876
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2877 our $format_A11Bold = ""; Format_A11Bold($wb, \$format_A11Bold); # Arial 11 bold and center
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2878 our $format_A11BoldLeft = ""; Format_A11BoldLeft($wb, \$format_A11BoldLeft); # Arial 11 bold and left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2879
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2880 our ($format_header12CA, $format_header12CG, $format_header12CT, $format_header12TA, $format_header12TC, $format_header12TG);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2881 Format_Header12MutType($wb, \$format_header12CA, \$format_header12CG, \$format_header12CT, \$format_header12TA, \$format_header12TC, \$format_header12TG);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2882
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2883 ## Header for SBS distribution by segment
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2884 HeaderMutTypeSBSDistrBySeg();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2885
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2886 ## Header for strand bias by function
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2887 $ws->set_column($colStart_SBSdistrBySeg+5, $colStart_SBSdistrBySeg+5, 11);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2888
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2889 my $row = $rowStart_SBSdistrBySeg+$nb_func+10; my $col = $colStart_SBSdistrBySeg;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2890 $ws->write($row, $col+1, ' ', $format_CA); $ws->write($row, $col+2, "C>A", $format_CA); $ws->write($row, $col+3, ' ', $format_CA);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2891 $ws->write($row, $col+5, ' ', $format_CG); $ws->write($row, $col+6, "C>G", $format_CG); $ws->write($row, $col+7, ' ', $format_CG);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2892 $ws->write($row, $col+9, ' ', $format_CT); $ws->write($row, $col+10, "C>T", $format_CT); $ws->write($row, $col+11, ' ', $format_RightCT);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2893
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2894 $row = $rowStart_SBSdistrBySeg+($nb_func*2)+14;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2895 $ws->write($row, $col+1, ' ', $format_TA); $ws->write($row, $col+2, "T>A", $format_TA); $ws->write($row, $col+3, ' ', $format_TA);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2896 $ws->write($row, $col+5, ' ', $format_TC); $ws->write($row, $col+6, "T>C", $format_TC); $ws->write($row, $col+7, ' ', $format_TC);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2897 $ws->write($row, $col+9, ' ', $format_TG2); $ws->write($row, $col+10, "T>G", $format_TG2); $ws->write($row, $col+11, ' ', $format_RightTG);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2898
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2899 $ws->set_row($rowStart_SBSdistrBySeg+$nb_func+11, 18); $ws->set_row($rowStart_SBSdistrBySeg+($nb_func*2)+15, 18);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2900 $ws->set_column($colStart_SBSdistrBySeg+5, $colStart_SBSdistrBySeg+5, 13); $ws->set_column($colStart_SBSdistrBySeg+9, $colStart_SBSdistrBySeg+9, 13);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2901
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2902 for(my $i=$rowStart_SBSdistrBySeg+$nb_func+10; $i<=$rowStart_SBSdistrBySeg+($nb_func*2)+14; $i+=$nb_func+4)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2903 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2904 $ws->write($i+1, $colStart_SBSdistrBySeg, 'Segment', $format_LeftHeader); $ws -> write($i+1, $colStart_SBSdistrBySeg+1, 'Non-Tr/Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+2, 'Non-Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+3, 'Tr', $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2905 $ws -> write($i+1, $colStart_SBSdistrBySeg+5, 'Non-Tr/Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+6, 'Non-Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+7, 'Tr', $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2906 $ws -> write($i+1, $colStart_SBSdistrBySeg+9, 'Non-Tr/Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+10, 'Non-Tr', $format_A11Bold); $ws -> write($i+1, $colStart_SBSdistrBySeg+11, 'Tr', $format_RightHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2907 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2908
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2909
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2910 ## Header for Counts of SBS per chromosome and mutation type
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2911 HeaderCountSBSPerChr();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2912
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2913 ## Header for the short sequence context
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2914 HeaderShortTriNtContext();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2915
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2916 ## Header for the 12 mutation types with the sequence context (coding strand)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2917 HeaderLongTriNtContext();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2918
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2919 sub HeaderMutTypeSBSDistrBySeg
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2920 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2921 $ws->set_row($rowStart_SBSdistrBySeg+2, 18);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2922 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+2, "C:G>A:T", $format_CA); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+3, $format_CA);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2923 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+4, "C:G>G:C", $format_CG); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+5, $format_CG);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2924 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+6, "C:G>T:A", $format_CT); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+7, $format_CT);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2925 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+8, "T:A>A:T", $format_TA); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+9, $format_TA);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2926 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+10, "T:A>C:G", $format_TC); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+11, $format_TC);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2927 $ws->write($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+12, "T:A>G:C", $format_TG); $ws->write_blank($rowStart_SBSdistrBySeg+2, $colStart_SBSdistrBySeg+13, $format_TG);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2928
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2929 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg, "Segment", $format_LeftHeader); $ws->set_column($colStart_SBSdistrBySeg, $colStart_SBSdistrBySeg, 13); $ws->set_row($rowStart_SBSdistrBySeg+3, 18);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2930 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+1, "Total SBS", $format_A11Bold); $ws->set_column($colStart_SBSdistrBySeg+1, $colStart_SBSdistrBySeg+1, 11);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2931 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+2, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+3, "#", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2932 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+4, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+5, "#", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2933 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+6, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+7, "#", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2934 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+8, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+9, "#", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2935 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+10, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+11, "#", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2936 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_SBSdistrBySeg+12, "%", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+3, 13, "#", $format_RightHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2937 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2938
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2939 sub HeaderCountSBSPerChr
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2940 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2941 $ws->set_column(3,3, 10); $ws->set_column(4,4, 10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2942 $ws->set_row($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, 18);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2943 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+7, $colStart_SBSdistrBySeg+1, "Pearson", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2944 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg, "Chr", $format_LeftHeader);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2945 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+1, "Size", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2946 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+2, "All SBS", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2947
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2948 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+3, "C:G>A:T", $format_CA);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2949 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+4, "C:G>G:C", $format_CG);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2950 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+5, "C:G>T:A", $format_CT);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2951 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+6, "T:A>A:T", $format_TA);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2952 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+7, "T:A>C:G", $format_TC);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2953 $ws->write($rowStart_SBSdistrBySeg+8+$nb_func+(($nb_func+4)*2)+8, $colStart_SBSdistrBySeg+8, "T:A>G:C", $format_TG);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2954 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2955
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2956 sub HeaderShortTriNtContext
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2957 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2958 ### GENOMIC STRAND
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2959 $ws->write(2, $colStart_matrixSeqContext, 'Count matrix', $format_LeftHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2960 $ws->write(3, $colStart_matrixSeqContext+4, 'C>A', $format_CA); $ws->write(3, $colStart_matrixSeqContext+5, 'C>G', $format_CG); $ws->write(3, $colStart_matrixSeqContext+6, 'C>T', $format_CT); $ws->write(3, $colStart_matrixSeqContext+7, 'T>A', $format_TA); $ws->write(3, $colStart_matrixSeqContext+8, 'T>C', $format_TC); $ws->write(3, $colStart_matrixSeqContext+9, 'T>G', $format_TG2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2961
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2962 $ws->write(2, $colStart_matrixSeqContext+11, 'Frequency matrix', $format_A11BoldLeft);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2963 $ws->write(3, $colStart_matrixSeqContext+14, 'C>A', $format_CA); $ws->write(3, $colStart_matrixSeqContext+15, 'C>G', $format_CG); $ws->write(3, $colStart_matrixSeqContext+16, 'C>T', $format_CT); $ws->write(3, $colStart_matrixSeqContext+17, 'T>A', $format_TA); $ws->write(3, $colStart_matrixSeqContext+18, 'T>C', $format_TC); $ws->write(3, $colStart_matrixSeqContext+19, 'T>G', $format_TG2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2964
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2965 ### sequence context with a bar graph
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2966 $ws->write(25, $colStart_matrixSeqContext+10, "Mutation spectra frequency", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2967 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2968
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2969 sub HeaderLongTriNtContext
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2970 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2971 $ws->set_row($rowStart_SBSdistrBySeg+3, 15); $ws->set_row($rowStart_SBSdistrBySeg+4, 15); $ws->set_row($rowStart_SBSdistrBySeg+5, 15);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2972 $ws->write($rowStart_SBSdistrBySeg+3, $colStart_matrixSeqContext, "Count matrix", $format_LeftHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2973 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+1, "C>A", $format_CA); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+2, $format_CA); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+1, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+2, "Tr", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2974 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+3, "C>G", $format_CG); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+4, $format_CG); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+3, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+4, "Tr", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2975 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+5, "C>T", $format_CT); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+6, $format_CT); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+5, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+6, "Tr", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2976 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+7, "T>A", $format_TA); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+8, $format_TA); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+7, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+8, "Tr", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2977 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+9, "T>C", $format_TC); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+10, $format_TC); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+9, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+10, "Tr", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2978 $ws->write($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+11, "T>G", $format_TG2); $ws->write_blank($rowStart_SBSdistrBySeg+4, $colStart_matrixSeqContext+12, $format_TG2); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+11, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+5, $colStart_matrixSeqContext+12, "Tr", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2979
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2980
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2981 $ws->set_row($rowStart_SBSdistrBySeg+24, 15); $ws->set_row($rowStart_SBSdistrBySeg+25, 15); $ws->set_row($rowStart_SBSdistrBySeg+26, 15);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2982 $ws->write($rowStart_SBSdistrBySeg+24, $colStart_matrixSeqContext, "Frequency matrix", $format_LeftHeader2);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2983 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+1, "C>A", $format_CA); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+2, $format_CA); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+1, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+2, "Tr", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2984 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+3, "C>G", $format_CG); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+4, $format_CG); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+3, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+4, "Tr", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2985 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+5, "C>T", $format_CT); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+6, $format_CT); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+5, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+6, "Tr", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2986 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+7, "T>A", $format_TA); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+8, $format_TA); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+7, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+8, "Tr", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2987 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+9, "T>C", $format_TC); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+10, $format_TC); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+9, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+10, "Tr", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2988 $ws->write($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+11, "T>G", $format_TG2); $ws->write_blank($rowStart_SBSdistrBySeg+25, $colStart_matrixSeqContext+12, $format_TG2); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+11, "NonTr", $format_A11Bold); $ws->write($rowStart_SBSdistrBySeg+26, $colStart_matrixSeqContext+12, "Tr", $format_A11Bold);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2989 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2990 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2991 # Create logo for representing the sequence context with n bases
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2992 sub CreateLogo
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2993 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2994 my ($refH_file, $folderWebLogo) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2995
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2996 my $folderSample = "";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2997
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2998 foreach my $k_file (sort keys $refH_file)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2999 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3000 $folderSample = "$folderWebLogo/$k_file";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3001 if(!-e $folderSample) { mkdir($folderSample) or die "Can't create the directory $folderSample\n"; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3002
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3003 my $test_lengthSeqContext = 0;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3004
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3005 foreach my $k_mutation (sort keys $refH_file->{$k_file}{'WebLogo3'})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3006 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3007 open(WEBLOGO, ">", "$folderSample/$k_file-$k_mutation.fa") or die "$!: $folderSample/$k_file-$k_mutation.fa\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3008 foreach (@{$refH_file->{$k_file}{'WebLogo3'}{$k_mutation}})
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3009 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3010 print WEBLOGO ">$k_file\n$_\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3011
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3012 if(length($_) < 10) { $test_lengthSeqContext = 0; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3013 else { $test_lengthSeqContext = 1; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3014 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3015 close WEBLOGO;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3016 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3017
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3018 ## Generate the logo
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3019 foreach my $fastaFile (`ls $folderSample/*.fa`)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3020 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3021 chomp($fastaFile);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3022 my ($filename, $directories, $suffix) = fileparse("$folderSample/$fastaFile", qr/\.[^.]*/);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3023
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3024 $filename =~ /(.+)\-/;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3025 my $title = $1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3026
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3027 ## Test if there is fasta sequence for the mutation type
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3028 my $nbLigne_temp = `wc -l $fastaFile`;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3029 my @nbLigne = split(" ", $nbLigne_temp);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3030
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3031 if($nbLigne[0] == 0) { print "WARNING: No sequence for $filename\n"; next; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3032
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3033 # When length sequence context is lower than 10 the image is to small for adding a title
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3034 if($test_lengthSeqContext == 1) { system("weblogo -c classic -F png -U probability --title $title < $fastaFile > $folderSample/$filename-Probability.png"); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3035 else { system("weblogo -c classic -F png -U probability < $fastaFile > $folderSample/$filename-Probability.png"); }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3036 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3037 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3038 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3039
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3040
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3041 # Define the format of the worksheet: Arial font size=10
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3042 sub Format_A10
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3043 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3044 my ($wb, $format) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3045 $$format = $wb->add_format(font=>'Arial', size=>10); $$format->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3046 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3047 # Define the format of the worksheet: Arial font size=11 bold and center
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3048 sub Format_A11Bold
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3049 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3050 my ($wb, $format) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3051 $$format = $wb->add_format(font=>'Arial', size=>11, bold=>1); $$format->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3052 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3053 # Define the format of the worksheet: Arial font size=10 italic red and center
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3054 sub Format_A10ItalicRed
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3055 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3056 my ($wb, $format) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3057 $$format = $wb->add_format(font=>'Arial', size=>10, italic=>1, color => 'red'); $$format->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3058 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3059 # Defile the format of the worksheet: Arialt font size=11 bold and left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3060 sub Format_A11BoldLeft
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3061 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3062 my ($wb, $format) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3063 $$format = $wb->add_format(valign =>'left', font=>'Arial', size=>11, bold=>1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3064 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3065 # Defile the format of the worksheet: Arialt font size=10 bold and left
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3066 sub Format_A10BoldLeft
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3067 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3068 my ($wb, $format) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3069 $$format = $wb->add_format(valign =>'left', font=>'Arial', size=>10, bold=>1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3070 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3071 # Define the format of the border of the section (for delimiting the different section of the report)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3072 sub Format_section
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3073 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3074 my ($wb, $format_topLeft, $format_topRight, $format_bottomLeft, $format_bottomRight, $format_top, $format_right, $format_bottom, $format_left) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3075
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3076 $$format_topLeft = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3077 $$format_topLeft->set_top(2); $$format_topLeft->set_top_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3078 $$format_topLeft->set_left(2); $$format_topLeft->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3079
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3080 $$format_topRight = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3081 $$format_topRight->set_top(2); $$format_topRight->set_top_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3082 $$format_topRight->set_right(2); $$format_topRight->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3083
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3084 $$format_bottomLeft = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3085 $$format_bottomLeft->set_bottom(2); $$format_bottomLeft->set_bottom_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3086 $$format_bottomLeft->set_left(2); $$format_bottomLeft->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3087
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3088 $$format_bottomRight = $wb->add_format(valign => 'left', bold => 1, font => 'Arial', size => 12);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3089 $$format_bottomRight->set_bottom(2); $$format_bottomRight->set_bottom_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3090 $$format_bottomRight->set_right(2); $$format_bottomRight->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3091
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3092 $$format_top = $wb->add_format(); $$format_top->set_top(2); $$format_top->set_top_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3093 $$format_right = $wb->add_format(); $$format_right->set_right(2); $$format_right->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3094 $$format_bottom = $wb->add_format(); $$format_bottom->set_bottom(2); $$format_bottom->set_bottom_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3095 $$format_left = $wb->add_format(); $$format_left->set_left(2); $$format_left->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3096 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3097 # Define the header
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3098 sub Format_Header
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3099 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3100 my ($wb, $format_CA, $format_CG, $format_CT, $format_TA, $format_TC, $format_TG, $format_TG2, $format_LeftHeader, $format_RightHeader, $format_LeftHeader2) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3101
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3102 my ($blue, $black, $red, $gray, $green, $pink);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3103 Color($wb, \$blue, \$black, \$red, \$gray, \$green, \$pink);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3104
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3105 my ($bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3106 BackgroundColor($wb, \$bgColor_blue, \$bgColor_black, \$bgColor_red, \$bgColor_gray, \$bgColor_green, \$bgColor_pink);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3107
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3108
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3109 $$format_CA = $wb->add_format(bg_color => $blue, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CA->set_align('center'); $$format_CA->set_center_across();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3110 $$format_CG = $wb->add_format(bg_color => $black, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CG->set_align('center'); $$format_CG->set_center_across();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3111 $$format_CT = $wb->add_format(bg_color => $red, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CT->set_align('center'); $$format_CT->set_center_across();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3112 $$format_TA = $wb->add_format(bg_color => $gray, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TA->set_align('center'); $$format_TA->set_center_across();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3113 $$format_TC = $wb->add_format(bg_color => $green, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TC->set_align('center'); $$format_TC->set_center_across();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3114 $$format_TG = $wb->add_format(bg_color=>$bgColor_pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TG->set_align('center'); $$format_TG->set_center_across();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3115 $$format_TG->set_right(2); $$format_TG->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3116
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3117 $$format_TG2 = $wb->add_format(bg_color => $pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TG2->set_align('center'); $$format_TG2->set_center_across();
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3118
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3119 $$format_LeftHeader = $wb->add_format(bold=>1, font=>'Arial', size=>11); $$format_LeftHeader->set_align('center'); $$format_LeftHeader->set_left(2); $$format_LeftHeader->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3120 $$format_LeftHeader2 = $wb->add_format(bold=>1, font=>'Arial', size=>11); $$format_LeftHeader2->set_left(2); $$format_LeftHeader2->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3121 $$format_RightHeader = $wb->add_format(bold=>1, font=>'Arial', size=>11); $$format_RightHeader->set_align('center'); $$format_RightHeader->set_right(2); $$format_RightHeader->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3122 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3123 # Define the mutation type header for the Strand bias by segment
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3124 sub Format_HeaderSBSDistrBySegAndFunc
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3125 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3126 my ($wb, $format_LeftCA, $format_LeftCG, $format_LeftCT, $format_LeftTA, $format_LeftTC, $format_LeftTG, $format_RightCA, $format_RightCG, $format_RightCT, $format_RightTA, $format_RightTC, $format_RightTG) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3127
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3128 my ($bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3129 BackgroundColor($wb, \$bgColor_blue, \$bgColor_black, \$bgColor_red, \$bgColor_gray, \$bgColor_green, \$bgColor_pink);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3130
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3131 $$format_LeftCA = $wb->add_format(bg_color=>$bgColor_blue, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftCA->set_align('center'); $$format_LeftCA->set_left(2); $$format_LeftCA->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3132 $$format_LeftCG = $wb->add_format(bg_color=>$bgColor_black, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftCG->set_align('center'); $$format_LeftCG->set_left(2); $$format_LeftCG->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3133 $$format_LeftCT = $wb->add_format(bg_color=>$bgColor_red, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftCT->set_align('center'); $$format_LeftCT->set_left(2); $$format_LeftCT->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3134 $$format_LeftTA = $wb->add_format(bg_color=>$bgColor_gray, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftTA->set_align('center'); $$format_LeftTA->set_left(2); $$format_LeftTA->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3135 $$format_LeftTC = $wb->add_format(bg_color=>$bgColor_green, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftTC->set_align('center'); $$format_LeftTC->set_left(2); $$format_LeftTC->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3136 $$format_LeftTG = $wb->add_format(bg_color=>$bgColor_pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_LeftTG->set_align('center'); $$format_LeftTG->set_left(2); $$format_LeftTG->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3137
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3138
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3139 $$format_RightCA = $wb->add_format(bg_color=>$bgColor_blue, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightCA->set_align('center'); $$format_RightCA->set_right(2); $$format_RightCA->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3140 $$format_RightCG = $wb->add_format(bg_color=>$bgColor_black, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightCG->set_align('center'); $$format_RightCG->set_right(2); $$format_RightCG->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3141 $$format_RightCT = $wb->add_format(bg_color=>$bgColor_red, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightCT->set_align('center'); $$format_RightCT->set_right(2); $$format_RightCT->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3142 $$format_RightTA = $wb->add_format(bg_color=>$bgColor_gray, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightTA->set_align('center'); $$format_RightTA->set_right(2); $$format_RightTA->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3143 $$format_RightTC = $wb->add_format(bg_color=>$bgColor_green, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightTC->set_align('center'); $$format_RightTC->set_right(2); $$format_RightTC->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3144 $$format_RightTG = $wb->add_format(bg_color=>$bgColor_pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_RightTG->set_align('center'); $$format_RightTG->set_right(2); $$format_RightTG->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3145 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3146 # Define the mutation type header for the trinucleotide sequence context on the coding strand
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3147 sub Format_Header12MutType
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3148 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3149 my ($wb, $format_CA, $format_CG, $format_CT, $format_TA, $format_TC, $format_TG) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3150
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3151 my ($bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3152 BackgroundColor($wb, \$bgColor_blue, \$bgColor_black, \$bgColor_red, \$bgColor_gray, \$bgColor_green, \$bgColor_pink);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3153
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3154 $$format_CA = $wb->add_format(bg_color=>$bgColor_blue, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CA->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3155 $$format_CG = $wb->add_format(bg_color=>$bgColor_black, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CG->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3156 $$format_CT = $wb->add_format(bg_color=>$bgColor_red, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_CT->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3157 $$format_TA = $wb->add_format(bg_color=>$bgColor_gray, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TA->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3158 $$format_TC = $wb->add_format(bg_color=>$bgColor_green, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TC->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3159 $$format_TG = $wb->add_format(bg_color=>$bgColor_pink, font=>'Arial', bold=>1, size=>11, color=>'white'); $$format_TG->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3160 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3161 # Define the format for the text that needs a section border
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3162 sub Format_TextSection
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3163 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3164 my ($wb, $formatT_left, $formatT_right, $formatT_bottomRight, $formatT_bottomLeft, $formatT_bottom, $formatT_bottomHeader, $formatT_bottomRightHeader, $formatT_bottomHeader2, $formatT_rightHeader) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3165
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3166 $$formatT_left = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3167 $$formatT_left->set_left(2); $$formatT_left->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3168
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3169 $$formatT_right = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3170 $$formatT_right->set_right(2); $$formatT_right->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3171
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3172 $$formatT_bottomRight = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3173 $$formatT_bottomRight->set_bottom(2); $$formatT_bottomRight->set_bottom_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3174 $$formatT_bottomRight->set_right(2); $$formatT_bottomRight->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3175
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3176 $$formatT_bottomLeft = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3177 $$formatT_bottomLeft->set_bottom(2); $$formatT_bottomLeft->set_bottom_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3178 $$formatT_bottomLeft->set_left(2); $$formatT_bottomLeft->set_left_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3179
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3180 $$formatT_bottom = $wb->add_format(valign=>'center', font=>'Arial', size=>10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3181 $$formatT_bottom->set_bottom(2); $$formatT_bottom->set_bottom_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3182
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3183 my $bgColor_totallighGray = $wb->set_custom_color(54, 230, 230, 230);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3184 $$formatT_bottomHeader = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>11); $$formatT_bottomHeader->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3185 $$formatT_bottomHeader->set_bottom(2); $$formatT_bottomHeader->set_bottom_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3186
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3187 $$formatT_bottomRightHeader = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>11); $$formatT_bottomRightHeader->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3188 $$formatT_bottomRightHeader->set_bottom(2); $$formatT_bottomRightHeader->set_bottom_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3189 $$formatT_bottomRightHeader->set_right(2); $$formatT_bottomRightHeader->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3190
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3191 $$formatT_bottomHeader2 = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>11); $$formatT_bottomHeader2->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3192
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3193 $$formatT_rightHeader = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>11); $$formatT_rightHeader->set_align('center');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3194 $$formatT_rightHeader->set_right(2); $$formatT_rightHeader->set_right_color('blue');
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3195 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3196 # Define the format for the graphs titles
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3197 sub Format_GraphTitle
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3198 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3199 my ($wb, $formatT_graphTitle) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3200
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3201 $$formatT_graphTitle = $wb->add_format(font=>'Arial', size=>12, bold=>1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3202 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3203 # Define the format of the border of the tables
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3204 sub Format_Table
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3205 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3206 my ($wb, $table_topleft, $table_topRight, $table_bottomleft, $table_bottomRight, $table_top, $table_right, $table_bottom, $table_bottomItalicRed, $table_left, $table_bottomrightHeader, $table_left2, $table_middleHeader, $table_middleHeader2) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3207
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3208 $$table_topleft = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_topleft->set_top(1); $$table_topleft->set_left(1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3209 $$table_topRight = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_topRight->set_top(1); $$table_topRight->set_right(1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3210 $$table_bottomleft = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_bottomleft->set_bottom(1); $$table_bottomleft->set_left(1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3211 $$table_bottomRight = $wb->add_format(valign=>'center', font=>'Arial', size=>10); $$table_bottomRight->set_bottom(1); $$table_bottomRight->set_right(1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3212
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3213 $$table_top = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_top->set_top(1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3214 $$table_right = $wb->add_format(valign=>'center', font=>'Arial', size=>10); $$table_right->set_right(1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3215 $$table_bottom = $wb->add_format(valign=>'center', font=>'Arial', size=>10); $$table_bottom->set_bottom(1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3216 $$table_bottomItalicRed = $wb->add_format(valign=>'center', font=>'Arial', size=>10, italic=>1, color => 'red'); $$table_bottomItalicRed->set_bottom(1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3217 $$table_left = $wb->add_format(valign=>'center', bold=>1, font=>'Arial', size=>10); $$table_left->set_left(1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3218
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3219 my $bgColor_totallighGray = $wb->set_custom_color(54, 230, 230, 230);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3220 $$table_bottomrightHeader = $wb->add_format(bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>10); $$table_bottomrightHeader->set_bottom(1); $$table_bottomrightHeader->set_right(1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3221
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3222 $$table_left2 = $wb->add_format(valign=>'left', font=>'Arial', size=>10); $$table_left2->set_left(1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3223
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3224 $$table_middleHeader = $wb->add_format(valign=>'center', bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>10);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3225 $$table_middleHeader2 = $wb->add_format(valign=>'center', bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>10); $$table_middleHeader2->set_bottom(1);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3226 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3227
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3228 # Define the color
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3229 sub Color
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3230 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3231 my ($wb, $blue, $black, $red, $gray, $green, $pink) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3232
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3233 $$blue = $wb->set_custom_color(40, 0, 0, 204);# C:G>A:T in blue
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3234 $$black = $wb->set_custom_color(41, 0, 0, 0);# C:G>G:C in black
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3235 $$red = $wb->set_custom_color(42, 255, 0, 0);# C:G>T:A in red
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3236 $$gray = $wb->set_custom_color(43, 205, 205, 205); # T:A>A:T in light gray
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3237 $$green = $wb->set_custom_color(44, 0, 204, 51);# T:A>C:G in green
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3238 $$pink = $wb->set_custom_color(45, 255, 192, 203);# T:A>G:C in pink
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3239 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3240 sub BackgroundColor
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3241 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3242 my ($wb, $bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3243
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3244 $$bgColor_blue = $wb->set_custom_color(48, 0, 0, 204);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3245 $$bgColor_black = $wb->set_custom_color(49, 0, 0, 0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3246 $$bgColor_red = $wb->set_custom_color(50, 255, 0, 0);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3247 $$bgColor_gray = $wb->set_custom_color(51, 205, 205, 205);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3248 $$bgColor_green = $wb->set_custom_color(52, 0, 204, 51);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3249 $$bgColor_pink = $wb->set_custom_color(53, 255, 192, 203);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3250 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3251 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3252
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3253
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3254 sub recoverNumCol
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3255 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3256 my ($input, $name_of_column) = @_;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3257
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3258 open(F1,$input) or die "recoverNumCol: $!: $input\n";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3259 # For having the name of the columns
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3260 my $search_header = <F1>; $search_header =~ s/[\r\n]+$//; my @tab_search_header = split("\t",$search_header);
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3261 close F1;
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3262 # The number of the column
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3263 my $name_of_column_NB = "toto";
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3264 for(my $i=0; $i<=$#tab_search_header; $i++)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3265 {
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3266 if($tab_search_header[$i] eq $name_of_column) { $name_of_column_NB = $i; last; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3267 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3268 if($name_of_column_NB eq "toto") { print STDERR "Error recoverNumCol(): the column named $name_of_column doesn't exits in the input file $input!!!!!\n"; exit; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3269 else { return $name_of_column_NB; }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3270 }
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3271
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3272
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3273
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3274
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3275 =head1 NAME
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3276
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3277 mutSpec-Stat
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3278
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3279 =head1 SYNOPSIS
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3280
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3281 mutSpecstat.pl [arguments] <query-file>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3282
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3283 <query-file> can be a folder with multiple VCF or a single VCF
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3284
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3285 Arguments:
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3286 -h, --help print help message
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3287 -m, --man print complete documentation
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3288 -v, --verbose use verbose output
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3289 --refGenome the reference genome to use (human, mouse or rat genomes)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3290 -o, --outfile <string> output directory for the result. If none is specify the result will be write in the same directory as the input file
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3291 -temp --pathTemporary <string> the path for saving the temporary files
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3292 --pathSeqRefGenome the path to the fasta reference sequences
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3293 --poolData generate the pool of all the samples (optional)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3294 --reportSample generate a report for each sample (optional)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3295
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3296
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3297 Function: automatically run a pipeline and calculate various statistics on mutations
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3298
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3299 Example: mutSpecstat.pl --refGenome hg19 --outfile output_directory --temp path_to_temporary_directory --pathRscript path_to_R_scripts --pathSeqRefGenome path_fasta_ref_seq --poolData --reportSample input
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3300
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3301 Version: 04-2016 (April 2016)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3302
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3303
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3304 =head1 OPTIONS
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3305
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3306 =over 8
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3307
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3308 =item B<--help>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3309
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3310 print a brief usage message and detailed explanation of options.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3311
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3312 =item B<--man>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3313
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3314 print the complete manual of the program.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3315
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3316 =item B<--verbose>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3317
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3318 use verbose output.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3319
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3320 =item B<--refGenome>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3321
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3322 the reference genome to use, could be human, mouse or rat genomes.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3323
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3324 =item B<--outfile>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3325
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3326 the directory of output file names. If it is nor specify the same directory as the input file is used.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3327
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3328 =item B<--pathTemporary>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3329
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3330 the path for saving temporary files generated by the script.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3331 If any is specify a temporary folder is created in the same directory where the script is running.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3332 Deleted when the script is finish
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3333
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3334 =item B<--pathSeqRefGenome>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3335
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3336 The path to the fasta reference sequences
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3337
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3338 =item B<--poolData only for the report>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3339
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3340 calculate the statistics on the pool of all the data pass in input
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3341
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3342 =item B<--reportSample only for the report>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3343
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3344 generate a report for each samples
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3345
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3346 =head1 DESCRIPTION
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3347
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3348 mutSpecstat is a perl script for calculated various statistics on mutations
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3349 An Excel report containing the mutation type distribution per functional region, the strand bias and the sequence context on genomic and coding sequence is created.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3350 The different statistics are illustrated using ggplot2.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3351
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3352 =cut