annotate bismark_methylation_extractor @ 4:243e8f9fb75b draft

Uploaded
author bgruening
date Mon, 09 Feb 2015 18:24:41 -0500
parents 91f07ff056ca
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1 #!/usr/bin/perl
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2 use warnings;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3 use strict;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4 $|++;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
5 use Getopt::Long;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
6 use Cwd;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
7 use Carp;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
8 use FindBin qw($Bin);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
9 use lib "$Bin/../lib";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
10
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
11
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
12 ## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
13
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
14 ## This program is free software: you can redistribute it and/or modify
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
15 ## it under the terms of the GNU General Public License as published by
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
16 ## the Free Software Foundation, either version 3 of the License, or
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
17 ## (at your option) any later version.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
18
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
19 ## This program is distributed in the hope that it will be useful,
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
20 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
21 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
22 ## GNU General Public License for more details.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
23
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
24 ## You should have received a copy of the GNU General Public License
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
25 ## along with this program. If not, see <http://www.gnu.org/licenses/>.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
26
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
27 my @filenames; # input files
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
28 my %counting;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
29 my $parent_dir = getcwd();
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
30
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
31 my %fhs;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
32
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
33 my $version = 'v0.10.1';
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
34 my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip,$ignore_r2,$mbias_only,$gazillion,$ample_mem) = process_commandline();
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
35
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
36
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
37 ### only needed for bedGraph output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
38 my @sorting_files; # if files are to be written to bedGraph format, these are the methylation extractor output files
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
39 my @methylcalls = qw (0 0 0); # [0] = methylated, [1] = unmethylated, [2] = total
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
40 my @bedfiles;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
41
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
42 ### only needed for genome-wide cytosine methylation report
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
43 my %chromosomes;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
44
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
45 my %mbias_1;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
46 my %mbias_2;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
47
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
48 ##############################################################################################
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
49 ### Summarising Run Parameters
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
50 ##############################################################################################
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
51
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
52 ### METHYLATION EXTRACTOR
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
53
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
54 warn "Summarising Bismark methylation extractor parameters:\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
55 warn '='x63,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
56
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
57 if ($single){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
58 if ($vanilla){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
59 warn "Bismark single-end vanilla format specified\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
60 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
61 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
62 warn "Bismark single-end SAM format specified (default)\n"; # default
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
63 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
64 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
65 elsif ($paired){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
66 if ($vanilla){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
67 warn "Bismark paired-end vanilla format specified\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
68 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
69 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
70 warn "Bismark paired-end SAM format specified (default)\n"; # default
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
71 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
72 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
73
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
74 if ($single){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
75 if ($ignore){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
76 warn "First $ignore bp will be disregarded when processing the methylation call string\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
77 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
78 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
79 else{ ## paired-end
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
80 if ($ignore){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
81 warn "First $ignore bp will be disregarded when processing the methylation call string of Read 1\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
82 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
83 if ($ignore_r2){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
84 warn "First $ignore_r2 bp will be disregarded when processing the methylation call string of Read 2\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
85 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
86 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
87
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
88
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
89 if ($full){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
90 warn "Strand-specific outputs will be skipped. Separate output files for cytosines in CpG, CHG and CHH context will be generated\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
91 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
92 if ($merge_non_CpG){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
93 warn "Merge CHG and CHH context to non-CpG context specified\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
94 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
95 ### output directory
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
96 if ($output_dir eq ''){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
97 warn "Output will be written to the current directory ('$parent_dir')\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
98 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
99 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
100 warn "Output path specified as: $output_dir\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
101 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
102
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
103
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
104 sleep (1);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
105
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
106 ### BEDGRAPH
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
107
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
108 if ($bedGraph){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
109 warn "\n\nSummarising bedGraph parameters:\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
110 warn '='x63,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
111
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
112 if ($counts){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
113 warn "Generating additional output in bedGraph and coverage format\nbedGraph format:\t<Chromosome> <Start Position> <End Position> <Methylation Percentage>\ncoverage format:\t<Chromosome> <Start Position> <End Position> <Methylation Percentage> <count methylated> <count non-methylated>\n\n";
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
114 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
115 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
116 warn "Generating additional sorted output in bedGraph format (output format: <Chromosome> <Start Position> <End Position> <Methylation Percentage>)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
117 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
118
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
119 warn "Using a cutoff of $coverage_threshold read(s) to report cytosine positions\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
120
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
121 if ($CX_context){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
122 warn "Reporting and sorting methylation information for all cytosine context (sorting may take a long time, you have been warned ...)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
123 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
124 else{ # default
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
125 $CpG_only = 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
126 warn "Reporting and sorting cytosine methylation information in CpG context only (default)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
127 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
128
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
129 if ($remove){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
130 warn "White spaces in read ID names will be removed prior to sorting\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
131 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
132
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
133 if ($ample_mem){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
134 warn "Sorting chromosomal postions for the bedGraph step using arrays instead of using UNIX sort\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
135 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
136 elsif (defined $sort_size){
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
137 warn "The bedGraph UNIX sort command will use the following memory setting:\t'$sort_size'. Temporary directory used for sorting is the output directory\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
138 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
139 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
140 warn "Setting a default memory usage for the bedGraph UNIX sort command to 2GB\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
141 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
142
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
143
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
144
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
145 sleep (1);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
146
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
147 if ($cytosine_report){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
148 warn "\n\nSummarising genome-wide cytosine methylation report parameters:\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
149 warn '='x63,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
150 warn "Generating comprehensive genome-wide cytosine report\n(output format: <Chromosome> <Position> <Strand> <count methylated> <count non-methylated> <C-context> <trinucleotide context> )\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
151
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
152
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
153 if ($CX_context){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
154 warn "Reporting methylation for all cytosine contexts. Be aware that this will generate enormous files\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
155 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
156 else{ # default
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
157 $CpG_only = 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
158 warn "Reporting cytosine methylation in CpG context only (default)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
159 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
160
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
161 if ($split_by_chromosome){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
162 warn "Splitting the cytosine report output up into individual files for each chromosome\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
163 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
164
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
165 ### Zero-based coordinates
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
166 if ($zero){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
167 warn "Using zero-based genomic coordinates (user-defined)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
168 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
169 else{ # default, 1-based coords
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
170 warn "Using 1-based genomic coordinates (default)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
171 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
172
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
173 ### GENOME folder
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
174 if ($genome_folder){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
175 unless ($genome_folder =~/\/$/){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
176 $genome_folder =~ s/$/\//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
177 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
178 warn "Genome folder was specified as $genome_folder\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
179 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
180 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
181 $genome_folder = '/data/public/Genomes/Mouse/NCBIM37/';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
182 warn "Using the default genome folder /data/public/Genomes/Mouse/NCBIM37/\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
183 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
184 sleep (1);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
185 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
186 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
187
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
188 warn "\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
189 sleep (5);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
190
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
191 ######################################################
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
192 ### PROCESSING FILES
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
193 ######################################################
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
194
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
195 foreach my $filename (@filenames){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
196 # resetting counters and filehandles
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
197 %fhs = ();
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
198 %counting =(
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
199 total_meCHG_count => 0,
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
200 total_meCHH_count => 0,
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
201 total_meCpG_count => 0,
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
202 total_unmethylated_CHG_count => 0,
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
203 total_unmethylated_CHH_count => 0,
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
204 total_unmethylated_CpG_count => 0,
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
205 sequences_count => 0,
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
206 );
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
207
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
208 @sorting_files = ();
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
209 @bedfiles = ();
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
210
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
211 %mbias_1 = ();
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
212 %mbias_2 = ();
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
213
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
214 ### performing a quick check to see if a paired-end SAM file has been sorted by positions which does interfere with the logic used by the extractor
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
215 unless ($vanilla){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
216 if ($paired){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
217 test_positional_sorting($filename);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
218 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
219 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
220
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
221 process_Bismark_results_file($filename);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
222
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
223 ### Closing all filehandles so that the Bismark methylation extractor output doesn't get truncated due to buffering issues
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
224 foreach my $fh (keys %fhs) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
225 if ($fh =~ /^[1230]$/) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
226 foreach my $context (keys %{$fhs{$fh}}) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
227 close $fhs{$fh}->{$context} or die $!;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
228 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
229 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
230 else{
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
231 close $fhs{$fh} or die $!;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
232 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
233 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
234
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
235 ### printing out all M-Bias data
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
236 produce_mbias_plots ($filename);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
237
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
238 delete_unused_files();
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
239
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
240 if ($bedGraph){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
241
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
242 my $out = (split (/\//,$filename))[-1]; # extracting the filename if a full path was specified
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
243 $out =~ s/gz$//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
244 $out =~ s/sam$//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
245 $out =~ s/bam$//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
246 $out =~ s/txt$//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
247 $out =~ s/$/bedGraph/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
248
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
249 my $bedGraph_output = $out;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
250 my @args;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
251
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
252 if ($remove){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
253 push @args, '--remove';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
254 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
255 if ($CX_context){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
256 push @args, '--CX_context';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
257 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
258 if ($no_header){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
259 push @args, '--no_header';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
260 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
261 if ($gazillion){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
262 push @args, '--gazillion';
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
263 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
264 if ($ample_mem){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
265 push @args, '--ample_memory';
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
266 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
267
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
268
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
269 # if ($counts){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
270 # push @args, "--counts";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
271 # }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
272
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
273 push @args, "--buffer_size $sort_size";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
274 push @args, "--cutoff $coverage_threshold";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
275 push @args, "--output $bedGraph_output";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
276 push @args, "--dir '$output_dir'";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
277
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
278 ### adding all files to be sorted to @args
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
279 foreach my $f (@sorting_files){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
280 push @args, $f;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
281 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
282
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
283 # print join "\t",@args,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
284
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
285 system ("$Bin/bismark2bedGraph @args");
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
286
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
287 warn "Finished BedGraph conversion ...\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
288 sleep(3);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
289
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
290 # open (OUT,'>',$output_dir.$bedGraph_output) or die "Problems with the bedGraph output filename detected: file path: '$output_dir'\tfile name: '$bedGraph_output' $!";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
291 # warn "Writing bedGraph to file: $bedGraph_output\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
292 # process_bedGraph_output();
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
293 # close OUT or die $!;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
294
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
295 ### genome-wide cytosine methylation report requires bedGraph processing anyway
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
296 if ($cytosine_report){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
297
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
298 @args = (); # resetting @args
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
299 my $cytosine_out = $out;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
300 $cytosine_out =~ s/bedGraph$//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
301
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
302 if ($CX_context){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
303 $cytosine_out =~ s/$/CX_report.txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
304 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
305 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
306 $cytosine_out =~ s/$/CpG_report.txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
307 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
308
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
309 push @args, "--output $cytosine_out";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
310 push @args, "--dir '$output_dir'";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
311 push @args, "--genome '$genome_folder'";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
312 push @args, "--parent_dir '$parent_dir'";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
313
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
314 if ($zero){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
315 push @args, "--zero";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
316 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
317 if ($CX_context){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
318 push @args, '--CX_context';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
319 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
320 if ($split_by_chromosome){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
321 push @args, '--split_by_chromosome';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
322 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
323
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
324 my $coverage_output = $bedGraph_output;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
325 $coverage_output =~ s/bedGraph$/bismark.cov/;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
326
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
327 push @args, $output_dir . $coverage_output; # this will be the infile
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
328
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
329 system ("$Bin/coverage2cytosine @args");
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
330 # generate_genome_wide_cytosine_report($bedGraph_output,$cytosine_out);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
331 warn "\n\nFinished generating genome-wide cytosine report\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
332 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
333 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
334 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
335
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
336 sub delete_unused_files{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
337
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
338 warn "Deleting unused files ...\n\n"; sleep(1);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
339
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
340 my $index = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
341
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
342 while ($index <= $#sorting_files){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
343 if ($sorting_files[$index] =~ /gz$/){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
344 open (USED,"zcat $sorting_files[$index] |") or die "Failed to read from methylation extractor output file $sorting_files[$index]: $!\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
345 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
346 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
347 open (USED,$sorting_files[$index]) or die "Failed to read from methylation extractor output file $sorting_files[$index]: $!\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
348 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
349
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
350 my $used = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
351
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
352 while (<USED>){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
353 next if (/^Bismark/);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
354 if ($_){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
355 $used = 1;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
356 last;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
357 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
358 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
359
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
360 if ($used){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
361 warn "$sorting_files[$index] contains data ->\tkept\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
362 ++$index;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
363 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
364 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
365
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
366 my $delete = unlink $sorting_files[$index];
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
367
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
368 if ($delete){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
369 warn "$sorting_files[$index] was empty ->\tdeleted\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
370 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
371 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
372 warn "$sorting_files[$index] was empty, however deletion was unsuccessful: $!\n"
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
373 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
374
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
375 ### we also need to remove the element from @sorting_files
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
376 splice @sorting_files, $index, 1;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
377 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
378 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
379 warn "\n\n"; ## can't close the piped filehandles at this point because it will die (unfortunately)
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
380 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
381
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
382 sub produce_mbias_plots{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
383
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
384 my $filename = shift;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
385
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
386 my $mbias = (split (/\//,$filename))[-1]; # extracting the filename if a full path was specified
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
387 $mbias =~ s/gz$//;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
388 $mbias =~ s/sam$//;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
389 $mbias =~ s/bam$//;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
390 $mbias =~ s/txt$//;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
391 my $mbias_graph_1 = my $mbias_graph_2 = $mbias;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
392 $mbias_graph_1 = $output_dir . $mbias_graph_1 . 'M-bias_R1.png';
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
393 $mbias_graph_2 = $output_dir . $mbias_graph_2 . 'M-bias_R2.png';
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
394
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
395 $mbias =~ s/$/M-bias.txt/;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
396
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
397 open (MBIAS,'>',"$output_dir$mbias") or die "Failed to open file for the M-bias data\n\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
398
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
399 # determining maximum read length
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
400 my $max_length_1 = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
401 my $max_length_2 = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
402
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
403 foreach my $context (keys %mbias_1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
404 foreach my $pos (sort {$a<=>$b} keys %{$mbias_1{$context}}){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
405 $max_length_1 = $pos unless ($max_length_1 >= $pos);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
406 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
407 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
408 if ($paired){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
409 foreach my $context (keys %mbias_2){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
410 foreach my $pos (sort {$a<=>$b} keys %{$mbias_2{$context}}){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
411 $max_length_2 = $pos unless ($max_length_2 >= $pos);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
412 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
413 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
414 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
415
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
416 if ($single){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
417 warn "Determining maximum read length for M-Bias plot\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
418 warn "Maximum read length of Read 1: $max_length_1\n\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
419 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
420 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
421 warn "Determining maximum read lengths for M-Bias plots\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
422 warn "Maximum read length of Read 1: $max_length_1\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
423 warn "Maximum read length of Read 2: $max_length_2\n\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
424 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
425 # sleep(3);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
426
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
427 my @mbias_read1;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
428 my @mbias_read2;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
429
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
430 #Check whether the module GD::Graph:lines is installed
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
431 my $gd_graph_installed = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
432 eval{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
433 require GD::Graph::lines;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
434 GD::Graph::lines->import();
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
435 };
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
436
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
437 unless($@) { # syntax or routine error variable, set if something goes wron in the last eval{ require ...}
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
438 $gd_graph_installed = 1;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
439
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
440 #Check whether the module GD::Graph::colour is installed
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
441 eval{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
442 require GD::Graph::colour;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
443 GD::Graph::colour->import(qw(:colours :lists :files :convert));
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
444 };
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
445
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
446 if ($@) {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
447 warn "Perl module GD::Graph::colour not found, skipping drawing M-bias plots (only writing out M-bias plot table)\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
448 sleep(2);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
449 $gd_graph_installed = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
450 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
451
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
452
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
453 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
454 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
455 warn "Perl module GD::Graph::lines is not installed, skipping drawing M-bias plots (only writing out M-bias plot table)\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
456 sleep(2);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
457 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
458
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
459
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
460 my $graph_title;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
461 my $graph1;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
462 my $graph2;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
463
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
464 if ( $gd_graph_installed){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
465 $graph1 = GD::Graph::lines->new(800,600);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
466 if ($paired){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
467 $graph2 = GD::Graph::lines->new(800,600);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
468 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
469 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
470
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
471 foreach my $context (qw(CpG CHG CHH)){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
472 @{$mbias_read1[0]} = ();
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
473
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
474 if ($paired){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
475 print MBIAS "$context context (R1)\n================\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
476 $graph_title = 'M-bias (Read 1)';
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
477 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
478 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
479 print MBIAS "$context context\n===========\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
480 $graph_title = 'M-bias';
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
481 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
482 print MBIAS "position\tcount methylated\tcount unmethylated\t% methylation\tcoverage\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
483
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
484 foreach my $pos (1..$max_length_1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
485
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
486 unless (defined $mbias_1{$context}->{$pos}->{meth}){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
487 $mbias_1{$context}->{$pos}->{meth} = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
488 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
489 unless (defined $mbias_1{$context}->{$pos}->{un}){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
490 $mbias_1{$context}->{$pos}->{un} = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
491 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
492
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
493 my $percent = '';
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
494 if (($mbias_1{$context}->{$pos}->{meth} + $mbias_1{$context}->{$pos}->{un}) > 0){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
495 $percent = sprintf("%.2f",$mbias_1{$context}->{$pos}->{meth} * 100/ ( $mbias_1{$context}->{$pos}->{meth} + $mbias_1{$context}->{$pos}->{un}) );
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
496 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
497 my $coverage = $mbias_1{$context}->{$pos}->{un} + $mbias_1{$context}->{$pos}->{meth};
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
498
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
499 print MBIAS "$pos\t$mbias_1{$context}->{$pos}->{meth}\t$mbias_1{$context}->{$pos}->{un}\t$percent\t$coverage\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
500 push @{$mbias_read1[0]},$pos;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
501
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
502 if ($context eq 'CpG'){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
503 push @{$mbias_read1[1]},$percent;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
504 push @{$mbias_read1[4]},$coverage;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
505 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
506 elsif ($context eq 'CHG'){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
507 push @{$mbias_read1[2]},$percent;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
508 push @{$mbias_read1[5]},$coverage;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
509 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
510 elsif ($context eq 'CHH'){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
511 push @{$mbias_read1[3]},$percent;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
512 push @{$mbias_read1[6]},$coverage;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
513 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
514 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
515 print MBIAS "\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
516 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
517
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
518 if ( $gd_graph_installed){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
519
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
520 add_colour(nice_blue => [31,120,180]);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
521 add_colour(nice_orange => [255,127,0]);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
522 add_colour(nice_green => [51,160,44]);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
523 add_colour(pale_blue => [153,206,227]);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
524 add_colour(pale_orange => [253,204,138]);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
525 add_colour(pale_green => [191,230,207]);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
526
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
527 $graph1->set(
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
528 x_label => 'position (bp)',
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
529 y1_label => '% methylation',
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
530 y2_label => '# methylation calls',
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
531 title => $graph_title,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
532 line_width => 2,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
533 x_max_value => $max_length_1,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
534 x_min_value => 0,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
535 y_tick_number => 10,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
536 y_label_skip => 2,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
537 y1_max_value => 100,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
538 y1_min_value => 0,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
539 y_label_skip => 2,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
540 y2_min_value => 0,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
541 x_label_skip => 5,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
542 x_label_position => 0.5,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
543 x_tick_offset => -1,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
544 bgclr => 'white',
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
545 transparent => 0,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
546 two_axes => 1,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
547 use_axis => [1,1,1,2,2,2],
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
548 legend_placement => 'RC',
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
549 legend_spacing => 6,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
550 legend_marker_width => 24,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
551 legend_marker_height => 18,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
552 dclrs => [ qw(nice_blue nice_orange nice_green pale_blue pale_orange pale_green)],
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
553 ) or die $graph1->error;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
554
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
555 $graph1->set_legend('CpG methylation','CHG methylation','CHH methylation','CpG total calls','CHG total calls','CHH total calls');
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
556
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
557 my $gd1 = $graph1->plot(\@mbias_read1) or die $graph1->error;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
558
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
559 open (MBIAS_G1,'>',$mbias_graph_1) or die "Failed to write to file for M-bias plot 1: $!\n\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
560 binmode MBIAS_G1;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
561 print MBIAS_G1 $gd1->png;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
562 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
563
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
564 if ($paired){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
565
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
566 foreach my $context (qw(CpG CHG CHH)){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
567 @{$mbias_read2[0]} = ();
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
568
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
569 print MBIAS "$context context (R2)\n================\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
570 print MBIAS "position\tcount methylated\tcount unmethylated\t% methylation\tcoverage\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
571 foreach my $pos (1..$max_length_2){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
572
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
573 unless (defined $mbias_2{$context}->{$pos}->{meth}){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
574 $mbias_2{$context}->{$pos}->{meth} = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
575 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
576 unless (defined $mbias_2{$context}->{$pos}->{un}){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
577 $mbias_2{$context}->{$pos}->{un} = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
578 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
579
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
580 my $percent = '';
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
581 if (($mbias_2{$context}->{$pos}->{meth} + $mbias_2{$context}->{$pos}->{un}) > 0){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
582 $percent = sprintf("%.2f",$mbias_2{$context}->{$pos}->{meth} * 100/ ($mbias_2{$context}->{$pos}->{meth} + $mbias_2{$context}->{$pos}->{un}) );
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
583 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
584 my $coverage = $mbias_2{$context}->{$pos}->{un} + $mbias_2{$context}->{$pos}->{meth};
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
585
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
586 print MBIAS "$pos\t$mbias_2{$context}->{$pos}->{meth}\t$mbias_2{$context}->{$pos}->{un}\t$percent\t$coverage\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
587
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
588 push @{$mbias_read2[0]},$pos;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
589
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
590 if ($context eq 'CpG'){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
591 push @{$mbias_read2[1]},$percent;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
592 push @{$mbias_read2[4]},$coverage;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
593 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
594 elsif ($context eq 'CHG'){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
595 push @{$mbias_read2[2]},$percent;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
596 push @{$mbias_read2[5]},$coverage;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
597 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
598 elsif ($context eq 'CHH'){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
599 push @{$mbias_read2[3]},$percent;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
600 push @{$mbias_read2[6]},$coverage;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
601 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
602 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
603 print MBIAS "\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
604 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
605
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
606 if ( $gd_graph_installed){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
607
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
608 add_colour(nice_blue => [31,120,180]);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
609 add_colour(nice_orange => [255,127,0]);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
610 add_colour(nice_green => [51,160,44]);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
611 add_colour(pale_blue => [153,206,227]);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
612 add_colour(pale_orange => [253,204,138]);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
613 add_colour(pale_green => [191,230,207]);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
614
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
615 $graph2->set(
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
616 x_label => 'position (bp)',
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
617 line_width => 2,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
618 x_max_value => $max_length_1,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
619 x_min_value => 0,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
620 y_tick_number => 10,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
621 y_label_skip => 2,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
622 y1_max_value => 100,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
623 y1_min_value => 0,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
624 y_label_skip => 2,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
625 y2_min_value => 0,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
626 x_label_skip => 5,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
627 x_label_position => 0.5,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
628 x_tick_offset => -1,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
629 bgclr => 'white',
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
630 transparent => 0,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
631 two_axes => 1,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
632 use_axis => [1,1,1,2,2,2],
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
633 legend_placement => 'RC',
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
634 legend_spacing => 6,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
635 legend_marker_width => 24,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
636 legend_marker_height => 18,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
637 dclrs => [ qw(nice_blue nice_orange nice_green pale_blue pale_orange pale_green)],
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
638 x_label => 'position (bp)',
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
639 y1_label => '% methylation',
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
640 y2_label => '# calls',
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
641 title => 'M-bias (Read 2)',
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
642 ) or die $graph2->error;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
643
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
644 $graph2->set_legend('CpG methylation','CHG methylation','CHH methylation','CpG total calls','CHG total calls','CHH total calls');
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
645 my $gd2 = $graph2->plot(\@mbias_read2) or die $graph2->error;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
646
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
647 open (MBIAS_G2,'>',$mbias_graph_2) or die "Failed to write to file for M-bias plot 2: $!\n\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
648 binmode MBIAS_G2;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
649 print MBIAS_G2 $gd2->png;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
650
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
651 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
652 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
653 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
654
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
655 sub process_commandline{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
656 my $help;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
657 my $single_end;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
658 my $paired_end;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
659 my $ignore;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
660 my $ignore_r2;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
661 my $genomic_fasta;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
662 my $full;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
663 my $report;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
664 my $extractor_version;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
665 my $no_overlap;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
666 my $merge_non_CpG;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
667 my $vanilla;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
668 my $output_dir;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
669 my $no_header;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
670 my $bedGraph;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
671 my $coverage_threshold = 1; # Minimum number of reads covering before calling methylation status
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
672 my $remove;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
673 my $counts;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
674 my $cytosine_report;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
675 my $genome_folder;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
676 my $zero;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
677 my $CpG_only;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
678 my $CX_context;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
679 my $split_by_chromosome;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
680 my $sort_size;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
681 my $samtools_path;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
682 my $gzip;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
683 my $mbias_only;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
684 my $gazillion;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
685 my $ample_mem;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
686
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
687 my $command_line = GetOptions ('help|man' => \$help,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
688 'p|paired-end' => \$paired_end,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
689 's|single-end' => \$single_end,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
690 'fasta' => \$genomic_fasta,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
691 'ignore=i' => \$ignore,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
692 'ignore_r2=i' => \$ignore_r2,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
693 'comprehensive' => \$full,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
694 'report' => \$report,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
695 'version' => \$extractor_version,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
696 'no_overlap' => \$no_overlap,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
697 'merge_non_CpG' => \$merge_non_CpG,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
698 'vanilla' => \$vanilla,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
699 'o|output=s' => \$output_dir,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
700 'no_header' => \$no_header,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
701 'bedGraph' => \$bedGraph,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
702 "cutoff=i" => \$coverage_threshold,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
703 "remove_spaces" => \$remove,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
704 "counts" => \$counts,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
705 "cytosine_report" => \$cytosine_report,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
706 'g|genome_folder=s' => \$genome_folder,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
707 "zero_based" => \$zero,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
708 "CX|CX_context" => \$CX_context,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
709 "split_by_chromosome" => \$split_by_chromosome,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
710 "buffer_size=s" => \$sort_size,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
711 'samtools_path=s' => \$samtools_path,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
712 "gzip" => \$gzip,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
713 "mbias_only" => \$mbias_only,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
714 "gazillion|scaffolds" => \$gazillion,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
715 "ample_memory" => \$ample_mem,
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
716 );
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
717
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
718 ### EXIT ON ERROR if there were errors with any of the supplied options
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
719 unless ($command_line){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
720 die "Please respecify command line options\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
721 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
722
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
723 ### HELPFILE
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
724 if ($help){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
725 print_helpfile();
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
726 exit;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
727 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
728
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
729 if ($extractor_version){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
730 print << "VERSION";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
731
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
732
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
733 Bismark Methylation Extractor
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
734
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
735 Bismark Extractor Version: $version
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
736 Copyright 2010-13 Felix Krueger, Babraham Bioinformatics
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
737 www.bioinformatics.babraham.ac.uk/projects/bismark/
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
738
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
739
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
740 VERSION
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
741 exit;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
742 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
743
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
744
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
745 ### no files provided
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
746 unless (@ARGV){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
747 die "You need to provide one or more Bismark files to create an individual C methylation output. Please respecify!\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
748 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
749 @filenames = @ARGV;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
750
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
751 warn "\n *** Bismark methylation extractor version $version ***\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
752
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
753 ### M-BIAS ONLY
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
754 if ($mbias_only){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
755 if ($bedGraph){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
756 die "Option '--mbias_only' skips all sorts of methylation extraction, including the bedGraph generation. Please respecify!\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
757 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
758 if ($cytosine_report){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
759 die "Option '--mbias_only' skips all sorts of methylation extraction, including the genome-wide cytosine methylation report generation. Please respecify!\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
760 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
761 if ($merge_non_CpG){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
762 warn "Option '--mbias_only' skips all sorts of methylation extraction, thus '--merge' won't have any effect\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
763 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
764 if ($full){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
765 warn "Option '--mbias_only' skips all sorts of methylation extraction, thus '--comprehensive' won't have any effect\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
766 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
767 sleep(3);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
768 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
769
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
770 ### PRINT A REPORT
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
771 unless ($report){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
772 $report = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
773 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
774
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
775 ### OUTPUT DIR PATH
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
776 if ($output_dir){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
777 unless ($output_dir =~ /\/$/){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
778 $output_dir =~ s/$/\//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
779 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
780 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
781 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
782 $output_dir = '';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
783 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
784
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
785 ### NO HEADER
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
786 unless ($no_header){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
787 $no_header = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
788 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
789
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
790 ### OLD (VANILLA) OUTPUT FORMAT
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
791 unless ($vanilla){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
792 $vanilla = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
793 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
794
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
795 if ($single_end){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
796 $paired_end = 0; ### SINGLE END ALIGNMENTS
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
797 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
798 elsif ($paired_end){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
799 $single_end = 0; ### PAIRED-END ALIGNMENTS
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
800 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
801 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
802
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
803 ### we will try to determine whether the input file was a single-end or paired-end sequencing run from the SAM header
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
804
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
805 if ($vanilla){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
806 die "Please specify whether the supplied file(s) are in Bismark single-end or paired-end format with '-s' or '-p'\n\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
807 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
808 else{ # SAM/BAM format
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
809
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
810 my $file = $filenames[0];
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
811 warn "Trying to determine the type of mapping from the SAM header line of file $file\n"; sleep(1);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
812
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
813 ### if the user did not specify whether the alignment file was single-end or paired-end we are trying to get this information from the @PG header line in the SAM/BAM file
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
814 if ($file =~ /\.gz$/){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
815 open (DETERMINE,"zcat $file |") or die "Unable to read from gzipped file $file: $!\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
816 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
817 elsif ($file =~ /\.bam$/ || `file -b $file` =~ /^gzip/){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
818 open (DETERMINE,"samtools view -h $file |") or die "Unable to read from BAM file $file: $!\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
819 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
820 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
821 open (DETERMINE,$file) or die "Unable to read from $file: $!\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
822 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
823
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
824 while (<DETERMINE>){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
825 last unless (/^\@/);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
826 if ($_ =~ /^\@PG/){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
827 # warn "found the \@PG line:\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
828 # warn "$_";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
829
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
830 if ($_ =~ /-1/ and $_ =~ /-2/){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
831 warn "Treating file(s) as paired-end data (as extracted from \@PG line)\n\n"; sleep(1);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
832 $paired_end = 1;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
833 $single_end = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
834 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
835 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
836 warn "Treating file(s) as single-end data (as extracted from \@PG line)\n\n"; sleep(1);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
837 $paired_end = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
838 $single_end = 1;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
839 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
840 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
841 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
842
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
843 close DETERMINE or warn $!;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
844
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
845 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
846 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
847
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
848 ### IGNORING <INT> bases at the start of the read when processing the methylation call string
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
849 unless ($ignore){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
850 $ignore = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
851 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
852
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
853 if (defined $ignore_r2){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
854 die "You can only specify --ignore_r2 for paired-end result files\n" unless ($paired_end);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
855 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
856 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
857 $ignore_r2 = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
858 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
859
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
860
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
861 ### NO OVERLAP
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
862 if ($no_overlap){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
863 die "The option '--no_overlap' can only be specified for paired-end input!\n" unless ($paired_end);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
864 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
865 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
866 $no_overlap = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
867 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
868
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
869 ### COMPREHENSIVE OUTPUT
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
870 unless ($full){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
871 $full = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
872 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
873
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
874 ### MERGE NON-CpG context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
875 unless ($merge_non_CpG){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
876 $merge_non_CpG = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
877 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
878
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
879 ### remove white spaces in read ID (needed for sorting using the sort command
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
880 unless ($remove){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
881 $remove = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
882 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
883
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
884 ### COVERAGE THRESHOLD FOR bedGraph OUTPUT
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
885 if (defined $coverage_threshold){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
886 unless ($coverage_threshold > 0){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
887 die "Please select a coverage greater than 0 (positive integers only)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
888 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
889 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
890 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
891 $coverage_threshold = 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
892 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
893
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
894 ### SORT buffer size
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
895 if (defined $sort_size){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
896 unless ($sort_size =~ /^\d+\%$/ or $sort_size =~ /^\d+(K|M|G|T)$/){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
897 die "Please select a buffer size as percentage (e.g. --buffer_size 20%) or a number to be multiplied with K, M, G, T etc. (e.g. --buffer_size 20G). For more information on sort type 'info sort' on a command line\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
898 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
899 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
900 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
901 $sort_size = '2G';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
902 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
903
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
904 if ($zero){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
905 die "Option '--zero' is only available if '--cytosine_report' is specified as well. Please respecify\n" unless ($cytosine_report);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
906 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
907
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
908 if ($CX_context){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
909 die "Option '--CX_context' is only available if '--cytosine_report' or '--bedGraph' is specified as well. Please respecify\n" unless ($cytosine_report or $bedGraph);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
910 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
911 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
912 $CX_context = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
913 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
914
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
915 unless ($counts){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
916 $counts = 1; # counts will always be set
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
917 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
918
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
919 if ($cytosine_report){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
920
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
921 ### GENOME folder
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
922 if ($genome_folder){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
923 unless ($genome_folder =~/\/$/){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
924 $genome_folder =~ s/$/\//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
925 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
926 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
927 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
928 die "Please specify a genome folder to proceed (full path only)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
929 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
930
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
931 unless ($bedGraph){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
932 warn "Setting the option '--bedGraph' since this is required for the genome-wide cytosine report\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
933 $bedGraph = 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
934 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
935 unless ($counts){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
936 # warn "Setting the option '--counts' since this is required for the genome-wide cytosine report\n";
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
937 $counts = 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
938 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
939 warn "\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
940 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
941
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
942 ### PATH TO SAMTOOLS
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
943 if (defined $samtools_path){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
944 # if Samtools was specified as full command
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
945 if ($samtools_path =~ /samtools$/){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
946 if (-e $samtools_path){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
947 # Samtools executable found
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
948 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
949 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
950 die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
951 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
952 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
953 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
954 unless ($samtools_path =~ /\/$/){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
955 $samtools_path =~ s/$/\//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
956 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
957 $samtools_path .= 'samtools';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
958 if (-e $samtools_path){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
959 # Samtools executable found
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
960 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
961 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
962 die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
963 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
964 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
965 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
966 # Check whether Samtools is in the PATH if no path was supplied by the user
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
967 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
968 if (!system "which samtools >/dev/null 2>&1"){ # STDOUT is binned, STDERR is redirected to STDOUT. Returns 0 if Samtools is in the PATH
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
969 $samtools_path = `which samtools`;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
970 chomp $samtools_path;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
971 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
972 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
973
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
974 unless (defined $samtools_path){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
975 $samtools_path = '';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
976 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
977
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
978
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
979 if ($gazillion){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
980 if ($ample_mem){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
981 die "You can't currently select '--ample_mem' together with '--gazillion'. Make your pick!\n\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
982 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
983 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
984
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
985 return ($ignore,$genomic_fasta,$single_end,$paired_end,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip,$ignore_r2,$mbias_only,$gazillion,$ample_mem);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
986 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
987
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
988
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
989 sub test_positional_sorting{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
990
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
991 my $filename = shift;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
992
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
993 print "\nNow testing Bismark result file $filename for positional sorting (which would be bad...)\t";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
994 sleep(1);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
995
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
996 if ($filename =~ /\.gz$/) {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
997 open (TEST,"zcat $filename |") or die "Can't open gzipped file $filename: $!\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
998 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
999 elsif ($filename =~ /bam$/ || `file -b $filename` =~ /^gzip/) {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1000 if ($samtools_path){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1001 open (TEST,"$samtools_path view -h $filename |") or die "Can't open BAM file $filename: $!\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1002 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1003 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1004 die "Sorry couldn't find an installation of Samtools. Either specifiy an alternative path using the option '--samtools_path /your/path/', or use a SAM file instead\n\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1005 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1006 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1007 else {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1008 open (TEST,$filename) or die "Can't open file $filename: $!\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1009 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1010
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1011 my $count = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1012
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1013 while (<TEST>) {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1014 if (/^\@/) { # testing header lines if they contain the @SO flag (for being sorted)
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1015 if (/^\@SO/) {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1016 die "SAM/BAM header line '$_' indicates that the Bismark aligment file has been sorted by chromosomal positions which is is incompatible with correct methylation extraction. Please use an unsorted file instead\n\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1017 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1018 next;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1019 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1020 $count++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1021
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1022 last if ($count > 100000); # else we test the first 100000 sequences if they start with the same read ID
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1023
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1024 my ($id_1) = (split (/\t/));
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1025
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1026 ### reading the next line which should be read 2
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1027 $_ = <TEST>;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1028 my ($id_2) = (split (/\t/));
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1029 last unless ($id_2);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1030 ++$count;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1031
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1032 if ($id_1 eq $id_2){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1033 ### ids are the same
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1034 next;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1035 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1036 else{ ### in previous versions of Bismark we appended /1 and /2 to the read IDs for easier eyeballing which read is which. These tags need to be removed first
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1037 my $id_1_trunc = $id_1;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1038 $id_1_trunc =~ s/\/1$//;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1039 my $id_2_trunc = $id_2;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1040 $id_2_trunc =~ s/\/2$//;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1041
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1042 unless ($id_1_trunc eq $id_2_trunc){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1043 die "The IDs of Read 1 ($id_1) and Read 2 ($id_2) are not the same. This might be a result of sorting the paired-end SAM/BAM files by chromosomal position which is not compatible with correct methylation extraction. Please use an unsorted file instead\n\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1044 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1045 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1046 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1047 # close TEST or die $!; somehow fails on our cluster...
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1048 ### If it hasen't died so far then it seems the file is in the correct Bismark format (read 1 and read 2 of a pair directly following each other)
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1049 warn "...passed!\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1050 sleep(1);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1051
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1052 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1053
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1054
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1055 sub process_Bismark_results_file{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1056 my $filename = shift;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1057
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1058 warn "\nNow reading in Bismark result file $filename\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1059
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1060 if ($filename =~ /\.gz$/) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1061 open (IN,"zcat $filename |") or die "Can't open gzipped file $filename: $!\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1062 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1063 elsif ($filename =~ /bam$/ || `file -b $filename` =~ /^gzip/) {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1064 if ($samtools_path){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1065 open (IN,"$samtools_path view -h $filename |") or die "Can't open BAM file $filename: $!\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1066 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1067 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1068 die "Sorry couldn't find an installation of Samtools. Either specifiy an alternative path using the option '--samtools_path /your/path/', or use a SAM file instead\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1069 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1070 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1071 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1072 open (IN,$filename) or die "Can't open file $filename: $!\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1073 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1074
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1075 ### Vanilla and SAM output need to read different numbers of header lines
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1076 if ($vanilla) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1077 my $bismark_version = <IN>; ## discarding the Bismark version info
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1078 chomp $bismark_version;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1079 $bismark_version =~ s/\r//; # replaces \r line feed
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1080 $bismark_version =~ s/Bismark version: //;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1081 if ($bismark_version =~ /^\@/) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1082 warn "Detected \@ as the first character of the version information. Is it possible that the file is in SAM format?\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1083 sleep (2);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1084 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1085
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1086 unless ($version eq $bismark_version){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1087 die "The methylation extractor and Bismark itself need to be of the same version!\n\nVersions used:\nmethylation extractor: '$version'\nBismark: '$bismark_version'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1088 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1089 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1090 # If the read is in SAM format (default) it can either start with @ header lines or start with alignments directly.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1091 # We are reading from it further down
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1092 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1093
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1094 my $output_filename = (split (/\//,$filename))[-1];
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1095
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1096 ### OPENING OUTPUT-FILEHANDLES
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1097 if ($report) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1098 my $report_filename = $output_filename;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1099 $report_filename =~ s/\.sam$//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1100 $report_filename =~ s/\.txt$//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1101 $report_filename =~ s/$/_splitting_report.txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1102 $report_filename = $output_dir . $report_filename;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1103 open (REPORT,'>',$report_filename) or die "Failed to write to file $report_filename $!\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1104 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1105
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1106 if ($report) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1107 print REPORT "$output_filename\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1108 print REPORT "Parameters used to extract methylation information:\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1109 if ($paired) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1110 if ($vanilla) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1111 print REPORT "Bismark result file: paired-end (vanilla Bismark format)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1112 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1113 print REPORT "Bismark result file: paired-end (SAM format)\n"; # default
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1114 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1115 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1116
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1117 if ($single) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1118 if ($vanilla) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1119 print REPORT "Bismark result file: single-end (vanilla Bismark format)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1120 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1121 print REPORT "Bismark result file: single-end (SAM format)\n"; # default
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1122 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1123 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1124 if ($single){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1125 if ($ignore) {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1126 print REPORT "Ignoring first $ignore bp\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1127 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1128 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1129 else{ # paired-end
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1130 if ($ignore) {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1131 print REPORT "Ignoring first $ignore bp of Read 1\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1132 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1133 if ($ignore_r2){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1134 print REPORT "Ignoring first $ignore_r2 bp of Read 2\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1135 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1136 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1137
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1138 if ($full) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1139 print REPORT "Output specified: comprehensive\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1140 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1141 print REPORT "Output specified: strand-specific (default)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1142 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1143
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1144 if ($no_overlap) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1145 print REPORT "No overlapping methylation calls specified\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1146 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1147 if ($genomic_fasta) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1148 print REPORT "Genomic equivalent sequences will be printed out in FastA format\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1149 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1150 if ($merge_non_CpG) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1151 print REPORT "Methylation in CHG and CHH context will be merged into \"non-CpG context\" output\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1152 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1153
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1154 print REPORT "\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1155 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1156
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1157 ##### open (OUT,"| gzip -c - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1158
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1159 ### CpG-context and non-CpG context. THIS SECTION IS OPTIONAL
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1160 ### if --comprehensive AND --merge_non_CpG was specified we are only writing out one CpG-context and one Any-Other-context result file
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1161 if ($full and $merge_non_CpG) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1162 my $cpg_output = my $other_c_output = $output_filename;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1163 ### C in CpG context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1164 $cpg_output =~ s/^/CpG_context_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1165 $cpg_output =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1166 $cpg_output =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1167 $cpg_output =~ s/$/.txt/ unless ($cpg_output =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1168 $cpg_output = $output_dir . $cpg_output;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1169
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1170 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1171 $cpg_output .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1172 open ($fhs{CpG_context},"| gzip -c - > $cpg_output") or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1173 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1174 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1175 open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1176 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1177
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1178 warn "Writing result file containing methylation information for C in CpG context to $cpg_output\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1179 push @sorting_files,$cpg_output;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1180
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1181 unless ($no_header) {
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1182 print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1183 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1184
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1185 ### C in any other context than CpG
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1186 $other_c_output =~ s/^/Non_CpG_context_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1187 $other_c_output =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1188 $other_c_output =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1189 $other_c_output =~ s/$/.txt/ unless ($other_c_output =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1190 $other_c_output = $output_dir . $other_c_output;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1191
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1192 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1193 $other_c_output .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1194 open ($fhs{other_context},"| gzip -c - > $other_c_output") or die "Failed to write to $other_c_output $! \n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1195 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1196 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1197 open ($fhs{other_context},'>',$other_c_output) or die "Failed to write to $other_c_output $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1198 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1199
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1200 warn "Writing result file containing methylation information for C in any other context to $other_c_output\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1201 push @sorting_files,$other_c_output;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1202
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1203
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1204 unless ($no_header) {
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1205 print {$fhs{other_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1206 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1207 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1208
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1209 ### if only --merge_non_CpG was specified we will write out 8 different output files, depending on where the (first) unique best alignment has been found
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1210 elsif ($merge_non_CpG) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1211
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1212 my $cpg_ot = my $cpg_ctot = my $cpg_ctob = my $cpg_ob = $output_filename;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1213
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1214 ### For cytosines in CpG context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1215 $cpg_ot =~ s/^/CpG_OT_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1216 $cpg_ot =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1217 $cpg_ot =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1218 $cpg_ot =~ s/$/.txt/ unless ($cpg_ot =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1219 $cpg_ot = $output_dir . $cpg_ot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1220
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1221 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1222 $cpg_ot .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1223 open ($fhs{0}->{CpG},"| gzip -c - > $cpg_ot") or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1224 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1225 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1226 open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1227 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1228
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1229 warn "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1230 push @sorting_files,$cpg_ot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1231
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1232 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1233 print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1234 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1235
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1236 $cpg_ctot =~ s/^/CpG_CTOT_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1237 $cpg_ctot =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1238 $cpg_ctot =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1239 $cpg_ctot =~ s/$/.txt/ unless ($cpg_ctot =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1240 $cpg_ctot = $output_dir . $cpg_ctot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1241
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1242 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1243 $cpg_ctot .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1244 open ($fhs{1}->{CpG},"| gzip -c - > $cpg_ctot") or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1245 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1246 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1247 open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1248 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1249
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1250 warn "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1251 push @sorting_files,$cpg_ctot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1252
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1253 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1254 print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1255 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1256
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1257 $cpg_ctob =~ s/^/CpG_CTOB_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1258 $cpg_ctob =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1259 $cpg_ctob =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1260 $cpg_ctob =~ s/$/.txt/ unless ($cpg_ctob =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1261 $cpg_ctob = $output_dir . $cpg_ctob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1262
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1263 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1264 $cpg_ctob .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1265 open ($fhs{2}->{CpG},"| gzip -c - > $cpg_ctob") or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1266 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1267 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1268 open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1269 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1270
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1271 warn "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1272 push @sorting_files,$cpg_ctob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1273
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1274 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1275 print {$fhs{2}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1276 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1277
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1278 $cpg_ob =~ s/^/CpG_OB_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1279 $cpg_ob =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1280 $cpg_ob =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1281 $cpg_ob =~ s/$/.txt/ unless ($cpg_ob =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1282 $cpg_ob = $output_dir . $cpg_ob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1283
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1284 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1285 $cpg_ob .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1286 open ($fhs{3}->{CpG},"| gzip -c - > $cpg_ob") or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1287 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1288 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1289 open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1290 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1291
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1292 warn "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1293 push @sorting_files,$cpg_ob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1294
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1295 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1296 print {$fhs{3}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1297 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1298
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1299 ### For cytosines in Non-CpG (CC, CT or CA) context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1300 my $other_c_ot = my $other_c_ctot = my $other_c_ctob = my $other_c_ob = $output_filename;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1301
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1302 $other_c_ot =~ s/^/Non_CpG_OT_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1303 $other_c_ot =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1304 $other_c_ot =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1305 $other_c_ot =~ s/$/.txt/ unless ($other_c_ot =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1306 $other_c_ot = $output_dir . $other_c_ot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1307
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1308 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1309 $other_c_ot .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1310 open ($fhs{0}->{other_c},"| gzip -c - > $other_c_ot") or die "Failed to write to $other_c_ot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1311 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1312 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1313 open ($fhs{0}->{other_c},'>',$other_c_ot) or die "Failed to write to $other_c_ot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1314 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1315
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1316 warn "Writing result file containing methylation information for C in any other context from the original top strand to $other_c_ot\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1317 push @sorting_files,$other_c_ot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1318
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1319 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1320 print {$fhs{0}->{other_c}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1321 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1322
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1323 $other_c_ctot =~ s/^/Non_CpG_CTOT_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1324 $other_c_ctot =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1325 $other_c_ctot =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1326 $other_c_ctot =~ s/$/.txt/ unless ($other_c_ctot =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1327 $other_c_ctot = $output_dir . $other_c_ctot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1328
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1329 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1330 $other_c_ctot .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1331 open ($fhs{1}->{other_c},"| gzip -c - > $other_c_ctot") or die "Failed to write to $other_c_ctot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1332 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1333 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1334 open ($fhs{1}->{other_c},'>',$other_c_ctot) or die "Failed to write to $other_c_ctot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1335 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1336
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1337 warn "Writing result file containing methylation information for C in any other context from the complementary to original top strand to $other_c_ctot\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1338 push @sorting_files,$other_c_ctot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1339
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1340 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1341 print {$fhs{1}->{other_c}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1342 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1343
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1344 $other_c_ctob =~ s/^/Non_CpG_CTOB_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1345 $other_c_ctob =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1346 $other_c_ctob =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1347 $other_c_ctob =~ s/$/.txt/ unless ($other_c_ctob =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1348 $other_c_ctob = $output_dir . $other_c_ctob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1349
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1350 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1351 $other_c_ctob .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1352 open ($fhs{2}->{other_c},"| gzip -c - > $other_c_ctob") or die "Failed to write to $other_c_ctob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1353 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1354 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1355 open ($fhs{2}->{other_c},'>',$other_c_ctob) or die "Failed to write to $other_c_ctob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1356 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1357
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1358 warn "Writing result file containing methylation information for C in any other context from the complementary to original bottom strand to $other_c_ctob\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1359 push @sorting_files,$other_c_ctob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1360
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1361 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1362 print {$fhs{2}->{other_c}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1363 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1364
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1365 $other_c_ob =~ s/^/Non_CpG_OB_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1366 $other_c_ob =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1367 $other_c_ob =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1368 $other_c_ob =~ s/$/.txt/ unless ($other_c_ob =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1369 $other_c_ob = $output_dir . $other_c_ob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1370
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1371 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1372 $other_c_ob .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1373 open ($fhs{3}->{other_c},"| gzip -c - > $other_c_ob") or die "Failed to write to $other_c_ob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1374 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1375 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1376 open ($fhs{3}->{other_c},'>',$other_c_ob) or die "Failed to write to $other_c_ob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1377 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1378
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1379 warn "Writing result file containing methylation information for C in any other context from the original bottom strand to $other_c_ob\n\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1380 push @sorting_files,$other_c_ob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1381
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1382 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1383 print {$fhs{3}->{other_c}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1384 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1385 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1386 ### THIS SECTION IS THE DEFAULT (CpG, CHG and CHH context)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1387
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1388 ### if --comprehensive was specified we are only writing one file per context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1389 elsif ($full) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1390 my $cpg_output = my $chg_output = my $chh_output = $output_filename;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1391 ### C in CpG context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1392 $cpg_output =~ s/^/CpG_context_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1393 $cpg_output =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1394 $cpg_output =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1395 $cpg_output =~ s/$/.txt/ unless ($cpg_output =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1396 $cpg_output = $output_dir . $cpg_output;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1397
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1398 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1399 $cpg_output .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1400 open ($fhs{CpG_context},"| gzip -c - > $cpg_output") or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1401 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1402 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1403 open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1404 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1405
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1406 warn "Writing result file containing methylation information for C in CpG context to $cpg_output\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1407 push @sorting_files,$cpg_output;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1408
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1409 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1410 print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1411 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1412
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1413 ### C in CHG context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1414 $chg_output =~ s/^/CHG_context_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1415 $chg_output =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1416 $chg_output =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1417 $chg_output =~ s/$/.txt/ unless ($chg_output =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1418 $chg_output = $output_dir . $chg_output;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1419
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1420 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1421 $chg_output .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1422 open ($fhs{CHG_context},"| gzip -c - > $chg_output") or die "Failed to write to $chg_output $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1423 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1424 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1425 open ($fhs{CHG_context},'>',$chg_output) or die "Failed to write to $chg_output $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1426 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1427
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1428 warn "Writing result file containing methylation information for C in CHG context to $chg_output\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1429 push @sorting_files,$chg_output;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1430
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1431 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1432 print {$fhs{CHG_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1433 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1434
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1435 ### C in CHH context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1436 $chh_output =~ s/^/CHH_context_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1437 $chh_output =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1438 $chh_output =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1439 $chh_output =~ s/$/.txt/ unless ($chh_output =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1440 $chh_output = $output_dir . $chh_output;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1441
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1442 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1443 $chh_output .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1444 open ($fhs{CHH_context},"| gzip -c - > $chh_output") or die "Failed to write to $chh_output $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1445 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1446 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1447 open ($fhs{CHH_context},'>',$chh_output) or die "Failed to write to $chh_output $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1448 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1449
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1450 warn "Writing result file containing methylation information for C in CHH context to $chh_output\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1451 push @sorting_files, $chh_output;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1452
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1453 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1454 print {$fhs{CHH_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1455 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1456 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1457 ### else we will write out 12 different output files, depending on where the (first) unique best alignment was found
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1458 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1459 my $cpg_ot = my $cpg_ctot = my $cpg_ctob = my $cpg_ob = $output_filename;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1460
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1461 ### For cytosines in CpG context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1462 $cpg_ot =~ s/^/CpG_OT_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1463 $cpg_ot =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1464 $cpg_ot =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1465 $cpg_ot =~ s/$/.txt/ unless ($cpg_ot =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1466 $cpg_ot = $output_dir . $cpg_ot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1467
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1468 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1469 $cpg_ot .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1470 open ($fhs{0}->{CpG},"| gzip -c - > $cpg_ot") or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1471 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1472 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1473 open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1474 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1475
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1476 warn "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1477 push @sorting_files,$cpg_ot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1478
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1479 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1480 print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1481 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1482
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1483 $cpg_ctot =~ s/^/CpG_CTOT_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1484 $cpg_ctot =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1485 $cpg_ctot =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1486 $cpg_ctot =~ s/$/.txt/ unless ($cpg_ctot =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1487 $cpg_ctot = $output_dir . $cpg_ctot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1488
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1489 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1490 $cpg_ctot .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1491 open ($fhs{1}->{CpG},"| gzip -c - > $cpg_ctot") or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1492 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1493 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1494 open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1495 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1496
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1497 warn "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1498 push @sorting_files,$cpg_ctot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1499
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1500 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1501 print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1502 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1503
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1504 $cpg_ctob =~ s/^/CpG_CTOB_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1505 $cpg_ctob =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1506 $cpg_ctob =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1507 $cpg_ctob =~ s/$/.txt/ unless ($cpg_ctob =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1508 $cpg_ctob = $output_dir . $cpg_ctob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1509
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1510 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1511 $cpg_ctob .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1512 open ($fhs{2}->{CpG},"| gzip -c - > $cpg_ctob") or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1513 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1514 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1515 open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1516 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1517
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1518 warn "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1519 push @sorting_files,$cpg_ctob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1520
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1521 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1522 print {$fhs{2}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1523 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1524
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1525 $cpg_ob =~ s/^/CpG_OB_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1526 $cpg_ob =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1527 $cpg_ob =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1528 $cpg_ob =~ s/$/.txt/ unless ($cpg_ob =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1529 $cpg_ob = $output_dir . $cpg_ob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1530
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1531 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1532 $cpg_ob .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1533 open ($fhs{3}->{CpG},"| gzip -c - > $cpg_ob") or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1534 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1535 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1536 open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1537 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1538
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1539 warn "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1540 push @sorting_files,$cpg_ob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1541
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1542 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1543 print {$fhs{3}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1544 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1545
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1546 ### For cytosines in CHG context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1547 my $chg_ot = my $chg_ctot = my $chg_ctob = my $chg_ob = $output_filename;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1548
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1549 $chg_ot =~ s/^/CHG_OT_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1550 $chg_ot =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1551 $chg_ot =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1552 $chg_ot =~ s/$/.txt/ unless ($chg_ot =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1553 $chg_ot = $output_dir . $chg_ot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1554
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1555 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1556 $chg_ot .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1557 open ($fhs{0}->{CHG},"| gzip -c - > $chg_ot") or die "Failed to write to $chg_ot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1558 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1559 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1560 open ($fhs{0}->{CHG},'>',$chg_ot) or die "Failed to write to $chg_ot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1561 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1562
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1563 warn "Writing result file containing methylation information for C in CHG context from the original top strand to $chg_ot\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1564 push @sorting_files,$chg_ot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1565
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1566 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1567 print {$fhs{0}->{CHG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1568 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1569
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1570 $chg_ctot =~ s/^/CHG_CTOT_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1571 $chg_ctot =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1572 $chg_ctot =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1573 $chg_ctot =~ s/$/.txt/ unless ($chg_ctot =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1574 $chg_ctot = $output_dir . $chg_ctot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1575
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1576 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1577 $chg_ctot .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1578 open ($fhs{1}->{CHG},"| gzip -c - > $chg_ctot") or die "Failed to write to $chg_ctot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1579 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1580 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1581 open ($fhs{1}->{CHG},'>',$chg_ctot) or die "Failed to write to $chg_ctot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1582 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1583
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1584 warn "Writing result file containing methylation information for C in CHG context from the complementary to original top strand to $chg_ctot\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1585 push @sorting_files,$chg_ctot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1586
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1587 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1588 print {$fhs{1}->{CHG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1589 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1590
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1591 $chg_ctob =~ s/^/CHG_CTOB_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1592 $chg_ctob =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1593 $chg_ctob =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1594 $chg_ctob =~ s/$/.txt/ unless ($chg_ctob =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1595 $chg_ctob = $output_dir . $chg_ctob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1596
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1597 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1598 $chg_ctob .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1599 open ($fhs{2}->{CHG},"| gzip -c - > $chg_ctob") or die "Failed to write to $chg_ctob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1600 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1601 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1602 open ($fhs{2}->{CHG},'>',$chg_ctob) or die "Failed to write to $chg_ctob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1603 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1604
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1605 warn "Writing result file containing methylation information for C in CHG context from the complementary to original bottom strand to $chg_ctob\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1606 push @sorting_files,$chg_ctob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1607
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1608 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1609 print {$fhs{2}->{CHG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1610 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1611
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1612 $chg_ob =~ s/^/CHG_OB_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1613 $chg_ob =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1614 $chg_ob =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1615 $chg_ob =~ s/$/.txt/ unless ($chg_ob =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1616 $chg_ob = $output_dir . $chg_ob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1617
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1618 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1619 $chg_ob .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1620 open ($fhs{3}->{CHG},"| gzip -c - > $chg_ob") or die "Failed to write to $chg_ob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1621 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1622 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1623 open ($fhs{3}->{CHG},'>',$chg_ob) or die "Failed to write to $chg_ob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1624 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1625
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1626 warn "Writing result file containing methylation information for C in CHG context from the original bottom strand to $chg_ob\n\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1627 push @sorting_files,$chg_ob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1628
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1629 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1630 print {$fhs{3}->{CHG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1631 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1632
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1633 ### For cytosines in CHH context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1634 my $chh_ot = my $chh_ctot = my $chh_ctob = my $chh_ob = $output_filename;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1635
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1636 $chh_ot =~ s/^/CHH_OT_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1637 $chh_ot =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1638 $chh_ot =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1639 $chh_ot =~ s/$/.txt/ unless ($chh_ot =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1640 $chh_ot = $output_dir . $chh_ot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1641
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1642 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1643 $chh_ot .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1644 open ($fhs{0}->{CHH},"| gzip -c - > $chh_ot") or die "Failed to write to $chh_ot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1645 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1646 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1647 open ($fhs{0}->{CHH},'>',$chh_ot) or die "Failed to write to $chh_ot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1648 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1649
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1650 warn "Writing result file containing methylation information for C in CHH context from the original top strand to $chh_ot\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1651 push @sorting_files,$chh_ot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1652
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1653 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1654 print {$fhs{0}->{CHH}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1655 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1656
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1657 $chh_ctot =~ s/^/CHH_CTOT_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1658 $chh_ctot =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1659 $chh_ctot =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1660 $chh_ctot =~ s/$/.txt/ unless ($chh_ctot =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1661 $chh_ctot = $output_dir . $chh_ctot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1662
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1663 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1664 $chh_ctot .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1665 open ($fhs{1}->{CHH},"| gzip -c - > $chh_ctot") or die "Failed to write to $chh_ctot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1666 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1667 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1668 open ($fhs{1}->{CHH},'>',$chh_ctot) or die "Failed to write to $chh_ctot $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1669 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1670
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1671 warn "Writing result file containing methylation information for C in CHH context from the complementary to original top strand to $chh_ctot\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1672 push @sorting_files,$chh_ctot;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1673
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1674 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1675 print {$fhs{1}->{CHH}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1676 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1677
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1678 $chh_ctob =~ s/^/CHH_CTOB_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1679 $chh_ctob =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1680 $chh_ctob =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1681 $chh_ctob =~ s/$/.txt/ unless ($chh_ctob =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1682 $chh_ctob = $output_dir . $chh_ctob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1683
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1684 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1685 $chh_ctob .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1686 open ($fhs{2}->{CHH},"| gzip -c - > $chh_ctob") or die "Failed to write to $chh_ctob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1687 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1688 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1689 open ($fhs{2}->{CHH},'>',$chh_ctob) or die "Failed to write to $chh_ctob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1690 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1691
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1692 warn "Writing result file containing methylation information for C in CHH context from the complementary to original bottom strand to $chh_ctob\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1693 push @sorting_files,$chh_ctob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1694
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1695 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1696 print {$fhs{2}->{CHH}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1697 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1698
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1699 $chh_ob =~ s/^/CHH_OB_/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1700 $chh_ob =~ s/sam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1701 $chh_ob =~ s/bam$/txt/;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1702 $chh_ob =~ s/$/.txt/ unless ($chh_ob =~ /\.txt$/);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1703 $chh_ob = $output_dir . $chh_ob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1704
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1705 if ($gzip){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1706 $chh_ob .= '.gz';
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1707 open ($fhs{3}->{CHH},"| gzip -c - > $chh_ob") or die "Failed to write to $chh_ob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1708 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1709 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1710 open ($fhs{3}->{CHH},'>',$chh_ob) or die "Failed to write to $chh_ob $!\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1711 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1712
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1713 warn "Writing result file containing methylation information for C in CHH context from the original bottom strand to $chh_ob\n\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1714 push @sorting_files,$chh_ob;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1715
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1716 unless($no_header){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1717 print {$fhs{3}->{CHH}} "Bismark methylation extractor version $version\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1718 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1719 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1720
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1721 my $methylation_call_strings_processed = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1722 my $line_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1723
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1724 ### proceeding differently now for single-end or paired-end Bismark files
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1725
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1726 ### PROCESSING SINGLE-END RESULT FILES
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1727 if ($single) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1728
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1729 ### also proceeding differently now for SAM format or vanilla Bismark format files
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1730 if ($vanilla) { # old vanilla Bismark output format
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1731 while (<IN>) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1732 ++$line_count;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1733 warn "Processed lines: $line_count\n" if ($line_count%500000==0);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1734
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1735 ### $seq here is the chromosomal sequence (to use for the repeat analysis for example)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1736 my ($id,$strand,$chrom,$start,$seq,$meth_call,$read_conversion,$genome_conversion) = (split("\t"))[0,1,2,3,6,7,8,9];
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1737
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1738 ### we need to remove 2 bp of the genomic sequence as we were extracting read + 2bp long fragments to make a methylation call at the first or
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1739 ### last position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1740 chomp $genome_conversion;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1741
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1742 my $index;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1743 if ($meth_call) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1744
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1745 if ($read_conversion eq 'CT' and $genome_conversion eq 'CT') { ## original top strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1746 $index = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1747 } elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT') { ## complementary to original top strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1748 $index = 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1749 } elsif ($read_conversion eq 'CT' and $genome_conversion eq 'GA') { ## original bottom strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1750 $index = 3;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1751 } elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA') { ## complementary to original bottom strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1752 $index = 2;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1753 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1754 die "Unexpected combination of read and genome conversion: '$read_conversion' / '$genome_conversion'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1755 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1756
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1757 ### Clipping off the first <int> number of bases from the methylation call string as specified with --ignore <int>
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1758 if ($ignore) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1759 $meth_call = substr($meth_call,$ignore,length($meth_call)-$ignore);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1760
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1761 ### If we are clipping off some bases at the start we need to adjust the start position of the alignments accordingly!
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1762 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1763 $start += $ignore;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1764 } elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1765 $start += length($meth_call)-1; ## $meth_call is already shortened!
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1766 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1767 die "Alignment did not have proper strand information: $strand\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1768 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1769 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1770 ### printing out the methylation state of every C in the read
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1771 print_individual_C_methylation_states_single_end($meth_call,$chrom,$start,$id,$strand,$index);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1772
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1773 ++$methylation_call_strings_processed; # 1 per single-end result
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1774 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1775 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1776 } else { # processing single-end SAM format (default)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1777 while (<IN>) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1778 ### SAM format can either start with header lines (starting with @) or start with alignments directly
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1779 if (/^\@/) { # skipping header lines (starting with @)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1780 warn "skipping SAM header line:\t$_";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1781 next;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1782 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1783
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1784 ++$line_count;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1785 warn "Processed lines: $line_count\n" if ($line_count%500000==0);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1786
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1787 # example read in SAM format
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1788 # 1_R1/1 67 5 103172224 255 40M = 103172417 233 AATATTTTTTTTATTTTAAAATGTGTATTGATTTAAATTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XX:Z:4T1T24TT7 XM:Z:....h.h........................hh....... XR:Z:CT XG:Z:CT
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1789 ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1790
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1791 # < 0.7.6 my ($id,$chrom,$start,$meth_call,$read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1792 # < 0.7.6 $meth_call =~ s/^XM:Z://;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1793 # < 0.7.6 $read_conversion =~ s/^XR:Z://;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1794 # < 0.7.6 $genome_conversion =~ s/^XG:Z://;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1795
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1796 my ($id,$chrom,$start,$cigar) = (split("\t"))[0,2,3,5];
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1797
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1798 ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1799 my $meth_call; ### Thanks to Zachary Zeno for this solution
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1800 my $read_conversion;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1801 my $genome_conversion;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1802
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1803 while ( /(XM|XR|XG):Z:([^\t]+)/g ) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1804 my $tag = $1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1805 my $value = $2;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1806
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1807 if ($tag eq "XM") {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1808 $meth_call = $value;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1809 $meth_call =~ s/\r//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1810 } elsif ($tag eq "XR") {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1811 $read_conversion = $value;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1812 $read_conversion =~ s/\r//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1813 } elsif ($tag eq "XG") {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1814 $genome_conversion = $value;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1815 $genome_conversion =~ s/\r//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1816 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1817 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1818
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1819 my $strand;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1820 chomp $genome_conversion;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1821 # print "$meth_call\n$read_conversion\n$genome_conversion\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1822
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1823 my $index;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1824 if ($meth_call) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1825 if ($read_conversion eq 'CT' and $genome_conversion eq 'CT') { ## original top strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1826 $index = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1827 $strand = '+';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1828 } elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT') { ## complementary to original top strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1829 $index = 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1830 $strand = '-';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1831 } elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA') { ## complementary to original bottom strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1832 $index = 2;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1833 $strand = '+';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1834 } elsif ($read_conversion eq 'CT' and $genome_conversion eq 'GA') { ## original bottom strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1835 $index = 3;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1836 $strand = '-';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1837 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1838 die "Unexpected combination of read and genome conversion: '$read_conversion' / '$genome_conversion'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1839 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1840
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1841 ### If the read is in SAM format we need to reverse the methylation call if the read has been reverse-complemented for the output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1842 if ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1843 $meth_call = reverse $meth_call;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1844 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1845
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1846 ### Clipping off the first <int> number of bases from the methylation call string as specified with --ignore <int>
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1847 if ($ignore) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1848 # print "\n\n$meth_call\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1849 $meth_call = substr($meth_call,$ignore,length($meth_call)-$ignore);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1850 # print "$meth_call\n";
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1851
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1852 ### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1853
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1854 my @len = split (/\D+/,$cigar); # storing the length per operation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1855 my @ops = split (/\d+/,$cigar); # storing the operation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1856 shift @ops; # remove the empty first element
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1857 die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1858
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1859 my @comp_cigar; # building an array with all CIGAR operations
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1860 foreach my $index (0..$#len) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1861 foreach (1..$len[$index]) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1862 # print "$ops[$index]";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1863 push @comp_cigar, $ops[$index];
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1864 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1865 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1866 # print "original CIGAR: $cigar\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1867 # print "original CIGAR: @comp_cigar\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1868
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1869 ### If we are clipping off some bases at the start we need to adjust the start position of the alignments accordingly!
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1870 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1871
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1872 my $D_count = 0; # counting all deletions that affect the ignored genomic position, i.e. Deletions and insertions
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1873 my $I_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1874
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1875 for (1..$ignore) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1876 my $op = shift @comp_cigar; # adjusting composite CIGAR string by removing $ignore operations from the start
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1877 # print "$_ deleted $op\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1878
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1879 while ($op eq 'D') { # repeating this for deletions (D)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1880 $D_count++;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1881 $op = shift @comp_cigar;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1882 # print "$_ deleted $op\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1883 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1884 if ($op eq 'I') { # adjusting the genomic position for insertions (I)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1885 $I_count++;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1886 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1887 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1888 $start += $ignore + $D_count - $I_count;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1889 # print "start $start\t ignore: $ignore\t D count: $D_count I_count: $I_count\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1890 } elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1891
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1892 for (1..$ignore) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1893 my $op = pop @comp_cigar; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1894 while ($op eq 'D') { # repeating this for deletions (D)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1895 $op = pop @comp_cigar;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1896 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1897 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1898
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1899 ### For reverse strand alignments we need to determine the number of matching bases (M) or deletions (D) in the read from the CIGAR
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1900 ### string to be able to work out the starting position of the read which is on the 3' end of the sequence
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1901 my $MD_count = 0; # counting all operations that affect the genomic position, i.e. M and D. Insertions do not affect the start position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1902 foreach (@comp_cigar) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1903 ++$MD_count if ($_ eq 'M' or $_ eq 'D');
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1904 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1905 $start += $MD_count - 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1906 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1907
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1908 ### reconstituting shortened CIGAR string
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1909 my $new_cigar;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1910 my $count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1911 my $last_op;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1912 # print "ignore adjusted: @comp_cigar\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1913 foreach my $op (@comp_cigar) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1914 unless (defined $last_op){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1915 $last_op = $op;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1916 ++$count;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1917 next;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1918 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1919 if ($last_op eq $op) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1920 ++$count;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1921 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1922 $new_cigar .= "$count$last_op";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1923 $last_op = $op;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1924 $count = 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1925 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1926 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1927 $new_cigar .= "$count$last_op"; # appending the last operation and count
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1928 $cigar = $new_cigar;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1929 # print "ignore adjusted scalar: $cigar\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1930 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1931 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1932 ### printing out the methylation state of every C in the read
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1933 print_individual_C_methylation_states_single_end($meth_call,$chrom,$start,$id,$strand,$index,$cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1934
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1935 ++$methylation_call_strings_processed; # 1 per single-end result
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1936 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1937 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1938 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1939
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1940 ### PROCESSING PAIRED-END RESULT FILES
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1941 elsif ($paired) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1942
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1943 ### proceeding differently now for SAM format or vanilla Bismark format files
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1944 if ($vanilla) { # old vanilla Bismark paired-end output format
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1945 while (<IN>) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1946 ++$line_count;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1947 warn "processed line: $line_count\n" if ($line_count%500000==0);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1948
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1949 ### $seq here is the chromosomal sequence (to use for the repeat analysis for example)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1950 my ($id,$strand,$chrom,$start_read_1,$end_read_2,$seq_1,$meth_call_1,$seq_2,$meth_call_2,$first_read_conversion,$genome_conversion) = (split("\t"))[0,1,2,3,4,6,7,9,10,11,12,13];
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1951
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1952 my $index;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1953 chomp $genome_conversion;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1954
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1955 if ($first_read_conversion eq 'CT' and $genome_conversion eq 'CT') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1956 $index = 0; ## this is OT
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1957 } elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'GA') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1958 $index = 2; ## this is CTOB!!!
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1959 } elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'CT') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1960 $index = 1; ## this is CTOT!!!
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1961 } elsif ($first_read_conversion eq 'CT' and $genome_conversion eq 'GA') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1962 $index = 3; ## this is OB
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1963 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1964 die "Unexpected combination of read and genome conversion: $first_read_conversion / $genome_conversion\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1965 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1966
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1967 if ($meth_call_1 and $meth_call_2) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1968 ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>'
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1969
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1970 if ($ignore) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1971 $meth_call_1 = substr($meth_call_1,$ignore,length($meth_call_1)-$ignore);
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1972
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1973 ### we also need to adjust the start and end positions of the alignments accordingly if '--ignore' was specified
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1974 $start_read_1 += $ignore;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1975 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1976 if ($ignore_r2) {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1977 $meth_call_2 = substr($meth_call_2,$ignore_r2,length($meth_call_2)-$ignore_r2);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1978
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1979 ### we also need to adjust the start and end positions of the alignments accordingly if '--ignore_r2' was specified
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1980 $end_read_2 -= $ignore_r2;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1981 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1982
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1983 my $end_read_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1984 my $start_read_2;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1985
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1986 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1987
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1988 $end_read_1 = $start_read_1+length($meth_call_1)-1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1989 $start_read_2 = $end_read_2-length($meth_call_2)+1;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1990
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1991 ## we first pass the first read which is in + orientation on the forward strand
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1992 print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id,'+',$index,0,0,undef,1); # the last two values are CIGAR string and read identity
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1993
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1994 # we next pass the second read which is in - orientation on the reverse strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1995 ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1996 print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$end_read_2,$id,'-',$index,$no_overlap,$end_read_1,undef,2);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1997 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
1998 else {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
1999
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2000 $end_read_1 = $start_read_1+length($meth_call_2)-1; # read 1 is the second reported read!
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2001 $start_read_2 = $end_read_2-length($meth_call_1)+1; # read 2 is the first reported read!
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2002
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2003 ## we first pass the first read which is in - orientation on the reverse strand
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2004 print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$end_read_2,$id,'-',$index,0,0,undef,1);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2005
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2006 # we next pass the second read which is in + orientation on the forward strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2007 ### if --no_overlap was specified we also pass the end of read 2. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2008 print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_1,$id,'+',$index,$no_overlap,$start_read_2,undef,2);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2009 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2010
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2011 $methylation_call_strings_processed += 2; # paired-end = 2 methylation calls
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2012 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2013 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2014 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2015 else { # Bismark paired-end SAM output format (default)
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2016 while (<IN>) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2017 ### SAM format can either start with header lines (starting with @) or start with alignments directly
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2018 if (/^\@/) { # skipping header lines (starting with @)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2019 warn "skipping SAM header line:\t$_";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2020 next;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2021 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2022
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2023 ++$line_count;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2024 warn "Processed lines: $line_count\n" if ($line_count%500000==0);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2025
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2026 # example paired-end reads in SAM format (2 consecutive lines)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2027 # 1_R1/1 67 5 103172224 255 40M = 103172417 233 AATATTTTTTTTATTTTAAAATGTGTATTGATTTAAATTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:4 XX:Z:4T1T24TT7 XM:Z:....h.h........................hh....... XR:Z:CT XG:Z:CT
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2028 # 1_R1/2 131 5 103172417 255 40M = 103172224 -233 TATTTTTTTTTAGAGTATTTTTTAATGGTTATTAGATTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:6 XX:Z:T5T1T9T9T7T3 XM:Z:h.....h.h.........h.........h.......h... XR:Z:GA XG:Z:CT
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2029
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2030 # < version 0.7.6 my ($id_1,$chrom,$start_read_1,$meth_call_1,$first_read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2031
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2032 my ($id_1,$chrom,$start_read_1,$cigar_1) = (split("\t"))[0,2,3,5]; ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2033 my $meth_call_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2034 my $first_read_conversion;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2035 my $genome_conversion;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2036
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2037 while ( /(XM|XR|XG):Z:([^\t]+)/g ) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2038 my $tag = $1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2039 my $value = $2;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2040
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2041 if ($tag eq "XM") {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2042 $meth_call_1 = $value;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2043 $meth_call_1 =~ s/\r//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2044 } elsif ($tag eq "XR") {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2045 $first_read_conversion = $value;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2046 $first_read_conversion =~ s/\r//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2047 } elsif ($tag eq "XG") {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2048 $genome_conversion = $value;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2049 $genome_conversion =~ s/\r//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2050 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2051 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2052
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2053 $_ = <IN>; # reading in the paired read
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2054
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2055 # < version 0.7.6 my ($id_2,$start_read_2,$meth_call_2,$second_read_conversion) = (split("\t"))[0,3,13,14];
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2056 # < version 0.7.6 $meth_call_1 =~ s/^XM:Z://;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2057 # < version 0.7.6 $meth_call_2 =~ s/^XM:Z://;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2058 # < version 0.7.6 $first_read_conversion =~ s/^XR:Z://;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2059 # < version 0.7.6 $second_read_conversion =~ s/^XR:Z://;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2060
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2061 my ($id_2,$start_read_2,$cigar_2) = (split("\t"))[0,3,5]; ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2062
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2063 my $meth_call_2;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2064 my $second_read_conversion;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2065
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2066 while ( /(XM|XR):Z:([^\t]+)/g ) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2067 my $tag = $1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2068 my $value = $2;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2069
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2070 if ($tag eq "XM") {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2071 $meth_call_2 = $value;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2072 $meth_call_2 =~ s/\r//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2073 } elsif ($tag eq "XR") {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2074 $second_read_conversion = $value;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2075 $second_read_conversion = s/\r//;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2076 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2077 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2078
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2079 # < version 0.7.6 $genome_conversion =~ s/^XG:Z://;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2080 chomp $genome_conversion; # in case it captured a new line character
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2081
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2082 # print join ("\t",$meth_call_1,$meth_call_2,$first_read_conversion,$second_read_conversion,$genome_conversion),"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2083
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2084 my $index;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2085 my $strand;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2086
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2087 if ($first_read_conversion eq 'CT' and $genome_conversion eq 'CT') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2088 $index = 0; ## this is OT
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2089 $strand = '+';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2090 } elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'CT') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2091 $index = 1; ## this is CTOT
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2092 $strand = '-';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2093 } elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'GA') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2094 $index = 2; ## this is CTOB
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2095 $strand = '+';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2096 } elsif ($first_read_conversion eq 'CT' and $genome_conversion eq 'GA') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2097 $index = 3; ## this is OB
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2098 $strand = '-';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2099 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2100 die "Unexpected combination of read and genome conversion: $first_read_conversion / $genome_conversion\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2101 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2102
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2103 ### reversing the methylation call of the read that was reverse-complemented
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2104 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2105 $meth_call_2 = reverse $meth_call_2;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2106 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2107 $meth_call_1 = reverse $meth_call_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2108 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2109
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2110 if ($meth_call_1 and $meth_call_2) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2111
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2112 my $end_read_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2113
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2114 ### READ 1
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2115 my @len_1 = split (/\D+/,$cigar_1); # storing the length per operation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2116 my @ops_1 = split (/\d+/,$cigar_1); # storing the operation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2117 shift @ops_1; # remove the empty first element
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2118
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2119 die "CIGAR string contained a non-matching number of lengths and operations: $cigar_1\n".join(" ",@len_1)."\n".join(" ",@ops_1)."\n" unless (scalar @len_1 == scalar @ops_1);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2120
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2121 my @comp_cigar_1; # building an array with all CIGAR operations
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2122 foreach my $index (0..$#len_1) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2123 foreach (1..$len_1[$index]) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2124 # print "$ops_1[$index]";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2125 push @comp_cigar_1, $ops_1[$index];
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2126 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2127 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2128 # print "original CIGAR read 1: $cigar_1\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2129 # print "original CIGAR read 1: @comp_cigar_1\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2130
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2131 ### READ 2
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2132 my @len_2 = split (/\D+/,$cigar_2); # storing the length per operation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2133 my @ops_2 = split (/\d+/,$cigar_2); # storing the operation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2134 shift @ops_2; # remove the empty first element
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2135 die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len_2 == scalar @ops_2);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2136 my @comp_cigar_2; # building an array with all CIGAR operations for read 2
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2137 foreach my $index (0..$#len_2) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2138 foreach (1..$len_2[$index]) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2139 # print "$ops_2[$index]";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2140 push @comp_cigar_2, $ops_2[$index];
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2141 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2142 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2143 # print "original CIGAR read 2: $cigar_2\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2144 # print "original CIGAR read 2: @comp_cigar_2\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2145
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2146
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2147
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2148 if ($ignore) {
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2149 ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>' for read 1
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2150 ### the methylation calls have already been reversed where necessary
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2151 $meth_call_1 = substr($meth_call_1,$ignore,length($meth_call_1)-$ignore);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2152
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2153 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2154
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2155 ### if the (read 1) strand information is '+', read 1 needs to be trimmed from the start
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2156 my $D_count_1 = 0; # counting all deletions that affect the ignored genomic position for read 1, i.e. Deletions and insertions
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2157 my $I_count_1 = 0;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2158
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2159 for (1..$ignore) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2160 my $op = shift @comp_cigar_1; # adjusting composite CIGAR string of read 1 by removing $ignore operations from the start
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2161 # print "$_ deleted $op\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2162
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2163 while ($op eq 'D') { # repeating this for deletions (D)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2164 $D_count_1++;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2165 $op = shift @comp_cigar_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2166 # print "$_ deleted $op\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2167 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2168 if ($op eq 'I') { # adjusting the genomic position for insertions (I)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2169 $I_count_1++;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2170 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2171 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2172
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2173 $start_read_1 += $ignore + $D_count_1 - $I_count_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2174 # print "start read 1 $start_read_1\t ignore: $ignore\t D count 1: $D_count_1\tI_count 1: $I_count_1\n";
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2175
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2176 # the start position of reads mapping to the reverse strand is being adjusted further below
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2177 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2178 elsif ($strand eq '-') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2179
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2180 ### if the (read 1) strand information is '-', read 1 needs to be trimmed from the back
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2181 for (1..$ignore) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2182 my $op = pop @comp_cigar_1; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2183 while ($op eq 'D') { # repeating this for deletions (D)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2184 $op = pop @comp_cigar_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2185 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2186 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2187 # the start position of reads mapping to the reverse strand is being adjusted further below
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2188
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2189 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2190 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2191
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2192 if ($ignore_r2) {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2193 ### Clipping off the first <int> number of bases from the methylation call string as specified with '--ignore_r2 <int>' for read 2
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2194 ### the methylation calls have already been reversed where necessary
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2195 $meth_call_2 = substr($meth_call_2,$ignore_r2,length($meth_call_2)-$ignore_r2);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2196
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2197 ### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2198
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2199 if ($strand eq '+') {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2200
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2201 ### if the (read 1) strand information is '+', read 2 needs to be trimmed from the back
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2202
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2203 for (1..$ignore_r2) {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2204 my $op = pop @comp_cigar_2; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2205 while ($op eq 'D') { # repeating this for deletions (D)
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2206 $op = pop @comp_cigar_2;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2207 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2208 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2209 # the start position of reads mapping to the reverse strand is being adjusted further below
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2210 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2211 elsif ($strand eq '-') {
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2212
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2213 ### if the (read 1) strand information is '-', read 2 needs to be trimmed from the start
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2214 my $D_count_2 = 0; # counting all deletions that affect the ignored genomic position for read 2, i.e. Deletions and insertions
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2215 my $I_count_2 = 0;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2216
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2217 for (1..$ignore_r2) {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2218 my $op = shift @comp_cigar_2; # adjusting composite CIGAR string of read 2 by removing $ignore operations from the start
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2219 # print "$_ deleted $op\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2220
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2221 while ($op eq 'D') { # repeating this for deletions (D)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2222 $D_count_2++;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2223 $op = shift @comp_cigar_2;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2224 # print "$_ deleted $op\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2225 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2226 if ($op eq 'I') { # adjusting the genomic position for insertions (I)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2227 $I_count_2++;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2228 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2229 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2230
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2231 $start_read_2 += $ignore_r2 + $D_count_2 - $I_count_2;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2232 # print "start read 2 $start_read_2\t ignore R2: $ignore_r2\t D count 2: $D_count_2\tI_count 2: $I_count_2\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2233 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2234 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2235
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2236 if ($ignore){
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2237 ### reconstituting shortened CIGAR string 1
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2238 my $new_cigar_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2239 my $count_1 = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2240 my $last_op_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2241 # print "ignore adjusted CIGAR 1: @comp_cigar_1\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2242 foreach my $op (@comp_cigar_1) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2243 unless (defined $last_op_1){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2244 $last_op_1 = $op;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2245 ++$count_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2246 next;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2247 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2248 if ($last_op_1 eq $op) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2249 ++$count_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2250 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2251 $new_cigar_1 .= "$count_1$last_op_1";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2252 $last_op_1 = $op;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2253 $count_1 = 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2254 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2255 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2256 $new_cigar_1 .= "$count_1$last_op_1"; # appending the last operation and count
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2257 $cigar_1 = $new_cigar_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2258 # print "ignore adjusted CIGAR 1 scalar: $cigar_1\n";
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2259 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2260
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2261 if ($ignore_r2){
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2262
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2263 ### reconstituting shortened CIGAR string 2
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2264 my $new_cigar_2;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2265 my $count_2 = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2266 my $last_op_2;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2267 # print "ignore adjusted CIGAR 2: @comp_cigar_2\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2268 foreach my $op (@comp_cigar_2) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2269 unless (defined $last_op_2){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2270 $last_op_2 = $op;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2271 ++$count_2;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2272 next;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2273 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2274 if ($last_op_2 eq $op) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2275 ++$count_2;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2276 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2277 else {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2278 $new_cigar_2 .= "$count_2$last_op_2";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2279 $last_op_2 = $op;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2280 $count_2 = 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2281 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2282 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2283 $new_cigar_2 .= "$count_2$last_op_2"; # appending the last operation and count
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2284 $cigar_2 = $new_cigar_2;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2285 # print "ignore_r2 adjusted CIGAR 2 scalar: $cigar_2\n";
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2286 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2287
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2288 ### Adjusting CIGAR string and starting position of reads in reverse orientation which we will pass to the extraction subroutine later on
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2289
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2290 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2291 ### adjusting the start position for all reads mapping to the reverse strand, in this case read 2
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2292 @comp_cigar_2 = reverse@comp_cigar_2; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2293 # print "reverse: @comp_cigar_2\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2294
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2295 my $MD_count_1 = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2296 foreach (@comp_cigar_1) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2297 ++$MD_count_1 if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2298 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2299
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2300 my $MD_count_2 = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2301 foreach (@comp_cigar_2) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2302 ++$MD_count_2 if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2303 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2304
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2305 $end_read_1 = $start_read_1 + $MD_count_1 - 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2306 $start_read_2 += $MD_count_2 - 1; ## Passing on the start position on the reverse strand
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2307 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2308 else {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2309 ### adjusting the start position for all reads mapping to the reverse strand, in this case read 1
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2310
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2311 @comp_cigar_1 = reverse@comp_cigar_1; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2312 # print "reverse: @comp_cigar_1\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2313
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2314 my $MD_count_1 = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2315 foreach (@comp_cigar_1) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2316 ++$MD_count_1 if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2317 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2318
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2319 $end_read_1 = $start_read_1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2320 $start_read_1 += $MD_count_1 - 1; ### Passing on the start position on the reverse strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2321 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2322
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2323 if ($strand eq '+') {
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2324 ## we first pass the first read which is in + orientation on the forward strand; the last value is the read identity
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2325 print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'+',$index,0,0,$cigar_1,1);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2326
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2327 # we next pass the second read which is in - orientation on the reverse strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2328 ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2329 print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'-',$index,$no_overlap,$end_read_1,$cigar_2,2);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2330 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2331 ## we first pass the first read which is in - orientation on the reverse strand
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2332 print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'-',$index,0,0,$cigar_1,1);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2333
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2334 # we next pass the second read which is in + orientation on the forward strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2335 ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2336 print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'+',$index,$no_overlap,$end_read_1,$cigar_2,2);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2337 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2338
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2339 $methylation_call_strings_processed += 2; # paired-end = 2 methylation calls
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2340 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2341 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2342 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2343 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2344 die "Single-end or paired-end reads not specified properly\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2345 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2346
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2347 warn "\n\nProcessed $line_count lines from $filename in total\n";
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2348 warn "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2349 if ($report) {
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2350 print REPORT "\n\nProcessed $line_count lines from $filename in total\n";
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2351 print REPORT "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2352 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2353 print_splitting_report ();
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2354 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2355
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2356
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2357
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2358 sub print_splitting_report{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2359
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2360 ### Calculating methylation percentages if applicable
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2361
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2362 my $percent_meCpG;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2363 if (($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}) > 0){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2364 $percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2365 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2366
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2367 my $percent_meCHG;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2368 if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2369 $percent_meCHG = sprintf("%.1f",100*$counting{total_meCHG_count}/($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}));
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2370 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2371
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2372 my $percent_meCHH;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2373 if (($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}) > 0){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2374 $percent_meCHH = sprintf("%.1f",100*$counting{total_meCHH_count}/($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}));
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2375 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2376
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2377 my $percent_non_CpG_methylation;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2378 if ($merge_non_CpG){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2379 if ( ($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}+$counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2380 $percent_non_CpG_methylation = sprintf("%.1f",100* ( $counting{total_meCHH_count}+$counting{total_meCHG_count} ) / ( $counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}+$counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count} ) );
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2381 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2382 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2383
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2384 if ($report){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2385 ### detailed information about Cs analysed
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2386 print REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2387
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2388 my $total_number_of_C = $counting{total_meCHG_count}+$counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2389 print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2390
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2391 print REPORT "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2392 print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2393 print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2394
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2395 print REPORT "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2396 print REPORT "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2397 print REPORT "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2398
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2399 ### calculating methylated CpG percentage if applicable
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2400 if ($percent_meCpG){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2401 print REPORT "C methylated in CpG context:\t${percent_meCpG}%\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2402 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2403 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2404 print REPORT "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2405 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2406
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2407 ### 2-Context Output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2408 if ($merge_non_CpG){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2409 if ($percent_non_CpG_methylation){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2410 print REPORT "C methylated in non-CpG context:\t${percent_non_CpG_methylation}%\n\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2411 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2412 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2413 print REPORT "Can't determine percentage of methylated Cs in non-CpG context if value was 0\n\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2414 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2415 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2416
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2417 ### 3 Context Output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2418 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2419 ### calculating methylated CHG percentage if applicable
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2420 if ($percent_meCHG){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2421 print REPORT "C methylated in CHG context:\t${percent_meCHG}%\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2422 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2423 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2424 print REPORT "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2425 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2426
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2427 ### calculating methylated CHH percentage if applicable
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2428 if ($percent_meCHH){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2429 print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2430 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2431 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2432 print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2433 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2434 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2435 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2436
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2437 ### detailed information about Cs analysed for on-screen report
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2438 print "Final Cytosine Methylation Report\n",'='x33,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2439
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2440 my $total_number_of_C = $counting{total_meCHG_count}+$counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2441 print "Total number of C's analysed:\t$total_number_of_C\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2442
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2443 print "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2444 print "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2445 print "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2446
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2447 print "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2448 print "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2449 print "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2450
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2451 ### printing methylated CpG percentage if applicable
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2452 if ($percent_meCpG){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2453 print "C methylated in CpG context:\t${percent_meCpG}%\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2454 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2455 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2456 print "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2457 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2458
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2459 ### 2-Context Output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2460 if ($merge_non_CpG){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2461 if ($percent_non_CpG_methylation){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2462 print "C methylated in non-CpG context:\t${percent_non_CpG_methylation}%\n\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2463 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2464 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2465 print "Can't determine percentage of methylated Cs in non-CpG context if value was 0\n\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2466 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2467 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2468
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2469 ### 3-Context Output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2470 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2471 ### printing methylated CHG percentage if applicable
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2472 if ($percent_meCHG){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2473 print "C methylated in CHG context:\t${percent_meCHG}%\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2474 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2475 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2476 print "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2477 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2478
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2479 ### printing methylated CHH percentage if applicable
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2480 if ($percent_meCHH){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2481 print "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2482 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2483 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2484 print "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2485 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2486 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2487 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2488
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2489
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2490
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2491 sub print_individual_C_methylation_states_paired_end_files{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2492
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2493 my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$no_overlap,$end_read_1,$cigar,$read_identity) = @_;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2494
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2495 ### we will use the read identity for the M-bias plot to discriminate read 1 and read 2
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2496 die "Read identity was neither 1 nor 2: $read_identity\n\n" unless ($read_identity == 1 or $read_identity == 2);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2497
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2498 my @methylation_calls = split(//,$meth_call);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2499
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2500 #################################################################
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2501 ### . for bases not involving cytosines ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2502 ### X for methylated C in CHG context (was protected) ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2503 ### x for not methylated C in CHG context (was converted) ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2504 ### H for methylated C in CHH context (was protected) ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2505 ### h for not methylated C in CHH context (was converted) ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2506 ### Z for methylated C in CpG context (was protected) ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2507 ### z for not methylated C in CpG context (was converted) ###
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2508 ### U for methylated C in Unknown context (was protected) ###
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2509 ### u for not methylated C in Unknown context (was converted) ###
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2510 #################################################################
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2511
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2512 my $methyl_CHG_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2513 my $methyl_CHH_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2514 my $methyl_CpG_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2515 my $unmethylated_CHG_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2516 my $unmethylated_CHH_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2517 my $unmethylated_CpG_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2518
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2519 my $pos_offset = 0; # this is only relevant for SAM reads with insertions or deletions
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2520 my $cigar_offset = 0; # again, this is only relevant for SAM reads containing indels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2521 my @comp_cigar;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2522
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2523 ### Checking whether the CIGAR string is a linear genomic match or whether if requires indel processing
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2524 if ($cigar =~ /^\d+M$/){
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2525 # this check speeds up the extraction process by up to 60%!!!
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2526 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2527 else{ # parsing CIGAR string
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2528 my @len;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2529 my @ops;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2530 @len = split (/\D+/,$cigar); # storing the length per operation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2531 @ops = split (/\d+/,$cigar); # storing the operation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2532 shift @ops; # remove the empty first element
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2533
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2534 die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2535
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2536 foreach my $index (0..$#len){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2537 foreach (1..$len[$index]){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2538 # print "$ops[$index]";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2539 push @comp_cigar, $ops[$index];
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2540 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2541 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2542 # warn "\nDetected CIGAR string: $cigar\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2543 # warn "Length of methylation call: ",length $meth_call,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2544 # warn "number of operations: ",scalar @ops,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2545 # warn "number of length digits: ",scalar @len,"\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2546 # print @comp_cigar,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2547 # print "$meth_call\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2548 # sleep (1);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2549 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2550
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2551 if ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2552
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2553 ### the CIGAR string needs to be reversed, the methylation call has already been reversed above
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2554 if (@comp_cigar){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2555 @comp_cigar = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2556 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2557 # print "reverse CIGAR string: @comp_cigar\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2558
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2559 ### the start position of paired-end files has already been corrected, see above
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2560 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2561
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2562 ### THIS IS AN OPTIONAL 2-CONTEXT (CpG and non-CpG) SECTION IF --merge_non_CpG was specified
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2563
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2564 if ($merge_non_CpG) {
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2565 if ($no_overlap) { # this has to be read 2...
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2566
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2567 ### single-file CpG and non-CpG context output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2568 if ($full) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2569 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2570 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2571
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2572 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2573 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2574 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2575 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2576 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2577 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2578
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2579 ### Returning as soon as the methylation calls start overlapping
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2580 if ($start+$index+$pos_offset >= $end_read_1) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2581 return;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2582 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2583
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2584 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2585 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2586 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2587 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2588 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2589 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2590 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2591 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2592 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2593 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2594 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2595 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2596 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2597 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2598 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2599 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2600 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2601 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2602 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2603 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2604 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2605 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2606 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2607 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2608 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2609 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2610 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2611 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2612 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2613 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2614 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2615 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2616 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2617 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2618 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2619 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2620 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2621 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2622 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2623 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2624 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2625 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2626 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2627 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2628 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2629 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2630 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2631 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2632 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2633 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2634 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2635 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2636 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2637 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2638 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2639 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2640 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2641 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2642 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2643 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2644 elsif ($methylation_calls[$index] eq '.'){}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2645 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2646 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2647 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2648 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2649 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2650 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2651 elsif ($strand eq '-') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2652 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2653
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2654 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2655 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2656 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2657 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2658 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2659 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2660
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2661 ### Returning as soon as the methylation calls start overlapping
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2662 if ($start-$index+$pos_offset <= $end_read_1) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2663 return;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2664 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2665
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2666 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2667 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2668 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2669 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2670 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2671 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2672 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2673 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2674 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2675 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2676 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2677 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2678 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2679 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2680 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2681 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2682 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2683 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2684 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2685 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2686 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2687 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2688 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2689 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2690 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2691 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2692 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2693 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2694 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2695 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2696 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2697 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2698 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2699 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2700 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2701 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2702 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2703 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2704 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2705 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2706 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2707 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2708 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2709 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2710 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2711 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2712 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2713 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2714 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2715 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2716 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2717 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2718 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2719 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2720 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2721 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2722 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2723 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2724 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2725 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2726 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2727 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2728 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2729 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2730 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2731 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2732 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2733 die "The read orientation was neither + nor -: '$strand'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2734 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2735 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2736
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2737 ### strand-specific methylation output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2738 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2739 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2740 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2741
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2742 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2743 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2744 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2745 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2746 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2747 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2748
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2749 ### Returning as soon as the methylation calls start overlapping
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2750 if ($start+$index+$pos_offset >= $end_read_1) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2751 return;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2752 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2753
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2754 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2755 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2756 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2757 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2758 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2759 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2760 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2761 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2762 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2763 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2764 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2765 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2766 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2767 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2768 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2769 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2770 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2771 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2772 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2773 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2774 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2775 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2776 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2777 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2778 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2779 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2780 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2781 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2782 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2783 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2784 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2785 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2786 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2787 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2788 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2789 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2790 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2791 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2792 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2793 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2794 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2795 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2796 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2797 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2798 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2799 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2800 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2801 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2802 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2803 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2804 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2805 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2806 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2807 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2808 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2809 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2810 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2811 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2812 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2813 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2814 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2815 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2816 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2817 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2818 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2819 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2820 } elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2821 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2822
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2823 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2824 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2825 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2826 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2827 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2828 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2829
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2830 ### Returning as soon as the methylation calls start overlapping
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2831 if ($start-$index+$pos_offset <= $end_read_1) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2832 return;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2833 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2834
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2835 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2836 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2837 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2838 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2839 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2840 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2841 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2842 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2843 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2844 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2845 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2846 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2847 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2848 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2849 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2850 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2851 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2852 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2853 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2854 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2855 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2856 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2857 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2858 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2859 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2860 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2861 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2862 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2863 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2864 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2865 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2866 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2867 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2868 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2869 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2870 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2871 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2872 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2873 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2874 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2875 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2876 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2877 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2878 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2879 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2880 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2881 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2882 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2883 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2884 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2885 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2886 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2887 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2888 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2889 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2890 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2891 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2892 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2893 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2894 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2895 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2896 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2897 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2898 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2899 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2900 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2901 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2902 die "The strand orientation was neither + nor -: '$strand'/n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2903 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2904 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2905 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2906
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2907 ### this is the default paired-end procedure allowing overlaps and using every single C position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2908 ### Still within the 2-CONTEXT ONLY optional section
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2909 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2910 ### single-file CpG and non-CpG context output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2911 if ($full) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2912 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2913 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2914
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2915 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2916 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2917 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2918 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2919 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2920 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2921
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2922 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2923 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2924 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2925 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2926 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2927 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2928 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2929 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2930 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2931 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2932 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2933 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2934 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2935 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2936 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2937 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2938 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2939 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2940 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2941 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2942 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2943 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2944 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2945 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2946 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2947 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2948 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2949 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2950 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2951 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2952 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2953 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2954 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2955 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2956 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2957 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2958 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2959 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2960 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2961 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2962 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2963 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2964 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2965 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2966 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2967 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2968 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2969 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2970 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2971 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2972 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2973 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2974 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2975 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2976 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2977 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2978 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2979 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2980 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2981 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2982 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2983 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2984 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
2985 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2986 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2987 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2988 } elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2989 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2990
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2991 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2992 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2993 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2994 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2995 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2996 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2997
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2998 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
2999 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3000 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3001 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3002 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3003 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3004 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3005 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3006 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3007 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3008 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3009 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3010 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3011 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3012 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3013 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3014 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3015 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3016 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3017 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3018 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3019 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3020 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3021 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3022 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3023 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3024 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3025 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3026 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3027 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3028 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3029 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3030 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3031 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3032 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3033 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3034 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3035 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3036 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3037 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3038 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3039 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3040 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3041 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3042 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3043 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3044 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3045 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3046 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3047 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3048 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3049 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3050 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3051 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3052 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3053 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3054 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3055 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3056 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3057 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3058 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3059 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3060 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3061 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3062 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3063 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3064 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3065 die "The strand orientation as neither + nor -: '$strand'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3066 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3067 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3068
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3069 ### strand-specific methylation output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3070 ### still within the 2-CONTEXT optional section
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3071 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3072 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3073 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3074
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3075 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3076 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3077 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3078 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3079 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3080 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3081
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3082 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3083 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3084 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3085 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3086 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3087 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3088 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3089 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3090 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3091 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3092 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3093 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3094 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3095 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3096 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3097 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3098 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3099 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3100 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3101 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3102 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3103 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3104 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3105 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3106 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3107 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3108 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3109 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3110 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3111 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3112 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3113 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3114 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3115 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3116 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3117 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3118 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3119 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3120 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3121 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3122 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3123 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3124 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3125 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3126 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3127 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3128 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3129 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3130 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3131 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3132 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3133 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3134 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3135 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3136 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3137 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3138 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3139 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3140 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3141 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3142 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3143 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3144 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3145 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3146 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3147 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3148 } elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3149 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3150
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3151 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3152 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3153 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3154 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3155 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3156 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3157
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3158 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3159 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3160 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3161 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3162 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3163 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3164 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3165 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3166 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3167 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3168 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3169 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3170 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3171 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3172 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3173 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3174 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3175 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3176 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3177 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3178 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3179 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3180 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3181 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3182 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3183 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3184 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3185 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3186 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3187 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3188 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3189 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3190 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3191 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3192 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3193 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3194 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3195 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3196 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3197 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3198 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3199 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3200 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3201 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3202 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3203 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3204 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3205 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3206 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3207 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3208 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3209 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3210 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3211 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3212 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3213 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3214 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3215 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3216 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3217 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3218 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3219 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3220 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3221 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3222 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3223 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3224 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3225 die "The strand orientation as neither + nor -: '$strand'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3226 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3227 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3228 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3229 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3230
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3231 ############################################
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3232 ### THIS IS THE DEFAULT 3-CONTEXT OUTPUT ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3233 ############################################
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3234
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3235 elsif ($no_overlap) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3236 ### single-file CpG, CHG and CHH context output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3237 if ($full) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3238 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3239 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3240
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3241 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3242 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3243 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3244 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3245 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3246 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3247
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3248 ### Returning as soon as the methylation calls start overlapping
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3249 if ($start+$index+$pos_offset >= $end_read_1) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3250 return;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3251 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3252
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3253 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3254 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3255 print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3256 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3257 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3258 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3259 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3260 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3261 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3262 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3263 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3264 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3265 print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3266 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3267 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3268 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3269 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3270 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3271 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3272 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3273 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3274 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3275 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3276 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3277 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3278 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3279 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3280 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3281 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3282 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3283 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3284 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3285 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3286 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3287 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3288 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3289 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3290 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3291 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3292 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3293 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3294 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3295 print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3296 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3297 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3298 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3299 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3300 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3301 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3302 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3303 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3304 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3305 print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3306 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3307 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3308 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3309 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3310 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3311 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3312 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3313 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3314 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3315 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3316 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3317 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3318 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3319 } elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3320 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3321
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3322 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3323 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3324 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3325 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3326 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3327 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3328
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3329 ### Returning as soon as the methylation calls start overlapping
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3330 if ($start-$index+$pos_offset <= $end_read_1) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3331 return;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3332 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3333
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3334 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3335 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3336 print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3337 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3338 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3339 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3340 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3341 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3342 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3343 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3344 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3345 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3346 print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3347 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3348 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3349 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3350 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3351 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3352 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3353 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3354 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3355 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3356 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3357 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3358 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3359 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3360 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3361 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3362 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3363 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3364 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3365 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3366 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3367 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3368 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3369 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3370 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3371 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3372 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3373 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3374 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3375 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3376 print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3377 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3378 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3379 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3380 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3381 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3382 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3383 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3384 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3385 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3386 print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3387 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3388 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3389 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3390 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3391 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3392 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3393 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3394 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3395 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3396 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3397 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3398 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3399 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3400 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3401 die "The strand orientation as neither + nor -: '$strand'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3402 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3403 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3404
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3405 ### strand-specific methylation output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3406 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3407 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3408 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3409
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3410 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3411 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3412 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3413 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3414 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3415 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3416
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3417 ### Returning as soon as the methylation calls start overlapping
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3418 if ($start+$index+$pos_offset >= $end_read_1) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3419 return;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3420 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3421
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3422 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3423 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3424 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3425 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3426 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3427 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3428 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3429 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3430 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3431 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3432 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3433 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3434 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3435 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3436 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3437 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3438 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3439 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3440 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3441 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3442 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3443 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3444 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3445 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3446 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3447 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3448 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3449 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3450 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3451 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3452 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3453 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3454 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3455 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3456 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3457 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3458 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3459 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3460 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3461 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3462 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3463 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3464 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3465 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3466 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3467 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3468 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3469 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3470 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3471 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3472 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3473 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3474 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3475 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3476 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3477 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3478 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3479 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3480 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3481 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3482 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3483 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3484 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3485 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3486 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3487 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3488 } elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3489 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3490
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3491 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3492 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3493 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3494 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3495 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3496 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3497
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3498 ### Returning as soon as the methylation calls start overlapping
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3499 if ($start-$index+$pos_offset <= $end_read_1) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3500 return;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3501 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3502
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3503 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3504 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3505 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3506 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3507 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3508 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3509 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3510 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3511 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3512 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3513 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3514 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3515 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3516 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3517 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3518 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3519 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3520 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3521 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3522 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3523 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3524 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3525 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3526 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3527 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3528 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3529 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3530 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3531 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3532 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3533 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3534 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3535 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3536 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3537 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3538 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3539 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3540 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3541 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3542 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3543 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3544 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3545 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3546 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3547 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3548 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3549 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3550 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3551 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3552 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3553 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3554 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3555 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3556 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3557 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3558 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3559 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3560 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3561 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3562 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3563 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3564 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3565 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3566 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3567 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3568 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3569 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3570 die "The strand orientation as neither + nor -: '$strand'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3571 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3572 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3573 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3574
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3575 ### this is the default paired-end procedure allowing overlaps and using every single C position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3576 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3577 ### single-file CpG, CHG and CHH context output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3578 if ($full) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3579 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3580 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3581
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3582 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3583 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3584 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3585 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3586 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3587 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3588
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3589 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3590 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3591 print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3592 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3593 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3594 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3595 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3596 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3597 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3598 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3599 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3600 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3601 print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3602 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3603 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3604 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3605 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3606 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3607 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3608 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3609 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3610 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3611 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3612 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3613 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3614 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3615 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3616 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3617 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3618 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3619 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3620 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3621 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3622 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3623 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3624 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3625 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3626 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3627 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3628 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3629 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3630 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3631 print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3632 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3633 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3634 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3635 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3636 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3637 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3638 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3639 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3640 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3641 print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3642 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3643 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3644 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3645 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3646 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3647 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3648 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3649 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3650 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3651 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3652 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3653 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3654 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3655 } elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3656 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3657
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3658 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3659 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3660 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3661 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3662 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3663 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3664
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3665 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3666 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3667 print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3668 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3669 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3670 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3671 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3672 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3673 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3674 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3675 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3676 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3677 print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3678 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3679 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3680 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3681 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3682 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3683 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3684 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3685 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3686 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3687 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3688 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3689 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3690 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3691 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3692 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3693 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3694 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3695 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3696 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3697 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3698 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3699 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3700 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3701 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3702 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3703 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3704 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3705 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3706 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3707 print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3708 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3709 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3710 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3711 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3712 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3713 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3714 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3715 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3716 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3717 print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3718 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3719 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3720 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3721 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3722 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3723 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3724 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3725 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3726 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3727 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3728 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3729 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3730 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3731 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3732 die "The strand orientation as neither + nor -: '$strand'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3733 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3734 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3735
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3736 ### strand-specific methylation output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3737 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3738 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3739 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3740
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3741 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3742 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3743 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3744 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3745 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3746 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3747
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3748 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3749 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3750 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3751 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3752 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3753 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3754 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3755 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3756 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3757 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3758 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3759 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3760 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3761 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3762 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3763 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3764 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3765 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3766 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3767 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3768 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3769 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3770 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3771 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3772 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3773 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3774 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3775 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3776 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3777 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3778 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3779 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3780 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3781 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3782 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3783 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3784 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3785 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3786 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3787 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3788 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3789 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3790 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3791 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3792 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3793 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3794 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3795 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3796 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3797 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3798 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3799 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3800 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3801 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3802 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3803 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3804 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3805 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3806 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3807 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3808 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3809 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3810 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3811 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3812 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3813 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3814 } elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3815 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3816
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3817 if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3818 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3819 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3820 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3821 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3822 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3823
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3824 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3825 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3826 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3827 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3828 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3829 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3830 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3831 $mbias_2{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3832 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3833 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3834 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3835 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3836 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3837 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3838 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3839 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3840 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3841 $mbias_2{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3842 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3843 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3844 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3845 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3846 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3847 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3848 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3849 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3850 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3851 $mbias_2{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3852 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3853 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3854 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3855 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3856 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3857 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3858 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3859 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3860 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3861 $mbias_2{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3862 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3863 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3864 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3865 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3866 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3867 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3868 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3869 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3870 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3871 $mbias_2{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3872 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3873 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3874 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3875 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3876 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3877 if ($read_identity == 1){
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3878 $mbias_1{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3879 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3880 else{
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3881 $mbias_2{CHH}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3882 }
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3883 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3884 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
3885 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3886 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3887 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3888 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3889 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3890 } else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3891 die "The strand orientation as neither + nor -: '$strand'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3892 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3893 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3894 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3895 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3896
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3897 sub check_cigar_string {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3898 my ($index,$cigar_offset,$pos_offset,$strand,$comp_cigar) = @_;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3899 # print "$index\t$cigar_offset\t$pos_offset\t$strand\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3900 my ($new_cigar_offset,$new_pos_offset) = (0,0);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3901
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3902 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3903 # print "### $strand strand @$comp_cigar[$index + $cigar_offset]\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3904
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3905 if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3906 # warn "position needs no adjustment\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3907 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3908
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3909 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){ # insertion in the read sequence
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3910 $new_pos_offset -= 1; # we need to subtract the length of inserted bases from the genomic position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3911 # warn "adjusted genomic position by -1 bp (insertion)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3912 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3913
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3914 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3915 $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3916 $new_pos_offset += 1; # we need to add the length of deleted bases to get the genomic position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3917 # warn "adjusted genomic position by +1 bp (deletion). Now looping through the CIGAR string until we hit another M or I\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3918
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3919 while ( ($index + $cigar_offset + $new_cigar_offset) < (scalar @$comp_cigar) ){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3920 if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3921 # warn "position needs no adjustment\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3922 last;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3923 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3924 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3925 $new_pos_offset -= 1; # we need to subtract the length of inserted bases from the genomic position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3926 # warn "adjusted genomic position by another -1 bp (insertion)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3927 last;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3928 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3929 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3930 $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3931 $new_pos_offset += 1; # we need to add the length of deleted bases to get the genomic position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3932 # warn "adjusted genomic position by another +1 bp (deletion)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3933 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3934 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3935 die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3936 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3937 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3938 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3939 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3940 die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3941 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3942 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3943
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3944 elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3945 # print "### $strand strand @$comp_cigar[$index + $cigar_offset]\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3946
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3947 if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3948 # warn "position needs no adjustment\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3949 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3950
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3951 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){ # insertion in the read sequence
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3952 $new_pos_offset += 1; # we need to add the length of inserted bases to the genomic position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3953 # warn "adjusted genomic position by +1 bp (insertion)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3954 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3955
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3956 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3957 $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3958 $new_pos_offset -= 1; # we need to subtract the length of deleted bases to get the genomic position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3959 # warn "adjusted genomic position by -1 bp (deletion). Now looping through the CIGAR string until we hit another M or I\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3960
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3961 while ( ($index + $cigar_offset + $new_cigar_offset) < (scalar @$comp_cigar) ){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3962 if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3963 # warn "Found new 'M' operation; position needs no adjustment\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3964 last;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3965 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3966 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3967 $new_pos_offset += 1; # we need to subtract the length of inserted bases from the genomic position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3968 # warn "Found new 'I' operation; adjusted genomic position by another +1 bp (insertion)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3969 last;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3970 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3971 elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3972 $new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3973 $new_pos_offset -= 1; # we need to subtract the length of deleted bases to get the genomic position
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3974 # warn "adjusted genomic position by another -1 bp (deletion)\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3975 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3976 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3977 die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3978 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3979 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3980 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3981 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3982 die "The CIGAR string contained undefined operations in addition to 'M', 'I' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3983 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3984 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3985 # print "new cigar offset: $new_cigar_offset\tnew pos offset: $new_pos_offset\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3986 return ($new_cigar_offset,$new_pos_offset);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3987 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3988
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3989 sub print_individual_C_methylation_states_single_end{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3990
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3991 my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$cigar) = @_;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3992 my @methylation_calls = split(//,$meth_call);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3993
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3994 #################################################################
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3995 ### . for bases not involving cytosines ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3996 ### X for methylated C in CHG context (was protected) ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3997 ### x for not methylated C in CHG context (was converted) ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3998 ### H for methylated C in CHH context (was protected) ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
3999 ### h for not methylated C in CHH context (was converted) ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4000 ### Z for methylated C in CpG context (was protected) ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4001 ### z for not methylated C in CpG context (was converted) ###
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4002 #################################################################
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4003
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4004 my $methyl_CHG_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4005 my $methyl_CHH_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4006 my $methyl_CpG_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4007 my $unmethylated_CHG_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4008 my $unmethylated_CHH_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4009 my $unmethylated_CpG_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4010
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4011 my $pos_offset = 0; # this is only relevant for SAM reads with insertions or deletions
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4012 my $cigar_offset = 0; # again, this is only relevant for SAM reads containing indels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4013
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4014 my @comp_cigar;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4015
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4016 if ($cigar){ # parsing CIGAR string
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4017
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4018 ### Checking whether the CIGAR string is a linear genomic match or whether if requires indel processing
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4019 if ($cigar =~ /^\d+M$/){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4020 # warn "See!? I told you so! $cigar\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4021 # sleep(1);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4022 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4023 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4024
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4025 my @len;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4026 my @ops;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4027
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4028 @len = split (/\D+/,$cigar); # storing the length per operation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4029 @ops = split (/\d+/,$cigar); # storing the operation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4030 shift @ops; # remove the empty first element
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4031 # die "CIGAR string contained a non-matching number of lengths and operations: id: $id\nmeth call: $meth_call\nCIGAR: $cigar\n".join(" ",@len)."\n".join(" ",@ops)."\n" unless (scalar @len == scalar @ops);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4032 die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4033
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4034 foreach my $index (0..$#len){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4035 foreach (1..$len[$index]){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4036 # print "$ops[$index]";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4037 push @comp_cigar, $ops[$index];
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4038 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4039 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4040 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4041 # warn "\nDetected CIGAR string: $cigar\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4042 # warn "Length of methylation call: ",length $meth_call,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4043 # warn "number of operations: ",scalar @ops,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4044 # warn "number of length digits: ",scalar @len,"\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4045 # print @comp_cigar,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4046 # print "$meth_call\n\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4047 # sleep (1);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4048 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4049
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4050 ### adjusting the start position for all reads mapping to the reverse strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4051 if ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4052
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4053 if (@comp_cigar){ # only needed for SAM reads with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4054 @comp_cigar = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4055 # print @comp_cigar,"\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4056 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4057
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4058 unless ($ignore){ ### if --ignore was specified the start position has already been corrected
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4059
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4060 if ($cigar){ ### SAM format
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4061 if ($cigar =~ /^(\d+)M$/){ # linear match
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4062 $start += $1 - 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4063 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4064 else{ # InDel read
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4065 my $MD_count = 0;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4066 foreach (@comp_cigar){
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4067 ++$MD_count if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4068 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4069 $start += $MD_count - 1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4070 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4071 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4072 else{ ### vanilla format
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4073 $start += length($meth_call)-1;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4074 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4075 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4076 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4077
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4078 ### THIS IS THE CpG and Non-CpG SECTION (OPTIONAL)
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4079
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4080 ### single-file CpG and other-context output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4081 if ($full and $merge_non_CpG) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4082 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4083 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4084
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4085 if ($cigar and @comp_cigar){ # only needed for SAM alignments with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4086 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4087 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4088 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4089 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4090 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4091
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4092 ### methylated Cs (any context) will receive a forward (+) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4093 ### not methylated Cs (any context) will receive a reverse (-) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4094 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4095 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4096 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4097 $mbias_1{CHG}->{$index+1}->{meth}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4098 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4099 elsif ($methylation_calls[$index] eq 'x') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4100 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4101 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4102 $mbias_1{CHG}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4103 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4104 elsif ($methylation_calls[$index] eq 'Z') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4105 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4106 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4107 $mbias_1{CpG}->{$index+1}->{meth}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4108 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4109 elsif ($methylation_calls[$index] eq 'z') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4110 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4111 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4112 $mbias_1{CpG}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4113 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4114 elsif ($methylation_calls[$index] eq 'H') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4115 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4116 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4117 $mbias_1{CHH}->{$index+1}->{meth}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4118 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4119 elsif ($methylation_calls[$index] eq 'h') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4120 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4121 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4122 $mbias_1{CHH}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4123 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4124 elsif ($methylation_calls[$index] eq '.') {}
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4125 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4126 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4127 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4128 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4129 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4130 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4131 elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4132
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4133 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4134 ### methylated Cs (any context) will receive a forward (+) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4135 ### not methylated Cs (any context) will receive a reverse (-) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4136
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4137 if ($cigar and @comp_cigar){ # only needed for SAM entries with InDels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4138 # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4139 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4140 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4141 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4142 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4143
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4144 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4145 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4146 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4147 $mbias_1{CHG}->{$index+1}->{meth}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4148 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4149 elsif ($methylation_calls[$index] eq 'x') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4150 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4151 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4152 $mbias_1{CHG}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4153 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4154 elsif ($methylation_calls[$index] eq 'Z') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4155 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4156 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4157 $mbias_1{CpG}->{$index+1}->{meth}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4158 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4159 elsif ($methylation_calls[$index] eq 'z') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4160 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4161 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4162 $mbias_1{CpG}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4163 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4164 elsif ($methylation_calls[$index] eq 'H') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4165 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4166 print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4167 $mbias_1{CHH}->{$index+1}->{meth}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4168 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4169 elsif ($methylation_calls[$index] eq 'h') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4170 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4171 print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4172 $mbias_1{CHH}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4173 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4174 elsif ($methylation_calls[$index] eq '.'){}
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4175 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4176 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4177 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4178 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4179 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4180 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4181 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4182 die "The strand information was neither + nor -: $strand\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4183 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4184 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4185
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4186 ### strand-specific methylation output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4187 elsif ($merge_non_CpG) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4188 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4189 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4190 ### methylated Cs (any context) will receive a forward (+) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4191 ### not methylated Cs (any context) will receive a reverse (-) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4192
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4193 if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4194 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4195 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4196 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4197 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4198
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4199 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4200 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4201 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4202 $mbias_1{CHG}->{$index+1}->{meth}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4203 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4204 elsif ($methylation_calls[$index] eq 'x') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4205 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4206 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4207 $mbias_1{CHG}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4208 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4209 elsif ($methylation_calls[$index] eq 'Z') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4210 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4211 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4212 $mbias_1{CpG}->{$index+1}->{meth}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4213 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4214 elsif ($methylation_calls[$index] eq 'z') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4215 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4216 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4217 $mbias_1{CpG}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4218 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4219 elsif ($methylation_calls[$index] eq 'H') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4220 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4221 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4222 $mbias_1{CHH}->{$index+1}->{meth}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4223 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4224 elsif ($methylation_calls[$index] eq 'h') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4225 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4226 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4227 $mbias_1{CHH}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4228 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4229 elsif ($methylation_calls[$index] eq '.') {}
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4230 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4231 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4232 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4233 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4234 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4235 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4236 elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4237
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4238 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4239 ### methylated Cs (any context) will receive a forward (+) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4240 ### not methylated Cs (any context) will receive a reverse (-) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4241
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4242 if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4243 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4244 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4245 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4246 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4247
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4248 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4249 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4250 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4251 $mbias_1{CHG}->{$index+1}->{meth}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4252 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4253 elsif ($methylation_calls[$index] eq 'x') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4254 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4255 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4256 $mbias_1{CHG}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4257 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4258 elsif ($methylation_calls[$index] eq 'Z') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4259 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4260 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4261 $mbias_1{CpG}->{$index+1}->{meth}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4262 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4263 elsif ($methylation_calls[$index] eq 'z') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4264 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4265 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4266 $mbias_1{CpG}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4267 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4268 elsif ($methylation_calls[$index] eq 'H') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4269 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4270 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4271 $mbias_1{CHH}->{$index+1}->{meth}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4272 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4273 elsif ($methylation_calls[$index] eq 'h') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4274 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4275 print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4276 $mbias_1{CHH}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4277 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4278 elsif ($methylation_calls[$index] eq '.') {}
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4279 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4280 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4281 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4282 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4283 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4284 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4285 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4286 die "The strand information was neither + nor -: $strand\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4287 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4288 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4289
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4290 ### THIS IS THE 3-CONTEXT (CpG, CHG and CHH) DEFAULT SECTION
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4291
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4292 elsif ($full) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4293 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4294 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4295 ### methylated Cs (any context) will receive a forward (+) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4296 ### not methylated Cs (any context) will receive a reverse (-) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4297
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4298 if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4299 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4300 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4301 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4302 }
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4303
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4304 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4305 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4306 print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4307 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4308 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4309 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4310 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4311 print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4312 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4313 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4314 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4315 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4316 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4317 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4318 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4319 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4320 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4321 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4322 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4323 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4324 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4325 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4326 print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4327 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4328 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4329 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4330 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4331 print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4332 $mbias_1{CHH}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4333 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4334 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4335 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4336 else{
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4337 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4338 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4339 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4340 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4341 elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4342
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4343 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4344 ### methylated Cs (any context) will receive a forward (+) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4345 ### not methylated Cs (any context) will receive a reverse (-) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4346
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4347 if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4348 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4349 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4350 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4351 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4352
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4353 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4354 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4355 print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4356 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4357 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4358 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4359 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4360 print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4361 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4362 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4363 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4364 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4365 print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4366 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4367 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4368 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4369 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4370 print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4371 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4372 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4373 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4374 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4375 print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4376 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4377 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4378 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4379 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4380 print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4381 $mbias_1{CHH}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4382 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4383 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4384 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4385 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4386 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4387 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4388 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4389 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4390 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4391 die "The read had a strand orientation which was neither + nor -: $strand\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4392 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4393 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4394
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4395 ### strand-specific methylation output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4396 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4397 if ($strand eq '+') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4398 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4399 ### methylated Cs (any context) will receive a forward (+) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4400 ### not methylated Cs (any context) will receive a reverse (-) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4401
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4402 if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4403 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4404 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4405 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4406 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4407
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4408 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4409 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4410 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4411 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4412 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4413 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4414 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4415 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4416 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4417 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4418 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4419 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4420 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4421 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4422 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4423 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4424 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4425 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4426 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4427 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4428 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4429 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4430 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4431 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4432 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4433 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4434 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4435 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4436 $mbias_1{CHH}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4437 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4438 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4439 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4440 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4441 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4442 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4443 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4444 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4445 elsif ($strand eq '-') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4446
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4447 for my $index (0..$#methylation_calls) {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4448 ### methylated Cs (any context) will receive a forward (+) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4449 ### not methylated Cs (any context) will receive a reverse (-) orientation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4450
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4451 if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4452 my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4453 $cigar_offset += $cigar_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4454 $pos_offset += $pos_mod;
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4455 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4456
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4457 if ($methylation_calls[$index] eq 'X') {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4458 $counting{total_meCHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4459 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4460 $mbias_1{CHG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4461 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4462 elsif ($methylation_calls[$index] eq 'x') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4463 $counting{total_unmethylated_CHG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4464 print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4465 $mbias_1{CHG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4466 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4467 elsif ($methylation_calls[$index] eq 'Z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4468 $counting{total_meCpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4469 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4470 $mbias_1{CpG}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4471 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4472 elsif ($methylation_calls[$index] eq 'z') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4473 $counting{total_unmethylated_CpG_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4474 print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4475 $mbias_1{CpG}->{$index+1}->{un}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4476 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4477 elsif ($methylation_calls[$index] eq 'H') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4478 $counting{total_meCHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4479 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4480 $mbias_1{CHH}->{$index+1}->{meth}++;
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4481 }
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4482 elsif ($methylation_calls[$index] eq 'h') {
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4483 $counting{total_unmethylated_CHH_count}++;
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4484 print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4485 $mbias_1{CHH}->{$index+1}->{un}++;
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4486 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4487 elsif ($methylation_calls[$index] eq '.') {}
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4488 elsif (lc$methylation_calls[$index] eq 'u'){}
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4489 else{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4490 die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4491 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4492 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4493 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4494 else {
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4495 die "The strand information was neither + nor -: $strand\n";
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4496 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4497 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4498 }
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4499
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4500
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4501
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4502 sub print_helpfile{
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4503
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4504 print << 'HOW_TO';
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4505
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4506
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4507 DESCRIPTION
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4508
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4509 The following is a brief description of all options to control the Bismark
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4510 methylation extractor. The script reads in a bisulfite read alignment results file
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4511 produced by the Bismark bisulfite mapper and extracts the methylation information
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4512 for individual cytosines. This information is found in the methylation call field
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4513 which can contain the following characters:
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4514
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4515 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4516 ~~~ X for methylated C in CHG context ~~~
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4517 ~~~ x for not methylated C CHG ~~~
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4518 ~~~ H for methylated C in CHH context ~~~
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4519 ~~~ h for not methylated C in CHH context ~~~
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4520 ~~~ Z for methylated C in CpG context ~~~
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4521 ~~~ z for not methylated C in CpG context ~~~
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4522 ~~~ U for methylated C in Unknown context (CN or CHN ~~~
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4523 ~~~ u for not methylated C in Unknown context (CN or CHN) ~~~
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4524 ~~~ . for any bases not involving cytosines ~~~
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4525 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4526
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4527 The methylation extractor outputs result files for cytosines in CpG, CHG and CHH
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4528 context (this distinction is actually already made in Bismark itself). As the methylation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4529 information for every C analysed can produce files which easily have tens or even hundreds of
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4530 millions of lines, file sizes can become very large and more difficult to handle. The C
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4531 methylation info additionally splits cytosine methylation calls up into one of the four possible
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4532 strands a given bisulfite read aligned against:
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4533
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4534 OT original top strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4535 CTOT complementary to original top strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4536
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4537 OB original bottom strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4538 CTOB complementary to original bottom strand
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4539
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4540 Thus, by default twelve individual output files are being generated per input file (unless
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4541 --comprehensive is specified, see below). The output files can be imported into a genome
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4542 viewer, such as SeqMonk, and re-combined into a single data group if desired (in fact
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4543 unless the bisulfite reads were generated preserving directionality it doesn't make any
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4544 sense to look at the data in a strand-specific manner). Strand-specific output files can
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4545 optionally be skipped, in which case only three output files for CpG, CHG or CHH context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4546 will be generated. For both the strand-specific and comprehensive outputs there is also
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4547 the option to merge both non-CpG contexts (CHG and CHH) into one single non-CpG context.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4548
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4549
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4550 The output files are in the following format (tab delimited):
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4551
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4552 <sequence_id> <strand> <chromosome> <position> <methylation call>
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4553
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4554
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4555 USAGE: methylation_extractor [options] <filenames>
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4556
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4557
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4558 ARGUMENTS:
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4559 ==========
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4560
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4561 <filenames> A space-separated list of Bismark result files in SAM format from
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4562 which methylation information is extracted for every cytosine in
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4563 the reads. For alignment files in the older custom Bismark output
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4564 see option '--vanilla'.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4565
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4566 OPTIONS:
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4567
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4568 -s/--single-end Input file(s) are Bismark result file(s) generated from single-end
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4569 read data. Specifying either --single-end or --paired-end is
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4570 mandatory.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4571
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4572 -p/--paired-end Input file(s) are Bismark result file(s) generated from paired-end
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4573 read data. Specifying either --paired-end or --single-end is
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4574 mandatory.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4575
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4576 --vanilla The Bismark result input file(s) are in the old custom Bismark format
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4577 (up to version 0.5.x) and not in SAM format which is the default as
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4578 of Bismark version 0.6.x or higher. Default: OFF.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4579
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4580 --no_overlap For paired-end reads it is theoretically possible that read_1 and
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4581 read_2 overlap. This option avoids scoring overlapping methylation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4582 calls twice (only methylation calls of read 1 are used for in the process
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4583 since read 1 has historically higher quality basecalls than read 2).
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4584 Whilst this option removes a bias towards more methylation calls
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4585 in the center of sequenced fragments it may de facto remove a sizable
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4586 proportion of the data. This option is highly recommended for paired-end
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4587 data.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4588
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4589 --ignore <int> Ignore the first <int> bp from the 5' end of Read 1 when processing the
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4590 methylation call string. This can remove e.g. a restriction enzyme site
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4591 at the start of each read or any other source of bias (e.g. PBAT-Seq data).
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4592
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4593 --ignore_r2 <int> Ignore the first <int> bp from the 5' end of Read 2 of paired-end sequencing
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4594 results only. Since the first couple of bases in Read 2 of BS-Seq experiments
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4595 show a severe bias towards non-methylation as a result of end-repairing
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4596 sonicated fragments with unmethylated cytosines (see M-bias plot), it is
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4597 recommended that the first couple of bp of Read 2 are removed before
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4598 starting downstream analysis. Please see the section on M-bias plots in the
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4599 Bismark User Guide for more details.
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4600
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4601 --comprehensive Specifying this option will merge all four possible strand-specific
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4602 methylation info into context-dependent output files. The default
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4603
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4604 contexts are:
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4605 - CpG context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4606 - CHG context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4607 - CHH context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4608
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4609 --merge_non_CpG This will produce two output files (in --comprehensive mode) or eight
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4610 strand-specific output files (default) for Cs in
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4611 - CpG context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4612 - non-CpG context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4613
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4614 --report Prints out a short methylation summary as well as the paramaters used to run
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4615 this script.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4616
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4617 --no_header Suppresses the Bismark version header line in all output files for more convenient
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4618 batch processing.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4619
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4620 -o/--output DIR Allows specification of a different output directory (absolute or relative
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4621 path). If not specified explicitely, the output will be written to the current directory.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4622
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4623 --samtools_path The path to your Samtools installation, e.g. /home/user/samtools/. Does not need to be specified
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4624 explicitly if Samtools is in the PATH already.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4625
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4626 --gzip The methylation extractor files (CpG_OT_..., CpG_OB_... etc) will be written out in
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4627 a GZIP compressed form to save disk space. This option does not work on bedGraph and
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4628 genome-wide cytosine reports as they are 'tiny' anyway.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4629
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4630 --version Displays version information.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4631
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4632 -h/--help Displays this help file and exits.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4633
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4634 --mbias_only The methylation extractor will read the entire file but only output the M-bias table and plots as
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4635 well as a report (optional) and then quit. Default: OFF.
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4636
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4637
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4638
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4639 bedGraph specific options:
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4640 ==========================
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4641
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4642 --bedGraph After finishing the methylation extraction, the methylation output is written into a
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4643 sorted bedGraph file that reports the position of a given cytosine and its methylation
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4644 state (in %, see details below). The methylation extractor output is temporarily split up into
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4645 temporary files, one per chromosome (written into the current directory or folder
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4646 specified with -o/--output); these temp files are then used for sorting and deleted
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4647 afterwards. By default, only cytosines in CpG context will be sorted. The option
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4648 '--CX_context' may be used to report all cytosines irrespective of sequence context
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4649 (this will take MUCH longer!). The default folder for temporary files during the sorting
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4650 process is the output directory. The bedGraph conversion step is performed by the external
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4651 module 'bismark2bedGraph'; this script needs to reside in the same folder as the
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4652 bismark_methylation_extractor itself.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4653
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4654
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4655 --cutoff [threshold] The minimum number of times a methylation state has to be seen for that nucleotide
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4656 before its methylation percentage is reported. Default: 1.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4657
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4658 --remove_spaces Replaces whitespaces in the sequence ID field with underscores to allow sorting.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4659
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4660
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4661 --CX/--CX_context The sorted bedGraph output file contains information on every single cytosine that was covered
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4662 in the experiment irrespective of its sequence context. This applies to both forward and
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4663 reverse strands. Please be aware that this option may generate large temporary and output files
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4664 and may take a long time to sort (up to many hours). Default: OFF.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4665 (i.e. Default = CpG context only).
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4666
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4667 --buffer_size <string> This allows you to specify the main memory sort buffer when sorting the methylation information.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4668 Either specify a percentage of physical memory by appending % (e.g. --buffer_size 50%) or
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4669 a multiple of 1024 bytes, e.g. 'K' multiplies by 1024, 'M' by 1048576 and so on for 'T' etc.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4670 (e.g. --buffer_size 20G). For more information on sort type 'info sort' on a command line.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4671 Defaults to 2G.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4672
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4673 --scaffolds/--gazillion Users working with unfinished genomes sporting tens or even hundreds of thousands of
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4674 scaffolds/contigs/chromosomes frequently encountered errors with pre-sorting reads to
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4675 individual chromosome files. These errors were caused by the operating system's limit
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4676 of the number of filehandle that can be written to at any one time (typically 1024; to
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4677 find out this limit on Linux, type: ulimit -a).
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4678 To bypass the limitation of open filehandles, the option --scaffolds does not pre-sort
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4679 methylation calls into individual chromosome files. Instead, all input files are
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4680 temporarily merged into a single file (unless there is only a single file), and this
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4681 file will then be sorted by both chromosome AND position using the Unix sort command.
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4682 Please be aware that this option might take a looooong time to complete, depending on
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4683 the size of the input files, and the memory you allocate to this process (see --buffer_size).
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4684 Nevertheless, it seems to be working.
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4685
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4686 --ample_memory Using this option will not sort chromosomal positions using the UNIX 'sort' command, but will
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4687 instead use two arrays to sort methylated and unmethylated calls. This may result in a faster
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4688 sorting process of very large files, but this comes at the cost of a larger memory footprint
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4689 (two arrays of the length of the largest human chromosome 1 (~250M bp) consume around 16GB
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4690 of RAM). Due to overheads in creating and looping through these arrays it seems that it will
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4691 actually be *slower* for small files (few million alignments), and we are currently testing at
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4692 which point it is advisable to use this option. Note that --ample_memory is not compatible
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4693 with options '--scaffolds/--gazillion' (as it requires pre-sorted files to begin with).
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4694
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4695
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4696
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4697 Genome-wide cytosine methylation report specific options:
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4698 =========================================================
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4699
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4700 --cytosine_report After the conversion to bedGraph has completed, the option '--cytosine_report' produces a
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4701 genome-wide methylation report for all cytosines in the genome. By default, the output uses 1-based
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4702 chromosome coordinates (zero-based cords are optional) and reports CpG context only (all
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4703 cytosine context is optional). The output considers all Cs on both forward and reverse strands and
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4704 reports their position, strand, trinucleotide content and methylation state (counts are 0 if not
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4705 covered). The cytsoine report conversion step is performed by the external module
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4706 'bedGraph2cytosine'; this script needs to reside in the same folder as the bismark_methylation_extractor
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4707 itself.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4708
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4709 --CX/--CX_context The output file contains information on every single cytosine in the genome irrespective of
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4710 its context. This applies to both forward and reverse strands. Please be aware that this will
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4711 generate output files with > 1.1 billion lines for a mammalian genome such as human or mouse.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4712 Default: OFF (i.e. Default = CpG context only).
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4713
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4714 --zero_based Uses zero-based coordinates like used in e.g. bed files instead of 1-based coordinates. Default: OFF.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4715
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4716 --genome_folder <path> Enter the genome folder you wish to use to extract sequences from (full path only). Accepted
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4717 formats are FastA files ending with '.fa' or '.fasta'. Specifying a genome folder path is mandatory.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4718
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4719 --split_by_chromosome Writes the output into individual files for each chromosome instead of a single output file. Files
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4720 will be named to include the input filename and the chromosome number.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4721
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4722
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4723
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4724 OUTPUT:
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4725
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4726 The bismark_methylation_extractor output is in the form:
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4727 ========================================================
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4728 <seq-ID> <methylation state*> <chromosome> <start position (= end position)> <methylation call>
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4729
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4730 * Methylated cytosines receive a '+' orientation,
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4731 * Unmethylated cytosines receive a '-' orientation.
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4732
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4733
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4734
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4735 The bedGraph output (optional) looks like this (tab-delimited; 0-based start coords, 1-based end coords):
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4736 =========================================================================================================
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4737
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4738 track type=bedGraph (header line)
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4739
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4740 <chromosome> <start position> <end position> <methylation percentage>
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4741
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4742
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4743
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4744 The coverage output looks like this (tab-delimited, 1-based genomic coords):
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4745 ============================================================================
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4746
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4747 <chromosome> <start position> <end position> <methylation percentage> <count methylated> <count non-methylated>
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4748
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4749
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4750
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4751 The genome-wide cytosine methylation output file is tab-delimited in the following format:
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4752 ==========================================================================================
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4753 <chromosome> <position> <strand> <count methylated> <count non-methylated> <C-context> <trinucleotide context>
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4754
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4755
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4756
3
91f07ff056ca Uploaded
bgruening
parents: 0
diff changeset
4757 This script was last modified on 25 November 2013.
0
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4758
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4759 HOW_TO
62c6da72dd4a Uploaded
bgruening
parents:
diff changeset
4760 }