annotate utilities/deprecated/FindNucleotideContextOnReference.pl @ 10:7d10b55965c9 draft default tip

planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
author thondeboer
date Wed, 16 May 2018 17:02:51 -0400
parents 6e75a84e9338
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
1 #!/usr/bin/perl
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
2
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
3 use strict;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
4
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
5
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
6 if ($#ARGV < 1) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
7 print "parameter mismatch\nTo run type this command:\nperl $0 fastahack reference input_pos_file output_file\n\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
8
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
9 print " first argument = full path to fastahack\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
10 print " second argument = full path to reference genome\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
11 print " third argument = input file with arbitrary number of columns, but 1st col=chromosome name and 2nd col=position\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
12 print " fourth argument = output file with three columns: chromosome name, position of the center nucleotide, and the thre-nucleotide context for that position\n\n\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
13 exit 1;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
14 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
15
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
16
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
17 my $Fastahack=$ARGV[0];
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
18 my $Reference=$ARGV[1];
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
19 open(InputPositions, '<', $ARGV[2]) || die("Could not open file!");
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
20 open(OutputTrinucleotideContext, '>', $ARGV[3]) || die("Could not open file!");
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
21
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
22
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
23
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
24
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
25 ################ read in one coordinate at a time and execute fastahack on it
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
26
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
27 # reading the header
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
28 my $head = <InputPositions>;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
29 $head =~ s/\n|\r//;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
30 print OutputTrinucleotideContext "$head\tContext\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
31
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
32 # creating trinucleotide context data hash, insertion and deletion counts
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
33 my %trinucleotide_context_data;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
34 my %context_tally_across_mutated_to;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
35 my %insertion_hash;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
36 my %deletion_hash;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
37 my $insertion_total;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
38 my $deletion_total;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
39
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
40
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
41 # reading the positional information
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
42 my $line_count = 1;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
43 while (<InputPositions>) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
44 $_ =~ s/\n|\r//;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
45 #print "$_\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
46 my @line = split('\t', $_);
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
47
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
48 # getting the chromosome and coordinate fields from input file
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
49 # fastahack will need to the chromosome and coordinate to read the information from the reference
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
50 my $chromosome = $line[0];
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
51 my $coordinate = $line[1];
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
52
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
53 # get coordinates of first and last character in the context
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
54 my $start_region = $coordinate - 1;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
55 my $end_region = $coordinate + 1;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
56
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
57 # if the coordinate is the very first letter on the chromosome, then do not read before that position
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
58 # the context becomes 2 letter code, as opposed to a trinucleotide
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
59 if ( $start_region == 0 ) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
60 $start_region = 1;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
61 $end_region = 2;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
62 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
63
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
64 #print "$Fastahack -r $chromosome:$start_region..$end_region $Reference\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
65 my $context = `$Fastahack -r $chromosome:$start_region..$end_region $Reference`;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
66
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
67 # capitalize context letters
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
68 $context = uc($context);
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
69
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
70 # split germline column into germline allele and mutated_to allele
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
71 # my @germline = split ('/', $line[6]);
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
72
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
73 # if germline allele does not equal reference allele, print "start_region germline allele end_region"
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
74 # specifically, replace the middle letter of the context with the germline allele
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
75 #print "$germline[0], $germline[1]\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
76 # if ($germline[0] ne $germline[1]) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
77 # print "germline/reference mismatch, line number $line_count\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
78 # if ($coordinate != 1) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
79 # substr($context,1,1)= $germline[1];
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
80 # }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
81 # else {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
82 # substr($context,0,1)= $germline[1];
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
83 # }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
84 # }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
85
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
86 print OutputTrinucleotideContext "$_\t$context";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
87
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
88
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
89
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
90 ###############################
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
91 # new section: forming the data structure
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
92 ###############################
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
93
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
94 # to create N_N contexts for data structure, context_code is defined as the trinucleotide context with a blank middle allele
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
95 my $context_code=$context;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
96 $context_code =~ s/\n|\r//;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
97 substr($context_code,1,1) = "_";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
98
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
99 # create variables for mutated_from and mutated_to nucleotides
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
100 my $mutated_from = $line[9];
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
101 my $mutated_to = $line[10];
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
102
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
103 # define length of insertions and deletions
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
104 # if ($mutated_from eq "-") {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
105 my $insertion_length = length( $mutated_to );
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
106 # }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
107
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
108 # if ($mutated_to eq "-") {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
109 my $deletion_length = length( $mutated_from );
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
110 # }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
111
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
112 # context_codes are totalled
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
113 $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to} = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to} + 1;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
114 $context_tally_across_mutated_to{$context_code}{$mutated_from} = $context_tally_across_mutated_to{$context_code}{$mutated_from} + 1;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
115
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
116 # insertion and deletion lengths are totalled
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
117 if ($mutated_from eq "-") {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
118 $insertion_hash{$insertion_length} = $insertion_hash{$insertion_length} + 1;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
119 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
120 if ($mutated_to eq "-") {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
121 $deletion_hash{$deletion_length} = $deletion_hash{$deletion_length} + 1;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
122 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
123
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
124 # total insertions and deletions
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
125 if ($mutated_from eq "-") {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
126 $insertion_total = $insertion_total + 1;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
127 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
128 if ($mutated_to eq "-") {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
129 $deletion_total = $deletion_total + 1;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
130 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
131
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
132
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
133
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
134 # to keep track of progress
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
135 unless ($line_count%10000) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
136 print "processed $line_count lines\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
137 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
138 $line_count++;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
139 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
140 # end working through the input file
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
141
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
142 # print total number of mutations
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
143 my $mutation_total = $line_count - 1;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
144 print "Number of Mutations -- $mutation_total\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
145
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
146 # define the output file name for insertions, deletions, and overall likelihoods. Open files for writing
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
147 my $insertion_file_name = "Leukemia_OPEN_insLength.prob";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
148 open(my $insertion_prob_handle, '>', $insertion_file_name) || die("Could not open file!");
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
149
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
150 my $deletion_file_name = "Leukemia_OPEN_delLength.prob";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
151 open(my $deletion_prob_handle, '>', $deletion_file_name) || die("Could not open file!");
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
152
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
153 my $overall_file_name = "Leukemia_OPEN_overall.prob";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
154 open(my $overall_prob_handle, '>', $overall_file_name) || die("Could not open file!");
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
155
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
156 # print overall likelihood file headers
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
157 print $overall_prob_handle "mutation_type\tprobability\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
158
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
159 # print insertions and deletion probabilities out of all mutations
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
160 my $insertion_prob_all = $insertion_total / $mutation_total;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
161 my $deletion_prob_all = $deletion_total / $mutation_total;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
162 print $overall_prob_handle "insertion\t$insertion_prob_all\ndeletion\t$deletion_prob_all\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
163 # print $overall_prob_handle "Deletion Probability -- $deletion_prob_all\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
164
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
165 # print InDel totals
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
166 print "Insertions $insertion_total\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
167 print "Deletions $deletion_total\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
168
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
169 # print insertion and deletion headers
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
170 print $insertion_prob_handle "insertion_length\tprobability\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
171 print $deletion_prob_handle "deletion_length\tprobability\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
172
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
173 # calculate InDel length totals and probability out of total number of insertions/deletions. Print probabilities to file.
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
174 foreach my $insertion_length (sort(keys %insertion_hash)) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
175 my $insertion_probability;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
176 $insertion_probability = $insertion_hash{$insertion_length}/$insertion_total;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
177 print $insertion_prob_handle "$insertion_length\t$insertion_probability\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
178 print "Insertion, $insertion_length, total , $insertion_hash{$insertion_length}\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
179 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
180 foreach my $deletion_length (sort(keys %deletion_hash)) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
181 my $deletion_probability;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
182 $deletion_probability = $deletion_hash{$deletion_length}/$deletion_total;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
183 print $deletion_prob_handle "$deletion_length\t$deletion_probability\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
184 print "Deletion, $deletion_length, total, $deletion_hash{$deletion_length}\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
185 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
186
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
187
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
188 # define nucleotide array
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
189 my @nucleotides = ("A", "C", "G", "T");
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
190
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
191 foreach my $nt1 (@nucleotides) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
192 foreach my $nt3 (@nucleotides) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
193
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
194 # define the output file name and open it for writing
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
195 my $trinucleotide_SNP_probability_file_name = "Leukemia_OPEN_".$nt1."-".$nt3.".trinuc";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
196 open(my $trinuc_prob_handle, '>', $trinucleotide_SNP_probability_file_name) || die("Could not open file!");
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
197
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
198
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
199 # print trinucleotide contexts and corresponding totals for every mutated_to nucleotide
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
200 my $context_code=$nt1."_".$nt3;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
201
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
202 #foreach my $mutated_from_nucl_key (keys %{ $trinucleotide_context_data{$context_code} }) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
203 foreach my $mutated_from (@nucleotides) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
204 # define the "mutated_to" keys in trinuc context hash
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
205 # my $mutated_to_nucl_key;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
206
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
207 # the sum is only across mutated_to, and will be redefined for each mutated_from
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
208 my $context_sum_across_mutated_to = 0;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
209 my $context_sum_across_indel = 0;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
210
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
211 # print "\nRaw counts for mutated_from $mutated_from \n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
212
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
213
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
214 # foreach $mutated_to_nucl_key (keys %{ $trinucleotide_context_data{$context_code}{$mutated_from_nucl_key} }) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
215 foreach my $mutated_to (@nucleotides) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
216 my $mutated_from_length = length( $mutated_from );
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
217 my $mutated_to_length = length( $mutated_to );
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
218 if ( $mutated_from_length == 1 ) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
219 if ( $mutated_from ne "-" ) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
220 if ( $mutated_to_length == 1 ) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
221 if ( $mutated_to ne "-" ) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
222 # print "$context_code, $mutated_from_nucl_key, $mutated_to_nucl_key -- $trinucleotide_context_data{$context_code}{$mutated_from_nucl_key}{$mutated_to_nucl_key}\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
223 $context_sum_across_mutated_to = $context_sum_across_mutated_to + $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to};
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
224 }# end if statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
225 else {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
226 $context_sum_across_indel = $context_sum_across_indel + $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to};
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
227 }# end else statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
228 }# end if statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
229 else {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
230 $context_sum_across_indel = $context_sum_across_indel + $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to};
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
231 }# end else statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
232 }# end if statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
233 else {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
234 $context_sum_across_indel = $context_sum_across_indel + $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to};
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
235 }# end else statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
236 }# end if statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
237 else {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
238 $context_sum_across_indel = $context_sum_across_indel + $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to};
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
239 }# end else statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
240 # print "$context_code, $mutated_from, $mutated_to-- $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
241 }# end of loop over mutated_to
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
242
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
243 # print "\nProbabilities for mutated_from $mutated_from:\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
244
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
245
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
246 foreach my $mutated_to (@nucleotides) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
247 #foreach $mutated_to_nucl_key (keys %{ $trinucleotide_context_data{$context_code}{$mutated_from_nucl_key} }) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
248 my $mutated_from_length = length( $mutated_from);
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
249 my $mutated_to_length = length( $mutated_to);
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
250 if ( $mutated_from_length == 1 ) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
251 if ( $mutated_from ne "-" ) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
252 if ( $mutated_to_length == 1 ) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
253 if ( $mutated_to ne "-" ) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
254 my $SNP_probability;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
255 if ( $context_sum_across_mutated_to == 0 ) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
256 $SNP_probability = 0;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
257 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
258 else {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
259 $SNP_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_mutated_to;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
260 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
261 if ( $mutated_to eq "T" ) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
262 print $trinuc_prob_handle "$SNP_probability";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
263 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
264 else {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
265 # print "$context_code, $mutated_from, $mutated_to, context_sum_across_mutated_to=$context_sum_across_mutated_to -- $SNP_probability\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
266 print $trinuc_prob_handle "$SNP_probability\t";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
267 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
268 }# end of if statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
269 else {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
270 my $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
271 # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
272 }# end else statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
273 }# end of if statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
274 else {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
275 # my $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
276 # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
277 }# end else statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
278 }# end of if statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
279 else {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
280 # my $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
281 # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
282 }# end else statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
283 }# end of if statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
284 else {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
285 my $indel_probability;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
286 if ( $context_sum_across_indel = 0 ) {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
287 $indel_probability = 0;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
288 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
289 else {
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
290 # $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel;
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
291 # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
292 }
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
293 }# end else statement
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
294 }# end of loop over mutated_to
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
295 print $trinuc_prob_handle "\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
296
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
297 }# end of loop over mutated_from
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
298
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
299 # print "\n\n";
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
300
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
301
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
302 }# end loop over nt3
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
303 }# end loop over nt1
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
304
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
305
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
306
6e75a84e9338 planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff changeset
307