Mercurial > repos > thondeboer > neat_genreads
annotate utilities/deprecated/FindNucleotideContextOnReference.healthy.pl @ 10:7d10b55965c9 draft default tip
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
author | thondeboer |
---|---|
date | Wed, 16 May 2018 17:02:51 -0400 |
parents | 6e75a84e9338 |
children |
rev | line source |
---|---|
0
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
1 #!/usr/bin/perl |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
2 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
3 use strict; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
4 use Math::Round; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
5 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
6 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
7 if ($#ARGV < 1) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
8 print "parameter mismatch\nTo run type this command:\nperl $0 fastahack reference input_pos_file output_file human_gff_file\n\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
9 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
10 print " first argument = full path to fastahack\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
11 print " second argument = full path to reference genome\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
12 print " third argument = input file with arbitrary number of columns, but 1st col=chromosome name and 2nd col=position\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
13 print " fourth argument = output file with three columns: chromosome name, position of the center nucleotide, and the thre-nucleotide context for that position\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
14 print " fifth argument = full path to human gff file\n\n\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
15 exit 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
16 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
17 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
18 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
19 my $Fastahack=$ARGV[0]; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
20 my $Reference=$ARGV[1]; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
21 open(InputPositions, '<', $ARGV[2]) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
22 open(OutputTrinucleotideContext, '>', $ARGV[3]) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
23 open(HumanGFF, '<', $ARGV[4]) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
24 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
25 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
26 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
27 ################ read in one coordinate at a time and execute fastahack on it |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
28 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
29 # reading the header |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
30 my $head = <InputPositions>; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
31 $head =~ s/\n|\r//; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
32 print OutputTrinucleotideContext "$head\tContext\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
33 my $gffHead = <HumanGFF>; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
34 chomp $gffHead; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
35 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
36 # creating trinucleotide context data hash, insertion and deletion counts |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
37 my %trinucleotide_context_data; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
38 my %context_tally_across_mutated_to; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
39 my %gff_hash; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
40 my $gffMatch; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
41 my %location; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
42 # my %genotype_hash; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
43 my %insertion_hash; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
44 my %deletion_hash; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
45 my $insertion_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
46 my $deletion_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
47 my $zygotes_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
48 my %annotation_hash; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
49 my $annotation_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
50 my %exonic_consequence_hash; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
51 my $intronic; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
52 my $exonic; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
53 my $intergenic; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
54 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
55 # reading the positional information |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
56 my $line_count = 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
57 while (<InputPositions>) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
58 $_ =~ s/\n|\r//; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
59 #print "$_\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
60 my @line = split('\t', $_); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
61 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
62 # getting the chromosome and coordinate fields from input file |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
63 # fastahack will need to the chromosome and coordinate to read the information from the reference |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
64 my $chromosome = $line[0]; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
65 my $coordinate = $line[1]; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
66 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
67 # get coordinates of first and last character in the context |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
68 my $start_region = $coordinate - 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
69 my $end_region = $coordinate + 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
70 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
71 # if the coordinate is the very first letter on the chromosome, then do not read before that position |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
72 # the context becomes 2 letter code, as opposed to a trinucleotide |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
73 if ( $start_region == 0 ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
74 $start_region = 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
75 $end_region = 2; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
76 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
77 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
78 #print "$Fastahack -r $chromosome:$start_region..$end_region $Reference\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
79 my $context = `$Fastahack -r $chromosome:$start_region..$end_region $Reference`; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
80 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
81 # capitalize context letters |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
82 $context = uc($context); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
83 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
84 #### IF USING CONTROLLED DATA, split germline column into germline allele and mutated_to allele |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
85 # my @germline = split ('/', $line[6]); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
86 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
87 # if germline allele does not equal reference allele, print "start_region germline allele end_region" |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
88 # specifically, replace the middle letter of the context with the germline allele |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
89 #print "$germline[0], $germline[1]\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
90 # if ($germline[0] ne $germline[1]) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
91 # print "germline/reference mismatch, line number $line_count\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
92 # if ($coordinate != 1) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
93 # substr($context,1,1)= $germline[1]; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
94 # } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
95 # else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
96 # substr($context,0,1)= $germline[1]; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
97 # } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
98 # } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
99 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
100 print OutputTrinucleotideContext "$_\t$context"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
101 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
102 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
103 ############################### |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
104 # new section: forming the data structure |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
105 ############################### |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
106 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
107 # to create N_N contexts for data structure, context_code is defined as the trinucleotide context with a blank middle allele |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
108 my $context_code=$context; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
109 $context_code =~ s/\n|\r//; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
110 substr($context_code,1,1) = "_"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
111 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
112 # create variables for mutated_from and mutated_to nucleotides |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
113 my $mutated_from = $line[3]; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
114 my $mutated_to = $line[4]; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
115 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
116 # creating genotype variable from column 10 of VCF |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
117 my $genotype = $line[9]; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
118 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
119 # incrementing each genotype |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
120 # $genotype_hash{$genotype} = $genotype_hash{$genotype} + 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
121 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
122 # splitting heterozygosity by comma, defining heterozygosity total |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
123 my @zygotes = split (',', $mutated_to); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
124 my $zygotes_length = scalar(@zygotes); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
125 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
126 # identify heterozygosity, choose one at random to use. Count heterozygosity instances |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
127 if ($zygotes_length > 1) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
128 my $zygotesRand = $zygotes_length*rand(); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
129 my $zygotesRound = round($zygotesRand) - 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
130 $zygotes_total = $zygotes_total + 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
131 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
132 $mutated_to = $zygotes[$zygotesRound]; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
133 # print "@zygotes\t$mutated_to\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
134 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
135 # print "@zygotes\t$mutated_to\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
136 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
137 # my $round_rand_test = round(rand()); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
138 # print $round_rand_test; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
139 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
140 # define length of insertions and deletions |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
141 my $insertion_length; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
142 my $deletion_length; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
143 if ($mutated_from eq "-") { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
144 $insertion_length = length( $mutated_to ); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
145 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
146 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
147 $insertion_length = length( $mutated_to ) - 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
148 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
149 if ($mutated_to eq "-") { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
150 $deletion_length = length( $mutated_from ); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
151 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
152 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
153 $deletion_length = length( $mutated_from ) - 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
154 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
155 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
156 # context_codes are totalled |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
157 $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to} = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to} + 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
158 $context_tally_across_mutated_to{$context_code}{$mutated_from} = $context_tally_across_mutated_to{$context_code}{$mutated_from} + 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
159 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
160 # insertion and deletion lengths are totalled |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
161 if ($insertion_length > $deletion_length) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
162 $insertion_hash{$insertion_length} = $insertion_hash{$insertion_length} + 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
163 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
164 if ($deletion_length > $insertion_length) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
165 $deletion_hash{$deletion_length} = $deletion_hash{$deletion_length} + 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
166 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
167 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
168 # total insertions and deletions |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
169 if ($insertion_length != $deletion_length) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
170 if ($insertion_length > $deletion_length) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
171 $insertion_total = $insertion_total + 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
172 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
173 elsif ($deletion_length > $insertion_length) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
174 $deletion_total = $deletion_total + 1; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
175 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
176 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
177 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
178 # Find variant annotation and exonic consequence in ANNOVAR outfile |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
179 my $annotation = $line[7]; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
180 if ( $annotation =~ /Func.refGene=(.{1,30});Gene\.refGene/ ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
181 # print "$1\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
182 $annotation_hash{$1}++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
183 $annotation_total++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
184 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
185 if ( $annotation =~ /ExonicFunc.refGene=(.{1,30});AAChange\.refGene/ ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
186 # print "$1\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
187 $exonic_consequence_hash{$1}++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
188 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
189 if ( $annotation =~ /Func.refGene=.{0,15}intronic\;/ ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
190 $intronic++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
191 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
192 if ( $annotation !~ /Func.refGene=ncRNA_exonic/ ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
193 if ( $annotation =~ /Func.refGene=.{0,15}exonic\;/ ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
194 $exonic++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
195 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
196 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
197 if ( $annotation =~ /Func.refGene=.{0,15}intergenic\;/ ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
198 $intergenic++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
199 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
200 elsif ( $annotation =~ /Func.refGene=.{0,15}ncRNA_splicing\;/ ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
201 $intergenic++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
202 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
203 elsif ( $annotation =~ /Func.refGene=.{0,15}upstream\;/ ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
204 $intergenic++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
205 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
206 elsif ( $annotation =~ /Func.refGene=.{0,15}downstream\;/ ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
207 $intergenic++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
208 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
209 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
210 $location{$coordinate}++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
211 # Reading input gff file, incrementing gff variant region hash |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
212 # while (<HumanGFF>) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
213 # $_ =~ s/\n|\r//; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
214 # my @line = split('\t', $_); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
215 # my $region_name = "$line[3]-$line[4]"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
216 # if ($coordinate >= $line[3] && $coordinate <= $line[4]) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
217 # $gff_hash{$region_name}++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
218 # $gffMatch++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
219 # print "$coordinate $region_name\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
220 # } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
221 # } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
222 #print "$region_name, $gff_hash{$region_name}\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
223 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
224 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
225 # to keep track of progress |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
226 # 1000000 for LARGE dbsnp vcfs, 10000 for smaller vcf/tsv tumor mutation files |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
227 unless ($line_count%10000) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
228 print "processed $line_count lines\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
229 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
230 $line_count++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
231 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
232 # end working through the input file |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
233 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
234 # print total number of mutations |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
235 my $mutation_total = $line_count; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
236 print "Number of Mutations -- $mutation_total\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
237 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
238 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
239 ################### Reading the input gff and creating custom BED file #################### |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
240 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
241 my $gffBED = "vars.bed"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
242 open(my $bed_handle, '>', $gffBED) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
243 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
244 # Print BED file Header |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
245 print $bed_handle "START\tEND\tVariant_Frequency\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
246 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
247 # Reading input gff file, incrementing gff variant region hash |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
248 while (<HumanGFF>) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
249 $_ =~ s/\n|\r//; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
250 my @line = split('\t', $_); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
251 my $region_name = "$line[3]-$line[4]"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
252 my $region_length = $line[4] - $line[3]; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
253 my $region_freq = 0; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
254 foreach my $coordinate (sort(keys %location)) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
255 if ($coordinate >= $line[3] && $coordinate <= $line[4]) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
256 $gff_hash{$region_name}++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
257 $gffMatch++; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
258 # print "$coordinate $region_name\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
259 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
260 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
261 if ($gff_hash{$region_name} == 0) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
262 print $bed_handle "$line[3]\t$line[4]\t$region_freq\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
263 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
264 if ($gff_hash{$region_name} > 0) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
265 $region_freq = $gff_hash{$region_name} / $region_length; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
266 print $bed_handle "$line[3]\t$line[4]\t$region_freq\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
267 print "Region $region_name variant frequency -- $region_freq\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
268 print "Total variants in region $region_name -- $gff_hash{$region_name}\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
269 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
270 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
271 #print "$region_name, $gff_hash{$region_name}\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
272 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
273 print "GFF Match -- $gffMatch\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
274 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
275 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
276 ######################### open files for writing ########################## |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
277 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
278 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
279 # my $genotype_name = "zygosity.prob"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
280 # open(my $genotype_handle, '>', $genotype_name) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
281 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
282 my $insertion_file_name = "SSM_insLength.prob"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
283 open(my $insertion_prob_handle, '>', $insertion_file_name) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
284 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
285 my $deletion_file_name = "SSM_delLength.prob"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
286 open(my $deletion_prob_handle, '>', $deletion_file_name) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
287 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
288 my $overall_file_name = "SSM_overall.prob"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
289 open(my $overall_prob_handle, '>', $overall_file_name) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
290 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
291 my $heterozygosity_file_name = "heterozygosity.prob"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
292 open(my $heterozygosity_prob_handle, '>', $heterozygosity_file_name) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
293 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
294 my $annotation_file_name = "annofreq.prob"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
295 open(my $annotation_handle, '>', $annotation_file_name) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
296 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
297 my $exonic_con_file_name = "exonic_consequences.prob"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
298 open(my $exonic_con_handle, '>', $exonic_con_file_name) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
299 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
300 my $intronic_file_name = "intronic_vars.prob"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
301 open(my $intronic_handle, '>', $intronic_file_name) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
302 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
303 my $exonic_file_name = "exonic_vars.prob"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
304 open(my $exonic_handle, '>', $exonic_file_name) || die ("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
305 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
306 my $intergenic_file_name = "intergenic_vars.prob"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
307 open(my $intergenic_handle, '>', $intergenic_file_name) || die ("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
308 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
309 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
310 ######################### Calculate frequency models ####################### |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
311 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
312 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
313 # calculate zygosity ratio frequency, print to file |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
314 # foreach my $genotype (sort(keys %genotype_hash)) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
315 # my $zygosity_frequency; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
316 # $zygosity_frequency = $genotype_hash{$genotype}/$mutation_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
317 # print $genotype_handle "$genotype\t$zygosity_frequency\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
318 # print "Genotype, $genotype -- $genotype_hash{$genotype}\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
319 # } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
320 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
321 # print annotation and exonic consequence frequencies |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
322 foreach $1 (sort(keys %annotation_hash)) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
323 my $annotation_frequency; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
324 $annotation_frequency = $annotation_hash{$1}/$mutation_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
325 print "$1 -- $annotation_hash{$1}, $annotation_frequency\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
326 print $annotation_handle "$1\t$annotation_frequency\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
327 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
328 foreach $1 (sort(keys %exonic_consequence_hash)) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
329 my $exonic_con_freq; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
330 if ( $1 ne "." ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
331 $exonic_con_freq = $exonic_consequence_hash{$1}/$mutation_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
332 print "Exonic Consequence: $1 -- $exonic_consequence_hash{$1}, $exonic_con_freq\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
333 print $exonic_con_handle "$1\t$exonic_con_freq\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
334 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
335 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
336 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
337 # Calculating exonic, intronic, and intergenic frequencies, printing to files |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
338 my $intronic_freq; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
339 my $exonic_freq; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
340 my $intergenic_freq; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
341 $intronic_freq = $intronic/$mutation_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
342 $exonic_freq = $exonic/$mutation_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
343 $intergenic_freq = $intergenic/$mutation_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
344 print $intronic_handle "$intronic_freq\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
345 print $exonic_handle "$exonic_freq\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
346 print $intergenic_handle "$intergenic_freq\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
347 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
348 print "Intronic -- $intronic\nExonic -- $exonic\nIntergenic -- $intergenic\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
349 #print "Total Annotations -- $annotation_total\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
350 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
351 # print overall likelihood file headers |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
352 print $overall_prob_handle "mutation_type\tprobability\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
353 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
354 # print insertions and deletion probabilities out of all mutations |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
355 my $insertion_prob_all = $insertion_total / $mutation_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
356 my $deletion_prob_all = $deletion_total / $mutation_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
357 print $overall_prob_handle "insertion\t$insertion_prob_all\ndeletion\t$deletion_prob_all\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
358 # print $overall_prob_handle "Deletion Probability -- $deletion_prob_all\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
359 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
360 # print InDel totals |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
361 print "Insertions $insertion_total\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
362 print "Deletions $deletion_total\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
363 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
364 # print insertion and deletion headers |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
365 print $insertion_prob_handle "insertion_length\tprobability\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
366 print $deletion_prob_handle "deletion_length\tprobability\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
367 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
368 # calculate InDel length totals and probability out of total number of insertions/deletions. Print probabilities to file. |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
369 foreach my $insertion_length (sort(keys %insertion_hash)) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
370 my $insertion_probability; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
371 $insertion_probability = $insertion_hash{$insertion_length}/$insertion_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
372 print $insertion_prob_handle "$insertion_length\t$insertion_probability\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
373 # print "Insertion, $insertion_length, total , $insertion_hash{$insertion_length}\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
374 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
375 foreach my $deletion_length (sort(keys %deletion_hash)) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
376 my $deletion_probability; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
377 $deletion_probability = $deletion_hash{$deletion_length}/$deletion_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
378 print $deletion_prob_handle "$deletion_length\t$deletion_probability\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
379 # print "Deletion, $deletion_length, total, $deletion_hash{$deletion_length}\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
380 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
381 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
382 # print heterozygosity frequency to file |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
383 my $zygote_frequency = $zygotes_total / $mutation_total; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
384 print $heterozygosity_prob_handle "$zygote_frequency\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
385 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
386 print "heterozygous alleles -- $zygotes_total\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
387 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
388 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
389 # define nucleotide array |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
390 my @nucleotides = ("A", "C", "G", "T"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
391 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
392 foreach my $nt1 (@nucleotides) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
393 foreach my $nt3 (@nucleotides) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
394 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
395 # define the output file name and open it for writing |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
396 my $trinucleotide_SNP_probability_file_name = "Context".$nt1."-".$nt3.".trinuc"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
397 open(my $trinuc_prob_handle, '>', $trinucleotide_SNP_probability_file_name) || die("Could not open file!"); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
398 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
399 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
400 # print trinucleotide contexts and corresponding totals for every mutated_to nucleotide |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
401 my $context_code=$nt1."_".$nt3; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
402 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
403 #foreach my $mutated_from_nucl_key (keys %{ $trinucleotide_context_data{$context_code} }) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
404 foreach my $mutated_from (@nucleotides) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
405 # define the "mutated_to" keys in trinuc context hash |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
406 # my $mutated_to_nucl_key; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
407 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
408 # the sum is only across mutated_to, and will be redefined for each mutated_from |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
409 my $context_sum_across_mutated_to = 0; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
410 my $context_sum_across_indel = 0; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
411 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
412 # print "\nRaw counts for mutated_from $mutated_from \n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
413 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
414 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
415 # foreach $mutated_to_nucl_key (keys %{ $trinucleotide_context_data{$context_code}{$mutated_from_nucl_key} }) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
416 foreach my $mutated_to (@nucleotides) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
417 my $mutated_from_length = length( $mutated_from ); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
418 my $mutated_to_length = length( $mutated_to ); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
419 if ( $mutated_from_length == 1 ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
420 if ( $mutated_from ne "-" ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
421 if ( $mutated_to_length == 1 ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
422 if ( $mutated_to ne "-" ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
423 # print "$context_code, $mutated_from_nucl_key, $mutated_to_nucl_key -- $trinucleotide_context_data{$context_code}{$mutated_from_nucl_key}{$mutated_to_nucl_key}\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
424 $context_sum_across_mutated_to = $context_sum_across_mutated_to + $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
425 }# end if statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
426 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
427 $context_sum_across_indel = $context_sum_across_indel + $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
428 }# end else statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
429 }# end if statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
430 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
431 $context_sum_across_indel = $context_sum_across_indel + $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
432 }# end else statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
433 }# end if statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
434 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
435 $context_sum_across_indel = $context_sum_across_indel + $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
436 }# end else statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
437 }# end if statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
438 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
439 $context_sum_across_indel = $context_sum_across_indel + $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
440 }# end else statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
441 # print "$context_code, $mutated_from, $mutated_to-- $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
442 }# end of loop over mutated_to |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
443 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
444 # print "\nProbabilities for mutated_from $mutated_from:\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
445 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
446 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
447 foreach my $mutated_to (@nucleotides) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
448 #foreach $mutated_to_nucl_key (keys %{ $trinucleotide_context_data{$context_code}{$mutated_from_nucl_key} }) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
449 my $mutated_from_length = length( $mutated_from); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
450 my $mutated_to_length = length( $mutated_to); |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
451 if ( $mutated_from_length == 1 ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
452 if ( $mutated_from ne "-" ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
453 if ( $mutated_to_length == 1 ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
454 if ( $mutated_to ne "-" ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
455 my $SNP_probability; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
456 if ( $context_sum_across_mutated_to == 0 ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
457 $SNP_probability = 0; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
458 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
459 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
460 $SNP_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_mutated_to; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
461 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
462 if ( $mutated_to eq "T" ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
463 print $trinuc_prob_handle "$SNP_probability"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
464 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
465 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
466 # print "$context_code, $mutated_from, $mutated_to, context_sum_across_mutated_to=$context_sum_across_mutated_to -- $SNP_probability\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
467 print $trinuc_prob_handle "$SNP_probability\t"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
468 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
469 }# end of if statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
470 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
471 my $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
472 # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
473 }# end else statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
474 }# end of if statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
475 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
476 # my $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
477 # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
478 }# end else statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
479 }# end of if statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
480 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
481 # my $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
482 # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
483 }# end else statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
484 }# end of if statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
485 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
486 my $indel_probability; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
487 if ( $context_sum_across_indel = 0 ) { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
488 $indel_probability = 0; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
489 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
490 else { |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
491 # $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
492 # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
493 } |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
494 }# end else statement |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
495 }# end of loop over mutated_to |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
496 print $trinuc_prob_handle "\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
497 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
498 }# end of loop over mutated_from |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
499 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
500 # print "\n\n"; |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
501 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
502 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
503 }# end loop over nt3 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
504 }# end loop over nt1 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
505 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
506 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
507 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
508 |