annotate pyPRADA_1.2/make_intragenic_junctions.pl @ 3:f17965495ec9 draft default tip

Uploaded
author siyuan
date Tue, 11 Mar 2014 12:14:01 -0400
parents acc2ca1a3ba4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 #!/usr/bin/perl
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2 use strict;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 use warnings;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 ##################################
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6 # make_intragenic_junctions.pl
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 #
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 # This is an extension of MFB's perl code make_exon_junctions.pl
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 # Generates "abnormal" junctions between exons of a gene
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10 # In the code GeneA, GeneB should be input as same gene
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 #
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12 # RV last modified 4/9/2013
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 ##################################
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15 if ($#ARGV!=5) {die "usage is 'perl make_exon_junctions.pl GeneA GeneB ref.annotation ref.map ref.fasta junL > output'\n";}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 my $geneA = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 my $geneB = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 my $annotations = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 my $map = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 my $fasta = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 my $overlap = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 # Get transcript Ensembl IDs and orientations for GeneA and GeneB
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25 my @ensembA;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26 my @ensembB;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27 my @orientationA;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28 my @orientationB;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 my @seqidA;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30 my @seqidB;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 my $testerA=0; my $testerB=0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33 open (ANN, "<$annotations") or die "can't open Ensembl annotations\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34 while (my $text = <ANN>) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35 chomp $text;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 my @line = split " ", $text;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 if ($line[3] eq $geneA) {push @ensembA, $line[1]; push @orientationA, $line[4]; push @seqidA, $line[0]; $testerA++;}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38 if ($line[3] eq $geneB) {push @ensembB, $line[1]; push @orientationB, $line[4]; push @seqidB, $line[0]; $testerB++;}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 close (ANN);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 if ($testerA==0 || $testerB==0) {die "couldn't find one of the genes\n";}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 # Get exon lengths for each transcript for GeneA and GeneB
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 my @exon_length_A;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46 my @exon_length_B;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 my $chr_A;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 my $chr_B;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 my @exon_end_A;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 my @exon_start_B;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 open (MAP, "<$map") or die "can't open Ensembl map\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 while (my $text = <MAP>) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 chomp $text;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 my @line = split " ", $text;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 my $ensID = pop (@line);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 pop @line;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 for (my $i=0; $i<=$#ensembA; $i++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61 if ($ensembA[$i] eq $ensID) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 $chr_A = $line[0];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 my $num_exons = ($#line+1)/3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 for (my $exonA=0; $exonA<$num_exons; $exonA++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 my $length = $line[3*$exonA + 2] - $line[3*$exonA + 1] + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 if ($orientationA[$i] == 1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 push @{$exon_length_A[$i]}, $length;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 push @{$exon_end_A[$i]}, $line[3*$exonA + 2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 unshift @{$exon_length_A[$i]}, $length;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 unshift @{$exon_end_A[$i]}, $line[3*$exonA + 1];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 for (my $i=0; $i<=$#ensembB; $i++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 if ($ensembB[$i] eq $ensID) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 $chr_B = $line[0];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80 my $num_exons = ($#line+1)/3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 for (my $exonB=0; $exonB<$num_exons; $exonB++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 my $length = $line[3*$exonB + 2] - $line[3*$exonB + 1] + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83 if ($orientationB[$i] == 1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84 push @{$exon_length_B[$i]}, $length;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85 push @{$exon_start_B[$i]}, $line[3*$exonB + 1];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87 else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 unshift @{$exon_length_B[$i]}, $length;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89 unshift @{$exon_start_B[$i]}, $line[3*$exonB + 2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
93 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
94 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
95 close (MAP);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
96
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
97 # Get sequence for each transcript (take reverse complement when necessary)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
98
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
99 my @sequenceA;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
100 my @sequenceB;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
101
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
102 open (FHSEQ, "<$fasta") or die "can't open Ensembl map\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
103 my $top = <FHSEQ>;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
104 my $readID = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
105 while (my $text = <FHSEQ>) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
106 chomp $text;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
107 if ($text =~ ">") { #WTG changed to update for specific ENST ID number which might not be in the same order in the fasta as in the annotation
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
108 $text=~ s/>//;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
109 $readID=$text;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
110 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
111 else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
112 for (my $i=0; $i<=$#seqidA; $i++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
113 if ($seqidA[$i] == $readID) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
114 if ($sequenceA[$i]) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
115 $sequenceA[$i] = $sequenceA[$i].$text;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
116 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
117 else {$sequenceA[$i] = $text;}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
118 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
119 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
120 for (my $i=0; $i<=$#seqidB; $i++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
121 if ($seqidB[$i] == $readID) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
122 if ($sequenceB[$i]) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
123 $sequenceB[$i] = $sequenceB[$i].$text;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
124 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
125 else {$sequenceB[$i] = $text;}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
126 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
127 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
128 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
129 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
130 close (FHSEQ);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
131
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
132 for (my $txtA=0; $txtA<=$#sequenceA; $txtA++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
133 if ($orientationA[$txtA]==-1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
134 $sequenceA[$txtA] = rc($sequenceA[$txtA]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
135 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
136 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
137 for (my $txtB=0; $txtB<=$#sequenceB; $txtB++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
138 if ($orientationB[$txtB]==-1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
139 $sequenceB[$txtB] = rc($sequenceB[$txtB]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
140 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
141 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
142
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
143
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
144 # Print sequences for each hypothetical exon junction (for each pair of transcripts)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
145 my %junctions;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
146 my %junctions_known;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
147 for (my $txtA=0; $txtA<=$#sequenceA; $txtA++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
148 for (my $txtB=0; $txtB<=$#sequenceB; $txtB++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
149
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
150 my $num_exon_A = $#{$exon_length_A[$txtA]}+1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
151 my $num_exon_B = $#{$exon_length_B[$txtB]}+1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
152
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
153 my $running_pos_A=0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
154 for (my $exonA=0; $exonA<$num_exon_A; $exonA++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
155 $running_pos_A += $exon_length_A[$txtA][$exonA];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
156 my $junction_start = $exon_end_A[$txtA][$exonA];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
157 my $start = $running_pos_A - $overlap;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
158 my $seqA;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
159 if ($start >= 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
160 $seqA = substr($sequenceA[$txtA], $start, $overlap);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
161 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
162 else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
163 $start=0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
164 my $tmp_length = $running_pos_A;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
165 $seqA = substr($sequenceA[$txtA], $start, $tmp_length);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
166 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
167 my $running_pos_B=0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
168 for (my $exonB=0; $exonB<$num_exon_B; $exonB++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
169 my $junction_end = $exon_start_B[$txtB][$exonB];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
170 my $start = $running_pos_B;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
171 my $seqB = substr($sequenceB[$txtB], $start, $overlap);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
172 $running_pos_B += $exon_length_B[$txtB][$exonB];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
173 my $key= ">chr$chr_A\.$junction_start\.chr$chr_B\.$junction_end\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
174 my $junction = $seqA.$seqB;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
175 #print ">chr$chr_A\.$junction_start\.chr$chr_B\.$junction_end\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
176 #print "$junction\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
177 $junctions{$key}=$junction;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
178 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
179 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
180 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
181
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
182 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
183
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
184 for (my $txtA=0; $txtA<=$#sequenceA; $txtA++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
185 my $num_exon_A = $#{$exon_length_A[$txtA]};
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
186
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
187 my $running_pos_A=0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
188 for (my $exonA=0; $exonA<$num_exon_A; $exonA++) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
189 $running_pos_A += $exon_length_A[$txtA][$exonA];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
190 my $junction_start = $exon_end_A[$txtA][$exonA];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
191 my $start = $running_pos_A - $overlap;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
192 my $seqA;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
193 if ($start >= 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
194 $seqA = substr($sequenceA[$txtA], $start, $overlap);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
195 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
196 else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
197 $start=0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
198 my $tmp_length = $running_pos_A;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
199 $seqA = substr($sequenceA[$txtA], $start, $tmp_length);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
200 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
201 my $running_pos_B=0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
202 my $junction_end = $exon_start_B[$txtA][$exonA+1];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
203 my $start1 = $running_pos_B;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
204 my $seqB = substr($sequenceA[$txtA], $start1, $overlap);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
205 #$running_pos_B += $exon_length_A[$txtA][$exonA+1];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
206 my $key= ">chr$chr_A\.$junction_start\.chr$chr_A\.$junction_end\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
207 my $junction = $seqA.$seqB;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
208 #print "$junction\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
209 $junctions_known{$key}=$junction;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
210
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
211 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
212 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
213
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
214 my @disc;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
215 my %count = ();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
216 foreach my $element (keys (%junctions_known), keys (%junctions)) { $count{$element}++ }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
217 foreach my $element (keys %count) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
218 if($count{$element} == 1){
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
219 #print $element;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
220 push @disc , $element;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
221 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
222 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
223
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
224 foreach my $junc(@disc){
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
225 if($junctions_known{$junc}){print $junc,$junctions_known{$junc},"\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
226 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
227 else {print $junc,$junctions{$junc},"\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
228 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
229
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
230 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
231
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
232
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
233 ##################
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
234 ##################
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
235 ### SUBROUTINES
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
236 ##################
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
237 ##################
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
238
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
239 sub rc{
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
240 my $dna = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
241 my $revcom = reverse($dna);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
242 $revcom =~ tr/ACGTacgt/TGCAtgca/;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
243 return $revcom;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
244 }