annotate GeneModel.pm @ 1:4f6952e0af48 default tip

CREST - add crest.loc.sample
author Jim Johnson <jj@umn.edu>
date Wed, 08 Feb 2012 16:08:01 -0600
parents acc8d8bfeb9a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1 package GeneModel;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
2 use strict;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
3 use Carp;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
4 use Data::Dumper;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
5 use Tree::Interval;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
6 use Tree::DAG_Node;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
7 use English;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
8 use Gene;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
9
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
10 sub new {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
11 my $class = shift;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
12 my $obj = {};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
13 $obj->{FMODEL} = {};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
14 $obj->{RMODEL} = {};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
15 if (@_) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
16 my %arg = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
17 $obj->{FMODEL} = $arg{-FMODEL} if($arg{-FMODEL});
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
18 $obj->{RMODEL} = $arg{-RMODEL} if($arg{-RMODEL});
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
19 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
20 return bless $obj, $class;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
21 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
22
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
23 sub get_all_chr {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
24 my $self = shift;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
25 return keys %{$self->{FMODEL}};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
26 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
27
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
28 sub add_gene {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
29 my ($self, $gene) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
30 if($gene->strand eq '+') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
31 $self->{FMODEL}->{$gene->chr}->insert([$gene->start, $gene->end], $gene);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
32 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
33 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
34 $self->{FMODEL}->{$gene->chr}->insert([$gene->start, $gene->end], $gene);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
35 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
36 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
37
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
38 sub remove_gene {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
39 my $self = shift;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
40 my $gene = shift;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
41 if($gene->strand eq '+') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
42 $self->{FMODEL}->{$gene->chr}->remove([$gene->start, $gene->end], $gene);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
43 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
44 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
45 $self->{RMODEL}->{$gene->chr}->remove([$gene->start, $gene->end], $gene);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
46 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
47 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
48
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
49 sub n_genes {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
50 my $self = shift;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
51 my $n = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
52 foreach my $c (keys %{$self->{FMODEL}}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
53 $n += $self->{FMODEL}->{$c}->size;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
54 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
55 foreach my $c (keys %{$self->{RMODEL}}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
56 $n += $self->{RMODEL}->{$c}->size;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
57 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
58 return $n;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
59 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
60
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
61 sub sub_model {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
62 my ($self, $chr, $strand) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
63 if($strand eq '+' ) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
64 return $self->{FMODEL}->{$chr};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
65 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
66 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
67 return $self->{RMODEL}->{$chr};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
68 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
69 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
70 sub look_up_gene {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
71 my ($self, $chr, $gene) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
72 if($gene->isa('Gene')) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
73 if($gene->strand eq '+') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
74 return $self->{FMODEL}->{$chr}->lookup([$gene->start, $gene->end]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
75 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
76 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
77 return $self->{RMODEL}->{$chr}->lookup([$gene->start, $gene->end]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
78 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
79 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
80 elsif(ref($gene) eq 'ARRAY') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
81 return (
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
82 $self->{FMODEL}->{$chr}->lookup([$gene->start, $gene->end]),
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
83 $self->{RMODEL}->{$chr}->lookup([$gene->start, $gene->end]) );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
84 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
85 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
86 croak "Not implemented look_up_gene for $gene";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
87 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
88 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
89
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
90 sub overlap_genes {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
91 my ($self, $chr, $fea, $ort) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
92 my @genes;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
93
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
94 if(ref($fea) eq 'ARRAY') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
95 @genes = $self->_overlap_array($chr, $fea, $ort);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
96 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
97 elsif($fea->isa('Transcript')) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
98 foreach my $e (@{$fea->exons}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
99 push @genes, $self->_overlap_array($chr, $e,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
100 $fea->strand ? $fea->strand : undef);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
101 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
102 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
103 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
104 croak "Not implemented overlap for $fea";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
105 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
106 my @rtn;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
107 foreach my $g (@genes) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
108 push @rtn, $g if($g->overlap($fea));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
109 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
110 return @rtn;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
111 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
112
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
113 sub _overlap_array {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
114 my($self, $chr, $a, $ort) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
115 if($ort) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
116 return $ort == '+' ? $self->{FMODEL}->{$chr}->intersect($a) :
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
117 $self->{RMODEL}->{$chr}->intersect($a);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
118 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
119 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
120 return ($self->{FMODEL}->{$chr}->intersect($a), $self->{RMODEL}->{$chr}->intersect($a));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
121 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
122 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
123
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
124 sub model {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
125 my ($self, $chr, $strand, $val) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
126 croak "Please specify chr and strand for GeneModel"
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
127 unless $chr && $strand;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
128 my $model = $strand eq '+' ? $self->{FMODEL} : $self->{RMODEL};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
129
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
130 if($val) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
131 croak "The model for each chrom must be a Tree::Interval"
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
132 unless $val->isa('Tree::Interval');
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
133 $model->{$chr} = $val;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
134 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
135 return $model->{$chr};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
136 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
137
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
138 sub from_file {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
139 my ($self, $file, $format) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
140 croak "$format is not implemented yet!" unless uc($format) eq "REFFLAT";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
141 print STDERR "Processing gene model file, please wait ... ";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
142 my $transcripts = _refFlat2Transcripts($file);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
143 my $model = _Transcript2GeneModel($transcripts);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
144 foreach my $c (keys %{$model}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
145 foreach my $s (keys %{$model->{$c}}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
146 if($s eq '+') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
147 $self->{FMODEL}->{$c} = $model->{$c}{$s};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
148 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
149 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
150 $self->{RMODEL}->{$c} = $model->{$c}{$s};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
151 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
152 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
153 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
154 print STDERR "Done.\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
155 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
156
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
157 sub _refFlat2Transcripts {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
158 my $file = shift;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
159 my %transcripts;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
160
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
161 open my $FILE, "<$file" or croak "Can't open $file: $OS_ERROR";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
162 while( my $line = <$FILE> ) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
163 next if($line =~ /^#/); #annotation line
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
164 my ($name, $refseq_id, $chr, $strand, $txStart, $txEnd, $cdsStart, $cdsEnd, $exonCount,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
165 $exonStarts, $exonEnds) = split(/\s+/, $line);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
166 my $type = $cdsEnd == $cdsStart ? "NR" : "NM";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
167 my @starts = split /,/, $exonStarts;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
168 my @ends = split /,/, $exonEnds;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
169 if(scalar @starts != scalar @ends) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
170 warn "Number of exon starts is not same as exon ends!";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
171 next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
172 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
173 $transcripts{$chr} = {} unless (exists $transcripts{$chr});
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
174 $transcripts{$chr}->{$strand} = [] unless (exists $transcripts{$chr}->{$strand});
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
175 my @exons;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
176 for( my $i = 0; $i < $exonCount; $i++) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
177 push @exons, [$starts[$i], $ends[$i]];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
178 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
179
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
180 my $t = Transcript->new(-START => $starts[0], -END => $ends[$#ends], -CHR => $chr,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
181 -STRAND => $strand eq '+' ? 1 : -1, -NAME => $name, -REFSEQ_ID => $refseq_id,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
182 -EXONS => \@exons, -TYPE => $type);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
183 push @{$transcripts{$chr}->{$strand}}, $t;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
184 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
185 #sort the list of transcripts
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
186 foreach my $c (keys %transcripts) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
187 foreach my $s (keys %{$transcripts{$c}}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
188 my @tmp = @{$transcripts{$c}->{$s}};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
189 @tmp = sort { $a->start <=> $b->start || $a->end <=> $b->end } @tmp;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
190 $transcripts{$c}->{$s} = \@tmp;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
191 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
192 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
193 return \%transcripts;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
194 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
195
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
196 sub _Transcript2GeneModel {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
197 my $transcripts = shift;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
198 my %genemodel;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
199
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
200 foreach my $c (keys %{$transcripts}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
201 $genemodel{$c} = {};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
202 foreach my $s (keys %{$transcripts->{$c}}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
203 my $tree = $genemodel{$c}->{$s} = Tree::Interval->new();
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
204 foreach my $t (@{$transcripts->{$c}{$s}}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
205 my @ol = $tree->intersect([$t->start, $t->end]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
206 if(scalar @ol == 0 ) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
207 my $gene = Gene->new();
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
208 $gene->add_transcript($t);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
209 $tree->insert([$t->start, $t->end], $gene);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
210 next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
211 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
212 my @real_ol;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
213 foreach my $g (@ol) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
214 push @real_ol, $g->val if($g->val->overlap($t));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
215 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
216 if(scalar @real_ol == 0) { #gene in gene?
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
217 # print STDERR "Gene in Gene: ", $t->name, " in ", $ol[0]->val->name, " \n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
218 my $gene = Gene->new();
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
219 $gene->add_transcript($t);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
220 $tree->insert([$t->start, $t->end], $gene);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
221 next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
222 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
223 if(scalar @real_ol == 1 ) { # transcript belongs to gene
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
224 my $g = $real_ol[0];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
225 # print STDERR "Same Gene group: ", $t->name, " in ", $real_ol[0]->name, " \n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
226 if($t->start < $g->start || $t->end > $g->end) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
227 $tree->delete([$g->start, $g->end], $g);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
228 $g->add_transcript($t);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
229 $tree->insert([$g->start, $g->end], $g);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
230 next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
231 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
232 $g->add_transcript($t);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
233 next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
234 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
235 # a transcript make more genes same
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
236 # print STDERR "Join Gene group: ", $t->name, " join ", $real_ol[0]->name, "and", $real_ol[1]->name, " \n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
237 my $gene = Gene->new();
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
238 $gene->add_transcript($t);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
239 foreach my $g (@real_ol) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
240 $tree->delete([$g->start, $g->end], $g);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
241 foreach my $tp (@{$g->transcripts}) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
242 $gene->add_transcript($tp);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
243 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
244 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
245 $tree->insert([$gene->start, $gene->end], $gene);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
246 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
247 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
248 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
249 return \%genemodel;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
250 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
251
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
252 sub _print_tree {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
253 return;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
254 my $tree = shift;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
255 my $t = Tree::DAG_Node->lol_to_tree($tree->root->as_lol);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
256 local $, = "\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
257 print @{$t->draw_ascii_tree}, "\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
258 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
259 1;