annotate rDiff/src/tools/sanitize_genes.m @ 3:29a698dc5c7e default tip

Merge multiple heads.
author Dave Bouvier <dave@bx.psu.edu>
date Mon, 27 Jan 2014 14:15:36 -0500
parents 0f80a5141704
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
1 function [genes]=sanitize_genes(genes,CFG)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
2 %This function removes trnascript and genes which have a invalid
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
3 %structure and recomputes the splicegraph
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
4
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
5
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
6 %Mark genes with eronous exon definitions
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
7 RM_GENES_IDX=[]; %genes to keep
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
8 for i=1:size(genes,2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
9 %remove transcripts which have a length smaller than
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
10 %readlength
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
11 RM_TR_IDX=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
12 START_MIN=inf;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
13 STOP_MAX=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
14 for j=1:size(genes(i).transcripts,2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
15 if sum(genes(i).exons{j}(:,2)-genes(i).exons{j}(:,1))< CFG.sequenced_length
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
16 RM_TR_IDX=[RM_TR_IDX,j];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
17 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
18 START_MIN=min(START_MIN,genes(i).exons{j}(1,1));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
19 STOP_MAX=max(STOP_MAX,genes(i).exons{j}(end,2));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
20 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
21 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
22 if ~isempty(RM_TR_IDX)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
23 genes(i).exons(RM_TR_IDX)=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
24 genes(i).transcripts(RM_TR_IDX)=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
25 genes(i).start=START_MIN;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
26 genes(i).stop=STOP_MAX;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
27 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
28 if genes(i).start>START_MIN
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
29 genes(i).start=START_MIN;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
30 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
31 if genes(i).stop<STOP_MAX
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
32 genes(i).stop=STOP_MAX;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
33 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
34
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
35
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
36 %Check if exons are eronous
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
37 CHECK=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
38
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
39 if size(genes(i).transcripts,2)==0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
40 CHECK=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
41 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
42
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
43 for j=1:size(genes(i).transcripts,2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
44 for k=1:(size(genes(i).exons{j},1)-1)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
45 if (genes(i).exons{j}(k,2)> genes(i).exons{j}(k+1,1))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
46 CHECK=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
47 break;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
48 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
49 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
50
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
51 if isempty(genes(i).exons{j})
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
52 CHECK=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
53 break;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
54 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
55 if CHECK==0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
56 break
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
57 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
58 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
59
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
60 if genes(i).stop-genes(i).start<=CFG.sequenced_length
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
61 CHECK=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
62 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
63 if CHECK==0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
64 RM_GENES_IDX=[RM_GENES_IDX;i];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
65 genes(i).do_not_quant=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
66 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
67 genes(i).do_not_quant=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
68 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
69 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
70 %genes(RM_GENES_IDX)=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
71
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
72 % Create splicegraph
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
73 for i=1:size(genes,2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
74 gene=genes(i);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
75 ALL_EXO=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
76
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
77 for j=1:size(gene.exons,2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
78 ALL_EXO=[ALL_EXO;gene.exons{j}];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
79 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
80 ALL_EXO=unique(ALL_EXO,'rows');
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
81 GRAPH=zeros(size(ALL_EXO,1),size(ALL_EXO,1));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
82 for j=1:size(gene.exons,2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
83 for k=1:(size(gene.exons{j},1)-1)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
84 [A,B]=intersect(ALL_EXO,gene.exons{j}(k,:),'rows');
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
85 [A,C]=intersect(ALL_EXO,gene.exons{j}(k+1,:),'rows');
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
86 GRAPH(B,C)=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
87 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
88 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
89 GRAPH=GRAPH+GRAPH';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
90 genes(i).splicegraph{1}=ALL_EXO';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
91 genes(i).splicegraph{2}=GRAPH;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
92 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
93
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
94
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
95