0
|
1 function [genes]=sanitize_genes(genes,CFG)
|
|
2 %This function removes trnascript and genes which have a invalid
|
|
3 %structure and recomputes the splicegraph
|
|
4
|
|
5
|
|
6 %Mark genes with eronous exon definitions
|
|
7 RM_GENES_IDX=[]; %genes to keep
|
|
8 for i=1:size(genes,2)
|
|
9 %remove transcripts which have a length smaller than
|
|
10 %readlength
|
|
11 RM_TR_IDX=[];
|
|
12 START_MIN=inf;
|
|
13 STOP_MAX=0;
|
|
14 for j=1:size(genes(i).transcripts,2)
|
|
15 if sum(genes(i).exons{j}(:,2)-genes(i).exons{j}(:,1))< CFG.sequenced_length
|
|
16 RM_TR_IDX=[RM_TR_IDX,j];
|
|
17 else
|
|
18 START_MIN=min(START_MIN,genes(i).exons{j}(1,1));
|
|
19 STOP_MAX=max(STOP_MAX,genes(i).exons{j}(end,2));
|
|
20 end
|
|
21 end
|
|
22 if ~isempty(RM_TR_IDX)
|
|
23 genes(i).exons(RM_TR_IDX)=[];
|
|
24 genes(i).transcripts(RM_TR_IDX)=[];
|
|
25 genes(i).start=START_MIN;
|
|
26 genes(i).stop=STOP_MAX;
|
|
27 end
|
|
28 if genes(i).start>START_MIN
|
|
29 genes(i).start=START_MIN;
|
|
30 end
|
|
31 if genes(i).stop<STOP_MAX
|
|
32 genes(i).stop=STOP_MAX;
|
|
33 end
|
|
34
|
|
35
|
|
36 %Check if exons are eronous
|
|
37 CHECK=1;
|
|
38
|
|
39 if size(genes(i).transcripts,2)==0
|
|
40 CHECK=0;
|
|
41 end
|
|
42
|
|
43 for j=1:size(genes(i).transcripts,2)
|
|
44 for k=1:(size(genes(i).exons{j},1)-1)
|
|
45 if (genes(i).exons{j}(k,2)> genes(i).exons{j}(k+1,1))
|
|
46 CHECK=0;
|
|
47 break;
|
|
48 end
|
|
49 end
|
|
50
|
|
51 if isempty(genes(i).exons{j})
|
|
52 CHECK=0;
|
|
53 break;
|
|
54 end
|
|
55 if CHECK==0
|
|
56 break
|
|
57 end
|
|
58 end
|
|
59
|
|
60 if genes(i).stop-genes(i).start<=CFG.sequenced_length
|
|
61 CHECK=0;
|
|
62 end
|
|
63 if CHECK==0
|
|
64 RM_GENES_IDX=[RM_GENES_IDX;i];
|
|
65 genes(i).do_not_quant=1;
|
|
66 else
|
|
67 genes(i).do_not_quant=0;
|
|
68 end
|
|
69 end
|
|
70 %genes(RM_GENES_IDX)=[];
|
|
71
|
|
72 % Create splicegraph
|
|
73 for i=1:size(genes,2)
|
|
74 gene=genes(i);
|
|
75 ALL_EXO=[];
|
|
76
|
|
77 for j=1:size(gene.exons,2)
|
|
78 ALL_EXO=[ALL_EXO;gene.exons{j}];
|
|
79 end
|
|
80 ALL_EXO=unique(ALL_EXO,'rows');
|
|
81 GRAPH=zeros(size(ALL_EXO,1),size(ALL_EXO,1));
|
|
82 for j=1:size(gene.exons,2)
|
|
83 for k=1:(size(gene.exons{j},1)-1)
|
|
84 [A,B]=intersect(ALL_EXO,gene.exons{j}(k,:),'rows');
|
|
85 [A,C]=intersect(ALL_EXO,gene.exons{j}(k+1,:),'rows');
|
|
86 GRAPH(B,C)=1;
|
|
87 end
|
|
88 end
|
|
89 GRAPH=GRAPH+GRAPH';
|
|
90 genes(i).splicegraph{1}=ALL_EXO';
|
|
91 genes(i).splicegraph{2}=GRAPH;
|
|
92 end
|
|
93
|
|
94
|
|
95 |