Mercurial > repos > vipints > rdiff
diff rDiff/src/tools/sanitize_genes.m @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rDiff/src/tools/sanitize_genes.m Thu Feb 14 23:38:36 2013 -0500 @@ -0,0 +1,95 @@ +function [genes]=sanitize_genes(genes,CFG) +%This function removes trnascript and genes which have a invalid +%structure and recomputes the splicegraph + + +%Mark genes with eronous exon definitions +RM_GENES_IDX=[]; %genes to keep +for i=1:size(genes,2) + %remove transcripts which have a length smaller than + %readlength + RM_TR_IDX=[]; + START_MIN=inf; + STOP_MAX=0; + for j=1:size(genes(i).transcripts,2) + if sum(genes(i).exons{j}(:,2)-genes(i).exons{j}(:,1))< CFG.sequenced_length + RM_TR_IDX=[RM_TR_IDX,j]; + else + START_MIN=min(START_MIN,genes(i).exons{j}(1,1)); + STOP_MAX=max(STOP_MAX,genes(i).exons{j}(end,2)); + end + end + if ~isempty(RM_TR_IDX) + genes(i).exons(RM_TR_IDX)=[]; + genes(i).transcripts(RM_TR_IDX)=[]; + genes(i).start=START_MIN; + genes(i).stop=STOP_MAX; + end + if genes(i).start>START_MIN + genes(i).start=START_MIN; + end + if genes(i).stop<STOP_MAX + genes(i).stop=STOP_MAX; + end + + + %Check if exons are eronous + CHECK=1; + + if size(genes(i).transcripts,2)==0 + CHECK=0; + end + + for j=1:size(genes(i).transcripts,2) + for k=1:(size(genes(i).exons{j},1)-1) + if (genes(i).exons{j}(k,2)> genes(i).exons{j}(k+1,1)) + CHECK=0; + break; + end + end + + if isempty(genes(i).exons{j}) + CHECK=0; + break; + end + if CHECK==0 + break + end + end + + if genes(i).stop-genes(i).start<=CFG.sequenced_length + CHECK=0; + end + if CHECK==0 + RM_GENES_IDX=[RM_GENES_IDX;i]; + genes(i).do_not_quant=1; + else + genes(i).do_not_quant=0; + end +end +%genes(RM_GENES_IDX)=[]; + +% Create splicegraph +for i=1:size(genes,2) + gene=genes(i); + ALL_EXO=[]; + + for j=1:size(gene.exons,2) + ALL_EXO=[ALL_EXO;gene.exons{j}]; + end + ALL_EXO=unique(ALL_EXO,'rows'); + GRAPH=zeros(size(ALL_EXO,1),size(ALL_EXO,1)); + for j=1:size(gene.exons,2) + for k=1:(size(gene.exons{j},1)-1) + [A,B]=intersect(ALL_EXO,gene.exons{j}(k,:),'rows'); + [A,C]=intersect(ALL_EXO,gene.exons{j}(k+1,:),'rows'); + GRAPH(B,C)=1; + end + end + GRAPH=GRAPH+GRAPH'; + genes(i).splicegraph{1}=ALL_EXO'; + genes(i).splicegraph{2}=GRAPH; +end + + + \ No newline at end of file