comparison rDiff/src/variance/estimate_variance_helper.m @ 0:0f80a5141704

version 0.3 uploaded
author vipints
date Thu, 14 Feb 2013 23:38:36 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0f80a5141704
1 function [VARIANCE]=estimate_variance_helper(CFG,SAMPLE_IX,COUNTS_PER_GENE,GENE_EXPRESSION_MATRIX)
2
3
4 %compute means and variances
5 %Get the number of of positions to be used in MEANS and VARS
6 LENGTH=0;
7 for i=1:size(COUNTS_PER_GENE,1)
8 try
9 TEMP_SUM=COUNTS_PER_GENE{i, SAMPLE_IX(1)};
10 for j=2:length(SAMPLE_IX)
11 TEMP_SUM=TEMP_SUM+COUNTS_PER_GENE{i,SAMPLE_IX(j)};
12 end
13 LENGTH=LENGTH+sum(TEMP_SUM>0);
14 catch
15 LENGTH=LENGTH;
16 end
17 end
18
19 %Compute the means and variances, normalizing for gene expression
20 MEANS=zeros(1,LENGTH);
21 VARS=zeros(1,LENGTH);
22 COUNTER=1;
23 for i=1:size(COUNTS_PER_GENE,1)
24 try
25 TS=GENE_EXPRESSION_MATRIX(i, SAMPLE_IX);
26 S=length(TS)*TS/sum(TS);
27
28 TEMP_SUM=sparse(zeros(length(SAMPLE_IX),size(COUNTS_PER_GENE{i, SAMPLE_IX(1)},2)));
29 %if S(1)==0
30 % TEMP_SUM(1,:)=0;
31 %else
32 % TEMP_SUM(1,:)=COUNTS_PER_GENE{i, SAMPLE_IX(1)}/S(1);
33 %end
34 for j=1:length(SAMPLE_IX)
35 if S(j)==0
36 TEMP_SUM(j,:)=0;
37 else
38 TEMP_SUM(j,:)=COUNTS_PER_GENE{i,SAMPLE_IX(j)}/S(j);
39 end
40 end
41 TEMP_SUM=TEMP_SUM(:,sum(TEMP_SUM,1)>0);
42 MEANS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=mean(TEMP_SUM(:,sum(TEMP_SUM,1)>0),1);
43 VARS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=var(TEMP_SUM(:,sum(TEMP_SUM,1)>0));
44 COUNTER=COUNTER+size(TEMP_SUM,2);
45 catch
46 LENGTH=LENGTH;
47 end
48 end
49
50 %filter those which have a zeros mean
51 NONZERO_IDX=MEANS>0;
52 MEANS=MEANS(NONZERO_IDX);
53 VARS=VARS(NONZERO_IDX);
54 % Subsample the mean variance pairs to reduce the number of
55 % samples which have a low mean
56 [MEANS,POS]=sort(MEANS);
57 MAX_VAL=max(MEANS);
58 MIN_VAL=min(MEANS);
59 BOUNDS=exp(linspace(log(MIN_VAL),log(MAX_VAL), CFG.variance_samplebins+1));
60 SAMPLE=[];
61 for i=1:CFG.variance_samplebins
62 NR_IN_BIN=length((find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last')));
63 if NR_IN_BIN==0
64 continue;
65 elseif NR_IN_BIN<= CFG.variance_samples_per_bin
66 SAMPLE=[SAMPLE find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last')];
67 else
68 CHUNK_SAMPLE=find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last');
69 TEMP_SAMPLE=randperm(NR_IN_BIN);
70 SAMPLE=[SAMPLE CHUNK_SAMPLE(TEMP_SAMPLE(1:CFG.variance_samples_per_bin))];
71 end
72 end
73 VARS=VARS(POS);
74
75 %estimate variance function
76 [VARIANCE]=estimate_variance(full(MEANS(SAMPLE))',full(VARS(SAMPLE)'));
77 return