Mercurial > repos > vipints > rdiff
comparison rDiff/src/variance/estimate_variance_helper.m @ 0:0f80a5141704
version 0.3 uploaded
| author | vipints |
|---|---|
| date | Thu, 14 Feb 2013 23:38:36 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0f80a5141704 |
|---|---|
| 1 function [VARIANCE]=estimate_variance_helper(CFG,SAMPLE_IX,COUNTS_PER_GENE,GENE_EXPRESSION_MATRIX) | |
| 2 | |
| 3 | |
| 4 %compute means and variances | |
| 5 %Get the number of of positions to be used in MEANS and VARS | |
| 6 LENGTH=0; | |
| 7 for i=1:size(COUNTS_PER_GENE,1) | |
| 8 try | |
| 9 TEMP_SUM=COUNTS_PER_GENE{i, SAMPLE_IX(1)}; | |
| 10 for j=2:length(SAMPLE_IX) | |
| 11 TEMP_SUM=TEMP_SUM+COUNTS_PER_GENE{i,SAMPLE_IX(j)}; | |
| 12 end | |
| 13 LENGTH=LENGTH+sum(TEMP_SUM>0); | |
| 14 catch | |
| 15 LENGTH=LENGTH; | |
| 16 end | |
| 17 end | |
| 18 | |
| 19 %Compute the means and variances, normalizing for gene expression | |
| 20 MEANS=zeros(1,LENGTH); | |
| 21 VARS=zeros(1,LENGTH); | |
| 22 COUNTER=1; | |
| 23 for i=1:size(COUNTS_PER_GENE,1) | |
| 24 try | |
| 25 TS=GENE_EXPRESSION_MATRIX(i, SAMPLE_IX); | |
| 26 S=length(TS)*TS/sum(TS); | |
| 27 | |
| 28 TEMP_SUM=sparse(zeros(length(SAMPLE_IX),size(COUNTS_PER_GENE{i, SAMPLE_IX(1)},2))); | |
| 29 %if S(1)==0 | |
| 30 % TEMP_SUM(1,:)=0; | |
| 31 %else | |
| 32 % TEMP_SUM(1,:)=COUNTS_PER_GENE{i, SAMPLE_IX(1)}/S(1); | |
| 33 %end | |
| 34 for j=1:length(SAMPLE_IX) | |
| 35 if S(j)==0 | |
| 36 TEMP_SUM(j,:)=0; | |
| 37 else | |
| 38 TEMP_SUM(j,:)=COUNTS_PER_GENE{i,SAMPLE_IX(j)}/S(j); | |
| 39 end | |
| 40 end | |
| 41 TEMP_SUM=TEMP_SUM(:,sum(TEMP_SUM,1)>0); | |
| 42 MEANS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=mean(TEMP_SUM(:,sum(TEMP_SUM,1)>0),1); | |
| 43 VARS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=var(TEMP_SUM(:,sum(TEMP_SUM,1)>0)); | |
| 44 COUNTER=COUNTER+size(TEMP_SUM,2); | |
| 45 catch | |
| 46 LENGTH=LENGTH; | |
| 47 end | |
| 48 end | |
| 49 | |
| 50 %filter those which have a zeros mean | |
| 51 NONZERO_IDX=MEANS>0; | |
| 52 MEANS=MEANS(NONZERO_IDX); | |
| 53 VARS=VARS(NONZERO_IDX); | |
| 54 % Subsample the mean variance pairs to reduce the number of | |
| 55 % samples which have a low mean | |
| 56 [MEANS,POS]=sort(MEANS); | |
| 57 MAX_VAL=max(MEANS); | |
| 58 MIN_VAL=min(MEANS); | |
| 59 BOUNDS=exp(linspace(log(MIN_VAL),log(MAX_VAL), CFG.variance_samplebins+1)); | |
| 60 SAMPLE=[]; | |
| 61 for i=1:CFG.variance_samplebins | |
| 62 NR_IN_BIN=length((find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last'))); | |
| 63 if NR_IN_BIN==0 | |
| 64 continue; | |
| 65 elseif NR_IN_BIN<= CFG.variance_samples_per_bin | |
| 66 SAMPLE=[SAMPLE find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last')]; | |
| 67 else | |
| 68 CHUNK_SAMPLE=find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last'); | |
| 69 TEMP_SAMPLE=randperm(NR_IN_BIN); | |
| 70 SAMPLE=[SAMPLE CHUNK_SAMPLE(TEMP_SAMPLE(1:CFG.variance_samples_per_bin))]; | |
| 71 end | |
| 72 end | |
| 73 VARS=VARS(POS); | |
| 74 | |
| 75 %estimate variance function | |
| 76 [VARIANCE]=estimate_variance(full(MEANS(SAMPLE))',full(VARS(SAMPLE)')); | |
| 77 return |
