| 0 | 1 function [VARIANCE]=estimate_variance_helper(CFG,SAMPLE_IX,COUNTS_PER_GENE,GENE_EXPRESSION_MATRIX) | 
|  | 2 | 
|  | 3 | 
|  | 4 %compute means and variances | 
|  | 5 %Get the number of of positions to be used in MEANS and VARS | 
|  | 6 LENGTH=0; | 
|  | 7 for i=1:size(COUNTS_PER_GENE,1) | 
|  | 8     try | 
|  | 9         TEMP_SUM=COUNTS_PER_GENE{i, SAMPLE_IX(1)}; | 
|  | 10         for j=2:length(SAMPLE_IX) | 
|  | 11             TEMP_SUM=TEMP_SUM+COUNTS_PER_GENE{i,SAMPLE_IX(j)}; | 
|  | 12         end | 
|  | 13         LENGTH=LENGTH+sum(TEMP_SUM>0); | 
|  | 14     catch | 
|  | 15         LENGTH=LENGTH; | 
|  | 16     end | 
|  | 17 end | 
|  | 18 | 
|  | 19 %Compute the means and variances, normalizing for gene expression | 
|  | 20 MEANS=zeros(1,LENGTH); | 
|  | 21 VARS=zeros(1,LENGTH); | 
|  | 22 COUNTER=1; | 
|  | 23 for i=1:size(COUNTS_PER_GENE,1) | 
|  | 24     try | 
|  | 25         TS=GENE_EXPRESSION_MATRIX(i, SAMPLE_IX); | 
|  | 26         S=length(TS)*TS/sum(TS); | 
|  | 27 | 
|  | 28         TEMP_SUM=sparse(zeros(length(SAMPLE_IX),size(COUNTS_PER_GENE{i, SAMPLE_IX(1)},2))); | 
|  | 29 	%if S(1)==0 | 
|  | 30 	%  TEMP_SUM(1,:)=0; | 
|  | 31 	%else | 
|  | 32 	%  TEMP_SUM(1,:)=COUNTS_PER_GENE{i, SAMPLE_IX(1)}/S(1); | 
|  | 33         %end | 
|  | 34 	for j=1:length(SAMPLE_IX) | 
|  | 35 	  if S(j)==0 | 
|  | 36 	    TEMP_SUM(j,:)=0; | 
|  | 37 	  else | 
|  | 38             TEMP_SUM(j,:)=COUNTS_PER_GENE{i,SAMPLE_IX(j)}/S(j); | 
|  | 39 	  end | 
|  | 40 	end | 
|  | 41         TEMP_SUM=TEMP_SUM(:,sum(TEMP_SUM,1)>0); | 
|  | 42         MEANS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=mean(TEMP_SUM(:,sum(TEMP_SUM,1)>0),1); | 
|  | 43         VARS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=var(TEMP_SUM(:,sum(TEMP_SUM,1)>0)); | 
|  | 44         COUNTER=COUNTER+size(TEMP_SUM,2); | 
|  | 45     catch | 
|  | 46         LENGTH=LENGTH; | 
|  | 47     end | 
|  | 48 end | 
|  | 49 | 
|  | 50 %filter those which have a zeros mean | 
|  | 51 NONZERO_IDX=MEANS>0; | 
|  | 52 MEANS=MEANS(NONZERO_IDX); | 
|  | 53 VARS=VARS(NONZERO_IDX); | 
|  | 54 % Subsample the mean variance pairs to reduce the number of | 
|  | 55 % samples which have a low mean | 
|  | 56 [MEANS,POS]=sort(MEANS); | 
|  | 57 MAX_VAL=max(MEANS); | 
|  | 58 MIN_VAL=min(MEANS); | 
|  | 59 BOUNDS=exp(linspace(log(MIN_VAL),log(MAX_VAL), CFG.variance_samplebins+1)); | 
|  | 60 SAMPLE=[]; | 
|  | 61 for i=1:CFG.variance_samplebins | 
|  | 62     NR_IN_BIN=length((find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last'))); | 
|  | 63     if NR_IN_BIN==0 | 
|  | 64         continue; | 
|  | 65     elseif NR_IN_BIN<= CFG.variance_samples_per_bin | 
|  | 66         SAMPLE=[SAMPLE find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last')]; | 
|  | 67     else | 
|  | 68         CHUNK_SAMPLE=find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last'); | 
|  | 69         TEMP_SAMPLE=randperm(NR_IN_BIN); | 
|  | 70         SAMPLE=[SAMPLE CHUNK_SAMPLE(TEMP_SAMPLE(1:CFG.variance_samples_per_bin))]; | 
|  | 71     end | 
|  | 72 end | 
|  | 73 VARS=VARS(POS); | 
|  | 74 | 
|  | 75 %estimate variance function | 
|  | 76 [VARIANCE]=estimate_variance(full(MEANS(SAMPLE))',full(VARS(SAMPLE)')); | 
|  | 77 return |