view rDiff/src/tests/rDiff_poisson.m @ 2:233c30f91d66

updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
author vipints <vipin@cbio.mskcc.org>
date Tue, 08 Oct 2013 07:15:44 -0400
parents 0f80a5141704
children
line wrap: on
line source

function [P_VALUE, RET_STRUCT]= rDiff_poisson(CFG,gene,Counts_rDiff_parametric,Gene_expression)

% Calculates the p-Values of a poisson test on each
% alternative regions and combines the p-values using Bonferroni's correction


%Initialize gene.name
NR_OF_TRANS=size(gene.transcripts,2);
if NR_OF_TRANS<=1
    RET_STRUCT='NR_OF_TRANS too small';
    P_VALUE=1;
    return
end


%get the samples that are expressed (have more than 10 reads)
TEMP_SAMPLE1=and(CFG.SAMPLES==1,Gene_expression>=10);
TEMP_SAMPLE2=and(CFG.SAMPLES==2,Gene_expression>=10);
SAMPLE1=find(TEMP_SAMPLE1);
SAMPLE2=find(TEMP_SAMPLE2);

%Check wether Counts_rDiff_parametric is nonempty
for j=1:length(TEMP_SAMPLE1)
  TEMP_SAMPLE1(j)=and(not(isempty(Counts_rDiff_parametric{j})), ...
		      TEMP_SAMPLE1(j));
end
for j=1:length(TEMP_SAMPLE2)
  TEMP_SAMPLE2(j)=and(not(isempty(Counts_rDiff_parametric{j})), ...
		      TEMP_SAMPLE2(j));
end

SAMPLE1=find(TEMP_SAMPLE1);
SAMPLE2=find(TEMP_SAMPLE2);



SAMPLE_LENGTH1=length(SAMPLE1);
SAMPLE_LENGTH2=length(SAMPLE2);

if min(SAMPLE_LENGTH1,SAMPLE_LENGTH2)==0
    RET_STRUCT='SAMPLE_LENGTH too small';
    P_VALUE=1;
    return
end
 
% Get the region counts
region_counts_1=zeros(SAMPLE_LENGTH1,length(Counts_rDiff_parametric{1,1}));
for j=1:SAMPLE_LENGTH1
    region_counts_1(j,:)=Counts_rDiff_parametric{1,SAMPLE1(j)};
end
region_counts_2=zeros(SAMPLE_LENGTH2,length(Counts_rDiff_parametric{1,1}));
for j=1:SAMPLE_LENGTH2
    region_counts_2(j,:)=Counts_rDiff_parametric{1,SAMPLE2(j)};
end

% Get the gene expression
gene_expression_1=sum(Gene_expression(SAMPLE1));
gene_expression_2=sum(Gene_expression(SAMPLE2));

%compute the p-values
OBSERVED_COUNTS=[sum(region_counts_1,1);sum(region_counts_2,1)];
TOTAL_COUNTS=sum(OBSERVED_COUNTS,1);

%calculate gene expression ratio
LAMBDA=gene_expression_1/(gene_expression_2+gene_expression_1);


P_LIST=ones(1,size(TOTAL_COUNTS,2));
%Iterate over the regions
SKIPPED_TESTS=0;
for i=1:length(P_LIST)
    if sum(TOTAL_COUNTS(i))==0
        SKIPPED_TESTS=SKIPPED_TESTS+1;
        continue
    end
    
    MEAN=LAMBDA*TOTAL_COUNTS(i);
    VARIANCE=(TOTAL_COUNTS(i)*LAMBDA*(1-LAMBDA)).^0.5;      
    Y=sum(((MEAN-OBSERVED_COUNTS(1,i))./VARIANCE).^2).^0.5;
    
    %Calculate the p-value
    P_VALUE=1-gammainc((Y^2)/2,1/2);
    
    P_LIST(i)=P_VALUE;
end
if length(P_LIST)-SKIPPED_TESTS<=0
  P_VALUE=1;
else
  P_VALUE=min(P_LIST)*(length(P_LIST)-SKIPPED_TESTS);
end

RET_STRUCT={};
return