diff rDiff/src/perform_nonparametric_tests.m @ 0:0f80a5141704

version 0.3 uploaded
author vipints
date Thu, 14 Feb 2013 23:38:36 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rDiff/src/perform_nonparametric_tests.m	Thu Feb 14 23:38:36 2013 -0500
@@ -0,0 +1,184 @@
+function []=perform_nonparametric_tests(CFG,genes,variance_function_nonparametric_1, variance_function_nonparametric_2)
+
+
+%Get the gene expression
+fprintf('Loading gene expression\n')
+if isempty(CFG.Counts_gene_expression)
+    EXPR_TAB_FILENAME=[CFG.out_base 'Gene_expression.tab'];
+else
+    EXPR_TAB_FILENAME=CFG.Counts_gene_expression;
+end
+
+try
+    Gene_expression=importdata(EXPR_TAB_FILENAME,'\t',1);
+    Gene_expression=Gene_expression.data;
+catch
+    error(['Could not open: ' EXPR_TAB_FILENAME])
+end
+  
+
+%Get the counts
+fprintf('Loading nonparametric region counts\n')
+if isempty(CFG.Counts_rDiff_nonparametric)
+    IN_FILENAME=[CFG.out_base 'Nonparametric_region_counts.mat'];
+    load(IN_FILENAME,'Counts_rDiff_nonparametric')
+else
+    IN_FILENAME=[CFG.out_base CFG.Counts_rDiff_nonparametric];
+    load(IN_FILENAMEc,'Counts_rDiff_nonparametric') 
+end
+
+
+
+if CFG.use_rproc
+   JB_NR=1;
+   JOB_INFO = rproc_empty();
+end
+
+PAR.variance_function_nonparametric_1=variance_function_nonparametric_1;
+PAR.variance_function_nonparametric_2=variance_function_nonparametric_2;
+if 1==1
+%%% Perform the test
+% configuration
+if not(CFG.use_rproc)
+    fprintf('Performing nonparametric testing\n')
+end
+%define the splits of the genes for the jobs
+idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))']; 
+% submit jobs to cluster
+
+for i = 1:CFG.rproc_num_jobs
+    PAR.genes = genes(idx(idx(:,2)==i,1)); 
+    PAR.Counts_rDiff_nonparametric=Counts_rDiff_nonparametric(idx(idx(:,2)==i,1),:);
+    PAR.Gene_expression=Gene_expression(idx(idx(:,2)==i,1),:);
+    CFG.rproc_memreq = 5000;
+    CFG.rproc_par.mem_req_resubmit = [5000 10000 32000];
+    CFG.rproc_par.identifier = sprintf('Pnp.%i-',i);  
+    CFG.outfile_prefix=[CFG.out_base_temp 'P_values_nonparametric_' num2str(i) '_of_' num2str(CFG.rproc_num_jobs)];
+    PAR.CFG=CFG;
+    if CFG.use_rproc
+        fprintf(1, 'Submitting job %i to cluster\n',i);
+        JOB_INFO(JB_NR) = rproc('get_nonparametric_tests_caller', PAR,CFG.rproc_memreq, CFG.rproc_par, CFG.rproc_time); 
+        JB_NR=JB_NR+1;
+    else
+        get_nonparametric_tests_caller(PAR);
+    end
+end
+if CFG.use_rproc
+    [JOB_INFO num_crashed] = rproc_wait(JOB_INFO, 60, 1, -1);
+end
+end
+% Get the test results
+
+%%% Generate the output files
+fprintf('Reading temporary results\n')
+P_values_rDiff_nonparametric=ones(size(genes,2),1);
+P_values_rDiff_mmd=ones(size(genes,2),1);
+
+
+P_values_rDiff_nonparametric_error_flag=cell(size(genes,2),1);
+P_values_rDiff_mmd_error_flag=cell(size(genes,2),1);
+NAMES=cell(size(genes,2),1);
+%Field containing the errors
+ERRORS_NR=[];
+idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))']; 
+% Iterate over the result files to load the data from the count files
+for j = 1:CFG.rproc_num_jobs
+    IN_FILENAME=[CFG.out_base_temp 'P_values_nonparametric_' num2str(j) '_of_' num2str(CFG.rproc_num_jobs)];
+    IDX=idx(idx(:,2)==j,1);
+    try
+        load(IN_FILENAME)
+        for k=1:length(IDX)
+            if isempty(P_VALS{k,1}) %Gene was not tested for
+                                          %some reason
+                P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED';
+                P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED';
+            else
+                if not(isempty(P_VALS{k,1}))
+                    NAMES{IDX(k)}=P_VALS{k,1};
+                end
+                COUNTER=2;
+                %Get the results from rDiff.mmd
+                if CFG.perform_mmd
+                    if not(isempty(P_VALS{k,COUNTER}))
+                        P_values_rDiff_mmd(IDX(k))=P_VALS{k,COUNTER}{1};
+                        if (isempty(P_VALS{k,COUNTER}{2}))
+                            P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED';
+                        else
+                            P_values_rDiff_mmd_error_flag{IDX(k)}='OK';
+                        end
+                    end
+                    COUNTER=COUNTER+1;
+                end
+                
+                %Get the results from rDiff.parametric
+                if CFG.perform_nonparametric
+                    if not(isempty(P_VALS{k,COUNTER}))
+                        P_values_rDiff_nonparametric(IDX(k))=P_VALS{k,COUNTER}{1};
+			if length(P_VALS{k,COUNTER})>1
+			  if iscell(P_VALS{k,COUNTER}{2}) && length(P_VALS{k,COUNTER}{2}{3})>3
+			    P_values_rDiff_nonparametric(IDX(k))=min(10*min(P_VALS{k,COUNTER}{2}{3})+max(P_VALS{k,COUNTER}{2}{3})*(10/(CFG.bootstraps+1)),1);
+			  end
+			end
+                        if (isempty(P_VALS{k,COUNTER}{2}))
+                            P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED';
+                        else
+                            P_values_rDiff_nonparametric_error_flag{IDX(k)}='OK';
+                        end
+                    end;
+                end
+                
+            end
+        end
+    catch
+        for k=1:length(IDX)
+            P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED';
+            P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED';
+        end
+        warning(['There was a problem loading: ' IN_FILENAME ])
+        ERRORS_NR=[ERRORS_NR;j];
+    end
+end
+if not(isempty(ERRORS_NR))
+    warning('There have been problems loading some of the parametric test result files');
+end
+
+fprintf('Writing output files\n')
+%Generate P-value table for rDiff.nonparametric
+if CFG.perform_nonparametric
+%Open file handler
+    P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_nonparametric.tab'];
+    fid=fopen(P_TABLE_FNAME,'w');
+    
+    %print header
+    fprintf(fid,'gene\tp-value\ttest-status\n');
+    
+    %print lines
+    for j=1:size(genes,2)
+        fprintf(fid,'%s',NAMES{j});
+        fprintf(fid,'\t%f\t%s\n',P_values_rDiff_nonparametric(j),P_values_rDiff_nonparametric_error_flag{j});
+    end
+    %close file handler
+    fclose(fid)
+end
+
+
+
+%Generate P-value table for rDiff.mmd
+if CFG.perform_mmd
+%Open file handler
+    P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_mmd.tab'];
+    fid=fopen(P_TABLE_FNAME,'w');
+    
+    %print header
+    fprintf(fid,'gene\tp-value\ttest-status\n');
+    
+    %print lines
+    for j=1:size(genes,2)
+        fprintf(fid,'%s',NAMES{j});
+        fprintf(fid,'\t%f\t%s\n',P_values_rDiff_mmd(j),P_values_rDiff_mmd_error_flag{j});
+    end
+    %close file handler
+    fclose(fid)
+end
+
+