Mercurial > repos > vipints > rdiff
comparison rDiff/src/perform_parametric_tests.m @ 0:0f80a5141704
version 0.3 uploaded
| author | vipints |
|---|---|
| date | Thu, 14 Feb 2013 23:38:36 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0f80a5141704 |
|---|---|
| 1 function []=perform_parametric_tests(CFG,genes,variance_function_parametric_1, variance_function_parametric_2) | |
| 2 | |
| 3 | |
| 4 | |
| 5 %Get the gene expression | |
| 6 fprintf('Loading gene expression\n') | |
| 7 if isempty(CFG.Counts_gene_expression) | |
| 8 EXPR_TAB_FILENAME=[CFG.out_base 'Gene_expression.tab']; | |
| 9 else | |
| 10 EXPR_TAB_FILENAME=CFG.Counts_gene_expression; | |
| 11 end | |
| 12 | |
| 13 try | |
| 14 Gene_expression=importdata(EXPR_TAB_FILENAME,'\t',1); | |
| 15 Gene_expression=Gene_expression.data; | |
| 16 catch | |
| 17 error(['Could not open: ' EXPR_TAB_FILENAME]) | |
| 18 end | |
| 19 | |
| 20 %Get the counts | |
| 21 fprintf('Loading alternative region counts\n') | |
| 22 if isempty(CFG.Counts_rDiff_parametric) | |
| 23 IN_FILENAME=[CFG.out_base 'Alternative_region_counts.mat']; | |
| 24 load(IN_FILENAME,'Counts_rDiff_parametric') | |
| 25 else | |
| 26 IN_FILENAME=[CFG.out_base CFG.Counts_rDiff_parametric]; | |
| 27 load(IN_FILENAMEc,'Counts_rDiff_parametric') | |
| 28 end | |
| 29 | |
| 30 | |
| 31 | |
| 32 if CFG.use_rproc | |
| 33 JB_NR=1; | |
| 34 JOB_INFO = rproc_empty(); | |
| 35 end | |
| 36 | |
| 37 PAR.variance_function_parametric_1=variance_function_parametric_1; | |
| 38 PAR.variance_function_parametric_2=variance_function_parametric_2; | |
| 39 | |
| 40 %%% Perform the test | |
| 41 % configuration | |
| 42 if not(CFG.use_rproc) | |
| 43 fprintf('Performing parametric testing\n') | |
| 44 end | |
| 45 | |
| 46 %define the splits of the genes for the jobs | |
| 47 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))']; | |
| 48 % submit jobs to cluster | |
| 49 for i = 1:CFG.rproc_num_jobs | |
| 50 PAR.genes = genes(idx(idx(:,2)==i,1)); | |
| 51 PAR.Counts_rDiff_parametric=Counts_rDiff_parametric(idx(idx(:,2)==i,1),:); | |
| 52 PAR.Gene_expression=Gene_expression(idx(idx(:,2)==i,1),:); | |
| 53 CFG.rproc_memreq = 5000; | |
| 54 CFG.rproc_par.mem_req_resubmit = [5000 10000 32000]; | |
| 55 CFG.rproc_par.identifier = sprintf('Pp.%i-',i); | |
| 56 CFG.outfile_prefix=[CFG.out_base_temp 'P_values_parametric_' num2str(i) '_of_' num2str(CFG.rproc_num_jobs)]; | |
| 57 PAR.CFG=CFG; | |
| 58 if CFG.use_rproc | |
| 59 fprintf(1, 'Submitting job %i to cluster\n',i); | |
| 60 JOB_INFO(JB_NR) = rproc('get_parametric_tests_caller', PAR,CFG.rproc_memreq, CFG.rproc_par, CFG.rproc_time); | |
| 61 JB_NR=JB_NR+1; | |
| 62 else | |
| 63 get_parametric_tests_caller(PAR); | |
| 64 end | |
| 65 end | |
| 66 if CFG.use_rproc | |
| 67 [JOB_INFO num_crashed] = rproc_wait(JOB_INFO, 60, 1, -1); | |
| 68 end | |
| 69 | |
| 70 % Get the test results | |
| 71 | |
| 72 | |
| 73 | |
| 74 | |
| 75 %%% Generate the output files | |
| 76 fprintf('Reading temporary results\n') | |
| 77 P_values_rDiff_parametric=ones(size(genes,2),1); | |
| 78 P_values_rDiff_poisson=ones(size(genes,2),1); | |
| 79 | |
| 80 | |
| 81 P_values_rDiff_parametric_error_flag=cell(size(genes,2),1); | |
| 82 P_values_rDiff_poisson_error_flag=cell(size(genes,2),1); | |
| 83 NAMES=cell(size(genes,2),1); | |
| 84 %Field containing the errors | |
| 85 ERRORS_NR=[]; | |
| 86 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))']; | |
| 87 % Iterate over the result files to load the data from the count files | |
| 88 for j = 1:CFG.rproc_num_jobs | |
| 89 IN_FILENAME=[CFG.out_base_temp 'P_values_parametric_' num2str(j) '_of_' num2str(CFG.rproc_num_jobs)]; | |
| 90 | |
| 91 IDX=idx(idx(:,2)==j,1); | |
| 92 try | |
| 93 load(IN_FILENAME) | |
| 94 for k=1:length(IDX) | |
| 95 if isempty(P_VALS{k,1}) %Gene was not tested for | |
| 96 %some reason | |
| 97 P_values_rDiff_parametric_error_flag{IDX(k)}='NOT_TESTED'; | |
| 98 P_values_rDiff_poisson_error_flag{IDX(k)}='NOT_TESTED'; | |
| 99 else | |
| 100 if not(isempty(P_VALS{k,1})) | |
| 101 NAMES{IDX(k)}=P_VALS{k,1}; | |
| 102 end | |
| 103 COUNTER=2; | |
| 104 %Get the results from rDiff.parametric | |
| 105 if CFG.perform_parametric | |
| 106 if not(isempty(P_VALS{k,COUNTER})) | |
| 107 P_values_rDiff_parametric(IDX(k))=P_VALS{k,COUNTER}{1}; | |
| 108 if not(isempty(P_VALS{k,COUNTER}{2})) | |
| 109 P_values_rDiff_parametric_error_flag{IDX(k)}='NOT_TESTED'; | |
| 110 else | |
| 111 P_values_rDiff_parametric_error_flag{IDX(k)}='OK'; | |
| 112 end | |
| 113 COUNTER=COUNTER+1; | |
| 114 end | |
| 115 end | |
| 116 | |
| 117 %Get the results from rDiff.poisson | |
| 118 if CFG.perform_poisson | |
| 119 if not(isempty(P_VALS{k,COUNTER})) | |
| 120 P_values_rDiff_poisson(IDX(k))=P_VALS{k,COUNTER}{1}; | |
| 121 if not(isempty(P_VALS{k,COUNTER}{2})) | |
| 122 P_values_rDiff_poisson_error_flag{IDX(k)}='NOT_TESTED'; | |
| 123 else | |
| 124 P_values_rDiff_poisson_error_flag{IDX(k)}='OK'; | |
| 125 end | |
| 126 end | |
| 127 end | |
| 128 end | |
| 129 end | |
| 130 catch | |
| 131 for k=1:length(IDX) | |
| 132 P_values_rDiff_parametric_error_flag{IDX(k)}='NOT_TESTED'; | |
| 133 P_values_rDiff_poisson_error_flag{IDX(k)}='NOT_TESTED'; | |
| 134 end | |
| 135 warning(['There was a problem loading: ' IN_FILENAME ]) | |
| 136 ERRORS_NR=[ERRORS_NR;j]; | |
| 137 end | |
| 138 end | |
| 139 if not(isempty(ERRORS_NR)) | |
| 140 warning('There have been problems loading some of the parametric test result files'); | |
| 141 end | |
| 142 | |
| 143 fprintf('Writing output files\n') | |
| 144 %Generate P-value table for rDiff.nonparametric | |
| 145 if CFG.perform_parametric | |
| 146 %Open file handler | |
| 147 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_parametric.tab']; | |
| 148 fid=fopen(P_TABLE_FNAME,'w'); | |
| 149 | |
| 150 %print header | |
| 151 fprintf(fid,'gene\tp-value\ttest-status\n'); | |
| 152 | |
| 153 %print lines | |
| 154 for j=1:size(genes,2) | |
| 155 fprintf(fid,'%s',NAMES{j}); | |
| 156 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_parametric(j),P_values_rDiff_parametric_error_flag{j}); | |
| 157 end | |
| 158 %close file handler | |
| 159 fclose(fid); | |
| 160 end | |
| 161 | |
| 162 | |
| 163 | |
| 164 %Generate P-value table for rDiff.poisson | |
| 165 if CFG.perform_poisson | |
| 166 %Open file handler | |
| 167 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_poisson.tab']; | |
| 168 fid=fopen(P_TABLE_FNAME,'w'); | |
| 169 | |
| 170 %print header | |
| 171 fprintf(fid,'gene\tp-value\ttest-status\n'); | |
| 172 | |
| 173 %print lines | |
| 174 for j=1:size(genes,2) | |
| 175 fprintf(fid,'%s',NAMES{j}); | |
| 176 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_poisson(j),P_values_rDiff_poisson_error_flag{j}); | |
| 177 end | |
| 178 %close file handler | |
| 179 fclose(fid); | |
| 180 end | |
| 181 | |
| 182 |
