Mercurial > repos > vipints > rdiff
comparison rDiff/src/perform_parametric_tests.m @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0f80a5141704 |
---|---|
1 function []=perform_parametric_tests(CFG,genes,variance_function_parametric_1, variance_function_parametric_2) | |
2 | |
3 | |
4 | |
5 %Get the gene expression | |
6 fprintf('Loading gene expression\n') | |
7 if isempty(CFG.Counts_gene_expression) | |
8 EXPR_TAB_FILENAME=[CFG.out_base 'Gene_expression.tab']; | |
9 else | |
10 EXPR_TAB_FILENAME=CFG.Counts_gene_expression; | |
11 end | |
12 | |
13 try | |
14 Gene_expression=importdata(EXPR_TAB_FILENAME,'\t',1); | |
15 Gene_expression=Gene_expression.data; | |
16 catch | |
17 error(['Could not open: ' EXPR_TAB_FILENAME]) | |
18 end | |
19 | |
20 %Get the counts | |
21 fprintf('Loading alternative region counts\n') | |
22 if isempty(CFG.Counts_rDiff_parametric) | |
23 IN_FILENAME=[CFG.out_base 'Alternative_region_counts.mat']; | |
24 load(IN_FILENAME,'Counts_rDiff_parametric') | |
25 else | |
26 IN_FILENAME=[CFG.out_base CFG.Counts_rDiff_parametric]; | |
27 load(IN_FILENAMEc,'Counts_rDiff_parametric') | |
28 end | |
29 | |
30 | |
31 | |
32 if CFG.use_rproc | |
33 JB_NR=1; | |
34 JOB_INFO = rproc_empty(); | |
35 end | |
36 | |
37 PAR.variance_function_parametric_1=variance_function_parametric_1; | |
38 PAR.variance_function_parametric_2=variance_function_parametric_2; | |
39 | |
40 %%% Perform the test | |
41 % configuration | |
42 if not(CFG.use_rproc) | |
43 fprintf('Performing parametric testing\n') | |
44 end | |
45 | |
46 %define the splits of the genes for the jobs | |
47 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))']; | |
48 % submit jobs to cluster | |
49 for i = 1:CFG.rproc_num_jobs | |
50 PAR.genes = genes(idx(idx(:,2)==i,1)); | |
51 PAR.Counts_rDiff_parametric=Counts_rDiff_parametric(idx(idx(:,2)==i,1),:); | |
52 PAR.Gene_expression=Gene_expression(idx(idx(:,2)==i,1),:); | |
53 CFG.rproc_memreq = 5000; | |
54 CFG.rproc_par.mem_req_resubmit = [5000 10000 32000]; | |
55 CFG.rproc_par.identifier = sprintf('Pp.%i-',i); | |
56 CFG.outfile_prefix=[CFG.out_base_temp 'P_values_parametric_' num2str(i) '_of_' num2str(CFG.rproc_num_jobs)]; | |
57 PAR.CFG=CFG; | |
58 if CFG.use_rproc | |
59 fprintf(1, 'Submitting job %i to cluster\n',i); | |
60 JOB_INFO(JB_NR) = rproc('get_parametric_tests_caller', PAR,CFG.rproc_memreq, CFG.rproc_par, CFG.rproc_time); | |
61 JB_NR=JB_NR+1; | |
62 else | |
63 get_parametric_tests_caller(PAR); | |
64 end | |
65 end | |
66 if CFG.use_rproc | |
67 [JOB_INFO num_crashed] = rproc_wait(JOB_INFO, 60, 1, -1); | |
68 end | |
69 | |
70 % Get the test results | |
71 | |
72 | |
73 | |
74 | |
75 %%% Generate the output files | |
76 fprintf('Reading temporary results\n') | |
77 P_values_rDiff_parametric=ones(size(genes,2),1); | |
78 P_values_rDiff_poisson=ones(size(genes,2),1); | |
79 | |
80 | |
81 P_values_rDiff_parametric_error_flag=cell(size(genes,2),1); | |
82 P_values_rDiff_poisson_error_flag=cell(size(genes,2),1); | |
83 NAMES=cell(size(genes,2),1); | |
84 %Field containing the errors | |
85 ERRORS_NR=[]; | |
86 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))']; | |
87 % Iterate over the result files to load the data from the count files | |
88 for j = 1:CFG.rproc_num_jobs | |
89 IN_FILENAME=[CFG.out_base_temp 'P_values_parametric_' num2str(j) '_of_' num2str(CFG.rproc_num_jobs)]; | |
90 | |
91 IDX=idx(idx(:,2)==j,1); | |
92 try | |
93 load(IN_FILENAME) | |
94 for k=1:length(IDX) | |
95 if isempty(P_VALS{k,1}) %Gene was not tested for | |
96 %some reason | |
97 P_values_rDiff_parametric_error_flag{IDX(k)}='NOT_TESTED'; | |
98 P_values_rDiff_poisson_error_flag{IDX(k)}='NOT_TESTED'; | |
99 else | |
100 if not(isempty(P_VALS{k,1})) | |
101 NAMES{IDX(k)}=P_VALS{k,1}; | |
102 end | |
103 COUNTER=2; | |
104 %Get the results from rDiff.parametric | |
105 if CFG.perform_parametric | |
106 if not(isempty(P_VALS{k,COUNTER})) | |
107 P_values_rDiff_parametric(IDX(k))=P_VALS{k,COUNTER}{1}; | |
108 if not(isempty(P_VALS{k,COUNTER}{2})) | |
109 P_values_rDiff_parametric_error_flag{IDX(k)}='NOT_TESTED'; | |
110 else | |
111 P_values_rDiff_parametric_error_flag{IDX(k)}='OK'; | |
112 end | |
113 COUNTER=COUNTER+1; | |
114 end | |
115 end | |
116 | |
117 %Get the results from rDiff.poisson | |
118 if CFG.perform_poisson | |
119 if not(isempty(P_VALS{k,COUNTER})) | |
120 P_values_rDiff_poisson(IDX(k))=P_VALS{k,COUNTER}{1}; | |
121 if not(isempty(P_VALS{k,COUNTER}{2})) | |
122 P_values_rDiff_poisson_error_flag{IDX(k)}='NOT_TESTED'; | |
123 else | |
124 P_values_rDiff_poisson_error_flag{IDX(k)}='OK'; | |
125 end | |
126 end | |
127 end | |
128 end | |
129 end | |
130 catch | |
131 for k=1:length(IDX) | |
132 P_values_rDiff_parametric_error_flag{IDX(k)}='NOT_TESTED'; | |
133 P_values_rDiff_poisson_error_flag{IDX(k)}='NOT_TESTED'; | |
134 end | |
135 warning(['There was a problem loading: ' IN_FILENAME ]) | |
136 ERRORS_NR=[ERRORS_NR;j]; | |
137 end | |
138 end | |
139 if not(isempty(ERRORS_NR)) | |
140 warning('There have been problems loading some of the parametric test result files'); | |
141 end | |
142 | |
143 fprintf('Writing output files\n') | |
144 %Generate P-value table for rDiff.nonparametric | |
145 if CFG.perform_parametric | |
146 %Open file handler | |
147 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_parametric.tab']; | |
148 fid=fopen(P_TABLE_FNAME,'w'); | |
149 | |
150 %print header | |
151 fprintf(fid,'gene\tp-value\ttest-status\n'); | |
152 | |
153 %print lines | |
154 for j=1:size(genes,2) | |
155 fprintf(fid,'%s',NAMES{j}); | |
156 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_parametric(j),P_values_rDiff_parametric_error_flag{j}); | |
157 end | |
158 %close file handler | |
159 fclose(fid); | |
160 end | |
161 | |
162 | |
163 | |
164 %Generate P-value table for rDiff.poisson | |
165 if CFG.perform_poisson | |
166 %Open file handler | |
167 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_poisson.tab']; | |
168 fid=fopen(P_TABLE_FNAME,'w'); | |
169 | |
170 %print header | |
171 fprintf(fid,'gene\tp-value\ttest-status\n'); | |
172 | |
173 %print lines | |
174 for j=1:size(genes,2) | |
175 fprintf(fid,'%s',NAMES{j}); | |
176 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_poisson(j),P_values_rDiff_poisson_error_flag{j}); | |
177 end | |
178 %close file handler | |
179 fclose(fid); | |
180 end | |
181 | |
182 |