comparison rDiff/src/perform_nonparametric_tests.m @ 0:0f80a5141704

version 0.3 uploaded
author vipints
date Thu, 14 Feb 2013 23:38:36 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0f80a5141704
1 function []=perform_nonparametric_tests(CFG,genes,variance_function_nonparametric_1, variance_function_nonparametric_2)
2
3
4 %Get the gene expression
5 fprintf('Loading gene expression\n')
6 if isempty(CFG.Counts_gene_expression)
7 EXPR_TAB_FILENAME=[CFG.out_base 'Gene_expression.tab'];
8 else
9 EXPR_TAB_FILENAME=CFG.Counts_gene_expression;
10 end
11
12 try
13 Gene_expression=importdata(EXPR_TAB_FILENAME,'\t',1);
14 Gene_expression=Gene_expression.data;
15 catch
16 error(['Could not open: ' EXPR_TAB_FILENAME])
17 end
18
19
20 %Get the counts
21 fprintf('Loading nonparametric region counts\n')
22 if isempty(CFG.Counts_rDiff_nonparametric)
23 IN_FILENAME=[CFG.out_base 'Nonparametric_region_counts.mat'];
24 load(IN_FILENAME,'Counts_rDiff_nonparametric')
25 else
26 IN_FILENAME=[CFG.out_base CFG.Counts_rDiff_nonparametric];
27 load(IN_FILENAMEc,'Counts_rDiff_nonparametric')
28 end
29
30
31
32 if CFG.use_rproc
33 JB_NR=1;
34 JOB_INFO = rproc_empty();
35 end
36
37 PAR.variance_function_nonparametric_1=variance_function_nonparametric_1;
38 PAR.variance_function_nonparametric_2=variance_function_nonparametric_2;
39 if 1==1
40 %%% Perform the test
41 % configuration
42 if not(CFG.use_rproc)
43 fprintf('Performing nonparametric testing\n')
44 end
45 %define the splits of the genes for the jobs
46 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))'];
47 % submit jobs to cluster
48
49 for i = 1:CFG.rproc_num_jobs
50 PAR.genes = genes(idx(idx(:,2)==i,1));
51 PAR.Counts_rDiff_nonparametric=Counts_rDiff_nonparametric(idx(idx(:,2)==i,1),:);
52 PAR.Gene_expression=Gene_expression(idx(idx(:,2)==i,1),:);
53 CFG.rproc_memreq = 5000;
54 CFG.rproc_par.mem_req_resubmit = [5000 10000 32000];
55 CFG.rproc_par.identifier = sprintf('Pnp.%i-',i);
56 CFG.outfile_prefix=[CFG.out_base_temp 'P_values_nonparametric_' num2str(i) '_of_' num2str(CFG.rproc_num_jobs)];
57 PAR.CFG=CFG;
58 if CFG.use_rproc
59 fprintf(1, 'Submitting job %i to cluster\n',i);
60 JOB_INFO(JB_NR) = rproc('get_nonparametric_tests_caller', PAR,CFG.rproc_memreq, CFG.rproc_par, CFG.rproc_time);
61 JB_NR=JB_NR+1;
62 else
63 get_nonparametric_tests_caller(PAR);
64 end
65 end
66 if CFG.use_rproc
67 [JOB_INFO num_crashed] = rproc_wait(JOB_INFO, 60, 1, -1);
68 end
69 end
70 % Get the test results
71
72 %%% Generate the output files
73 fprintf('Reading temporary results\n')
74 P_values_rDiff_nonparametric=ones(size(genes,2),1);
75 P_values_rDiff_mmd=ones(size(genes,2),1);
76
77
78 P_values_rDiff_nonparametric_error_flag=cell(size(genes,2),1);
79 P_values_rDiff_mmd_error_flag=cell(size(genes,2),1);
80 NAMES=cell(size(genes,2),1);
81 %Field containing the errors
82 ERRORS_NR=[];
83 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))'];
84 % Iterate over the result files to load the data from the count files
85 for j = 1:CFG.rproc_num_jobs
86 IN_FILENAME=[CFG.out_base_temp 'P_values_nonparametric_' num2str(j) '_of_' num2str(CFG.rproc_num_jobs)];
87 IDX=idx(idx(:,2)==j,1);
88 try
89 load(IN_FILENAME)
90 for k=1:length(IDX)
91 if isempty(P_VALS{k,1}) %Gene was not tested for
92 %some reason
93 P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED';
94 P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED';
95 else
96 if not(isempty(P_VALS{k,1}))
97 NAMES{IDX(k)}=P_VALS{k,1};
98 end
99 COUNTER=2;
100 %Get the results from rDiff.mmd
101 if CFG.perform_mmd
102 if not(isempty(P_VALS{k,COUNTER}))
103 P_values_rDiff_mmd(IDX(k))=P_VALS{k,COUNTER}{1};
104 if (isempty(P_VALS{k,COUNTER}{2}))
105 P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED';
106 else
107 P_values_rDiff_mmd_error_flag{IDX(k)}='OK';
108 end
109 end
110 COUNTER=COUNTER+1;
111 end
112
113 %Get the results from rDiff.parametric
114 if CFG.perform_nonparametric
115 if not(isempty(P_VALS{k,COUNTER}))
116 P_values_rDiff_nonparametric(IDX(k))=P_VALS{k,COUNTER}{1};
117 if length(P_VALS{k,COUNTER})>1
118 if iscell(P_VALS{k,COUNTER}{2}) && length(P_VALS{k,COUNTER}{2}{3})>3
119 P_values_rDiff_nonparametric(IDX(k))=min(10*min(P_VALS{k,COUNTER}{2}{3})+max(P_VALS{k,COUNTER}{2}{3})*(10/(CFG.bootstraps+1)),1);
120 end
121 end
122 if (isempty(P_VALS{k,COUNTER}{2}))
123 P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED';
124 else
125 P_values_rDiff_nonparametric_error_flag{IDX(k)}='OK';
126 end
127 end;
128 end
129
130 end
131 end
132 catch
133 for k=1:length(IDX)
134 P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED';
135 P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED';
136 end
137 warning(['There was a problem loading: ' IN_FILENAME ])
138 ERRORS_NR=[ERRORS_NR;j];
139 end
140 end
141 if not(isempty(ERRORS_NR))
142 warning('There have been problems loading some of the parametric test result files');
143 end
144
145 fprintf('Writing output files\n')
146 %Generate P-value table for rDiff.nonparametric
147 if CFG.perform_nonparametric
148 %Open file handler
149 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_nonparametric.tab'];
150 fid=fopen(P_TABLE_FNAME,'w');
151
152 %print header
153 fprintf(fid,'gene\tp-value\ttest-status\n');
154
155 %print lines
156 for j=1:size(genes,2)
157 fprintf(fid,'%s',NAMES{j});
158 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_nonparametric(j),P_values_rDiff_nonparametric_error_flag{j});
159 end
160 %close file handler
161 fclose(fid)
162 end
163
164
165
166 %Generate P-value table for rDiff.mmd
167 if CFG.perform_mmd
168 %Open file handler
169 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_mmd.tab'];
170 fid=fopen(P_TABLE_FNAME,'w');
171
172 %print header
173 fprintf(fid,'gene\tp-value\ttest-status\n');
174
175 %print lines
176 for j=1:size(genes,2)
177 fprintf(fid,'%s',NAMES{j});
178 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_mmd(j),P_values_rDiff_mmd_error_flag{j});
179 end
180 %close file handler
181 fclose(fid)
182 end
183
184