0
|
1 function []=perform_nonparametric_tests(CFG,genes,variance_function_nonparametric_1, variance_function_nonparametric_2)
|
|
2
|
|
3
|
|
4 %Get the gene expression
|
|
5 fprintf('Loading gene expression\n')
|
|
6 if isempty(CFG.Counts_gene_expression)
|
|
7 EXPR_TAB_FILENAME=[CFG.out_base 'Gene_expression.tab'];
|
|
8 else
|
|
9 EXPR_TAB_FILENAME=CFG.Counts_gene_expression;
|
|
10 end
|
|
11
|
|
12 try
|
|
13 Gene_expression=importdata(EXPR_TAB_FILENAME,'\t',1);
|
|
14 Gene_expression=Gene_expression.data;
|
|
15 catch
|
|
16 error(['Could not open: ' EXPR_TAB_FILENAME])
|
|
17 end
|
|
18
|
|
19
|
|
20 %Get the counts
|
|
21 fprintf('Loading nonparametric region counts\n')
|
|
22 if isempty(CFG.Counts_rDiff_nonparametric)
|
|
23 IN_FILENAME=[CFG.out_base 'Nonparametric_region_counts.mat'];
|
|
24 load(IN_FILENAME,'Counts_rDiff_nonparametric')
|
|
25 else
|
|
26 IN_FILENAME=[CFG.out_base CFG.Counts_rDiff_nonparametric];
|
|
27 load(IN_FILENAMEc,'Counts_rDiff_nonparametric')
|
|
28 end
|
|
29
|
|
30
|
|
31
|
|
32 if CFG.use_rproc
|
|
33 JB_NR=1;
|
|
34 JOB_INFO = rproc_empty();
|
|
35 end
|
|
36
|
|
37 PAR.variance_function_nonparametric_1=variance_function_nonparametric_1;
|
|
38 PAR.variance_function_nonparametric_2=variance_function_nonparametric_2;
|
|
39 if 1==1
|
|
40 %%% Perform the test
|
|
41 % configuration
|
|
42 if not(CFG.use_rproc)
|
|
43 fprintf('Performing nonparametric testing\n')
|
|
44 end
|
|
45 %define the splits of the genes for the jobs
|
|
46 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))'];
|
|
47 % submit jobs to cluster
|
|
48
|
|
49 for i = 1:CFG.rproc_num_jobs
|
|
50 PAR.genes = genes(idx(idx(:,2)==i,1));
|
|
51 PAR.Counts_rDiff_nonparametric=Counts_rDiff_nonparametric(idx(idx(:,2)==i,1),:);
|
|
52 PAR.Gene_expression=Gene_expression(idx(idx(:,2)==i,1),:);
|
|
53 CFG.rproc_memreq = 5000;
|
|
54 CFG.rproc_par.mem_req_resubmit = [5000 10000 32000];
|
|
55 CFG.rproc_par.identifier = sprintf('Pnp.%i-',i);
|
|
56 CFG.outfile_prefix=[CFG.out_base_temp 'P_values_nonparametric_' num2str(i) '_of_' num2str(CFG.rproc_num_jobs)];
|
|
57 PAR.CFG=CFG;
|
|
58 if CFG.use_rproc
|
|
59 fprintf(1, 'Submitting job %i to cluster\n',i);
|
|
60 JOB_INFO(JB_NR) = rproc('get_nonparametric_tests_caller', PAR,CFG.rproc_memreq, CFG.rproc_par, CFG.rproc_time);
|
|
61 JB_NR=JB_NR+1;
|
|
62 else
|
|
63 get_nonparametric_tests_caller(PAR);
|
|
64 end
|
|
65 end
|
|
66 if CFG.use_rproc
|
|
67 [JOB_INFO num_crashed] = rproc_wait(JOB_INFO, 60, 1, -1);
|
|
68 end
|
|
69 end
|
|
70 % Get the test results
|
|
71
|
|
72 %%% Generate the output files
|
|
73 fprintf('Reading temporary results\n')
|
|
74 P_values_rDiff_nonparametric=ones(size(genes,2),1);
|
|
75 P_values_rDiff_mmd=ones(size(genes,2),1);
|
|
76
|
|
77
|
|
78 P_values_rDiff_nonparametric_error_flag=cell(size(genes,2),1);
|
|
79 P_values_rDiff_mmd_error_flag=cell(size(genes,2),1);
|
|
80 NAMES=cell(size(genes,2),1);
|
|
81 %Field containing the errors
|
|
82 ERRORS_NR=[];
|
|
83 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))'];
|
|
84 % Iterate over the result files to load the data from the count files
|
|
85 for j = 1:CFG.rproc_num_jobs
|
|
86 IN_FILENAME=[CFG.out_base_temp 'P_values_nonparametric_' num2str(j) '_of_' num2str(CFG.rproc_num_jobs)];
|
|
87 IDX=idx(idx(:,2)==j,1);
|
|
88 try
|
|
89 load(IN_FILENAME)
|
|
90 for k=1:length(IDX)
|
|
91 if isempty(P_VALS{k,1}) %Gene was not tested for
|
|
92 %some reason
|
|
93 P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED';
|
|
94 P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED';
|
|
95 else
|
|
96 if not(isempty(P_VALS{k,1}))
|
|
97 NAMES{IDX(k)}=P_VALS{k,1};
|
|
98 end
|
|
99 COUNTER=2;
|
|
100 %Get the results from rDiff.mmd
|
|
101 if CFG.perform_mmd
|
|
102 if not(isempty(P_VALS{k,COUNTER}))
|
|
103 P_values_rDiff_mmd(IDX(k))=P_VALS{k,COUNTER}{1};
|
|
104 if (isempty(P_VALS{k,COUNTER}{2}))
|
|
105 P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED';
|
|
106 else
|
|
107 P_values_rDiff_mmd_error_flag{IDX(k)}='OK';
|
|
108 end
|
|
109 end
|
|
110 COUNTER=COUNTER+1;
|
|
111 end
|
|
112
|
|
113 %Get the results from rDiff.parametric
|
|
114 if CFG.perform_nonparametric
|
|
115 if not(isempty(P_VALS{k,COUNTER}))
|
|
116 P_values_rDiff_nonparametric(IDX(k))=P_VALS{k,COUNTER}{1};
|
|
117 if length(P_VALS{k,COUNTER})>1
|
|
118 if iscell(P_VALS{k,COUNTER}{2}) && length(P_VALS{k,COUNTER}{2}{3})>3
|
|
119 P_values_rDiff_nonparametric(IDX(k))=min(10*min(P_VALS{k,COUNTER}{2}{3})+max(P_VALS{k,COUNTER}{2}{3})*(10/(CFG.bootstraps+1)),1);
|
|
120 end
|
|
121 end
|
|
122 if (isempty(P_VALS{k,COUNTER}{2}))
|
|
123 P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED';
|
|
124 else
|
|
125 P_values_rDiff_nonparametric_error_flag{IDX(k)}='OK';
|
|
126 end
|
|
127 end;
|
|
128 end
|
|
129
|
|
130 end
|
|
131 end
|
|
132 catch
|
|
133 for k=1:length(IDX)
|
|
134 P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED';
|
|
135 P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED';
|
|
136 end
|
|
137 warning(['There was a problem loading: ' IN_FILENAME ])
|
|
138 ERRORS_NR=[ERRORS_NR;j];
|
|
139 end
|
|
140 end
|
|
141 if not(isempty(ERRORS_NR))
|
|
142 warning('There have been problems loading some of the parametric test result files');
|
|
143 end
|
|
144
|
|
145 fprintf('Writing output files\n')
|
|
146 %Generate P-value table for rDiff.nonparametric
|
|
147 if CFG.perform_nonparametric
|
|
148 %Open file handler
|
|
149 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_nonparametric.tab'];
|
|
150 fid=fopen(P_TABLE_FNAME,'w');
|
|
151
|
|
152 %print header
|
|
153 fprintf(fid,'gene\tp-value\ttest-status\n');
|
|
154
|
|
155 %print lines
|
|
156 for j=1:size(genes,2)
|
|
157 fprintf(fid,'%s',NAMES{j});
|
|
158 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_nonparametric(j),P_values_rDiff_nonparametric_error_flag{j});
|
|
159 end
|
|
160 %close file handler
|
|
161 fclose(fid)
|
|
162 end
|
|
163
|
|
164
|
|
165
|
|
166 %Generate P-value table for rDiff.mmd
|
|
167 if CFG.perform_mmd
|
|
168 %Open file handler
|
|
169 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_mmd.tab'];
|
|
170 fid=fopen(P_TABLE_FNAME,'w');
|
|
171
|
|
172 %print header
|
|
173 fprintf(fid,'gene\tp-value\ttest-status\n');
|
|
174
|
|
175 %print lines
|
|
176 for j=1:size(genes,2)
|
|
177 fprintf(fid,'%s',NAMES{j});
|
|
178 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_mmd(j),P_values_rDiff_mmd_error_flag{j});
|
|
179 end
|
|
180 %close file handler
|
|
181 fclose(fid)
|
|
182 end
|
|
183
|
|
184
|