Mercurial > repos > bgruening > bg_statistical_hypothesis_testing
comparison statistical_hypothesis_testing.py @ 0:178b22349b79 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/statistics commit 7c5002672919ca1e5eacacb835a4ce66ffa19656
author | bgruening |
---|---|
date | Mon, 21 Nov 2022 18:08:27 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:178b22349b79 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 | |
5 """ | |
6 import argparse | |
7 | |
8 import numpy as np | |
9 from scipy import stats | |
10 | |
11 | |
12 def columns_to_values(args, line): | |
13 # here you go over every list | |
14 samples = [] | |
15 for i in args: | |
16 cols = line.split("\t") | |
17 sample_list = [] | |
18 for row in i: | |
19 sample_list.append(cols[row - 1]) | |
20 samples.append(list(map(int, sample_list))) | |
21 return samples | |
22 | |
23 | |
24 def main(): | |
25 parser = argparse.ArgumentParser() | |
26 parser.add_argument("-i", "--infile", required=True, help="Tabular file.") | |
27 parser.add_argument( | |
28 "-o", "--outfile", required=True, help="Path to the output file." | |
29 ) | |
30 parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") | |
31 parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") | |
32 parser.add_argument( | |
33 "--sample_cols", | |
34 help="Input format, like smi, sdf, inchi,separate arrays using ;", | |
35 ) | |
36 parser.add_argument("--test_id", help="statistical test method") | |
37 parser.add_argument( | |
38 "--mwu_use_continuity", | |
39 action="store_true", | |
40 default=False, | |
41 help="Whether a continuity correction (1/2.) should be taken into account.", | |
42 ) | |
43 parser.add_argument( | |
44 "--equal_var", | |
45 action="store_true", | |
46 default=False, | |
47 help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", | |
48 ) | |
49 parser.add_argument( | |
50 "--reta", | |
51 action="store_true", | |
52 default=False, | |
53 help="Whether or not to return the internally computed a values.", | |
54 ) | |
55 parser.add_argument( | |
56 "--fisher", | |
57 action="store_true", | |
58 default=False, | |
59 help="if true then Fisher definition is used", | |
60 ) | |
61 parser.add_argument( | |
62 "--bias", | |
63 action="store_true", | |
64 default=False, | |
65 help="if false,then the calculations are corrected for statistical bias", | |
66 ) | |
67 parser.add_argument( | |
68 "--inclusive1", | |
69 action="store_true", | |
70 default=False, | |
71 help="if false,lower_limit will be ignored", | |
72 ) | |
73 parser.add_argument( | |
74 "--inclusive2", | |
75 action="store_true", | |
76 default=False, | |
77 help="if false,higher_limit will be ignored", | |
78 ) | |
79 parser.add_argument( | |
80 "--inclusive", | |
81 action="store_true", | |
82 default=False, | |
83 help="if false,limit will be ignored", | |
84 ) | |
85 parser.add_argument( | |
86 "--printextras", | |
87 action="store_true", | |
88 default=False, | |
89 help="If True, if there are extra points a warning is raised saying how many of those points there are", | |
90 ) | |
91 parser.add_argument( | |
92 "--initial_lexsort", | |
93 action="store_true", | |
94 default="False", | |
95 help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", | |
96 ) | |
97 parser.add_argument( | |
98 "--correction", | |
99 action="store_true", | |
100 default=False, | |
101 help="continuity correction ", | |
102 ) | |
103 parser.add_argument( | |
104 "--axis", | |
105 type=int, | |
106 default=0, | |
107 help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", | |
108 ) | |
109 parser.add_argument( | |
110 "--n", | |
111 type=int, | |
112 default=0, | |
113 help="the number of trials. This is ignored if x gives both the number of successes and failures", | |
114 ) | |
115 parser.add_argument( | |
116 "--b", type=int, default=0, help="The number of bins to use for the histogram" | |
117 ) | |
118 parser.add_argument( | |
119 "--N", type=int, default=0, help="Score that is compared to the elements in a." | |
120 ) | |
121 parser.add_argument( | |
122 "--ddof", type=int, default=0, help="Degrees of freedom correction" | |
123 ) | |
124 parser.add_argument( | |
125 "--score", | |
126 type=int, | |
127 default=0, | |
128 help="Score that is compared to the elements in a.", | |
129 ) | |
130 parser.add_argument("--m", type=float, default=0.0, help="limits") | |
131 parser.add_argument("--mf", type=float, default=2.0, help="lower limit") | |
132 parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") | |
133 parser.add_argument( | |
134 "--p", | |
135 type=float, | |
136 default=0.5, | |
137 help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", | |
138 ) | |
139 parser.add_argument("--alpha", type=float, default=0.9, help="probability") | |
140 parser.add_argument( | |
141 "--new", | |
142 type=float, | |
143 default=0.0, | |
144 help="Value to put in place of values in a outside of bounds", | |
145 ) | |
146 parser.add_argument( | |
147 "--proportiontocut", | |
148 type=float, | |
149 default=0.0, | |
150 help="Proportion (in range 0-1) of total data set to trim of each end.", | |
151 ) | |
152 parser.add_argument( | |
153 "--lambda_", | |
154 type=float, | |
155 default=1.0, | |
156 help="lambda_ gives the power in the Cressie-Read power divergence statistic", | |
157 ) | |
158 parser.add_argument( | |
159 "--imbda", | |
160 type=float, | |
161 default=0, | |
162 help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", | |
163 ) | |
164 parser.add_argument( | |
165 "--base", | |
166 type=float, | |
167 default=1.6, | |
168 help="The logarithmic base to use, defaults to e", | |
169 ) | |
170 parser.add_argument("--dtype", help="dtype") | |
171 parser.add_argument("--med", help="med") | |
172 parser.add_argument("--cdf", help="cdf") | |
173 parser.add_argument("--zero_method", help="zero_method options") | |
174 parser.add_argument("--dist", help="dist options") | |
175 parser.add_argument("--ties", help="ties options") | |
176 parser.add_argument("--alternative", help="alternative options") | |
177 parser.add_argument("--mode", help="mode options") | |
178 parser.add_argument("--method", help="method options") | |
179 parser.add_argument("--md", help="md options") | |
180 parser.add_argument("--center", help="center options") | |
181 parser.add_argument("--kind", help="kind options") | |
182 parser.add_argument("--tail", help="tail options") | |
183 parser.add_argument("--interpolation", help="interpolation options") | |
184 parser.add_argument("--statistic", help="statistic options") | |
185 | |
186 args = parser.parse_args() | |
187 infile = args.infile | |
188 outfile = open(args.outfile, "w+") | |
189 test_id = args.test_id | |
190 nf = args.nf | |
191 mf = args.mf | |
192 imbda = args.imbda | |
193 inclusive1 = args.inclusive1 | |
194 inclusive2 = args.inclusive2 | |
195 sample0 = 0 | |
196 sample1 = 0 | |
197 sample2 = 0 | |
198 if args.sample_cols is not None: | |
199 sample0 = 1 | |
200 barlett_samples = [] | |
201 for sample in args.sample_cols.split(";"): | |
202 barlett_samples.append(list(map(int, sample.split(",")))) | |
203 if args.sample_one_cols is not None: | |
204 sample1 = 1 | |
205 sample_one_cols = args.sample_one_cols.split(",") | |
206 if args.sample_two_cols is not None: | |
207 sample_two_cols = args.sample_two_cols.split(",") | |
208 sample2 = 1 | |
209 for line in open(infile): | |
210 sample_one = [] | |
211 sample_two = [] | |
212 cols = line.strip().split("\t") | |
213 if sample0 == 1: | |
214 b_samples = columns_to_values(barlett_samples, line) | |
215 if sample1 == 1: | |
216 for index in sample_one_cols: | |
217 sample_one.append(cols[int(index) - 1]) | |
218 if sample2 == 1: | |
219 for index in sample_two_cols: | |
220 sample_two.append(cols[int(index) - 1]) | |
221 if test_id.strip() == "describe": | |
222 size, min_max, mean, uv, bs, bk = stats.describe( | |
223 list(map(float, sample_one)) | |
224 ) | |
225 cols.append(size) | |
226 cols.append(min_max) | |
227 cols.append(mean) | |
228 cols.append(uv) | |
229 cols.append(bs) | |
230 cols.append(bk) | |
231 elif test_id.strip() == "mode": | |
232 vals, counts = stats.mode(list(map(float, sample_one))) | |
233 cols.append(vals) | |
234 cols.append(counts) | |
235 elif test_id.strip() == "nanmean": | |
236 m = stats.nanmean(list(map(float, sample_one))) | |
237 cols.append(m) | |
238 elif test_id.strip() == "nanmedian": | |
239 m = stats.nanmedian(list(map(float, sample_one))) | |
240 cols.append(m) | |
241 elif test_id.strip() == "kurtosistest": | |
242 z_value, p_value = stats.kurtosistest(list(map(float, sample_one))) | |
243 cols.append(z_value) | |
244 cols.append(p_value) | |
245 elif test_id.strip() == "variation": | |
246 ra = stats.variation(list(map(float, sample_one))) | |
247 cols.append(ra) | |
248 elif test_id.strip() == "itemfreq": | |
249 freq = np.unique(list(map(float, sample_one)), return_counts=True) | |
250 for i in freq: | |
251 elements = ",".join(list(map(str, i))) | |
252 cols.append(elements) | |
253 elif test_id.strip() == "nanmedian": | |
254 m = stats.nanmedian(list(map(float, sample_one))) | |
255 cols.append(m) | |
256 elif test_id.strip() == "variation": | |
257 ra = stats.variation(list(map(float, sample_one))) | |
258 cols.append(ra) | |
259 elif test_id.strip() == "boxcox_llf": | |
260 IIf = stats.boxcox_llf(imbda, list(map(float, sample_one))) | |
261 cols.append(IIf) | |
262 elif test_id.strip() == "tiecorrect": | |
263 fa = stats.tiecorrect(list(map(float, sample_one))) | |
264 cols.append(fa) | |
265 elif test_id.strip() == "rankdata": | |
266 r = stats.rankdata(list(map(float, sample_one)), method=args.md) | |
267 cols.append(r) | |
268 elif test_id.strip() == "nanstd": | |
269 s = stats.nanstd(list(map(float, sample_one)), bias=args.bias) | |
270 cols.append(s) | |
271 elif test_id.strip() == "anderson": | |
272 A2, critical, sig = stats.anderson( | |
273 list(map(float, sample_one)), dist=args.dist | |
274 ) | |
275 cols.append(A2) | |
276 for i in critical: | |
277 cols.append(i) | |
278 cols.append(",") | |
279 for i in sig: | |
280 cols.append(i) | |
281 elif test_id.strip() == "binom_test": | |
282 p_value = stats.binom_test(list(map(float, sample_one)), n=args.n, p=args.p) | |
283 cols.append(p_value) | |
284 elif test_id.strip() == "gmean": | |
285 gm = stats.gmean(list(map(float, sample_one)), dtype=args.dtype) | |
286 cols.append(gm) | |
287 elif test_id.strip() == "hmean": | |
288 hm = stats.hmean(list(map(float, sample_one)), dtype=args.dtype) | |
289 cols.append(hm) | |
290 elif test_id.strip() == "kurtosis": | |
291 k = stats.kurtosis( | |
292 list(map(float, sample_one)), | |
293 axis=args.axis, | |
294 fisher=args.fisher, | |
295 bias=args.bias, | |
296 ) | |
297 cols.append(k) | |
298 elif test_id.strip() == "moment": | |
299 n_moment = stats.moment(list(map(float, sample_one)), n=args.n) | |
300 cols.append(n_moment) | |
301 elif test_id.strip() == "normaltest": | |
302 k2, p_value = stats.normaltest(list(map(float, sample_one))) | |
303 cols.append(k2) | |
304 cols.append(p_value) | |
305 elif test_id.strip() == "skew": | |
306 skewness = stats.skew(list(map(float, sample_one)), bias=args.bias) | |
307 cols.append(skewness) | |
308 elif test_id.strip() == "skewtest": | |
309 z_value, p_value = stats.skewtest(list(map(float, sample_one))) | |
310 cols.append(z_value) | |
311 cols.append(p_value) | |
312 elif test_id.strip() == "sem": | |
313 s = stats.sem(list(map(float, sample_one)), ddof=args.ddof) | |
314 cols.append(s) | |
315 elif test_id.strip() == "zscore": | |
316 z = stats.zscore(list(map(float, sample_one)), ddof=args.ddof) | |
317 for i in z: | |
318 cols.append(i) | |
319 elif test_id.strip() == "signaltonoise": | |
320 s2n = stats.signaltonoise(list(map(float, sample_one)), ddof=args.ddof) | |
321 cols.append(s2n) | |
322 elif test_id.strip() == "percentileofscore": | |
323 p = stats.percentileofscore( | |
324 list(map(float, sample_one)), score=args.score, kind=args.kind | |
325 ) | |
326 cols.append(p) | |
327 elif test_id.strip() == "bayes_mvs": | |
328 c_mean, c_var, c_std = stats.bayes_mvs( | |
329 list(map(float, sample_one)), alpha=args.alpha | |
330 ) | |
331 cols.append(c_mean) | |
332 cols.append(c_var) | |
333 cols.append(c_std) | |
334 elif test_id.strip() == "sigmaclip": | |
335 c, c_low, c_up = stats.sigmaclip( | |
336 list(map(float, sample_one)), low=args.m, high=args.n | |
337 ) | |
338 cols.append(c) | |
339 cols.append(c_low) | |
340 cols.append(c_up) | |
341 elif test_id.strip() == "kstest": | |
342 d, p_value = stats.kstest( | |
343 list(map(float, sample_one)), | |
344 cdf=args.cdf, | |
345 N=args.N, | |
346 alternative=args.alternative, | |
347 mode=args.mode, | |
348 ) | |
349 cols.append(d) | |
350 cols.append(p_value) | |
351 elif test_id.strip() == "chi2_contingency": | |
352 chi2, p, dof, ex = stats.chi2_contingency( | |
353 list(map(float, sample_one)), | |
354 correction=args.correction, | |
355 lambda_=args.lambda_, | |
356 ) | |
357 cols.append(chi2) | |
358 cols.append(p) | |
359 cols.append(dof) | |
360 cols.append(ex) | |
361 elif test_id.strip() == "tmean": | |
362 if nf == 0 and mf == 0: | |
363 mean = stats.tmean(list(map(float, sample_one))) | |
364 else: | |
365 mean = stats.tmean( | |
366 list(map(float, sample_one)), (mf, nf), (inclusive1, inclusive2) | |
367 ) | |
368 cols.append(mean) | |
369 elif test_id.strip() == "tmin": | |
370 if mf == 0: | |
371 min = stats.tmin(list(map(float, sample_one))) | |
372 else: | |
373 min = stats.tmin( | |
374 list(map(float, sample_one)), | |
375 lowerlimit=mf, | |
376 inclusive=args.inclusive, | |
377 ) | |
378 cols.append(min) | |
379 elif test_id.strip() == "tmax": | |
380 if nf == 0: | |
381 max = stats.tmax(list(map(float, sample_one))) | |
382 else: | |
383 max = stats.tmax( | |
384 list(map(float, sample_one)), | |
385 upperlimit=nf, | |
386 inclusive=args.inclusive, | |
387 ) | |
388 cols.append(max) | |
389 elif test_id.strip() == "tvar": | |
390 if nf == 0 and mf == 0: | |
391 var = stats.tvar(list(map(float, sample_one))) | |
392 else: | |
393 var = stats.tvar( | |
394 list(map(float, sample_one)), (mf, nf), (inclusive1, inclusive2) | |
395 ) | |
396 cols.append(var) | |
397 elif test_id.strip() == "tstd": | |
398 if nf == 0 and mf == 0: | |
399 std = stats.tstd(list(map(float, sample_one))) | |
400 else: | |
401 std = stats.tstd( | |
402 list(map(float, sample_one)), (mf, nf), (inclusive1, inclusive2) | |
403 ) | |
404 cols.append(std) | |
405 elif test_id.strip() == "tsem": | |
406 if nf == 0 and mf == 0: | |
407 s = stats.tsem(list(map(float, sample_one))) | |
408 else: | |
409 s = stats.tsem( | |
410 list(map(float, sample_one)), (mf, nf), (inclusive1, inclusive2) | |
411 ) | |
412 cols.append(s) | |
413 elif test_id.strip() == "scoreatpercentile": | |
414 if nf == 0 and mf == 0: | |
415 s = stats.scoreatpercentile( | |
416 list(map(float, sample_one)), | |
417 list(map(float, sample_two)), | |
418 interpolation_method=args.interpolation, | |
419 ) | |
420 else: | |
421 s = stats.scoreatpercentile( | |
422 list(map(float, sample_one)), | |
423 list(map(float, sample_two)), | |
424 (mf, nf), | |
425 interpolation_method=args.interpolation, | |
426 ) | |
427 for i in s: | |
428 cols.append(i) | |
429 elif test_id.strip() == "relfreq": | |
430 if nf == 0 and mf == 0: | |
431 rel, low_range, binsize, ex = stats.relfreq( | |
432 list(map(float, sample_one)), args.b | |
433 ) | |
434 else: | |
435 rel, low_range, binsize, ex = stats.relfreq( | |
436 list(map(float, sample_one)), args.b, (mf, nf) | |
437 ) | |
438 for i in rel: | |
439 cols.append(i) | |
440 cols.append(low_range) | |
441 cols.append(binsize) | |
442 cols.append(ex) | |
443 elif test_id.strip() == "binned_statistic": | |
444 if nf == 0 and mf == 0: | |
445 st, b_edge, b_n = stats.binned_statistic( | |
446 list(map(float, sample_one)), | |
447 list(map(float, sample_two)), | |
448 statistic=args.statistic, | |
449 bins=args.b, | |
450 ) | |
451 else: | |
452 st, b_edge, b_n = stats.binned_statistic( | |
453 list(map(float, sample_one)), | |
454 list(map(float, sample_two)), | |
455 statistic=args.statistic, | |
456 bins=args.b, | |
457 range=(mf, nf), | |
458 ) | |
459 cols.append(st) | |
460 cols.append(b_edge) | |
461 cols.append(b_n) | |
462 elif test_id.strip() == "threshold": | |
463 if nf == 0 and mf == 0: | |
464 o = stats.threshold(list(map(float, sample_one)), newval=args.new) | |
465 else: | |
466 o = stats.threshold( | |
467 list(map(float, sample_one)), mf, nf, newval=args.new | |
468 ) | |
469 for i in o: | |
470 cols.append(i) | |
471 elif test_id.strip() == "trimboth": | |
472 o = stats.trimboth( | |
473 list(map(float, sample_one)), proportiontocut=args.proportiontocut | |
474 ) | |
475 for i in o: | |
476 cols.append(i) | |
477 elif test_id.strip() == "trim1": | |
478 t1 = stats.trim1( | |
479 list(map(float, sample_one)), | |
480 proportiontocut=args.proportiontocut, | |
481 tail=args.tail, | |
482 ) | |
483 for i in t1: | |
484 cols.append(i) | |
485 elif test_id.strip() == "histogram": | |
486 if nf == 0 and mf == 0: | |
487 hi, low_range, binsize, ex = stats.histogram( | |
488 list(map(float, sample_one)), args.b | |
489 ) | |
490 else: | |
491 hi, low_range, binsize, ex = stats.histogram( | |
492 list(map(float, sample_one)), args.b, (mf, nf) | |
493 ) | |
494 cols.append(hi) | |
495 cols.append(low_range) | |
496 cols.append(binsize) | |
497 cols.append(ex) | |
498 elif test_id.strip() == "cumfreq": | |
499 if nf == 0 and mf == 0: | |
500 cum, low_range, binsize, ex = stats.cumfreq( | |
501 list(map(float, sample_one)), args.b | |
502 ) | |
503 else: | |
504 cum, low_range, binsize, ex = stats.cumfreq( | |
505 list(map(float, sample_one)), args.b, (mf, nf) | |
506 ) | |
507 cols.append(cum) | |
508 cols.append(low_range) | |
509 cols.append(binsize) | |
510 cols.append(ex) | |
511 elif test_id.strip() == "boxcox_normmax": | |
512 if nf == 0 and mf == 0: | |
513 ma = stats.boxcox_normmax(list(map(float, sample_one))) | |
514 else: | |
515 ma = stats.boxcox_normmax( | |
516 list(map(float, sample_one)), (mf, nf), method=args.method | |
517 ) | |
518 cols.append(ma) | |
519 elif test_id.strip() == "boxcox": | |
520 if imbda == 0: | |
521 box, ma, ci = stats.boxcox( | |
522 list(map(float, sample_one)), alpha=args.alpha | |
523 ) | |
524 cols.append(box) | |
525 cols.append(ma) | |
526 cols.append(ci) | |
527 else: | |
528 box = stats.boxcox( | |
529 list(map(float, sample_one)), imbda, alpha=args.alpha | |
530 ) | |
531 cols.append(box) | |
532 elif test_id.strip() == "histogram2": | |
533 h2 = stats.histogram2( | |
534 list(map(float, sample_one)), list(map(float, sample_two)) | |
535 ) | |
536 for i in h2: | |
537 cols.append(i) | |
538 elif test_id.strip() == "ranksums": | |
539 z_statistic, p_value = stats.ranksums( | |
540 list(map(float, sample_one)), list(map(float, sample_two)) | |
541 ) | |
542 cols.append(z_statistic) | |
543 cols.append(p_value) | |
544 elif test_id.strip() == "ttest_1samp": | |
545 t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) | |
546 for i in t: | |
547 cols.append(i) | |
548 for i in prob: | |
549 cols.append(i) | |
550 elif test_id.strip() == "ansari": | |
551 AB, p_value = stats.ansari( | |
552 list(map(float, sample_one)), list(map(float, sample_two)) | |
553 ) | |
554 cols.append(AB) | |
555 cols.append(p_value) | |
556 elif test_id.strip() == "linregress": | |
557 slope, intercept, r_value, p_value, stderr = stats.linregress( | |
558 list(map(float, sample_one)), list(map(float, sample_two)) | |
559 ) | |
560 cols.append(slope) | |
561 cols.append(intercept) | |
562 cols.append(r_value) | |
563 cols.append(p_value) | |
564 cols.append(stderr) | |
565 elif test_id.strip() == "pearsonr": | |
566 cor, p_value = stats.pearsonr( | |
567 list(map(float, sample_one)), list(map(float, sample_two)) | |
568 ) | |
569 cols.append(cor) | |
570 cols.append(p_value) | |
571 elif test_id.strip() == "pointbiserialr": | |
572 r, p_value = stats.pointbiserialr( | |
573 list(map(float, sample_one)), list(map(float, sample_two)) | |
574 ) | |
575 cols.append(r) | |
576 cols.append(p_value) | |
577 elif test_id.strip() == "ks_2samp": | |
578 d, p_value = stats.ks_2samp( | |
579 list(map(float, sample_one)), list(map(float, sample_two)) | |
580 ) | |
581 cols.append(d) | |
582 cols.append(p_value) | |
583 elif test_id.strip() == "mannwhitneyu": | |
584 mw_stats_u, p_value = stats.mannwhitneyu( | |
585 list(map(float, sample_one)), | |
586 list(map(float, sample_two)), | |
587 use_continuity=args.mwu_use_continuity, | |
588 ) | |
589 cols.append(mw_stats_u) | |
590 cols.append(p_value) | |
591 elif test_id.strip() == "zmap": | |
592 z = stats.zmap( | |
593 list(map(float, sample_one)), | |
594 list(map(float, sample_two)), | |
595 ddof=args.ddof, | |
596 ) | |
597 for i in z: | |
598 cols.append(i) | |
599 elif test_id.strip() == "ttest_ind": | |
600 mw_stats_u, p_value = stats.ttest_ind( | |
601 list(map(float, sample_one)), | |
602 list(map(float, sample_two)), | |
603 equal_var=args.equal_var, | |
604 ) | |
605 cols.append(mw_stats_u) | |
606 cols.append(p_value) | |
607 elif test_id.strip() == "ttest_rel": | |
608 t, prob = stats.ttest_rel( | |
609 list(map(float, sample_one)), | |
610 list(map(float, sample_two)), | |
611 axis=args.axis, | |
612 ) | |
613 cols.append(t) | |
614 cols.append(prob) | |
615 elif test_id.strip() == "mood": | |
616 z, p_value = stats.mood( | |
617 list(map(float, sample_one)), | |
618 list(map(float, sample_two)), | |
619 axis=args.axis, | |
620 ) | |
621 cols.append(z) | |
622 cols.append(p_value) | |
623 elif test_id.strip() == "shapiro": | |
624 W, p_value = stats.shapiro(list(map(float, sample_one))) | |
625 cols.append(W) | |
626 cols.append(p_value) | |
627 elif test_id.strip() == "kendalltau": | |
628 k, p_value = stats.kendalltau( | |
629 list(map(float, sample_one)), | |
630 list(map(float, sample_two)), | |
631 initial_lexsort=args.initial_lexsort, | |
632 ) | |
633 cols.append(k) | |
634 cols.append(p_value) | |
635 elif test_id.strip() == "entropy": | |
636 s = stats.entropy( | |
637 list(map(float, sample_one)), | |
638 list(map(float, sample_two)), | |
639 base=args.base, | |
640 ) | |
641 cols.append(s) | |
642 elif test_id.strip() == "spearmanr": | |
643 if sample2 == 1: | |
644 rho, p_value = stats.spearmanr( | |
645 list(map(float, sample_one)), list(map(float, sample_two)) | |
646 ) | |
647 else: | |
648 rho, p_value = stats.spearmanr(list(map(float, sample_one))) | |
649 cols.append(rho) | |
650 cols.append(p_value) | |
651 elif test_id.strip() == "wilcoxon": | |
652 if sample2 == 1: | |
653 T, p_value = stats.wilcoxon( | |
654 list(map(float, sample_one)), | |
655 list(map(float, sample_two)), | |
656 zero_method=args.zero_method, | |
657 correction=args.correction, | |
658 ) | |
659 else: | |
660 T, p_value = stats.wilcoxon( | |
661 list(map(float, sample_one)), | |
662 zero_method=args.zero_method, | |
663 correction=args.correction, | |
664 ) | |
665 cols.append(T) | |
666 cols.append(p_value) | |
667 elif test_id.strip() == "chisquare": | |
668 if sample2 == 1: | |
669 rho, p_value = stats.chisquare( | |
670 list(map(float, sample_one)), | |
671 list(map(float, sample_two)), | |
672 ddof=args.ddof, | |
673 ) | |
674 else: | |
675 rho, p_value = stats.chisquare( | |
676 list(map(float, sample_one)), ddof=args.ddof | |
677 ) | |
678 cols.append(rho) | |
679 cols.append(p_value) | |
680 elif test_id.strip() == "power_divergence": | |
681 if sample2 == 1: | |
682 stat, p_value = stats.power_divergence( | |
683 list(map(float, sample_one)), | |
684 list(map(float, sample_two)), | |
685 ddof=args.ddof, | |
686 lambda_=args.lambda_, | |
687 ) | |
688 else: | |
689 stat, p_value = stats.power_divergence( | |
690 list(map(float, sample_one)), ddof=args.ddof, lambda_=args.lambda_ | |
691 ) | |
692 cols.append(stat) | |
693 cols.append(p_value) | |
694 elif test_id.strip() == "theilslopes": | |
695 if sample2 == 1: | |
696 mpe, met, lo, up = stats.theilslopes( | |
697 list(map(float, sample_one)), | |
698 list(map(float, sample_two)), | |
699 alpha=args.alpha, | |
700 ) | |
701 else: | |
702 mpe, met, lo, up = stats.theilslopes( | |
703 list(map(float, sample_one)), alpha=args.alpha | |
704 ) | |
705 cols.append(mpe) | |
706 cols.append(met) | |
707 cols.append(lo) | |
708 cols.append(up) | |
709 elif test_id.strip() == "combine_pvalues": | |
710 if sample2 == 1: | |
711 stat, p_value = stats.combine_pvalues( | |
712 list(map(float, sample_one)), | |
713 method=args.med, | |
714 weights=list(map(float, sample_two)), | |
715 ) | |
716 else: | |
717 stat, p_value = stats.combine_pvalues( | |
718 list(map(float, sample_one)), method=args.med | |
719 ) | |
720 cols.append(stat) | |
721 cols.append(p_value) | |
722 elif test_id.strip() == "obrientransform": | |
723 ob = stats.obrientransform(*b_samples) | |
724 for i in ob: | |
725 elements = ",".join(list(map(str, i))) | |
726 cols.append(elements) | |
727 elif test_id.strip() == "f_oneway": | |
728 f_value, p_value = stats.f_oneway(*b_samples) | |
729 cols.append(f_value) | |
730 cols.append(p_value) | |
731 elif test_id.strip() == "kruskal": | |
732 h, p_value = stats.kruskal(*b_samples) | |
733 cols.append(h) | |
734 cols.append(p_value) | |
735 elif test_id.strip() == "friedmanchisquare": | |
736 fr, p_value = stats.friedmanchisquare(*b_samples) | |
737 cols.append(fr) | |
738 cols.append(p_value) | |
739 elif test_id.strip() == "fligner": | |
740 xsq, p_value = stats.fligner( | |
741 center=args.center, proportiontocut=args.proportiontocut, *b_samples | |
742 ) | |
743 cols.append(xsq) | |
744 cols.append(p_value) | |
745 elif test_id.strip() == "bartlett": | |
746 T, p_value = stats.bartlett(*b_samples) | |
747 cols.append(T) | |
748 cols.append(p_value) | |
749 elif test_id.strip() == "levene": | |
750 w, p_value = stats.levene( | |
751 center=args.center, proportiontocut=args.proportiontocut, *b_samples | |
752 ) | |
753 cols.append(w) | |
754 cols.append(p_value) | |
755 elif test_id.strip() == "median_test": | |
756 stat, p_value, m, table = stats.median_test( | |
757 ties=args.ties, | |
758 correction=args.correction, | |
759 lambda_=args.lambda_, | |
760 *b_samples | |
761 ) | |
762 cols.append(stat) | |
763 cols.append(p_value) | |
764 cols.append(m) | |
765 cols.append(table) | |
766 for i in table: | |
767 elements = ",".join(list(map(str, i))) | |
768 cols.append(elements) | |
769 outfile.write("%s\n" % "\t".join(list(map(str, cols)))) | |
770 outfile.close() | |
771 | |
772 | |
773 if __name__ == "__main__": | |
774 main() |