Mercurial > repos > bebatut > normalize_dataset
comparison normalize_dataset.py @ 0:72633301cc0d draft default tip
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
author | bebatut |
---|---|
date | Fri, 15 Apr 2016 08:42:40 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:72633301cc0d |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 import sys | |
5 import os | |
6 import argparse | |
7 import re | |
8 | |
9 def isfloat(value): | |
10 try: | |
11 float(value) | |
12 return True | |
13 except ValueError: | |
14 return False | |
15 | |
16 def normalize_dataset(args): | |
17 with open(args.input_file, 'r') as input_file: | |
18 input_file_content = input_file.readlines() | |
19 if args.normalization == 'column': | |
20 column_number = len(input_file_content[0][:-1].split('\t')) | |
21 column_sum = [0] * column_number | |
22 | |
23 with open(args.output_file, 'w') as output_file: | |
24 for line in input_file_content: | |
25 split_line = line[:-1].split('\t') | |
26 | |
27 if args.normalization == 'row': | |
28 row_sum = 0 | |
29 | |
30 for col in split_line: | |
31 if isfloat(col): | |
32 row_sum += float(col) | |
33 | |
34 sep = '' | |
35 for col in split_line: | |
36 if isfloat(col): | |
37 if args.format == 'percentage': | |
38 output_file.write(sep + str(100*float(col)/row_sum)) | |
39 else: | |
40 output_file.write(sep + str(float(col)/row_sum)) | |
41 else: | |
42 output_file.write(sep + col) | |
43 sep = '\t' | |
44 output_file.write('\n') | |
45 | |
46 elif args.normalization == 'column': | |
47 for i in range(len(split_line)): | |
48 if isfloat(split_line[i]): | |
49 column_sum[i] += float(split_line[i]) | |
50 | |
51 if args.normalization == 'column': | |
52 for line in input_file_content: | |
53 split_line = line[:-1].split('\t') | |
54 sep = '' | |
55 for i in range(len(split_line)): | |
56 if isfloat(split_line[i]): | |
57 if args.format == 'percentage': | |
58 output_file.write(sep + str(100*float(split_line[i])/column_sum[i])) | |
59 else: | |
60 output_file.write(sep + str(float(split_line[i])/column_sum[i])) | |
61 else: | |
62 output_file.write(sep + split_line[i]) | |
63 sep = '\t' | |
64 output_file.write('\n') | |
65 | |
66 if __name__ == '__main__': | |
67 parser = argparse.ArgumentParser() | |
68 parser.add_argument('--input_file', required=True) | |
69 parser.add_argument('--output_file', required=True) | |
70 parser.add_argument('--normalization', required=True, | |
71 choices= ['column','row']) | |
72 parser.add_argument('--format', required=True, | |
73 choices= ['proportion','percentage']) | |
74 args = parser.parse_args() | |
75 normalize_dataset(args) |