comparison normalize_dataset.py @ 0:72633301cc0d draft default tip

planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
author bebatut
date Fri, 15 Apr 2016 08:42:40 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:72633301cc0d
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 import os
6 import argparse
7 import re
8
9 def isfloat(value):
10 try:
11 float(value)
12 return True
13 except ValueError:
14 return False
15
16 def normalize_dataset(args):
17 with open(args.input_file, 'r') as input_file:
18 input_file_content = input_file.readlines()
19 if args.normalization == 'column':
20 column_number = len(input_file_content[0][:-1].split('\t'))
21 column_sum = [0] * column_number
22
23 with open(args.output_file, 'w') as output_file:
24 for line in input_file_content:
25 split_line = line[:-1].split('\t')
26
27 if args.normalization == 'row':
28 row_sum = 0
29
30 for col in split_line:
31 if isfloat(col):
32 row_sum += float(col)
33
34 sep = ''
35 for col in split_line:
36 if isfloat(col):
37 if args.format == 'percentage':
38 output_file.write(sep + str(100*float(col)/row_sum))
39 else:
40 output_file.write(sep + str(float(col)/row_sum))
41 else:
42 output_file.write(sep + col)
43 sep = '\t'
44 output_file.write('\n')
45
46 elif args.normalization == 'column':
47 for i in range(len(split_line)):
48 if isfloat(split_line[i]):
49 column_sum[i] += float(split_line[i])
50
51 if args.normalization == 'column':
52 for line in input_file_content:
53 split_line = line[:-1].split('\t')
54 sep = ''
55 for i in range(len(split_line)):
56 if isfloat(split_line[i]):
57 if args.format == 'percentage':
58 output_file.write(sep + str(100*float(split_line[i])/column_sum[i]))
59 else:
60 output_file.write(sep + str(float(split_line[i])/column_sum[i]))
61 else:
62 output_file.write(sep + split_line[i])
63 sep = '\t'
64 output_file.write('\n')
65
66 if __name__ == '__main__':
67 parser = argparse.ArgumentParser()
68 parser.add_argument('--input_file', required=True)
69 parser.add_argument('--output_file', required=True)
70 parser.add_argument('--normalization', required=True,
71 choices= ['column','row'])
72 parser.add_argument('--format', required=True,
73 choices= ['proportion','percentage'])
74 args = parser.parse_args()
75 normalize_dataset(args)