annotate normalize_dataset.py @ 0:72633301cc0d draft default tip

planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
author bebatut
date Fri, 15 Apr 2016 08:42:40 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
1 #!/usr/bin/env python
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
2 # -*- coding: utf-8 -*-
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
3
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
4 import sys
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
5 import os
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
6 import argparse
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
7 import re
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
8
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
9 def isfloat(value):
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
10 try:
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
11 float(value)
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
12 return True
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
13 except ValueError:
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
14 return False
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
15
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
16 def normalize_dataset(args):
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
17 with open(args.input_file, 'r') as input_file:
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
18 input_file_content = input_file.readlines()
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
19 if args.normalization == 'column':
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
20 column_number = len(input_file_content[0][:-1].split('\t'))
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
21 column_sum = [0] * column_number
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
22
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
23 with open(args.output_file, 'w') as output_file:
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
24 for line in input_file_content:
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
25 split_line = line[:-1].split('\t')
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
26
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
27 if args.normalization == 'row':
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
28 row_sum = 0
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
29
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
30 for col in split_line:
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
31 if isfloat(col):
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
32 row_sum += float(col)
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
33
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
34 sep = ''
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
35 for col in split_line:
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
36 if isfloat(col):
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
37 if args.format == 'percentage':
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
38 output_file.write(sep + str(100*float(col)/row_sum))
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
39 else:
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
40 output_file.write(sep + str(float(col)/row_sum))
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
41 else:
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
42 output_file.write(sep + col)
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
43 sep = '\t'
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
44 output_file.write('\n')
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
45
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
46 elif args.normalization == 'column':
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
47 for i in range(len(split_line)):
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
48 if isfloat(split_line[i]):
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
49 column_sum[i] += float(split_line[i])
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
50
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
51 if args.normalization == 'column':
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
52 for line in input_file_content:
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
53 split_line = line[:-1].split('\t')
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
54 sep = ''
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
55 for i in range(len(split_line)):
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
56 if isfloat(split_line[i]):
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
57 if args.format == 'percentage':
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
58 output_file.write(sep + str(100*float(split_line[i])/column_sum[i]))
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
59 else:
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
60 output_file.write(sep + str(float(split_line[i])/column_sum[i]))
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
61 else:
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
62 output_file.write(sep + split_line[i])
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
63 sep = '\t'
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
64 output_file.write('\n')
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
65
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
66 if __name__ == '__main__':
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
67 parser = argparse.ArgumentParser()
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
68 parser.add_argument('--input_file', required=True)
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
69 parser.add_argument('--output_file', required=True)
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
70 parser.add_argument('--normalization', required=True,
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
71 choices= ['column','row'])
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
72 parser.add_argument('--format', required=True,
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
73 choices= ['proportion','percentage'])
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
74 args = parser.parse_args()
72633301cc0d planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff changeset
75 normalize_dataset(args)