Mercurial > repos > bebatut > normalize_dataset
annotate normalize_dataset.py @ 0:72633301cc0d draft default tip
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
author | bebatut |
---|---|
date | Fri, 15 Apr 2016 08:42:40 -0400 |
parents | |
children |
rev | line source |
---|---|
0
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
1 #!/usr/bin/env python |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
3 |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
4 import sys |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
5 import os |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
6 import argparse |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
7 import re |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
8 |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
9 def isfloat(value): |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
10 try: |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
11 float(value) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
12 return True |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
13 except ValueError: |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
14 return False |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
15 |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
16 def normalize_dataset(args): |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
17 with open(args.input_file, 'r') as input_file: |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
18 input_file_content = input_file.readlines() |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
19 if args.normalization == 'column': |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
20 column_number = len(input_file_content[0][:-1].split('\t')) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
21 column_sum = [0] * column_number |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
22 |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
23 with open(args.output_file, 'w') as output_file: |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
24 for line in input_file_content: |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
25 split_line = line[:-1].split('\t') |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
26 |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
27 if args.normalization == 'row': |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
28 row_sum = 0 |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
29 |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
30 for col in split_line: |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
31 if isfloat(col): |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
32 row_sum += float(col) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
33 |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
34 sep = '' |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
35 for col in split_line: |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
36 if isfloat(col): |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
37 if args.format == 'percentage': |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
38 output_file.write(sep + str(100*float(col)/row_sum)) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
39 else: |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
40 output_file.write(sep + str(float(col)/row_sum)) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
41 else: |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
42 output_file.write(sep + col) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
43 sep = '\t' |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
44 output_file.write('\n') |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
45 |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
46 elif args.normalization == 'column': |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
47 for i in range(len(split_line)): |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
48 if isfloat(split_line[i]): |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
49 column_sum[i] += float(split_line[i]) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
50 |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
51 if args.normalization == 'column': |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
52 for line in input_file_content: |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
53 split_line = line[:-1].split('\t') |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
54 sep = '' |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
55 for i in range(len(split_line)): |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
56 if isfloat(split_line[i]): |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
57 if args.format == 'percentage': |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
58 output_file.write(sep + str(100*float(split_line[i])/column_sum[i])) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
59 else: |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
60 output_file.write(sep + str(float(split_line[i])/column_sum[i])) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
61 else: |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
62 output_file.write(sep + split_line[i]) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
63 sep = '\t' |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
64 output_file.write('\n') |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
65 |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
66 if __name__ == '__main__': |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
67 parser = argparse.ArgumentParser() |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
68 parser.add_argument('--input_file', required=True) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
69 parser.add_argument('--output_file', required=True) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
70 parser.add_argument('--normalization', required=True, |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
71 choices= ['column','row']) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
72 parser.add_argument('--format', required=True, |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
73 choices= ['proportion','percentage']) |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
74 args = parser.parse_args() |
72633301cc0d
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
bebatut
parents:
diff
changeset
|
75 normalize_dataset(args) |