Mercurial > repos > bebatut > normalize_dataset
comparison normalize_dataset.py @ 0:72633301cc0d draft default tip
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
| author | bebatut |
|---|---|
| date | Fri, 15 Apr 2016 08:42:40 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:72633301cc0d |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 import sys | |
| 5 import os | |
| 6 import argparse | |
| 7 import re | |
| 8 | |
| 9 def isfloat(value): | |
| 10 try: | |
| 11 float(value) | |
| 12 return True | |
| 13 except ValueError: | |
| 14 return False | |
| 15 | |
| 16 def normalize_dataset(args): | |
| 17 with open(args.input_file, 'r') as input_file: | |
| 18 input_file_content = input_file.readlines() | |
| 19 if args.normalization == 'column': | |
| 20 column_number = len(input_file_content[0][:-1].split('\t')) | |
| 21 column_sum = [0] * column_number | |
| 22 | |
| 23 with open(args.output_file, 'w') as output_file: | |
| 24 for line in input_file_content: | |
| 25 split_line = line[:-1].split('\t') | |
| 26 | |
| 27 if args.normalization == 'row': | |
| 28 row_sum = 0 | |
| 29 | |
| 30 for col in split_line: | |
| 31 if isfloat(col): | |
| 32 row_sum += float(col) | |
| 33 | |
| 34 sep = '' | |
| 35 for col in split_line: | |
| 36 if isfloat(col): | |
| 37 if args.format == 'percentage': | |
| 38 output_file.write(sep + str(100*float(col)/row_sum)) | |
| 39 else: | |
| 40 output_file.write(sep + str(float(col)/row_sum)) | |
| 41 else: | |
| 42 output_file.write(sep + col) | |
| 43 sep = '\t' | |
| 44 output_file.write('\n') | |
| 45 | |
| 46 elif args.normalization == 'column': | |
| 47 for i in range(len(split_line)): | |
| 48 if isfloat(split_line[i]): | |
| 49 column_sum[i] += float(split_line[i]) | |
| 50 | |
| 51 if args.normalization == 'column': | |
| 52 for line in input_file_content: | |
| 53 split_line = line[:-1].split('\t') | |
| 54 sep = '' | |
| 55 for i in range(len(split_line)): | |
| 56 if isfloat(split_line[i]): | |
| 57 if args.format == 'percentage': | |
| 58 output_file.write(sep + str(100*float(split_line[i])/column_sum[i])) | |
| 59 else: | |
| 60 output_file.write(sep + str(float(split_line[i])/column_sum[i])) | |
| 61 else: | |
| 62 output_file.write(sep + split_line[i]) | |
| 63 sep = '\t' | |
| 64 output_file.write('\n') | |
| 65 | |
| 66 if __name__ == '__main__': | |
| 67 parser = argparse.ArgumentParser() | |
| 68 parser.add_argument('--input_file', required=True) | |
| 69 parser.add_argument('--output_file', required=True) | |
| 70 parser.add_argument('--normalization', required=True, | |
| 71 choices= ['column','row']) | |
| 72 parser.add_argument('--format', required=True, | |
| 73 choices= ['proportion','percentage']) | |
| 74 args = parser.parse_args() | |
| 75 normalize_dataset(args) |
