Mercurial > repos > bebatut > normalize_dataset
view normalize_dataset.py @ 0:72633301cc0d draft default tip
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 21b25425f77162c0edae4dd87b3a9e33608c5a95-dirty
author | bebatut |
---|---|
date | Fri, 15 Apr 2016 08:42:40 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import os import argparse import re def isfloat(value): try: float(value) return True except ValueError: return False def normalize_dataset(args): with open(args.input_file, 'r') as input_file: input_file_content = input_file.readlines() if args.normalization == 'column': column_number = len(input_file_content[0][:-1].split('\t')) column_sum = [0] * column_number with open(args.output_file, 'w') as output_file: for line in input_file_content: split_line = line[:-1].split('\t') if args.normalization == 'row': row_sum = 0 for col in split_line: if isfloat(col): row_sum += float(col) sep = '' for col in split_line: if isfloat(col): if args.format == 'percentage': output_file.write(sep + str(100*float(col)/row_sum)) else: output_file.write(sep + str(float(col)/row_sum)) else: output_file.write(sep + col) sep = '\t' output_file.write('\n') elif args.normalization == 'column': for i in range(len(split_line)): if isfloat(split_line[i]): column_sum[i] += float(split_line[i]) if args.normalization == 'column': for line in input_file_content: split_line = line[:-1].split('\t') sep = '' for i in range(len(split_line)): if isfloat(split_line[i]): if args.format == 'percentage': output_file.write(sep + str(100*float(split_line[i])/column_sum[i])) else: output_file.write(sep + str(float(split_line[i])/column_sum[i])) else: output_file.write(sep + split_line[i]) sep = '\t' output_file.write('\n') if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--input_file', required=True) parser.add_argument('--output_file', required=True) parser.add_argument('--normalization', required=True, choices= ['column','row']) parser.add_argument('--format', required=True, choices= ['proportion','percentage']) args = parser.parse_args() normalize_dataset(args)