Mercurial > repos > prog > lcmsmatching
diff list-file-cols.py @ 5:fb9c0409d85c draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
author | prog |
---|---|
date | Wed, 19 Apr 2017 10:00:05 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/list-file-cols.py Wed Apr 19 10:00:05 2017 -0400 @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# vi: fdm=marker + +import csv +import re +import argparse + +# Get file cols {{{1 +################################################################ + +def get_file_cols(file, preferred): + + cols = [] + + with open(file if isinstance(file, str) else file.get_file_name(), 'r') as f: + + # Read file header + reader = csv.reader(f, delimiter = "\t", quotechar='"') + header = reader.next() + + preferred = preferred.split(',') + + # Determine default value + perfect_matches = [] + partial_matches = [] + for p in preferred: + for c in header: + if c == p: + perfect_matches.append(c) # Perfect match ! + elif re.match(p, c): + partial_matches.append(c) # Keep this partial match in case we find no perfect match + + ordered_cols = perfect_matches + partial_matches + for c in header: + if not c in ordered_cols: + ordered_cols.append(c) + ordered_cols.append('NA') + + default = 0 + if len(perfect_matches) + len(partial_matches) == 0: + default = len(ordered_cols) - 1 + + # Build list of cols + for i, c in enumerate(ordered_cols): + cols.append( (c, c, i == default) ) + + return cols + +# Main {{{1 +################################################################ + +if __name__ == '__main__': + + # Parse command line arguments + parser = argparse.ArgumentParser(description='Script for getting column names in a csv file.') + parser.add_argument('-f', help = 'CSV File (separator must be TAB)', dest = 'file', required = True) + parser.add_argument('-p', help = 'List (comma separated values) of preferred column names for default one.', dest = 'preferred', required = True) + args = parser.parse_args() + args_dict = vars(args) + + print(get_file_cols(**args_dict))