comparison list-file-cols.py @ 5:fb9c0409d85c draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
author prog
date Wed, 19 Apr 2017 10:00:05 -0400
parents
children
comparison
equal deleted inserted replaced
4:b34c14151f25 5:fb9c0409d85c
1 #!/usr/bin/env python
2 # vi: fdm=marker
3
4 import csv
5 import re
6 import argparse
7
8 # Get file cols {{{1
9 ################################################################
10
11 def get_file_cols(file, preferred):
12
13 cols = []
14
15 with open(file if isinstance(file, str) else file.get_file_name(), 'r') as f:
16
17 # Read file header
18 reader = csv.reader(f, delimiter = "\t", quotechar='"')
19 header = reader.next()
20
21 preferred = preferred.split(',')
22
23 # Determine default value
24 perfect_matches = []
25 partial_matches = []
26 for p in preferred:
27 for c in header:
28 if c == p:
29 perfect_matches.append(c) # Perfect match !
30 elif re.match(p, c):
31 partial_matches.append(c) # Keep this partial match in case we find no perfect match
32
33 ordered_cols = perfect_matches + partial_matches
34 for c in header:
35 if not c in ordered_cols:
36 ordered_cols.append(c)
37 ordered_cols.append('NA')
38
39 default = 0
40 if len(perfect_matches) + len(partial_matches) == 0:
41 default = len(ordered_cols) - 1
42
43 # Build list of cols
44 for i, c in enumerate(ordered_cols):
45 cols.append( (c, c, i == default) )
46
47 return cols
48
49 # Main {{{1
50 ################################################################
51
52 if __name__ == '__main__':
53
54 # Parse command line arguments
55 parser = argparse.ArgumentParser(description='Script for getting column names in a csv file.')
56 parser.add_argument('-f', help = 'CSV File (separator must be TAB)', dest = 'file', required = True)
57 parser.add_argument('-p', help = 'List (comma separated values) of preferred column names for default one.', dest = 'preferred', required = True)
58 args = parser.parse_args()
59 args_dict = vars(args)
60
61 print(get_file_cols(**args_dict))