Mercurial > repos > prog > lcmsmatching
comparison list-file-cols.py @ 6:f86fec07f392 draft default tip
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author | prog |
---|---|
date | Fri, 22 Feb 2019 16:04:22 -0500 |
parents | fb9c0409d85c |
children |
comparison
equal
deleted
inserted
replaced
5:fb9c0409d85c | 6:f86fec07f392 |
---|---|
1 #!/usr/bin/env python | |
2 # vi: fdm=marker | |
3 | |
4 import csv | |
5 import re | |
6 import argparse | |
7 | |
8 # Get file cols {{{1 | |
9 ################################################################ | |
10 | |
11 def get_file_cols(file, preferred): | |
12 | |
13 cols = [] | |
14 | |
15 with open(file if isinstance(file, str) else file.get_file_name(), 'r') as f: | |
16 | |
17 # Read file header | |
18 reader = csv.reader(f, delimiter = "\t", quotechar='"') | |
19 header = reader.next() | |
20 | |
21 preferred = preferred.split(',') | |
22 | |
23 # Determine default value | |
24 perfect_matches = [] | |
25 partial_matches = [] | |
26 for p in preferred: | |
27 for c in header: | |
28 if c == p: | |
29 perfect_matches.append(c) # Perfect match ! | |
30 elif re.match(p, c): | |
31 partial_matches.append(c) # Keep this partial match in case we find no perfect match | |
32 | |
33 ordered_cols = perfect_matches + partial_matches | |
34 for c in header: | |
35 if not c in ordered_cols: | |
36 ordered_cols.append(c) | |
37 ordered_cols.append('NA') | |
38 | |
39 default = 0 | |
40 if len(perfect_matches) + len(partial_matches) == 0: | |
41 default = len(ordered_cols) - 1 | |
42 | |
43 # Build list of cols | |
44 for i, c in enumerate(ordered_cols): | |
45 cols.append( (c, c, i == default) ) | |
46 | |
47 return cols | |
48 | |
49 # Main {{{1 | |
50 ################################################################ | |
51 | |
52 if __name__ == '__main__': | |
53 | |
54 # Parse command line arguments | |
55 parser = argparse.ArgumentParser(description='Script for getting column names in a csv file.') | |
56 parser.add_argument('-f', help = 'CSV File (separator must be TAB)', dest = 'file', required = True) | |
57 parser.add_argument('-p', help = 'List (comma separated values) of preferred column names for default one.', dest = 'preferred', required = True) | |
58 args = parser.parse_args() | |
59 args_dict = vars(args) | |
60 | |
61 print(get_file_cols(**args_dict)) |