Mercurial > repos > prog > lcmsmatching
annotate list-file-cols.py @ 5:fb9c0409d85c draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
author | prog |
---|---|
date | Wed, 19 Apr 2017 10:00:05 -0400 |
parents | |
children |
rev | line source |
---|---|
5
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
1 #!/usr/bin/env python |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
2 # vi: fdm=marker |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
3 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
4 import csv |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
5 import re |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
6 import argparse |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
7 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
8 # Get file cols {{{1 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
9 ################################################################ |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
10 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
11 def get_file_cols(file, preferred): |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
12 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
13 cols = [] |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
14 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
15 with open(file if isinstance(file, str) else file.get_file_name(), 'r') as f: |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
16 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
17 # Read file header |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
18 reader = csv.reader(f, delimiter = "\t", quotechar='"') |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
19 header = reader.next() |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
20 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
21 preferred = preferred.split(',') |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
22 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
23 # Determine default value |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
24 perfect_matches = [] |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
25 partial_matches = [] |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
26 for p in preferred: |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
27 for c in header: |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
28 if c == p: |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
29 perfect_matches.append(c) # Perfect match ! |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
30 elif re.match(p, c): |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
31 partial_matches.append(c) # Keep this partial match in case we find no perfect match |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
32 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
33 ordered_cols = perfect_matches + partial_matches |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
34 for c in header: |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
35 if not c in ordered_cols: |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
36 ordered_cols.append(c) |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
37 ordered_cols.append('NA') |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
38 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
39 default = 0 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
40 if len(perfect_matches) + len(partial_matches) == 0: |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
41 default = len(ordered_cols) - 1 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
42 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
43 # Build list of cols |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
44 for i, c in enumerate(ordered_cols): |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
45 cols.append( (c, c, i == default) ) |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
46 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
47 return cols |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
48 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
49 # Main {{{1 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
50 ################################################################ |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
51 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
52 if __name__ == '__main__': |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
53 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
54 # Parse command line arguments |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
55 parser = argparse.ArgumentParser(description='Script for getting column names in a csv file.') |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
56 parser.add_argument('-f', help = 'CSV File (separator must be TAB)', dest = 'file', required = True) |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
57 parser.add_argument('-p', help = 'List (comma separated values) of preferred column names for default one.', dest = 'preferred', required = True) |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
58 args = parser.parse_args() |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
59 args_dict = vars(args) |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
60 |
fb9c0409d85c
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
prog
parents:
diff
changeset
|
61 print(get_file_cols(**args_dict)) |