Mercurial > repos > earlhaminst > replace_chromosome_names
annotate replace_chromosome_names.py @ 1:6c0373cc070f draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit c8418d1d7657dacae7f81fe4aa0aba051873d4ad
author | earlhaminst |
---|---|
date | Tue, 13 Jun 2017 14:43:07 -0400 |
parents | 97c11d04cd4c |
children |
rev | line source |
---|---|
0
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
1 #!/usr/bin/env python |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
2 from __future__ import print_function |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
3 |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
4 import argparse |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
5 import sys |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
6 |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
7 |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
8 def main(): |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
9 parser = argparse.ArgumentParser(description='Replace chromosome names in a tabular (e.g. VCF) file using a mapping table.') |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
10 parser.add_argument('--cols', required=True, help='comma-separated list of column indexes (starting from 1) on which to perform the replacement') |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
11 parser.add_argument('-m', dest='mapping_file', type=argparse.FileType(), required=True, help='mapping table file. Must contain 2 tab-separated columns') |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
12 parser.add_argument('--comment-char', help='lines starting with this character will be directly printed to the output file') |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
13 parser.add_argument('-o', dest='output', type=argparse.FileType('w'), default=sys.stdout, help='output file. If not specified, writes on standard output') |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
14 parser.add_argument('input', metavar='INPUT', type=argparse.FileType(), help='tabular input file') |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
15 args = parser.parse_args() |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
16 |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
17 map_dict = dict() |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
18 for line in args.mapping_file: |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
19 line = line.rstrip('\r\n') |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
20 line_cols = line.split('\t') |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
21 if len(line_cols) < 2: |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
22 raise Exception("Line '%s' in mapping table file does not contain 2 tab-separated columns" % line) |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
23 map_dict[line_cols[0]] = line_cols[1] |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
24 |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
25 cols_to_map = [int(_) - 1 for _ in args.cols.split(',')] |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
26 |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
27 for line in args.input: |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
28 line = line.rstrip('\r\n') |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
29 if args.comment_char and line.startswith(args.comment_char): |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
30 print(line, file=args.output) |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
31 else: |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
32 line_cols = line.split('\t') |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
33 for col_to_map in cols_to_map: |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
34 old_value = line_cols[col_to_map] |
1
6c0373cc070f
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit c8418d1d7657dacae7f81fe4aa0aba051873d4ad
earlhaminst
parents:
0
diff
changeset
|
35 new_value = map_dict.get(old_value, '') |
6c0373cc070f
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit c8418d1d7657dacae7f81fe4aa0aba051873d4ad
earlhaminst
parents:
0
diff
changeset
|
36 if not new_value: |
6c0373cc070f
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit c8418d1d7657dacae7f81fe4aa0aba051873d4ad
earlhaminst
parents:
0
diff
changeset
|
37 print('%s has no valid mapping, skipping line: %s' % (old_value, line), file=sys.stderr) |
6c0373cc070f
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit c8418d1d7657dacae7f81fe4aa0aba051873d4ad
earlhaminst
parents:
0
diff
changeset
|
38 break |
6c0373cc070f
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit c8418d1d7657dacae7f81fe4aa0aba051873d4ad
earlhaminst
parents:
0
diff
changeset
|
39 line_cols[col_to_map] = new_value |
6c0373cc070f
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit c8418d1d7657dacae7f81fe4aa0aba051873d4ad
earlhaminst
parents:
0
diff
changeset
|
40 else: |
6c0373cc070f
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit c8418d1d7657dacae7f81fe4aa0aba051873d4ad
earlhaminst
parents:
0
diff
changeset
|
41 mapped_line = '\t'.join(line_cols) |
6c0373cc070f
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit c8418d1d7657dacae7f81fe4aa0aba051873d4ad
earlhaminst
parents:
0
diff
changeset
|
42 print(mapped_line, file=args.output) |
0
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
43 |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
44 |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
45 if __name__ == "__main__": |
97c11d04cd4c
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff
changeset
|
46 main() |