annotate replace_chromosome_names.py @ 0:97c11d04cd4c draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
author earlhaminst
date Thu, 18 May 2017 14:17:48 -0400
parents
children 6c0373cc070f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
1 #!/usr/bin/env python
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
2 from __future__ import print_function
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
3
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
4 import argparse
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
5 import sys
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
6
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
7
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
8 def main():
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
9 parser = argparse.ArgumentParser(description='Replace chromosome names in a tabular (e.g. VCF) file using a mapping table.')
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
10 parser.add_argument('--cols', required=True, help='comma-separated list of column indexes (starting from 1) on which to perform the replacement')
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
11 parser.add_argument('-m', dest='mapping_file', type=argparse.FileType(), required=True, help='mapping table file. Must contain 2 tab-separated columns')
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
12 parser.add_argument('--comment-char', help='lines starting with this character will be directly printed to the output file')
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
13 parser.add_argument('-o', dest='output', type=argparse.FileType('w'), default=sys.stdout, help='output file. If not specified, writes on standard output')
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
14 parser.add_argument('input', metavar='INPUT', type=argparse.FileType(), help='tabular input file')
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
15 args = parser.parse_args()
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
16
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
17 map_dict = dict()
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
18 for line in args.mapping_file:
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
19 line = line.rstrip('\r\n')
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
20 line_cols = line.split('\t')
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
21 if len(line_cols) < 2:
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
22 raise Exception("Line '%s' in mapping table file does not contain 2 tab-separated columns" % line)
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
23 map_dict[line_cols[0]] = line_cols[1]
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
24
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
25 cols_to_map = [int(_) - 1 for _ in args.cols.split(',')]
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
26
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
27 for line in args.input:
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
28 line = line.rstrip('\r\n')
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
29 if args.comment_char and line.startswith(args.comment_char):
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
30 print(line, file=args.output)
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
31 else:
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
32 line_cols = line.split('\t')
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
33 for col_to_map in cols_to_map:
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
34 old_value = line_cols[col_to_map]
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
35 line_cols[col_to_map] = map_dict.get(old_value, old_value)
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
36 mapped_line = '\t'.join(line_cols)
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
37 print(mapped_line, file=args.output)
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
38
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
39
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
40 if __name__ == "__main__":
97c11d04cd4c planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
earlhaminst
parents:
diff changeset
41 main()