Mercurial > repos > gregory-minevich > check_snpeff_candidates
comparison checkSnpEffCandidates.py @ 2:6bd8660f3a8f
Uploaded
author | gregory-minevich |
---|---|
date | Mon, 26 Mar 2012 19:37:13 -0400 |
parents | a3873bb68495 |
children |
comparison
equal
deleted
inserted
replaced
1:bc7cc93ef659 | 2:6bd8660f3a8f |
---|---|
1 #!/usr/bin/python | |
2 | |
3 import sys | |
4 import optparse | |
5 import csv | |
6 import re | |
7 | |
8 def main(): | |
9 parser = optparse.OptionParser() | |
10 parser.add_option('-s', '--snpeff_file', dest = 'snpeff_file', action = 'store', type = 'string', default = None, help = "Path to the snpEff file") | |
11 parser.add_option('-c', '--candidate_list', dest = 'candidate_list', action = 'store', type = 'string', default = None, help = "Two column tabular list of candidate Gene ID, Type") | |
12 parser.add_option('-o', '--output', dest = 'output', action = 'store', type = 'string', default = None, help = "Output file name") | |
13 (options, args) = parser.parse_args() | |
14 | |
15 snpeff_file = options.snpeff_file | |
16 candidate_list = options.candidate_list | |
17 | |
18 candidates = parse_candidate_list(candidate_list = candidate_list) | |
19 mark_snpeff_file(snpeff_file = snpeff_file, output = options.output, candidates = candidates) | |
20 | |
21 def skip_and_write_headers(writer = None, reader = None, i_file = None): | |
22 # count headers | |
23 comment = 0 | |
24 while reader.next()[0].startswith('#'): | |
25 comment = comment + 1 | |
26 | |
27 # skip and write headers | |
28 i_file.seek(0) | |
29 for i in range(0, comment): | |
30 row = reader.next() | |
31 writer.writerow(row) | |
32 | |
33 def parse_candidate_list(candidate_list = ""): | |
34 i_file = open(candidate_list, 'rU') | |
35 reader = csv.reader(i_file, delimiter = '\t',) | |
36 | |
37 candidates = {} | |
38 for row in reader: | |
39 gene_id = row[0] | |
40 gene_type = row[1] | |
41 candidates[gene_id] = gene_type | |
42 | |
43 i_file.close() | |
44 | |
45 return candidates | |
46 | |
47 def mark_snpeff_file(snpeff_file = "", output = "", candidates = None): | |
48 i_file = open(snpeff_file, 'rU') | |
49 reader = csv.reader(i_file, delimiter = '\t') | |
50 | |
51 o_file = open(output, 'wb') | |
52 writer = csv.writer(o_file, delimiter = '\t') | |
53 | |
54 skip_and_write_headers(writer = writer, reader = reader, i_file = i_file) | |
55 | |
56 for row in reader: | |
57 gene_id = row[9] | |
58 if gene_id in candidates: | |
59 gene_type = candidates[gene_id] | |
60 row.append(gene_type) | |
61 else: | |
62 row.append('') | |
63 | |
64 writer.writerow(row) | |
65 | |
66 o_file.close() | |
67 i_file.close() | |
68 | |
69 if __name__ == "__main__": | |
70 main() |