annotate checkSnpEffCandidates.py @ 2:6bd8660f3a8f

Uploaded
author gregory-minevich
date Mon, 26 Mar 2012 19:37:13 -0400
parents a3873bb68495
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
1 #!/usr/bin/python
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
2
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
3 import sys
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
4 import optparse
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
5 import csv
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
6 import re
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
7
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
8 def main():
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
9 parser = optparse.OptionParser()
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
10 parser.add_option('-s', '--snpeff_file', dest = 'snpeff_file', action = 'store', type = 'string', default = None, help = "Path to the snpEff file")
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
11 parser.add_option('-c', '--candidate_list', dest = 'candidate_list', action = 'store', type = 'string', default = None, help = "Two column tabular list of candidate Gene ID, Type")
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
12 parser.add_option('-o', '--output', dest = 'output', action = 'store', type = 'string', default = None, help = "Output file name")
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
13 (options, args) = parser.parse_args()
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
14
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
15 snpeff_file = options.snpeff_file
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
16 candidate_list = options.candidate_list
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
17
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
18 candidates = parse_candidate_list(candidate_list = candidate_list)
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
19 mark_snpeff_file(snpeff_file = snpeff_file, output = options.output, candidates = candidates)
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
20
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
21 def skip_and_write_headers(writer = None, reader = None, i_file = None):
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
22 # count headers
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
23 comment = 0
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
24 while reader.next()[0].startswith('#'):
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
25 comment = comment + 1
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
26
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
27 # skip and write headers
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
28 i_file.seek(0)
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
29 for i in range(0, comment):
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
30 row = reader.next()
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
31 writer.writerow(row)
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
32
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
33 def parse_candidate_list(candidate_list = ""):
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
34 i_file = open(candidate_list, 'rU')
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
35 reader = csv.reader(i_file, delimiter = '\t',)
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
36
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
37 candidates = {}
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
38 for row in reader:
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
39 gene_id = row[0]
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
40 gene_type = row[1]
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
41 candidates[gene_id] = gene_type
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
42
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
43 i_file.close()
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
44
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
45 return candidates
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
46
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
47 def mark_snpeff_file(snpeff_file = "", output = "", candidates = None):
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
48 i_file = open(snpeff_file, 'rU')
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
49 reader = csv.reader(i_file, delimiter = '\t')
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
50
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
51 o_file = open(output, 'wb')
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
52 writer = csv.writer(o_file, delimiter = '\t')
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
53
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
54 skip_and_write_headers(writer = writer, reader = reader, i_file = i_file)
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
55
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
56 for row in reader:
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
57 gene_id = row[9]
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
58 if gene_id in candidates:
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
59 gene_type = candidates[gene_id]
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
60 row.append(gene_type)
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
61 else:
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
62 row.append('')
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
63
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
64 writer.writerow(row)
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
65
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
66 o_file.close()
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
67 i_file.close()
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
68
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
69 if __name__ == "__main__":
a3873bb68495 Uploaded
gregory-minevich
parents:
diff changeset
70 main()