annotate dgidb_annotator.py @ 2:792f3cb0eff4 draft

Uploaded
author devteam
date Tue, 25 Feb 2014 14:19:17 -0500
parents 8cc7cf4bd833
children c5bb987015c5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
1 '''
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
2 Annotates a tabular file with information from the Drug-Gene Interaction (DGI) database.
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
3 '''
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
4
1
8cc7cf4bd833 Uploaded
devteam
parents: 0
diff changeset
5 import optparse, json, urllib2, sys, re
0
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
6
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
7 def __main__():
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
8 # -- Parse command line. --
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
9 parser = optparse.OptionParser()
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
10 parser.add_option('-g', '--gene-name-col', dest='gene_name_col', help='column of gene names')
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
11 parser.add_option('-a', '--print-all', dest='print_all', action='store_true', help='print all lines, even though without a result')
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
12 parser.add_option('-e', '--expert-curated', dest='expert_curated', action='store_true', help='use only expert curated results')
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
13 (options, args) = parser.parse_args()
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
14 gene_name_col = int(options.gene_name_col) - 1
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
15
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
16 # Open input stream.
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
17 if len(args) > 0:
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
18 input_file = open(args[0], 'r')
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
19 else:
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
20 input_file = sys.stdin
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
21
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
22 # -- Make connection and get results. --
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
23
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
24 # Get gene list.
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
25 gene_list = []
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
26 lines = []
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
27 for line in input_file:
1
8cc7cf4bd833 Uploaded
devteam
parents: 0
diff changeset
28 entry = line.split('\t')[gene_name_col].strip()
8cc7cf4bd833 Uploaded
devteam
parents: 0
diff changeset
29 # Some annotations may be of the form
8cc7cf4bd833 Uploaded
devteam
parents: 0
diff changeset
30 # <gene_name>(<splicing_info>) or <gene_name>;<gene_name>(splicing_info)
8cc7cf4bd833 Uploaded
devteam
parents: 0
diff changeset
31 gene_list.append(entry.split(';')[0].split('(')[0])
0
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
32 lines.append(line.strip())
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
33
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
34 # Query for results.
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
35 query_str = 'http://dgidb.genome.wustl.edu/api/v1/interactions.json?genes=%s' % ','.join(set(gene_list))
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
36 if options.expert_curated:
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
37 query_str += '&source_trust_levels=Expert%20curated'
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
38 results = urllib2.urlopen(query_str).read()
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
39 results_dict = json.loads(results)
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
40
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
41 # Process results.
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
42 matched_results = results_dict['matchedTerms']
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
43 for result in matched_results:
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
44 # Process result.
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
45 processed_results = []
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
46 result_fields = [ result['geneName'], result['geneLongName'], ','.join( result['geneCategories'] ) ]
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
47 for interaction in result['interactions']:
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
48 result_fields = result_fields[0:3]
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
49 result_fields.extend( [
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
50 interaction['interactionType'], interaction['drugName'], interaction['source']
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
51 ] )
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
52 processed_results.append( '\t'.join( result_fields ) )
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
53
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
54 # Store processed results.
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
55 results_dict[ result['searchTerm'] ] = processed_results
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
56
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
57 # -- Annotate input file and produce output. --
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
58 for line in lines:
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
59 fields = line.split('\t')
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
60 gene = fields[gene_name_col]
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
61 if gene in results_dict:
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
62 for result in results_dict[gene]:
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
63 print line.strip() + '\t' + result
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
64 elif options.print_all:
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
65 print line
8c6dc9da6c89 Uploaded
devteam
parents:
diff changeset
66
1
8cc7cf4bd833 Uploaded
devteam
parents: 0
diff changeset
67 if __name__=="__main__": __main__()