comparison dgidb_annotator.py @ 1:8cc7cf4bd833 draft

Uploaded
author devteam
date Tue, 25 Feb 2014 14:16:43 -0500
parents 8c6dc9da6c89
children 792f3cb0eff4
comparison
equal deleted inserted replaced
0:8c6dc9da6c89 1:8cc7cf4bd833
1 ''' 1 '''
2 Annotates a tabular file with information from the Drug-Gene Interaction (DGI) database. 2 Annotates a tabular file with information from the Drug-Gene Interaction (DGI) database.
3 ''' 3 '''
4 4
5 import optparse, json, urllib2, sys 5 import optparse, json, urllib2, sys, re
6 6
7 def __main__(): 7 def __main__():
8 # -- Parse command line. -- 8 # -- Parse command line. --
9 parser = optparse.OptionParser() 9 parser = optparse.OptionParser()
10 parser.add_option('-g', '--gene-name-col', dest='gene_name_col', help='column of gene names') 10 parser.add_option('-g', '--gene-name-col', dest='gene_name_col', help='column of gene names')
23 23
24 # Get gene list. 24 # Get gene list.
25 gene_list = [] 25 gene_list = []
26 lines = [] 26 lines = []
27 for line in input_file: 27 for line in input_file:
28 gene_list.append( line.split('\t')[gene_name_col].strip() ) 28 entry = line.split('\t')[gene_name_col].strip()
29 # Some annotations may be of the form
30 # <gene_name>(<splicing_info>) or <gene_name>;<gene_name>(splicing_info)
31 gene_list.append(entry.split(';')[0].split('(')[0])
29 lines.append(line.strip()) 32 lines.append(line.strip())
30 33
31 # Query for results. 34 # Query for results.
32 query_str = 'http://dgidb.genome.wustl.edu/api/v1/interactions.json?genes=%s' % ','.join(set(gene_list)) 35 query_str = 'http://dgidb.genome.wustl.edu/api/v1/interactions.json?genes=%s' % ','.join(set(gene_list))
33 if options.expert_curated: 36 if options.expert_curated:
34 query_str += '&source_trust_levels=Expert%20curated' 37 query_str += '&source_trust_levels=Expert%20curated'
38 print query_str
35 results = urllib2.urlopen(query_str).read() 39 results = urllib2.urlopen(query_str).read()
36 results_dict = json.loads(results) 40 results_dict = json.loads(results)
37 41
38 # Process results. 42 # Process results.
39 matched_results = results_dict['matchedTerms'] 43 matched_results = results_dict['matchedTerms']