comparison cravat_annotate/cravat_annotate.py @ 20:c5b3d80c43e6 draft

Uploaded
author in_silico
date Tue, 12 Jun 2018 14:06:22 -0400
parents dd9181024296
children
comparison
equal deleted inserted replaced
19:ec55c91879e8 20:c5b3d80c43e6
1 """
2 A galaxy wrapper for the /rest/service/query API endpoint on Cravat.
3 """
4
5
6 import requests
7 import json
8 import sys
9 import re
10 import argparse
11
12
13 # The endpoint that CravatQuerys are submitted to
14 endpoint = 'http://cravat.us/CRAVAT/rest/service/query'
15
16
17 # The value delimiter used in the Cravat input file to delimit values
18 delimiter = "\t"
19
20
21 # Defualt indices for intepretting a cravat file's row of data in to a CravatQuery
22 cr_mapping = {
23 'chromosome': 1,
24 'position': 2,
25 'strand': 3,
26 'reference': 4,
27 'alternate': 5
28 }
29
30
31 # The neccessary attributes neeeded to submit a query.
32 query_keys = [
33 'chromosome', 'position', 'strand', 'reference', 'alternate'
34 ]
35
36
37 # Expected response keys from server. Ordered in list so that galaxy output has uniform column ordering run-to-run.
38 # If cravat server returns additional keys, they are appended to and included in output.
39 response_keys = [
40 "Chromosome", "Position", "Strand", "Reference base(s)", "Alternate base(s)",
41 "HUGO symbol", "S.O. transcript", "Sequence ontology protein change", "Sequence ontology",
42 "S.O. all transcripts", "gnomAD AF", "gnomAD AF (African)", "gnomAD AF (Amrican)",
43 "gnomAD AF (Ashkenazi Jewish)", "gnomAD AF (East Asian)", "gnomAD AF (Finnish)",
44 "gnomAD AF (Non-Finnish European)", "gnomAD AF (Other)", "gnomAD AF (South Asian)",
45 "1000 Genomes AF", "ESP6500 AF (average)", "ESP6500 AF (European American)",
46 "ESP6500 AF (African American)", "COSMIC transcript", "COSMIC protein change",
47 "COSMIC variant count [exact nucleotide change]", "cosmic_site_nt", "CGL driver class",
48 "TARGET", "dbSNP", "cgc_role", "cgc_inheritance", "cgc_tumor_type_somatic",
49 "cgc_tumor_type_germline", "ClinVar", "ClinVar disease identifier", "ClinVar XRef",
50 "GWAS Phenotype (GRASP)", "GWAS PMID (GRASP)", "Protein 3D variant"
51 ]
52
53
54 def get_args():
55 parser = argparse.ArgumentParser()
56 parser.add_argument('--input',
57 '-i',
58 required = True,
59 help='Input path to a VCF file for conversion',)
60 parser.add_argument('--output',
61 '-o',
62 default = None,
63 help = 'Output path to write the cravat file to')
64 return parser.parse_args()
65
66
67 def format_chromosome(chrom):
68 """ : Ensure chromosome entry is propely formatted for use as querying attribute. """
69 if chrom[0:3] == 'chr':
70 return chrom
71 return 'chr' + str(chrom)
72
73
74 def get_query_string(row):
75 """ : From a row dict, return a query string for the Cravat server.
76 : The row dict is cravat headeres associated to their values of that row.
77 """
78 return '_'.join([ row['chromosome'], row['position'], row['strand'], row['reference'], row['alternate'] ])
79
80
81 def query(in_path, out_path):
82 """ : From a Cravat the file at in_path, query each line on the Cravat server.
83 : Write the response values to file at out_path.
84 """
85
86 with open(in_path, 'r') as in_file, \
87 open(out_path, 'w') as out_file:
88
89 for line in in_file:
90 line = line.strip().split('\t')
91 # row is dict of cravat col headers assioted values in this line
92 row = { header: line[index] for header, index in cr_mapping.items() }
93 row['chromosome'] = format_chromosome(row['chromosome'])
94 query_string = get_query_string(row)
95 call = requests.get(endpoint, params={ 'mutation': query_string })
96 if call.status_code != 200 or call.text == "":
97 raise requests.RequestException("Bad Server Response. Respone code: '{}', Response Text: '{}'".format(call.status_code, call.text))
98 json_response = json.loads(call.text)
99 # See if server returned additional json key-val paris not expected in response_keys
100 for key in json_response:
101 if key not in response_keys:
102 response_keys.append(key)
103 # Write key in order of response_keys to standardize order of output columns
104 wrote = False
105 for key in response_keys:
106 if key not in json_response:
107 val = None
108 val = json_response[key]
109 # Format standardization for numerics
110 try:
111 val = float(val)
112 val = format(val, ".4f")
113 except:
114 pass
115 if wrote:
116 out_file.write("\t")
117 out_file.write(val)
118 wrote = True
119 out_file.write("\n")
120
121
122 if __name__ == "__main__":
123 cli_args = get_args()
124 if cli_args.output == None:
125 base, _ = os.path.split(cli_args.input)
126 cli_args.output = os.path.join(base, "cravat_converted.txt")
127 query(cli_args.input, cli_args.output)