Mercurial > repos > in_silico > cravat_annotate_mutations
comparison cravat_annotate_mutations-18ce5c6169ef/cravat_annotate.py @ 5:6b7ce75ea2f8 draft
Uploaded
author | in_silico |
---|---|
date | Wed, 19 Jul 2017 14:49:03 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:27c0b0f841c8 | 5:6b7ce75ea2f8 |
---|---|
1 import requests | |
2 import json | |
3 import sys | |
4 from __builtin__ import False | |
5 #Gets the input and output from galaxy | |
6 input_filename = sys.argv[1] | |
7 output_filename = sys.argv[2] | |
8 | |
9 #opens each file, in to read, out to write | |
10 in_file = open(input_filename, "r") | |
11 out_file = open(output_filename, "w") | |
12 | |
13 | |
14 #sets replacements to replace each space in genomic coordinates with an underscore to run with the query | |
15 replacements = {' ':'_'} | |
16 #so we only print out the Keys once | |
17 write_header = True | |
18 | |
19 #loops through the input file line by line | |
20 for line in in_file: | |
21 #strips the input line of \n (new line) and replaces every space with an underscore | |
22 line = "_".join( line.split() ) | |
23 #gets request from CRAVAT server with the inputed mutation line | |
24 call = requests.get('http://staging.cravat.us/CRAVAT/rest/service/query', params={'mutation': line} ) | |
25 #puts the string of data into a json dictionary | |
26 json_data = json.loads(call.text) | |
27 #manually sets the order of the Keys to the same as CRAVAT Server | |
28 keys = ["Chromosome","Position","Strand","Reference base(s)","Alternate base(s)","HUGO symbol", | |
29 "Sequence ontology transcript","Sequence ontology protein change","Sequence ontology", | |
30 "Sequence ontology all transcripts","ExAC total allele frequency", | |
31 "ExAC allele frequency (African/African American)","ExAC allele frequency (Latino)", | |
32 "ExAC allele frequency (East Asian)","ExAC allele frequency (Finnish)", | |
33 "ExAC allele frequency (Non-Finnish European)","ExAC allele frequency (Other)", | |
34 "ExAC allele frequency (South Asian)", "1000 Genomes allele frequency", | |
35 "ESP6500 allele frequency (European American)","ESP6500 allele frequency (African American)", | |
36 "Transcript in COSMIC","Protein sequence change in COSMIC", | |
37 "Occurrences in COSMIC [exact nucleotide change]","Mappability Warning","Driver Genes", | |
38 "TARGET","dbSNP","MuPIT Link"] | |
39 print json_data | |
40 | |
41 #Spit out first 8 or 9, then loop through rest and print out | |
42 for key in json_data: | |
43 if key not in keys: | |
44 keys.append(key) | |
45 | |
46 #used so we only print out the Keys once | |
47 if write_header == True: | |
48 #writes out the keys of the dictionary | |
49 out_file.write('\t'.join(keys) + '\n') | |
50 write_header = False | |
51 | |
52 #sets value to the first value in the first key | |
53 value = json_data[keys[0]] | |
54 #actually writes out the value | |
55 out_file.write(value) | |
56 #print "key[" + key[0] + "] value[" + str(value) + "]" | |
57 #sets value to the second key | |
58 value = json_data[keys[1]] | |
59 out_file.write('\t' + value) | |
60 #print "key[" + key[1] + "] value[" + str(value) + "]" | |
61 | |
62 | |
63 #loops through all other values for each key | |
64 for key in keys[2:]: | |
65 #strips the value | |
66 value = json_data[key].strip() | |
67 #another try, except statement to convert the rest of the values to floats, and then round them to four decimals | |
68 try: | |
69 value = float(value) | |
70 value = '%.4f'%value | |
71 except: | |
72 pass | |
73 #writes out the value with a tab after for galaxy formatting | |
74 out_file.write("\t" + str(value)) | |
75 #print for debugging | |
76 print "key[" + key + "] value[" + str(value) + "]" | |
77 #creates a new line for the next set of values | |
78 out_file.write('\n') | |
79 | |
80 | |
81 #closes both files | |
82 in_file.close() | |
83 out_file.close() |