Mercurial > repos > in_silico > cravat_annotate_mutations
diff cravat_annotate_mutations-18ce5c6169ef/cravat_annotate.py @ 5:6b7ce75ea2f8 draft
Uploaded
author | in_silico |
---|---|
date | Wed, 19 Jul 2017 14:49:03 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cravat_annotate_mutations-18ce5c6169ef/cravat_annotate.py Wed Jul 19 14:49:03 2017 -0400 @@ -0,0 +1,83 @@ +import requests +import json +import sys +from __builtin__ import False +#Gets the input and output from galaxy +input_filename = sys.argv[1] +output_filename = sys.argv[2] + +#opens each file, in to read, out to write +in_file = open(input_filename, "r") +out_file = open(output_filename, "w") + + +#sets replacements to replace each space in genomic coordinates with an underscore to run with the query +replacements = {' ':'_'} +#so we only print out the Keys once +write_header = True + +#loops through the input file line by line +for line in in_file: + #strips the input line of \n (new line) and replaces every space with an underscore + line = "_".join( line.split() ) + #gets request from CRAVAT server with the inputed mutation line + call = requests.get('http://staging.cravat.us/CRAVAT/rest/service/query', params={'mutation': line} ) + #puts the string of data into a json dictionary + json_data = json.loads(call.text) + #manually sets the order of the Keys to the same as CRAVAT Server + keys = ["Chromosome","Position","Strand","Reference base(s)","Alternate base(s)","HUGO symbol", + "Sequence ontology transcript","Sequence ontology protein change","Sequence ontology", + "Sequence ontology all transcripts","ExAC total allele frequency", + "ExAC allele frequency (African/African American)","ExAC allele frequency (Latino)", + "ExAC allele frequency (East Asian)","ExAC allele frequency (Finnish)", + "ExAC allele frequency (Non-Finnish European)","ExAC allele frequency (Other)", + "ExAC allele frequency (South Asian)", "1000 Genomes allele frequency", + "ESP6500 allele frequency (European American)","ESP6500 allele frequency (African American)", + "Transcript in COSMIC","Protein sequence change in COSMIC", + "Occurrences in COSMIC [exact nucleotide change]","Mappability Warning","Driver Genes", + "TARGET","dbSNP","MuPIT Link"] + print json_data + + #Spit out first 8 or 9, then loop through rest and print out + for key in json_data: + if key not in keys: + keys.append(key) + + #used so we only print out the Keys once + if write_header == True: + #writes out the keys of the dictionary + out_file.write('\t'.join(keys) + '\n') + write_header = False + + #sets value to the first value in the first key + value = json_data[keys[0]] + #actually writes out the value + out_file.write(value) + #print "key[" + key[0] + "] value[" + str(value) + "]" + #sets value to the second key + value = json_data[keys[1]] + out_file.write('\t' + value) + #print "key[" + key[1] + "] value[" + str(value) + "]" + + + #loops through all other values for each key + for key in keys[2:]: + #strips the value + value = json_data[key].strip() + #another try, except statement to convert the rest of the values to floats, and then round them to four decimals + try: + value = float(value) + value = '%.4f'%value + except: + pass + #writes out the value with a tab after for galaxy formatting + out_file.write("\t" + str(value)) + #print for debugging + print "key[" + key + "] value[" + str(value) + "]" + #creates a new line for the next set of values + out_file.write('\n') + + +#closes both files +in_file.close() +out_file.close()