Mercurial > repos > in_silico > cravat_annotate_mutations
view cravat_annotate_mutations-18ce5c6169ef/cravat_annotate.py @ 6:6f4223b8a428 draft
Deleted selected files
author | in_silico |
---|---|
date | Wed, 19 Jul 2017 14:49:31 -0400 |
parents | 6b7ce75ea2f8 |
children |
line wrap: on
line source
import requests import json import sys from __builtin__ import False #Gets the input and output from galaxy input_filename = sys.argv[1] output_filename = sys.argv[2] #opens each file, in to read, out to write in_file = open(input_filename, "r") out_file = open(output_filename, "w") #sets replacements to replace each space in genomic coordinates with an underscore to run with the query replacements = {' ':'_'} #so we only print out the Keys once write_header = True #loops through the input file line by line for line in in_file: #strips the input line of \n (new line) and replaces every space with an underscore line = "_".join( line.split() ) #gets request from CRAVAT server with the inputed mutation line call = requests.get('http://staging.cravat.us/CRAVAT/rest/service/query', params={'mutation': line} ) #puts the string of data into a json dictionary json_data = json.loads(call.text) #manually sets the order of the Keys to the same as CRAVAT Server keys = ["Chromosome","Position","Strand","Reference base(s)","Alternate base(s)","HUGO symbol", "Sequence ontology transcript","Sequence ontology protein change","Sequence ontology", "Sequence ontology all transcripts","ExAC total allele frequency", "ExAC allele frequency (African/African American)","ExAC allele frequency (Latino)", "ExAC allele frequency (East Asian)","ExAC allele frequency (Finnish)", "ExAC allele frequency (Non-Finnish European)","ExAC allele frequency (Other)", "ExAC allele frequency (South Asian)", "1000 Genomes allele frequency", "ESP6500 allele frequency (European American)","ESP6500 allele frequency (African American)", "Transcript in COSMIC","Protein sequence change in COSMIC", "Occurrences in COSMIC [exact nucleotide change]","Mappability Warning","Driver Genes", "TARGET","dbSNP","MuPIT Link"] print json_data #Spit out first 8 or 9, then loop through rest and print out for key in json_data: if key not in keys: keys.append(key) #used so we only print out the Keys once if write_header == True: #writes out the keys of the dictionary out_file.write('\t'.join(keys) + '\n') write_header = False #sets value to the first value in the first key value = json_data[keys[0]] #actually writes out the value out_file.write(value) #print "key[" + key[0] + "] value[" + str(value) + "]" #sets value to the second key value = json_data[keys[1]] out_file.write('\t' + value) #print "key[" + key[1] + "] value[" + str(value) + "]" #loops through all other values for each key for key in keys[2:]: #strips the value value = json_data[key].strip() #another try, except statement to convert the rest of the values to floats, and then round them to four decimals try: value = float(value) value = '%.4f'%value except: pass #writes out the value with a tab after for galaxy formatting out_file.write("\t" + str(value)) #print for debugging print "key[" + key + "] value[" + str(value) + "]" #creates a new line for the next set of values out_file.write('\n') #closes both files in_file.close() out_file.close()