# HG changeset patch # User in_silico # Date 1530136455 14400 # Node ID ae65a43ed6586320d1101ebab1f5d31ec1fd3079 # Parent 435b5bb17d01812ca6e286a35c9035d385bc9397 Uploaded diff -r 435b5bb17d01 -r ae65a43ed658 cravat_annotate/cravat_annotate.py --- a/cravat_annotate/cravat_annotate.py Wed Jun 27 17:54:05 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,128 +0,0 @@ -""" -A galaxy wrapper for the /rest/service/query API endpoint on Cravat. -""" - - -import requests -import json -import sys -import re -import argparse -import ipdb - - -# The endpoint that CravatQuerys are submitted to -endpoint = 'http://www.cravat.us/CRAVAT/rest/service/query' - - -# The value delimiter used in the Cravat input file to delimit values -delimiter = "\t" - - -# Defualt indices for intepretting a cravat file's row of data in to a CravatQuery -cr_mapping = { - 'chromosome': 1, - 'position': 2, - 'strand': 3, - 'reference': 4, - 'alternate': 5 -} - - -# The neccessary attributes neeeded to submit a query. -query_keys = [ - 'chromosome', 'position', 'strand', 'reference', 'alternate' -] - - -# Expected response keys from server. Ordered in list so that galaxy output has uniform column ordering run-to-run. -# If cravat server returns additional keys, they are appended to and included in output. -response_keys = [ - "Chromosome", "Position", "Strand", "Reference base(s)", "Alternate base(s)", - "HUGO symbol", "S.O. transcript", "Sequence ontology protein change", "Sequence ontology", - "S.O. all transcripts", "gnomAD AF", "gnomAD AF (African)", "gnomAD AF (Amrican)", - "gnomAD AF (Ashkenazi Jewish)", "gnomAD AF (East Asian)", "gnomAD AF (Finnish)", - "gnomAD AF (Non-Finnish European)", "gnomAD AF (Other)", "gnomAD AF (South Asian)", - "1000 Genomes AF", "ESP6500 AF (average)", "ESP6500 AF (European American)", - "ESP6500 AF (African American)", "COSMIC transcript", "COSMIC protein change", - "COSMIC variant count [exact nucleotide change]", "cosmic_site_nt", "CGL driver class", - "TARGET", "dbSNP", "cgc_role", "cgc_inheritance", "cgc_tumor_type_somatic", - "cgc_tumor_type_germline", "ClinVar", "ClinVar disease identifier", "ClinVar XRef", - "GWAS Phenotype (GRASP)", "GWAS PMID (GRASP)", "Protein 3D variant" -] - - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument('--input', - '-i', - required = True, - help='Input path to a cravat file for querying',) - parser.add_argument('--output', - '-o', - default = None, - help = 'Output path to write results from query') - return parser.parse_args() - - -def format_chromosome(chrom): - """ : Ensure chromosome entry is propely formatted for use as querying attribute. """ - if chrom[0:3] == 'chr': - return chrom - return 'chr' + str(chrom) - - -def get_query_string(row): - """ : From a row dict, return a query string for the Cravat server. - : The row dict is cravat headeres associated to their values of that row. - """ - return '_'.join([ row['chromosome'], row['position'], row['strand'], row['reference'], row['alternate'] ]) - - -def query(in_path, out_path): - """ : From a Cravat the file at in_path, query each line on the Cravat server. - : Write the response values to file at out_path. - """ - - with open(in_path, 'r') as in_file, \ - open(out_path, 'w') as out_file: - - for line in in_file: - line = line.strip().split('\t') - # row is dict of cravat col headers assioted values in this line - row = { header: line[index] for header, index in cr_mapping.items() } - row['chromosome'] = format_chromosome(row['chromosome']) - query_string = get_query_string(row) - call = requests.get(endpoint, params={ 'mutation': query_string }) - if call.status_code != 200 or call.text == "": - raise requests.RequestException("Bad Server Response. Respone code: '{}', Response Text: '{}'".format(call.status_code, call.text)) - json_response = json.loads(call.text) - # See if server returned additional json key-val paris not expected in response_keys - for key in json_response: - if key not in response_keys: - response_keys.append(key) - # Write key in order of response_keys to standardize order of output columns - wrote = False - for key in response_keys: - if key not in json_response: - val = None - val = json_response[key] - # Format standardization for numerics - try: - val = float(val) - val = format(val, ".4f") - except: - pass - if wrote: - out_file.write("\t") - out_file.write(val) - wrote = True - out_file.write("\n") - - -if __name__ == "__main__": - cli_args = get_args() - if cli_args.output == None: - base, _ = os.path.split(cli_args.input) - cli_args.output = os.path.join(base, "cravat_converted.txt") - query(cli_args.input, cli_args.output) diff -r 435b5bb17d01 -r ae65a43ed658 cravat_annotate/cravat_annotate.xml --- a/cravat_annotate/cravat_annotate.xml Wed Jun 27 17:54:05 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ - - Queries CRAVAT for cancer annotation - cravat_annotate.py -i $input -o $output - - - - - - - - - - - - - - - - - - This tool queries CRAVAT for cancer annotation. - - - - diff -r 435b5bb17d01 -r ae65a43ed658 cravat_submit/cravat_submit.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cravat_submit/cravat_submit.py Wed Jun 27 17:54:15 2018 -0400 @@ -0,0 +1,103 @@ +import requests +import json +import time +import urllib +import sys +import csv + +input_filename = sys.argv[1] +input_select_bar = sys.argv[2] +output_filename = sys.argv[3] + +# HACK: Input args corrections. +if input_select_bar == "None": + # The server represents an analyses of None as ""; however, submitting a blank string on command line throws off arg position + input_select_bar = "" + # The server represents the "Vest and Chasm" analyses as "VEST;CHASM; however, galaxy converts the semi-colon to an 'X'. Switch it back. +elif input_select_bar == "VESTXCHASM": + input_select_bar = "VEST;CHASM" + +write_header = True + +#plugs in params to given URL +submit = requests.post('http://cravat.us/CRAVAT/rest/service/submit', files={'inputfile':open(input_filename)}, data={'email':'znylund@insilico.us.com', 'analyses': input_select_bar}) +#,'analysis':input_select_bar,'functionalannotation': "on"}) +#Makes the data a json dictionary, takes out only the job ID +jobid = json.loads(submit.text)['jobid'] +#out_file.write(jobid) +submitted = json.loads(submit.text)['status'] +#out_file.write('\t' + submitted) + +#loops until we find a status equal to Success, then breaks +while True: + check = requests.get('http://staging.cravat.us/CRAVAT/rest/service/status', params={'jobid': jobid}) + status = json.loads(check.text)['status'] + resultfileurl = json.loads(check.text)['resultfileurl'] + #out_file.write(str(status) + ', ') + if status == 'Success': + #out_file.write('\t' + resultfileurl) + break + else: + time.sleep(2) + +#out_file.write('\n') + +#creates three files +file_1 = time.strftime("%H:%M") + '_Z_Variant_Result.tsv' +file_2 = time.strftime("%H:%M") + '_Z_Additional_Details.tsv' +file_3 = time.strftime("%H:%M") + 'Combined_Variant_Results.tsv' + + +#Download the two results +urllib.urlretrieve("http://cravat.us/CRAVAT/results/" + jobid + "/" + "Variant.Result.tsv", file_1) +urllib.urlretrieve("http://cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Additional_Details.Result.tsv", file_2) + +headers = [] +duplicates = [] + +#opens the Variant Result file and the Variant Additional Details file as csv readers, then opens the output file (galaxy) as a writer +with open(file_1) as tsvin_1, open(file_2) as tsvin_2, open(output_filename, 'wb') as tsvout: + tsvreader_1 = csv.reader(tsvin_1, delimiter='\t') + tsvreader_2 = csv.reader(tsvin_2, delimiter='\t') + tsvout = csv.writer(tsvout, delimiter='\t') + +#loops through each row in the Variant Additional Details file + for row in tsvreader_2: + #sets row_2 equal to the same row in Variant Result file + row_2 = tsvreader_1.next() + #checks if row is empty or if the first term contains '#' + if row == [] or row[0][0] == '#': + continue + #checks if the row begins with input line + if row[0] == 'Input line': + #Goes through each value in the headers list in VAD + for value in row: + #Adds each value into headers + headers.append(value) + #Loops through the Keys in VR + for value in row_2: + #Checks if the value is already in headers + if value in headers: + continue + #else adds the header to headers + else: + headers.append(value) + + print headers + tsvout.writerow(headers) + + + else: + + cells = [] + #Goes through each value in the next list + for value in row: + #adds it to cells + cells.append(value) + #Goes through each value from the VR file after position 11 (After it is done repeating from VAD file) + for value in row_2[11:]: + #adds in the rest of the values to cells + cells.append(value) + + print cells + tsvout.writerow(cells) \ No newline at end of file diff -r 435b5bb17d01 -r ae65a43ed658 cravat_submit/cravat_submit.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cravat_submit/cravat_submit.xml Wed Jun 27 17:54:15 2018 -0400 @@ -0,0 +1,34 @@ + + Submits, checks for, and retrieves data for cancer annotation + cravat_submit.py $input $dropdown $output + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool submits, checks for, and retrieves data for cancer annotation. + + +