Mercurial > repos > in_silico > cravat_score_and_annotate
view cravat_submit/cravat_submit.py @ 21:67a13940d0bf draft default tip
Uploaded
author | in_silico |
---|---|
date | Thu, 16 Aug 2018 15:10:43 -0400 |
parents | 275d45d14350 |
children |
line wrap: on
line source
from __future__ import print_function import requests import json import time try: # Python 3.0+ from urllib.request import urlretrieve except: # Python 2.7 from urllib import urlretrieve import sys import csv import argparse """ Tool's email: usernmae: cravatgalaxy@gmail.com password: chicken_quesadilla """ email = 'cravatgalaxy@gmail.com' class CravatSubmissionException(Exception): def __init__(self, message): super(CravatSubmissionException, self).__init__(message) class CravatSubmission(object): def get_cmd_args(self, argv): parser = argparse.ArgumentParser() parser.add_argument('path', help="Path to python module") parser.add_argument('--input', '-i', required = True, help='Input path to a cravat file for querying',) parser.add_argument('--output', '-o', default = None, help = 'Output path to write results from query') parser.add_argument('--analysis', '-a', required=True, help = "Cravat analysis. Should be 'VEST', 'CHASM', 'NONE', or 'VEST;CHASM'") return parser.parse_args(argv) def is_valid_analysis(self, analysis): """: Test if analysis is a recognized value""" analyses = ["VEST", "CHASM", "VEST;CHASM", ""] return analysis in analyses def is_skippable(self, s): """: Test if a line (str or list/tuple) is skippable, a.k.a. a header or blank line""" if not isinstance(s, str): raise CravatSubmissionException("is_skippable accepts a string") skippable = s == "" \ or s[0] == "#" \ or s.startswith('"#For more information on CRAVAT') \ or s.isspace() return skippable def parse(self, s, sep='\t'): """: Convert string line to an array of values""" return s.strip().split(sep) def unparse(self, array, sep='\t', newline='\n'): """: Convert an array of values to a writable string line""" return sep.join([str(i) for i in array]) + newline def get_headers(self, path, pattern='Input line', sep='\t'): """: Get the headers from a Results/Details file obtained from by a finished Cravat submission""" with open(path, 'r') as f: for line in f: if line.startswith(pattern): return self.parse(line) return None def create_index(self, path, prop='Input line'): """ : Create an index of seek/tell positions in file associated to a line value. Used to record : the location of lines betwen two files that are associated with each other without reading entire : files into memory. """ headers = self.get_headers(path) if prop not in headers: raise CravatSubmissionException("Index retrievel property not found in headers") prop_loc = headers.index(prop) index = {} with open(path, 'r') as f: pos = 0 line = f.readline() while line != "": if not self.is_skippable(line): parsed = self.parse(line) if not parsed == headers: index[parsed[prop_loc]] = pos pos = f.tell() line = f.readline() return index def get_header_val_dict(self, headers, vals): """: Associate an array of header keys to an array of values.""" return { header:val for (header, val) in zip(headers, vals) } def write_results(self, results_path, details_path, out_path, write_headers=True): """ : Using the paths to the Results and Details file from a Cravat Sumbission, : write the output file. """ results_headers = self.get_headers(results_path) details_headers = self.get_headers(details_path) if results_headers == None \ or details_headers == None: raise CravatSubmissionException("Unable to intepret headers in Results or Details submission files") headers = results_headers headers.extend(filter(lambda x: x not in headers, details_headers)) results_index = self.create_index(results_path) details_index = self.create_index(details_path) with open(results_path, 'r') as results_file, \ open(details_path, 'r') as details_file, \ open(out_path, 'w') as out_file: if write_headers: out_file.write(self.unparse(headers)) for line_id, file_pos in results_index.items(): results_file.seek(file_pos) results_vals = self.parse(results_file.readline()) results_dict = self.get_header_val_dict(results_headers, results_vals) if line_id in details_index: details_file.seek(details_index[line_id]) details_vals = self.parse(details_file.readline()) details_dict = self.get_header_val_dict(details_headers, details_vals) # On a repeated entry, the Details value will overwrite Results value results_dict.update(details_dict) line = [ results_dict.get(header, 'None') for header in headers ] out_file.write(self.unparse(line)) def submit(self, in_path, analysis): """: Make a POST request to submit a job to production CRAVAT server.""" if not self.is_valid_analysis(analysis): raise ValueError("Did not get valid analyses.") # Create post request to submit job to CRAVAT production server submit = requests.post('http://cravat.us/CRAVAT/rest/service/submit', files={'inputfile' : open(in_path)}, data={'email' : email, 'analyses' : analysis}) # Check job run status in loop until status is 'Success' jobid = json.loads(submit.text)['jobid'] while True: check = requests.get('http://cravat.us/CRAVAT/rest/service/status', params={'jobid': jobid}) status = json.loads(check.text)['status'] #print(status) if status == 'Success': break else: time.sleep(2) # Download completed job results to local files timestamp = time.strftime("%Y-%m-%d_%H-%M-%S_") results_path = 'Z_Variant_Result' + timestamp + '.tsv' details_path = 'Z_Additional_Details' + timestamp + '.tsv' urlretrieve("http://cravat.us/CRAVAT/results/" + jobid + "/" + "Variant.Result.tsv", filename=results_path) urlretrieve("http://cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Additional_Details.Result.tsv", filename=details_path) return results_path, details_path if __name__ == "__main__": submission = CravatSubmission() cmd_args = submission.get_cmd_args(sys.argv) # Galaxy converts semi-colons to X's. Switch it back analysis = cmd_args.analysis if analysis == "VESTXCHASM": analysis = "VEST;CHASM" results_path, details_path = submission.submit(cmd_args.input, analysis) #submission.write_results('Results_test.tsv', 'Details_test.tsv', 'Out_test.tsv') submission.write_results(results_path, details_path, cmd_args.output)