cravat_score_and_annotate: cravat_submit/cravat

comparison cravat_submit/cravat_submit.py @ 15:0835042eb731 draft

Uploaded

author	in_silico
date	Mon, 30 Jul 2018 13:32:05 -0400
parents
children

comparison

equal deleted inserted replaced

-:45b91fdd18ce
+:0835042eb731
+from __future__ import print_function
+import requests
+import json
+import time
+from urllib import urlretrieve
+# from urllib.request import urlretrieve
+import sys
+import csv
+import argparse
+"""
+Tool's email:
+usernmae: cravatgalaxy@gmail.com
+password: chicken_quesadilla
+"""
+email = 'cravatgalaxy@gmail.com'
+class CravatSubmissionException(Exception):
+def __init__(self, message):
+super(CravatSubmissionException, self).__init__(message)
+class CravatSubmission(object):
+def get_cmd_args(self, argv):
+parser = argparse.ArgumentParser()
+parser.add_argument('path',
+help="Path to python module")
+parser.add_argument('--input',
+'-i',
+required = True,
+help='Input path to a cravat file for querying',)
+parser.add_argument('--output',
+'-o',
+default = None,
+help = 'Output path to write results from query')
+parser.add_argument('--analysis',
+'-a',
+required=True,
+help = "Cravat analysis. Should be 'VEST', 'CHASM', 'NONE', or 'VEST;CHASM'")
+return parser.parse_args(argv)
+def is_valid_analysis(self, analysis):
+""": Test if analysis is a recognized value"""
+analyses = ["VEST", "CHASM", "VEST;CHASM", ""]
+return analysis in analyses
+def is_skippable(self, s):
+""": Test if a line (str or list/tuple) is skippable, a.k.a. a header or blank line"""
+if not isinstance(s, str):
+raise CravatSubmissionException("is_skippable accepts a string")
+skippable = s == "" \
+or s[0] == "#" \
+or s.startswith('"#For more information on CRAVAT') \
+or s.isspace()
+return skippable
+def parse(self, s, sep='\t'):
+""": Convert string line to an array of values"""
+return s.strip().split(sep)
+def unparse(self, array, sep='\t', newline='\n'):
+""": Convert an array of values to a writable string line"""
+return sep.join([str(i) for i in array]) + newline
+def get_headers(self, path, pattern='Input line', sep='\t'):
+""": Get the headers from a Results/Details file obtained from by a finished Cravat submission"""
+with open(path, 'r') as f:
+for line in f:
+if line.startswith(pattern):
+return self.parse(line)
+return None
+def create_index(self, path, prop='Input line'):
+"""
+: Create an index of seek/tell positions in file associated to a line value. Used to record
+: the location of lines betwen two files that are associated with each other without reading entire
+: files into memory.
+"""
+headers = self.get_headers(path)
+if prop not in headers:
+raise CravatSubmissionException("Index retrievel property not found in headers")
+prop_loc = headers.index(prop)
+index = {}
+with open(path, 'r') as f:
+pos = 0
+line = f.readline()
+while line != "":
+if not self.is_skippable(line):
+parsed = self.parse(line)
+if not parsed == headers:
+index[parsed[prop_loc]] = pos
+pos = f.tell()
+line = f.readline()
+return index
+def get_header_val_dict(self, headers, vals):
+""": Associate an array of header keys to an array of values."""
+return { header:val for (header, val) in zip(headers, vals) }
+def write_results(self, results_path, details_path, out_path, write_headers=True):
+"""
+: Using the paths to the Results and Details file from a Cravat Sumbission,
+: write the output file.
+"""
+results_headers = self.get_headers(results_path)
+details_headers = self.get_headers(details_path)
+if results_headers == None \
+or details_headers == None:
+raise CravatSubmissionException("Unable to intepret headers in Results or Details submission files")
+headers = results_headers
+headers.extend(filter(lambda x: x not in headers, details_headers))
+results_index = self.create_index(results_path)
+details_index = self.create_index(details_path)
+with open(results_path, 'r') as results_file, \
+open(details_path, 'r') as details_file, \
+open(out_path, 'w') as out_file:
+if write_headers:
+out_file.write(self.unparse(headers))
+for line_id, file_pos in results_index.items():
+results_file.seek(file_pos)
+results_vals = self.parse(results_file.readline())
+results_dict = self.get_header_val_dict(results_headers, results_vals)
+if line_id in details_index:
+details_file.seek(details_index[line_id])
+details_vals = self.parse(details_file.readline())
+details_dict = self.get_header_val_dict(details_headers, details_vals)
+# On a repeated entry, the Details value will overwrite Results value
+results_dict.update(details_dict)
+line = [ results_dict.get(header, 'None') for header in headers ]
+out_file.write(self.unparse(line))
+def submit(self, in_path, analysis):
+""": Make a POST request to submit a job to production CRAVAT server."""
+if not self.is_valid_analysis(analysis):
+raise ValueError("Did not get valid analyses.")
+# Create post request to submit job to  CRAVAT production server
+submit = requests.post('http://cravat.us/CRAVAT/rest/service/submit',
+files={'inputfile' : open(in_path)},
+data={'email' : email,
+'analyses' : analysis})
+# Check job run status in loop until status is 'Success'
+jobid = json.loads(submit.text)['jobid']
+while True:
+check = requests.get('http://cravat.us/CRAVAT/rest/service/status', params={'jobid': jobid})
+status = json.loads(check.text)['status']
+print(status)
+if status == 'Success':
+break
+else:
+time.sleep(2)
+# Download completed job results to local files
+timestamp = time.strftime("%Y-%m-%d_%H-%M-%S_")
+results_path = 'Z_Variant_Result' + timestamp + '.tsv'
+details_path = 'Z_Additional_Details' + timestamp + '.tsv'
+urlretrieve("http://cravat.us/CRAVAT/results/" + jobid + "/" + "Variant.Result.tsv",
+filename=results_path)
+urlretrieve("http://cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Additional_Details.Result.tsv",
+filename=details_path)
+return results_path, details_path
+if __name__ == "__main__":
+submission = CravatSubmission()
+cmd_args = submission.get_cmd_args(sys.argv)
+# Galaxy converts semi-colons to X's. Switch it back
+analysis = cmd_args.analysis
+if analysis == "VESTXCHASM":
+analysis = "VEST;CHASM"
+results_path, details_path = submission.submit(cmd_args.input, analysis)
+#submission.write_results('Results_test.tsv', 'Details_test.tsv', 'Out_test.tsv')
+submission.write_results(results_path, details_path, cmd_args.output)

Mercurial > repos > in_silico > cravat_score_and_annotate

comparison cravat_submit/cravat_submit.py @ 15:0835042eb731 draft