diff cravat_submit/cravat_submit.py @ 13:2c9208fe16a0 draft

Uploaded
author in_silico
date Mon, 30 Jul 2018 13:22:46 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cravat_submit/cravat_submit.py	Mon Jul 30 13:22:46 2018 -0400
@@ -0,0 +1,170 @@
+from __future__ import print_function
+import requests
+import json
+import time
+from urllib.request import urlretrieve
+import sys
+import csv
+import argparse
+
+"""
+Tool's email:
+usernmae: cravatgalaxy@gmail.com
+password: chicken_quesadilla
+"""
+
+email = 'cravatgalaxy@gmail.com'
+
+class CravatSubmissionException(Exception):
+    def __init__(self, message):
+        super(CravatSubmissionException, self).__init__(message)
+
+class CravatSubmission(object):
+
+    def get_cmd_args(self, argv):
+        parser = argparse.ArgumentParser()
+        parser.add_argument('path',
+                                help="Path to python module")
+        parser.add_argument('--input',
+                                '-i',
+                                required = True,
+                                help='Input path to a cravat file for querying',)
+        parser.add_argument('--output',
+                                '-o',
+                                default = None,
+                                help = 'Output path to write results from query')
+        parser.add_argument('--analysis',
+                                '-a',
+                                required=True,
+                                help = "Cravat analysis. Should be 'VEST', 'CHASM', 'NONE', or 'VEST;CHASM'")
+        return parser.parse_args(argv)
+
+    def is_valid_analysis(self, analysis):
+        """: Test if analysis is a recognized value"""
+        analyses = ["VEST", "CHASM", "VEST;CHASM", ""]
+        return analysis in analyses
+
+    def is_skippable(self, s):
+        """: Test if a line (str or list/tuple) is skippable, a.k.a. a header or blank line"""
+        if not isinstance(s, str):
+            raise CravatSubmissionException("is_skippable accepts a string")
+        skippable = s == "" \
+            or s[0] == "#" \
+            or s.startswith('"#For more information on CRAVAT') \
+            or s.isspace()
+        return skippable
+
+    def parse(self, s, sep='\t'):
+        """: Convert string line to an array of values"""
+        return s.strip().split(sep)
+
+    def unparse(self, array, sep='\t', newline='\n'):
+        """: Convert an array of values to a writable string line"""
+        return sep.join([str(i) for i in array]) + newline
+
+    def get_headers(self, path, pattern='Input line', sep='\t'):
+        """: Get the headers from a Results/Details file obtained from by a finished Cravat submission"""
+        with open(path, 'r') as f:
+            for line in f:
+                if line.startswith(pattern):
+                    return self.parse(line)
+            return None
+
+    def create_index(self, path, prop='Input line'):
+        """
+        : Create an index of seek/tell positions in file associated to a line value. Used to record
+        : the location of lines betwen two files that are associated with each other without reading entire
+        : files into memory.
+        """
+        headers = self.get_headers(path)
+        if prop not in headers:
+            raise CravatSubmissionException("Index retrievel property not found in headers")
+        prop_loc = headers.index(prop)
+        index = {}
+        with open(path, 'r') as f:
+            pos = 0
+            line = f.readline()
+            while line != "":
+                if not self.is_skippable(line):
+                    parsed = self.parse(line)
+                    if not parsed == headers:
+                        index[parsed[prop_loc]] = pos
+                pos = f.tell()
+                line = f.readline()
+        return index
+
+    def get_header_val_dict(self, headers, vals):
+        """: Associate an array of header keys to an array of values."""
+        return { header:val for (header, val) in zip(headers, vals) }
+
+    def write_results(self, results_path, details_path, out_path, write_headers=True):
+        """
+        : Using the paths to the Results and Details file from a Cravat Sumbission,
+        : write the output file.
+        """
+        results_headers = self.get_headers(results_path)
+        details_headers = self.get_headers(details_path)
+        if results_headers == None \
+        or details_headers == None:
+            raise CravatSubmissionException("Unable to intepret headers in Results or Details submission files")
+        headers = results_headers
+        headers.extend(filter(lambda x: x not in headers, details_headers))
+        results_index = self.create_index(results_path)
+        details_index = self.create_index(details_path)
+        with open(results_path, 'r') as results_file, \
+        open(details_path, 'r') as details_file, \
+        open(out_path, 'w') as out_file:
+            if write_headers:
+                out_file.write(self.unparse(headers))
+            for line_id, file_pos in results_index.items():
+                results_file.seek(file_pos)
+                results_vals = self.parse(results_file.readline())
+                results_dict = self.get_header_val_dict(results_headers, results_vals)
+                if line_id in details_index:
+                    details_file.seek(details_index[line_id])
+                    details_vals = self.parse(details_file.readline())
+                    details_dict = self.get_header_val_dict(details_headers, details_vals)
+                    # On a repeated entry, the Details value will overwrite Results value
+                    results_dict.update(details_dict)
+                line = [ results_dict.get(header, 'None') for header in headers ]
+                out_file.write(self.unparse(line))
+                
+    def submit(self, in_path, analysis):
+        """: Make a POST request to submit a job to production CRAVAT server."""
+        if not self.is_valid_analysis(analysis):
+            raise ValueError("Did not get valid analyses.")
+        # Create post request to submit job to  CRAVAT production server
+        submit = requests.post('http://cravat.us/CRAVAT/rest/service/submit',
+                                files={'inputfile' : open(in_path)},
+                                data={'email' : email,
+                                'analyses' : analysis})
+        # Check job run status in loop until status is 'Success'
+        jobid = json.loads(submit.text)['jobid']
+        while True:
+            check = requests.get('http://cravat.us/CRAVAT/rest/service/status', params={'jobid': jobid})
+            status = json.loads(check.text)['status']
+            print(status)
+            if status == 'Success':
+                break
+            else:
+                time.sleep(2)
+        # Download completed job results to local files
+        timestamp = time.strftime("%Y-%m-%d_%H-%M-%S_")
+        results_path = 'Z_Variant_Result' + timestamp + '.tsv'
+        details_path = 'Z_Additional_Details' + timestamp + '.tsv'
+        urlretrieve("http://cravat.us/CRAVAT/results/" + jobid + "/" + "Variant.Result.tsv",
+            filename=results_path)
+        urlretrieve("http://cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Additional_Details.Result.tsv",
+            filename=details_path)
+        return results_path, details_path
+
+if __name__ == "__main__":
+    submission = CravatSubmission()
+    cmd_args = submission.get_cmd_args(sys.argv)
+    # Galaxy converts semi-colons to X's. Switch it back
+    analysis = cmd_args.analysis
+    if analysis == "VESTXCHASM":
+        analysis = "VEST;CHASM"
+    results_path, details_path = submission.submit(cmd_args.input, analysis)
+    #submission.write_results('Results_test.tsv', 'Details_test.tsv', 'Out_test.tsv')
+    submission.write_results(results_path, details_path, cmd_args.output)
\ No newline at end of file