Mercurial > repos > in_silico > cravat_score_and_annotate

from __future__ import print_function
import requests
import json
import time
try:
    # Python 3.0+
    from urllib.request import urlretrieve
except:
    # Python 2.7
    from urllib import urlretrieve
import sys
import csv
import argparse

"""
Tool's email:
usernmae: cravatgalaxy@gmail.com
password: chicken_quesadilla
"""

email = 'cravatgalaxy@gmail.com'

class CravatSubmissionException(Exception):
    def __init__(self, message):
        super(CravatSubmissionException, self).__init__(message)

class CravatSubmission(object):

    def get_cmd_args(self, argv):
        parser = argparse.ArgumentParser()
        parser.add_argument('path',
                                help="Path to python module")
        parser.add_argument('--input',
                                '-i',
                                required = True,
                                help='Input path to a cravat file for querying',)
        parser.add_argument('--output',
                                '-o',
                                default = None,
                                help = 'Output path to write results from query')
        parser.add_argument('--analysis',
                                '-a',
                                required=True,
                                help = "Cravat analysis. Should be 'VEST', 'CHASM', 'NONE', or 'VEST;CHASM'")
        return parser.parse_args(argv)

    def is_valid_analysis(self, analysis):
        """: Test if analysis is a recognized value"""
        analyses = ["VEST", "CHASM", "VEST;CHASM", ""]
        return analysis in analyses

    def is_skippable(self, s):
        """: Test if a line (str or list/tuple) is skippable, a.k.a. a header or blank line"""
        if not isinstance(s, str):
            raise CravatSubmissionException("is_skippable accepts a string")
        skippable = s == "" \
            or s[0] == "#" \
            or s.startswith('"#For more information on CRAVAT') \
            or s.isspace()
        return skippable

    def parse(self, s, sep='\t'):
        """: Convert string line to an array of values"""
        return s.strip().split(sep)

    def unparse(self, array, sep='\t', newline='\n'):
        """: Convert an array of values to a writable string line"""
        return sep.join([str(i) for i in array]) + newline

    def get_headers(self, path, pattern='Input line', sep='\t'):
        """: Get the headers from a Results/Details file obtained from by a finished Cravat submission"""
        with open(path, 'r') as f:
            for line in f:
                if line.startswith(pattern):
                    return self.parse(line)
            return None

    def create_index(self, path, prop='Input line'):
        """
        : Create an index of seek/tell positions in file associated to a line value. Used to record
        : the location of lines betwen two files that are associated with each other without reading entire
        : files into memory.
        """
        headers = self.get_headers(path)
        if prop not in headers:
            raise CravatSubmissionException("Index retrievel property not found in headers")
        prop_loc = headers.index(prop)
        index = {}
        with open(path, 'r') as f:
            pos = 0
            line = f.readline()
            while line != "":
                if not self.is_skippable(line):
                    parsed = self.parse(line)
                    if not parsed == headers:
                        index[parsed[prop_loc]] = pos
                pos = f.tell()
                line = f.readline()
        return index

    def get_header_val_dict(self, headers, vals):
        """: Associate an array of header keys to an array of values."""
        return { header:val for (header, val) in zip(headers, vals) }

    def write_results(self, results_path, details_path, out_path, write_headers=True):
        """
        : Using the paths to the Results and Details file from a Cravat Sumbission,
        : write the output file.
        """
        results_headers = self.get_headers(results_path)
        details_headers = self.get_headers(details_path)
        if results_headers == None \
        or details_headers == None:
            raise CravatSubmissionException("Unable to intepret headers in Results or Details submission files")
        headers = results_headers
        headers.extend(filter(lambda x: x not in headers, details_headers))
        results_index = self.create_index(results_path)
        details_index = self.create_index(details_path)
        with open(results_path, 'r') as results_file, \
        open(details_path, 'r') as details_file, \
        open(out_path, 'w') as out_file:
            if write_headers:
                out_file.write(self.unparse(headers))
            for line_id, file_pos in results_index.items():
                results_file.seek(file_pos)
                results_vals = self.parse(results_file.readline())
                results_dict = self.get_header_val_dict(results_headers, results_vals)
                if line_id in details_index:
                    details_file.seek(details_index[line_id])
                    details_vals = self.parse(details_file.readline())
                    details_dict = self.get_header_val_dict(details_headers, details_vals)
                    # On a repeated entry, the Details value will overwrite Results value
                    results_dict.update(details_dict)
                line = [ results_dict.get(header, 'None') for header in headers ]
                out_file.write(self.unparse(line))

    def submit(self, in_path, analysis):
        """: Make a POST request to submit a job to production CRAVAT server."""
        if not self.is_valid_analysis(analysis):
            raise ValueError("Did not get valid analyses.")
        # Create post request to submit job to  CRAVAT production server
        submit = requests.post('http://cravat.us/CRAVAT/rest/service/submit',
                                files={'inputfile' : open(in_path)},
                                data={'email' : email,
                                'analyses' : analysis})
        # Check job run status in loop until status is 'Success'
        jobid = json.loads(submit.text)['jobid']
        while True:
            check = requests.get('http://cravat.us/CRAVAT/rest/service/status', params={'jobid': jobid})
            status = json.loads(check.text)['status']
            #print(status)
            if status == 'Success':
                break
            else:
                time.sleep(2)
        # Download completed job results to local files
        timestamp = time.strftime("%Y-%m-%d_%H-%M-%S_")
        results_path = 'Z_Variant_Result' + timestamp + '.tsv'
        details_path = 'Z_Additional_Details' + timestamp + '.tsv'
        urlretrieve("http://cravat.us/CRAVAT/results/" + jobid + "/" + "Variant.Result.tsv",
            filename=results_path)
        urlretrieve("http://cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Additional_Details.Result.tsv",
            filename=details_path)
        return results_path, details_path

if __name__ == "__main__":
    submission = CravatSubmission()
    cmd_args = submission.get_cmd_args(sys.argv)
    # Galaxy converts semi-colons to X's. Switch it back
    analysis = cmd_args.analysis
    if analysis == "VESTXCHASM":
        analysis = "VEST;CHASM"
    results_path, details_path = submission.submit(cmd_args.input, analysis)
    #submission.write_results('Results_test.tsv', 'Details_test.tsv', 'Out_test.tsv')
    submission.write_results(results_path, details_path, cmd_args.output)
author	in_silico
date	Thu, 16 Aug 2018 15:10:43 -0400
parents	275d45d14350
children