view cravat_submit/ @ 21:67a13940d0bf draft default tip

author in_silico
date Thu, 16 Aug 2018 15:10:43 -0400
parents 275d45d14350
line wrap: on
line source

from __future__ import print_function
import requests
import json
import time
    # Python 3.0+
    from urllib.request import urlretrieve
    # Python 2.7
    from urllib import urlretrieve
import sys
import csv
import argparse

Tool's email:
password: chicken_quesadilla

email = ''

class CravatSubmissionException(Exception):
    def __init__(self, message):
        super(CravatSubmissionException, self).__init__(message)

class CravatSubmission(object):

    def get_cmd_args(self, argv):
        parser = argparse.ArgumentParser()
                                help="Path to python module")
                                required = True,
                                help='Input path to a cravat file for querying',)
                                default = None,
                                help = 'Output path to write results from query')
                                help = "Cravat analysis. Should be 'VEST', 'CHASM', 'NONE', or 'VEST;CHASM'")
        return parser.parse_args(argv)

    def is_valid_analysis(self, analysis):
        """: Test if analysis is a recognized value"""
        analyses = ["VEST", "CHASM", "VEST;CHASM", ""]
        return analysis in analyses

    def is_skippable(self, s):
        """: Test if a line (str or list/tuple) is skippable, a.k.a. a header or blank line"""
        if not isinstance(s, str):
            raise CravatSubmissionException("is_skippable accepts a string")
        skippable = s == "" \
            or s[0] == "#" \
            or s.startswith('"#For more information on CRAVAT') \
            or s.isspace()
        return skippable

    def parse(self, s, sep='\t'):
        """: Convert string line to an array of values"""
        return s.strip().split(sep)

    def unparse(self, array, sep='\t', newline='\n'):
        """: Convert an array of values to a writable string line"""
        return sep.join([str(i) for i in array]) + newline

    def get_headers(self, path, pattern='Input line', sep='\t'):
        """: Get the headers from a Results/Details file obtained from by a finished Cravat submission"""
        with open(path, 'r') as f:
            for line in f:
                if line.startswith(pattern):
                    return self.parse(line)
            return None

    def create_index(self, path, prop='Input line'):
        : Create an index of seek/tell positions in file associated to a line value. Used to record
        : the location of lines betwen two files that are associated with each other without reading entire
        : files into memory.
        headers = self.get_headers(path)
        if prop not in headers:
            raise CravatSubmissionException("Index retrievel property not found in headers")
        prop_loc = headers.index(prop)
        index = {}
        with open(path, 'r') as f:
            pos = 0
            line = f.readline()
            while line != "":
                if not self.is_skippable(line):
                    parsed = self.parse(line)
                    if not parsed == headers:
                        index[parsed[prop_loc]] = pos
                pos = f.tell()
                line = f.readline()
        return index

    def get_header_val_dict(self, headers, vals):
        """: Associate an array of header keys to an array of values."""
        return { header:val for (header, val) in zip(headers, vals) }

    def write_results(self, results_path, details_path, out_path, write_headers=True):
        : Using the paths to the Results and Details file from a Cravat Sumbission,
        : write the output file.
        results_headers = self.get_headers(results_path)
        details_headers = self.get_headers(details_path)
        if results_headers == None \
        or details_headers == None:
            raise CravatSubmissionException("Unable to intepret headers in Results or Details submission files")
        headers = results_headers
        headers.extend(filter(lambda x: x not in headers, details_headers))
        results_index = self.create_index(results_path)
        details_index = self.create_index(details_path)
        with open(results_path, 'r') as results_file, \
        open(details_path, 'r') as details_file, \
        open(out_path, 'w') as out_file:
            if write_headers:
            for line_id, file_pos in results_index.items():
                results_vals = self.parse(results_file.readline())
                results_dict = self.get_header_val_dict(results_headers, results_vals)
                if line_id in details_index:
                    details_vals = self.parse(details_file.readline())
                    details_dict = self.get_header_val_dict(details_headers, details_vals)
                    # On a repeated entry, the Details value will overwrite Results value
                line = [ results_dict.get(header, 'None') for header in headers ]
    def submit(self, in_path, analysis):
        """: Make a POST request to submit a job to production CRAVAT server."""
        if not self.is_valid_analysis(analysis):
            raise ValueError("Did not get valid analyses.")
        # Create post request to submit job to  CRAVAT production server
        submit ='',
                                files={'inputfile' : open(in_path)},
                                data={'email' : email,
                                'analyses' : analysis})
        # Check job run status in loop until status is 'Success'
        jobid = json.loads(submit.text)['jobid']
        while True:
            check = requests.get('', params={'jobid': jobid})
            status = json.loads(check.text)['status']
            if status == 'Success':
        # Download completed job results to local files
        timestamp = time.strftime("%Y-%m-%d_%H-%M-%S_")
        results_path = 'Z_Variant_Result' + timestamp + '.tsv'
        details_path = 'Z_Additional_Details' + timestamp + '.tsv'
        urlretrieve("" + jobid + "/" + "Variant.Result.tsv",
        urlretrieve("" + jobid + "/" + "Variant_Additional_Details.Result.tsv",
        return results_path, details_path

if __name__ == "__main__":
    submission = CravatSubmission()
    cmd_args = submission.get_cmd_args(sys.argv)
    # Galaxy converts semi-colons to X's. Switch it back
    analysis = cmd_args.analysis
    if analysis == "VESTXCHASM":
        analysis = "VEST;CHASM"
    results_path, details_path = submission.submit(cmd_args.input, analysis)
    #submission.write_results('Results_test.tsv', 'Details_test.tsv', 'Out_test.tsv')
    submission.write_results(results_path, details_path, cmd_args.output)