diff chasm_webservice/chasm_webservice.py @ 0:99d838cef41a draft default tip

Uploaded
author saket-choudhary
date Tue, 07 Oct 2014 19:23:27 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chasm_webservice/chasm_webservice.py	Tue Oct 07 19:23:27 2014 -0400
@@ -0,0 +1,285 @@
+#!/usr/bin/python
+"""
+The MIT License (MIT)
+
+Copyright (c) 2014 Saket Choudhary, <saketkc@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the 'Software'), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+"""
+import sys
+import requests
+import argparse
+import time
+from functools import wraps
+import json
+import zipfile
+import tempfile
+import ntpath
+import shutil
+import xlrd
+import csv
+import os
+sheet_map = {0: 'Variant_Analysis.csv',
+             1: 'Amino_Acid_Level_Analysis.csv', 2: 'Gene_Level_Analysis.csv'}
+
+
+def retry(ExceptionToCheck, tries=40000, delay=3, backoff=2, logger=None):
+    '''Retry calling the decorated function using an exponential backoff.
+
+    http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
+    original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
+
+    :param ExceptionToCheck: the exception to check. may be a tuple of
+        exceptions to check
+    :type ExceptionToCheck: Exception or tuple
+    :param tries: number of times to try (not retry) before giving up
+    :type tries: int
+    :param delay: initial delay between retries in seconds
+    :type delay: int
+    :param backoff: backoff multiplier e.g. value of 2 will double the delay
+        each retry
+    :type backoff: int
+    :param logger: logger to use. If None, print
+    :type logger: logging.Logger instance
+    '''
+    def deco_retry(f):
+
+        @wraps(f)
+        def f_retry(*args, **kwargs):
+            mtries, mdelay = tries, delay
+            while mtries > 1:
+                try:
+                    return f(*args, **kwargs)
+                except ExceptionToCheck, e:
+                    #msg = '%s, Retrying in %d seconds...' % (str(e), mdelay)
+                    msg = 'Retrying in %d seconds...' % (mdelay)
+                    if logger:
+                        logger.warning(msg)
+                    else:
+                        print msg
+                    time.sleep(mdelay)
+                    mtries -= 1
+                    mdelay *= backoff
+            return f(*args, **kwargs)
+
+        return f_retry  # true decorator
+
+    return deco_retry
+CANCERTYPES = ['Bladder', 'Blood-Lymphocyte', 'Blood-Myeloid',
+               'Brain-Cerebellum', 'Brain-Glioblastoma_Multiforme',
+               'Brain-Lower_Grade_Glioma', 'Breast', 'Cervix',
+               'Colon', 'Head_and_Neck', 'Kidney-Chromophobe',
+               'Kidney-Clear_Cell', 'Kidney-Papiallary_Cell',
+               'Liver-Nonviral', 'Liver-Viral', 'Lung-Adenocarcinoma',
+               'Lung-Squamous_Cell', 'Melanoma', 'Other', 'Ovary',
+               'Pancreas', 'Prostate-Adenocarcinoma', 'Rectum',
+               'Skin', 'Stomach', 'Thyroid', 'Uterus']
+
+__URL__ = 'http://www.cravat.us/rest/service/submit'
+
+
+def stop_err(msg):
+    sys.stderr.write('%s\n' % msg)
+    sys.exit()
+
+
+class CHASMWeb:
+
+    def __init__(self,
+                 mutationbox=None, filepath=None,
+                 is_hg_18=None, analysis_type=None,
+                 analysis_program=None, chosendb=None,
+                 cancer_type=None, email=None,
+                 annotate_genes=None, text_reports=None,
+                 mupit_out=None):
+        self.mutationbox = mutationbox
+        self.filepath = filepath
+        self.is_hg_18 = is_hg_18
+        self.analysis_type = analysis_type
+        self.analysis_program = analysis_program
+        self.chosendb = chosendb
+        self.email = email
+        self.annotate_genes = annotate_genes
+        self.cancer_type = cancer_type
+        self.email = email
+        self.annotate_genes = annotate_genes
+        self.text_reports = text_reports
+        self.mupit_input = mupit_out
+
+    def make_request(self):
+        data = {
+            'mutations  ': self.mutationbox,
+            'hg18': self.is_hg_18,
+            'analysistype': self.analysis_type,
+            'analysisitem': self.analysis_program,
+            'chasmclassifier': self.cancer_type,
+            'geneannotation': self.annotate_genes,
+            'email': self.email,
+            'tsvreport': 'on',  # self.text_reports,
+            'mupitinput': self.mupit_input,
+        }
+        stripped_data = {}
+
+        for key, value in data.iteritems():
+            if value is True:
+                value = 'on'
+            if value is not None and value is not False:
+                stripped_data[key] = value
+
+        if not self.mutationbox:
+            file_payload = {'inputfile': open(self.filepath)}
+            request = requests.post(
+                __URL__, data=stripped_data, files=file_payload)
+        else:
+            request = requests.post(
+                __URL__, data=stripped_data, files=dict(foo='bar'))
+        print request.text
+        job_id = json.loads(request.text)['jobid']
+        return job_id
+
+    @retry(requests.exceptions.HTTPError)
+    def zip_exists(self, job_id):
+        print job_id
+        url = 'http://www.cravat.us/results/%s/%s.zip' % (job_id, job_id)
+        zip_download_request = requests.request('GET', url)
+        if zip_download_request.status_code == 404:
+            raise requests.HTTPError()
+        else:
+            return url
+
+    def download_zip(self, url, job_id):
+        self.tmp_dir = tempfile.mkdtemp()
+        r = requests.get(url, stream=True)
+        if r.status_code == 200:
+            self.path = os.path.join(self.tmp_dir, job_id + '.zip')
+            with open(self.path, 'wb') as f:
+                for chunk in r.iter_content(128):
+                    f.write(chunk)
+        else:
+            self.path = None
+        return self.path
+
+    def move_files(self, file_map):
+        fh = open(self.path, 'rb')
+        zip_files = zipfile.ZipFile(fh)
+        for name in zip_files.namelist():
+            filename = ntpath.basename(name)
+            extension = ntpath.splitext(filename)[-1]
+            source_file = zip_files.open(name)
+            if extension == '.txt':
+                target_file = open(file_map['error.txt'], 'wb')
+            elif filename != 'SnvGet Feature Description.xls' and extension != '.xls':
+                target_file = open(file_map[filename], 'wbb')
+            else:
+                target_file = None
+            if target_file:
+                with source_file, target_file:
+                    shutil.copyfileobj(source_file, target_file)
+            if filename == 'SnvGet Feature Description.xls':
+                with xlrd.open_workbook(source_file) as wb:
+                    sheet_names = wb.sheet_names()
+                    for name in sheet_names:
+                        sh = wb.sheet_by_name(name)
+                        name_shortened = name.replace(' ').strip() + '.csv'
+                        with open(name_shortened, 'wb') as f:
+                            c = csv.writer(f)
+                            for r in range(sh.nrows):
+                                c.writerow(sh.row_values(r))
+        shutil.rmtree(self.tmp_dir)
+        fh.close()
+
+
+def main(params):
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--input',
+                        type=str, dest='mutationbox',
+                        help='Input variants')
+    parser.add_argument('--path', type=str,
+                        dest='input_file_location',
+                        help='Input file location')
+    parser.add_argument('--hg18', dest='hg18',
+                        action='store_true')
+    parser.add_argument('--analysis_type', dest='analysis_type',
+                        type=str,
+                        choices=['driver', 'functional',
+                                 'geneannotationonly'],
+                        default='driver')
+    parser.add_argument('--chosendb', dest='chosendb',
+                        type=str, nargs='*',
+                        choices=['CHASM', 'SnvGet'],
+                        default='CHASM')
+    parser.add_argument('--cancertype', dest='cancer_type',
+                        type=str, choices=CANCERTYPES,
+                        required=True)
+    parser.add_argument('--email', dest='email',
+                        required=True, type=str)
+    parser.add_argument('--annotate', dest='annotate',
+                        action='store_true', default=None)
+    parser.add_argument('--tsv_report', dest='tsv_report',
+                        action='store_true', default=None)
+    parser.add_argument('--mupit_out', dest='mupit_out',
+                        action='store_true', default=None)
+    parser.add_argument('--gene_analysis_out', dest='gene_analysis_out',
+                        type=str, required=True)
+    parser.add_argument('--variant_analysis_out',
+                        dest='variant_analysis_out',
+                        type=str, required=True)
+    parser.add_argument('--amino_acid_level_analysis_out',
+                        dest='amino_acid_level_analysis_out',
+                        type=str, required=True,)
+    parser.add_argument('--codon_level_analysis_out',
+                        dest='codon_level_analysis_out',
+                        type=str, required=True,)
+    parser.add_argument('--error_file', dest='error_file_out',
+                        type=str, required=True)
+    parser.add_argument('--snv_box_out', dest='snv_box_out',
+                        type=str, required=False)
+    parser.add_argument('--snv_features', dest='snv_features_out',
+                        type=str, required=False)
+    args = parser.parse_args(params)
+    chasm_web = CHASMWeb(mutationbox=args.mutationbox,
+                         filepath=args.input_file_location,
+                         is_hg_18=args.hg18,
+                         analysis_type=args.analysis_type,
+                         chosendb=args.chosendb,
+                         cancer_type=args.cancer_type,
+                         email=args.email,
+                         annotate_genes=args.annotate,
+                         text_reports=args.tsv_report,
+                         mupit_out=args.mupit_out)
+    job_id = chasm_web.make_request()
+    file_map = {'Amino_Acid_Level_Analysis.Result.tsv': args.amino_acid_level_analysis_out,
+                'SNVBox.tsv': args.snv_box_out,
+                'Variant_Analysis.Result.tsv': args.variant_analysis_out,
+                'Gene_Level_Analysis.Result.tsv': args.gene_analysis_out,
+                'SnvGet Feature Description.xls': args.snv_features_out,
+                'error.txt': args.error_file_out,
+                'Codon_Level_Analysis.Result.tsv': args.codon_level_analysis_out,
+                }
+    url = chasm_web.zip_exists(job_id)
+    download = chasm_web.download_zip(url, job_id)
+    if download:
+        chasm_web.move_files(file_map=file_map)
+    else:
+        stop_err('Unable to download from the server')
+
+if __name__ == '__main__':
+    main(sys.argv[1:])