changeset 4:5cc3210369ac draft

Uploaded
author saket-choudhary
date Thu, 04 Sep 2014 17:46:51 -0400
parents 5717f4376968
children 697e66764209
files sift_web/README.rst sift_web/sift_web.py sift_web/sift_web.xml sift_web/tool_dependencies.xml
diffstat 4 files changed, 200 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sift_web/README.rst	Thu Sep 04 17:46:51 2014 -0400
@@ -0,0 +1,42 @@
+Galaxy wrapper for the SIFT webservice 
+===================================================
+
+This tool is copyright 2014 by Saket Choudhary<saketkc@gmail.com>, Indian Institute of Technology Bombay
+All rights reserved. MIT licensed.
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+Citations
+===========
+
+
+If you use this Galaxy tool in work leading to a scientific publication please cite:
+
+Kumar P, Henikoff S, Ng PC. Predicting the effects of coding non-synonymous variants on protein function using the SIFT algorithm. Nat Protoc. 2009;4(7):1073-81.
+
+Ng PC, Henikoff S. Predicting the Effects of Amino Acid Substitutions on Protein Function Annu Rev Genomics Hum Genet. 2006;7:61-80. 
+
+Ng PC, Henikoff S. SIFT: predicting amino acid changes that affect protein function. Nucleic Acids Res. 2003 Jul 1;31(13):3812-4. 
+
+Ng PC, Henikoff S. Accounting for Human Polymorphisms Predicted to Affect Protein Function. Genome Res. 2002 Mar;12(3):436-46.  
+
+Ng PC, Henikoff S. Predicting Deleterious Amino Acid Substitutions. Genome Res. 2001 May;11(5):863-74.  
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sift_web/sift_web.py	Thu Sep 04 17:46:51 2014 -0400
@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+import requests
+import argparse
+import sys
+from functools import wraps
+import time
+from bs4 import BeautifulSoup
+
+__url__ = 'http://provean.jcvi.org/genome_prg_2.php'
+
+
+def stop_err(msg, err=1):
+    sys.stderr.write('%s\n' % msg)
+    sys.exit(err)
+
+
+def retry(ExceptionToCheck, tries=10, delay=3, backoff=2, logger=None):
+    """Retry calling the decorated function using an exponential backoff.
+
+    http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
+    original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
+
+    :param ExceptionToCheck: the exception to check. may be a tuple of
+        exceptions to check
+    :type ExceptionToCheck: Exception or tuple
+    :param tries: number of times to try (not retry) before giving up
+    :type tries: int
+    :param delay: initial delay between retries in seconds
+    :type delay: int
+    :param backoff: backoff multiplier e.g. value of 2 will double the delay
+        each retry
+    :type backoff: int
+    :param logger: logger to use. If None, print
+    :type logger: logging.Logger instance
+    """
+    def deco_retry(f):
+
+        @wraps(f)
+        def f_retry(*args, **kwargs):
+            mtries, mdelay = tries, delay
+            while mtries > 1:
+                try:
+                    return f(*args, **kwargs)
+                except ExceptionToCheck, e:
+                    #msg = '%s, Retrying in %d seconds...' % (str(e), mdelay)
+                    msg = 'Retrying in %d seconds...' % (mdelay)
+                    if logger:
+                        logger.warning(msg)
+                    else:
+                        # print msg
+                        pass
+                    time.sleep(mdelay)
+                    mtries -= 1
+                    mdelay *= backoff
+            return f(*args, **kwargs)
+
+        return f_retry  # true decorator
+
+    return deco_retry
+
+
+class SIFTWeb:
+
+    def __init__(self):
+        self.full_download_url = 'http://provean.jcvi.org/serve_file.php?VAR=g%s/%s.result.tsv'
+        self.condensed_download_url = 'http://provean.jcvi.org/serve_file.php?VAR=g%s/%s.result.one.tsv'
+        self.summary_download_url = 'http://provean.jcvi.org/serve_file.php?VAR=g%s/%s.result.summary.tsv'
+        self.url_dict = {'full': self.full_download_url,
+                         'condensed': self.condensed_download_url,
+                         'summary': self.summary_download_url}
+
+    def upload(self, inputpath):
+        payload = {'table': 'human37_66'}
+        in_txt = open(inputpath, 'rb').read()
+        payload['CHR'] = in_txt
+        request = requests.post( __url__, data=payload)#, files={'CHR_file': open(path)})
+        return request.text
+
+    @retry(requests.exceptions.HTTPError)
+    def get_full_data(self, job_id, full_output):
+        r = requests.request(
+            'GET', (self.full_download_url) % (job_id, job_id))
+        if r.text != 'No file exists':
+            with open(full_output, 'wb') as f:
+                f.write(r.text)
+        else:
+            return requests.HTTPError()
+
+    @retry(requests.exceptions.HTTPError)
+    def get_condensed_data(self, job_id, condensed_output):
+        r = requests.request(
+            'GET', (self.condensed_download_url) % (job_id, job_id))
+        if r.text != 'No file exists':
+            with open(condensed_output, 'wb') as f:
+                f.write(r.text)
+        else:
+            raise(requests.HTTPError())
+
+    @retry(requests.exceptions.HTTPError)
+    def get_summary_data(self, job_id, summary_output):
+        r = requests.request(
+            'GET', (self.summary_download_url) % (job_id, job_id))
+        if r.text != 'No file exists':
+            with open(summary_output, 'wb') as f:
+                f.write(r.text)
+        else:
+            raise(requests.HTTPError())
+
+
+def main(params):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input', type=str, required=True)
+    parser.add_argument('--output1', type=str, required=True)
+    parser.add_argument('--output2', type=str, required=True)
+    parser.add_argument('--output3', type=str, required=True)
+    args = parser.parse_args(params)
+    sift_web = SIFTWeb()
+    content = sift_web.upload(args.input)
+    soup = BeautifulSoup(content)
+    p = soup.findAll('p')
+    job_id = p[1].string.split(':')[-1].replace(' ', '').replace(').', '')
+    sift_web.get_full_data(job_id, args.output1)
+    sift_web.get_condensed_data(job_id, args.output2)
+    sift_web.get_summary_data(job_id, args.output3)
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sift_web/sift_web.xml	Thu Sep 04 17:46:51 2014 -0400
@@ -0,0 +1,21 @@
+<tool id="sift_web" name="SIFTWeb">
+    <description>SIFT/Provean web service</description>
+    <requirements>
+        <requirement type="package" version="2.2.1">requests</requirement>
+        <requirement type="package" version="4.1.0">beautifulsoup4</requirement>
+        <requirement type="python-module">requests</requirement>
+        <requirement type="python-package">bs4</requirement>
+    </requirements>
+    <command interpreter="python">
+        sift_web.py --input $input --output1 $output1 --output2 $output2 --output3 $output3
+    </command>
+    <inputs>
+        <param name="input" format="txt" type="data" label="Input Variants"/>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular"/>
+        <data name="output2" format="tabular"/>
+        <data name="output3" format="tabular"/>
+    </outputs>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sift_web/tool_dependencies.xml	Thu Sep 04 17:46:51 2014 -0400
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="beautifulsoup4" version="4.1.0">
+        <repository changeset_revision="4890592e10f8" name="package_beautifulsoup4_4_1_0" owner="saket-choudhary" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="requests" version="2.2.1">
+        <repository changeset_revision="04c9eef6c14b" name="package_requests_2_2_1" owner="saket-choudhary" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>