# HG changeset patch # User saket-choudhary # Date 1412724075 14400 # Node ID 09f68bdd1999f3c526da4cd7c16ddac529c5e16b Uploaded diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/README.rst Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,35 @@ +Galaxy wrapper for the Polyphen2 webservice +=================================================== + +This tool is copyright 2014 by Saket Choudhary, Indian Institute of Technology Bombay +All rights reserved. MIT licensed. + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +Citations +=========== + + +If you use this Galaxy tool in work leading to a scientific publication please cite: + +Adzhubei IA, Schmidt S, Peshkin L, Ramensky VE, Gerasimova A, Bork P, Kondrashov AS, Sunyaev SR. Nat Methods 7(4):248-249 (2010). +"A method and server for predicting damaging missense mutations." diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/polyphen2_web.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/polyphen2_web.py Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,207 @@ +#!/usr/bin/python +from bs4 import BeautifulSoup +import argparse +import sys +import time +import os +import tempfile +import requests +import shutil +import csv +submission_url = 'http://genetics.bwh.harvard.edu/cgi-bin/ggi/ggi2.cgi' +result_url = 'http://genetics.bwh.harvard.edu' + +TIMEOUT = 60 * 60 * 24 +TIME_DELAY = 30 +MAX_TRIES = 900000000 + +# Genome assembly version used for chromosome +# coordinates of the SNPs in user input +UCSCDB = ['hg19', 'hg18'] +# Classifier model used for predictions. +MODELNAME = ['HumDiv', 'HumVar'] + +# Set of transcripts on which genomic SNPs will be mapped +SNPFILTER = { + 'All': 0, + 'Canonical': 1, + 'CCDS': 3, +} +# Functional SNP categories to include in genomic SNPs annotation report +SNPFUNCTION = ['c', 'm', ''] + + +def stop_err(msg, err=1): + sys.stderr.write('%s\n' % msg) + sys.exit(err) + + +class Polyphen2Web: + + def __init__(self, ucscdb=None, model_name=None, snp_filter=None, + snp_function=None, file_location=None, email=None): + self.ucscdb = ucscdb + self.model_name = model_name + self.snp_filter = snp_filter + self.snp_function = snp_function + self.file_location = file_location + self.notify_me = email + + def soupify(self, string): + return BeautifulSoup(string) + + def make_request(self): + in_txt = csv.reader(open(self.file_location, 'rb'), delimiter='\t') + tmp_dir = tempfile.mkdtemp() + path = os.path.join(tmp_dir, 'csv_file') + with open(path, 'wb') as fh: + a = csv.writer(fh) + a.writerows(in_txt) + contents = open(self.file_location, 'r').read().replace( + '\t', ' ').replace('::::::::::::::', '') + if self.snp_function == 'All': + self.snp_function = '' + payload = { + '_ggi_project': 'PPHWeb2', + '_ggi_origin': 'query', + '_ggi_batch': contents, + '_ggi_target_pipeline': '1', + 'MODELNAME': self.model_name, + 'UCSCDB': self.ucscdb, + 'SNPFILTER': SNPFILTER[self.snp_filter], + 'SNPFUNC': self.snp_function, + 'NOTIFYME': '', + + } + if self.notify_me: + payload['NOTIFYME'] = self.notify_me + request = requests.post(submission_url, data=payload) + content = request.content + soup = self.soupify(content) + sid_soup = soup.find('input', {'name': 'sid'}) + try: + sid = sid_soup['value'] + except: + sid = None + shutil.rmtree(tmp_dir) + return sid + + def poll_for_files(self, sid, + max_tries=MAX_TRIES, + time_delay=TIME_DELAY, + timeout=TIMEOUT): + payload = { + '_ggi_project': 'PPHWeb2', + '_ggi_origin': 'manage', + '_ggi_target_manage': 'Refresh', + 'sid': sid, + } + content = None + tries = 0 + url_dict = None + while True: + tries += 1 + if tries > max_tries: + stop_err('Number of tries exceeded!') + request = requests.post(submission_url, data=payload) + content = request.content + soup = self.soupify(content) + all_tables = soup.findAll('table') + if all_tables: + try: + running_jobs_table = all_tables[-2] + except: + running_jobs_table = None + if running_jobs_table: + rows = running_jobs_table.findAll('tr') + if len(rows) == 1: + row = rows[0] + hrefs = row.findAll('a') + # print hrefs + if len(hrefs) >= 3: + short_txt = hrefs[0]['href'] + # print short_txt + path = short_txt.split('-')[0] + full_txt = result_url + path + '-full.txt' + log_txt = result_url + path + '-log.txt' + snps_txt = result_url + path + '-snps.txt' + short_txt = result_url + path + \ + '-short.txt' # short_txt + url_dict = { + 'full_file': full_txt, + 'snps_file': snps_txt, + 'log_file': log_txt, + 'short_file': short_txt, + } + return url_dict + time.sleep(time_delay) + return url_dict + + def save_to_files(self, url_dict, args): + tmp_dir = tempfile.mkdtemp() + for key, value in url_dict.iteritems(): + r = requests.get(value, stream=True) + if r.status_code == 200: + path = os.path.join(tmp_dir, key) + with open(path, 'wb') as f: + for chunk in r.iter_content(128): + f.write(chunk) + shutil.move(path, args[key]) + if os.path.exists(tmp_dir): + shutil.rmtree(tmp_dir) + return True + + +def main(args): + parser = argparse.ArgumentParser() + parser.add_argument('-u', + '--ucscdb', + dest='ucscdb', + choices=UCSCDB, + required=True, type=str) + parser.add_argument('-m', '--model', + dest='modelname', choices=MODELNAME, + required=True, type=str) + parser.add_argument('-fl', '--filter', + '--snpfilter', dest='snpfilter', + choices=SNPFILTER.keys(), + required=True, type=str) + parser.add_argument('-i', '--input', + dest='input', nargs='?', + required=True, type=str, + default=sys.stdin) + parser.add_argument('-e', '--email', + dest='email', + required=False, default=None) + parser.add_argument('--log', dest='log_file', + required=True, default=None, type=str) + parser.add_argument('--short', dest='short_file', + required=True, default=None, type=str) + parser.add_argument('--full', dest='full_file', + required=True, default=None, type=str) + parser.add_argument('--snp', dest='snps_file', + required=True, default=None, type=str) + parser.add_argument('--function', dest='snpfunction', + required=True, type=str) + args_s = vars(parser.parse_args(args)) + polyphen2_web = Polyphen2Web(ucscdb=args_s['ucscdb'], + model_name=args_s['modelname'], + snp_filter=args_s['snpfilter'], + snp_function=args_s['snpfunction'], + file_location=args_s['input'], + email=args_s['email']) + sid = polyphen2_web.make_request() + if not sid: + stop_err( + 'Something went wrong! The tracking id could not be retrieved.') + url_dict = polyphen2_web.poll_for_files(sid) + locations = {} + if not url_dict: + stop_err('There was error downloading the output files!') + for key in url_dict.keys(): + locations[key] = args_s[key] + polyphen2_web.save_to_files(url_dict, locations) + return True + +if __name__ == '__main__': + main(sys.argv[1:]) diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/polyphen2_web.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/polyphen2_web.xml Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,153 @@ + + Compute functional impact of SNVs + + beautifulsoup4 + bs4 + + + polyphen2_web.py --ucscdb $ucscdb + --model $model + --filter $filter + --function $function + --input $input + --log $log_file + --full $full_file + --short $short_file + --snp $snp_file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + **What it does** + This tool interacts with the Web Version of Polyphen2 hosted at http://genetics.bwh.harvard.edu/pph2/ + + PolyPhen-2 (Polymorphism Phenotyping v2) is a software tool which predicts possible impact of amino acid substitutions + on the structure and function of human proteins using straightforward physical and evolutionary comparative considerations. + + .. class:: infomark + + *Classifier model* used by the probabilistic predictor: + + -HumDiv is preferred for evaluating rare alleles, dense mapping of regions identified by genome-wide association studies, + and analysis of natural selection. HumDiv model uses 5% / 10% FPR thresholds for “probably damaging” / “possibly damaging” predictions + + + -HumVar is better suited for diagnostics of Mendelian diseases which requires distinguishing mutations with drastic effects + from all the remaining human variation, including abundant mildly deleterious alleles. + HumVar model uses 10% / 20% FPR thresholds for “probably damaging” / “possibly damaging” predictions + + .. class:: infomark + + *Transcripts* A set of Transcripts on which genomic SNPs will be mapped: + + + -*All* includes all UCSC knownGene transcripts (highly redundant) + + -*Canonical* includes UCSC knownCanonical subset + + -*CCDS* further restricts knownCanonical subset to those transcripts which are also annotated as part of NCBI CCDS. + + + .. class:: infomark + + *Annotations* for the following functional categories of genomic SNPs will be included in the output: + + + -*All*: coding-synon, introns, nonsense missense utr-3, utr-5. + + + -*Coding*: coding-synon, nonsense. missense + + + -*Missense*: missense. + + + + .. class:: warningmark + + Note that PolyPhen-2 predictions are always produced for missense + + + .. class:: infomark + + + Input format: + + + chr22:30421786 A/T + + chr22:29446079 A/G + + chr22:40814500 A/G + + chr22:40815256 C/T + + + + **Citations** + + If you use this tool please cite: + + Adzhubei IA, Schmidt S, Peshkin L, Ramensky VE, Gerasimova A, Bork P, Kondrashov AS, Sunyaev SR. Nat Methods 7(4):248-249 (2010). + "A method and server for predicting damaging missense mutations." + + + + + + + + + + diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/test-data/polyphen2_full.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/test-data/polyphen2_full.txt Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,23 @@ +#o_acc o_pos o_aa1 o_aa2 rsid acc pos aa1 aa2 nt1 nt2 prediction based_on effect pph2_class pph2_prob pph2_FPR pph2_TPR pph2_FDR site region PHAT dScore Score1 Score2 MSAv Nobs Nstruct Nfilt PDB_id PDB_pos PDB_ch ident length NormASA SecStr MapReg dVol dProp B-fact H-bonds AveNHet MinDHet AveNInt MinDInt AveNSit MinDSit Transv CodPos CpG MinDJxn PfamHit IdPmax IdPSNP IdQmin +Q13615-2 1170 N I ? Q13615-2 1170 N I A T probably damaging alignment ? deleterious 0.998 0.0112 0.273 0.0274 ? ? ? +2.214 -1.705 -3.919 2 37 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 1 0 -2313 ? 1.268 ? 47.09 # chr22:30421786|AT|uc003agu.3+|MTMR3|NP_694690 +Q13615 1198 N I ? Q13615 1198 N I A T probably damaging alignment ? deleterious 0.998 0.0112 0.273 0.0274 NO NO ? +2.296 -1.580 -3.876 2 38 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 1 0 -3099 NO 1.010 ? 45.58 # chr22:30421786|AT|uc003agv.3+|MTMR3|NP_066576 +Q13615-3 1161 N I ? Q13615-3 1161 N I A T probably damaging alignment ? deleterious 0.998 0.0112 0.273 0.0274 ? ? ? +2.214 -1.705 -3.919 2 37 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 1 0 -3099 ? 1.275 ? 47.37 # chr22:30421786|AT|uc003agw.3+|MTMR3|NP_694691 +Q9ULT6 637 H R ? Q9ULT6 637 H R A G benign alignment ? neutral 0.002 0.704 0.987 0.452 NO NO ? +0.398 -2.258 -2.656 2 47 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 1 2 +858 NO 20.363 20.363 77.46 # chr22:29446079|AG|uc003aeg.2+|ZNRF3|NP_115549 +Q9ULT6 637 H R ? Q9ULT6 637 H R A G benign alignment ? neutral 0.002 0.704 0.987 0.452 NO NO ? +0.398 -2.258 -2.656 2 47 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 1 2 -1599 NO 20.363 20.363 77.46 # chr22:29446079|AG|uc003aeh.1+|ZNRF3|NP_115549 +Q969V6 648 S C ? Q969V6 648 S C A T possibly damaging alignment ? deleterious 0.89 0.0639 0.821 0.0953 NO COMPBIAS ? +2.837 -1.909 -4.746 2 32 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 0 +123 NO 1.320 ? 90.33 # chr22:40814500|TA|uc003ayv.1-|MKL1|NP_065882 +Q969V6 648 S R ? Q969V6 648 S R A C benign alignment ? neutral 0.167 0.131 0.92 0.162 NO COMPBIAS ? +1.814 -1.909 -3.723 2 32 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 2 +123 NO 2.525 ? 90.33 # chr22:40814500|TG|uc003ayv.1-|MKL1|NP_065882 +Q969V6 648 S C ? Q969V6 648 S C A T possibly damaging alignment ? deleterious 0.89 0.0639 0.821 0.0953 NO COMPBIAS ? +2.837 -1.909 -4.746 2 32 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 0 +123 NO 1.320 ? 90.33 # chr22:40814500|TA|uc003ayw.1-|MKL1|NP_065882 +Q969V6 648 S R ? Q969V6 648 S R A C benign alignment ? neutral 0.167 0.131 0.92 0.162 NO COMPBIAS ? +1.814 -1.909 -3.723 2 32 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 2 +123 NO 2.525 ? 90.33 # chr22:40814500|TG|uc003ayw.1-|MKL1|NP_065882 +E7ER32 648 S C ? E7ER32 648 S C A T possibly damaging alignment ? deleterious 0.953 0.0514 0.788 0.0812 NO NO ? +2.837 -1.909 -4.746 2 33 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 0 +123 NO 1.255 ? 87.22 # chr22:40814500|TA|uc010gye.1-|MKL1| +E7ER32 648 S R ? E7ER32 648 S R A C benign alignment ? neutral 0.337 0.111 0.901 0.142 NO NO ? +1.814 -1.909 -3.723 2 33 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 2 +123 NO 2.402 ? 87.22 # chr22:40814500|TG|uc010gye.1-|MKL1| +B0QY83 598 S C ? B0QY83 598 S C A T possibly damaging alignment_mz ? deleterious 0.726 0.0797 0.856 0.112 NO NO ? +2.847 -1.931 -4.778 3 31 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 0 +123 NO 1.615 ? 91.49 # chr22:40814500|TA|uc010gyf.1-|MKL1|NP_065882 +B0QY83 598 S R ? B0QY83 598 S R A C benign alignment_mz ? neutral 0.047 0.168 0.942 0.195 NO NO ? +1.674 -1.931 -3.605 3 31 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 2 +123 NO 5.560 ? 91.49 # chr22:40814500|TG|uc010gyf.1-|MKL1|NP_065882 +Q969V6 396 A T ? Q969V6 396 A T G A benign alignment ? neutral 0.009 0.233 0.961 0.247 NO NO ? +0.097 -1.540 -1.637 2 39 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 0 1 +879 NO 21.659 21.659 88.08 # chr22:40815256|CT|uc003ayv.1-|MKL1|NP_065882 +Q969V6 396 A T ? Q969V6 396 A T G A benign alignment ? neutral 0.009 0.233 0.961 0.247 NO NO ? +0.097 -1.540 -1.637 2 39 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 0 1 +879 NO 21.659 21.659 88.08 # chr22:40815256|CT|uc003ayw.1-|MKL1|NP_065882 +E7ER32 396 A T ? E7ER32 396 A T G A benign alignment ? neutral 0.009 0.233 0.961 0.247 NO NO ? +0.097 -1.540 -1.637 2 39 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 0 1 +879 NO 20.554 20.554 83.58 # chr22:40815256|CT|uc010gye.1-|MKL1| +B0QY83 346 A T ? B0QY83 346 A T G A benign alignment_mz ? neutral 0.008 0.239 0.963 0.252 NO NO ? +0.456 -1.547 -2.003 3 32 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 0 1 +879 NO 21.940 21.940 89.22 # chr22:40815256|CT|uc010gyf.1-|MKL1|NP_065882 +## Sources: +## Predictions: PolyPhen-2 v2.2.2r398 +## Sequences: UniProtKB/UniRef100 Release 2011_12 (14-Dec-2011) +## Structures: PDB/DSSP Snapshot 03-Jan-2012 (78304 Structures) +## Genes: UCSC MultiZ46Way GRCh37/hg19 (08-Oct-2009) diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/test-data/polyphen2_input.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/test-data/polyphen2_input.txt Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,5 @@ +chr22:30421786 A/T +chr22:29446079 A/G +chr22:40814500 A/G +chr22:40815256 C/T + diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/test-data/polyphen2_log.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/test-data/polyphen2_log.txt Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,38 @@ +=========================== +Stage 1/7: Validating input +=========================== +No errors + +=============================== +Stage 2/7: Mapping genomic SNPs +=============================== +WARNING: (chr22:40814500 - uc003ayv.1) None of the input alleles (A/G) matches reference allele (T) +WARNING: (chr22:40814500 - uc003ayw.1) None of the input alleles (A/G) matches reference allele (T) +WARNING: (chr22:40814500 - uc010gye.1) None of the input alleles (A/G) matches reference allele (T) +WARNING: (chr22:40814500 - uc010gyf.1) None of the input alleles (A/G) matches reference allele (T) +Total errors/warnings: 4 + +============================ +Stage 3/7: Collecting output +============================ +No errors + +=============================================== +Stage 4/7: Building MSA and annotating proteins +=============================================== +No errors + +============================ +Stage 5/7: Collecting output +============================ +No errors + +===================== +Stage 6/7: Predicting +===================== +No errors + +============================= +Stage 7/7: Generating reports +============================= +No errors diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/test-data/polyphen2_short.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/test-data/polyphen2_short.txt Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,23 @@ +#o_acc o_pos o_aa1 o_aa2 rsid acc pos aa1 aa2 prediction pph2_prob pph2_FPR pph2_TPR +Q13615-2 1170 N I ? Q13615-2 1170 N I probably damaging 0.998 0.0112 0.273 # chr22:30421786|AT|uc003agu.3+|MTMR3|NP_694690 +Q13615 1198 N I ? Q13615 1198 N I probably damaging 0.998 0.0112 0.273 # chr22:30421786|AT|uc003agv.3+|MTMR3|NP_066576 +Q13615-3 1161 N I ? Q13615-3 1161 N I probably damaging 0.998 0.0112 0.273 # chr22:30421786|AT|uc003agw.3+|MTMR3|NP_694691 +Q9ULT6 637 H R ? Q9ULT6 637 H R benign 0.002 0.704 0.987 # chr22:29446079|AG|uc003aeg.2+|ZNRF3|NP_115549 +Q9ULT6 637 H R ? Q9ULT6 637 H R benign 0.002 0.704 0.987 # chr22:29446079|AG|uc003aeh.1+|ZNRF3|NP_115549 +Q969V6 648 S C ? Q969V6 648 S C possibly damaging 0.89 0.0639 0.821 # chr22:40814500|TA|uc003ayv.1-|MKL1|NP_065882 +Q969V6 648 S R ? Q969V6 648 S R benign 0.167 0.131 0.92 # chr22:40814500|TG|uc003ayv.1-|MKL1|NP_065882 +Q969V6 648 S C ? Q969V6 648 S C possibly damaging 0.89 0.0639 0.821 # chr22:40814500|TA|uc003ayw.1-|MKL1|NP_065882 +Q969V6 648 S R ? Q969V6 648 S R benign 0.167 0.131 0.92 # chr22:40814500|TG|uc003ayw.1-|MKL1|NP_065882 +E7ER32 648 S C ? E7ER32 648 S C possibly damaging 0.953 0.0514 0.788 # chr22:40814500|TA|uc010gye.1-|MKL1| +E7ER32 648 S R ? E7ER32 648 S R benign 0.337 0.111 0.901 # chr22:40814500|TG|uc010gye.1-|MKL1| +B0QY83 598 S C ? B0QY83 598 S C possibly damaging 0.726 0.0797 0.856 # chr22:40814500|TA|uc010gyf.1-|MKL1|NP_065882 +B0QY83 598 S R ? B0QY83 598 S R benign 0.047 0.168 0.942 # chr22:40814500|TG|uc010gyf.1-|MKL1|NP_065882 +Q969V6 396 A T ? Q969V6 396 A T benign 0.009 0.233 0.961 # chr22:40815256|CT|uc003ayv.1-|MKL1|NP_065882 +Q969V6 396 A T ? Q969V6 396 A T benign 0.009 0.233 0.961 # chr22:40815256|CT|uc003ayw.1-|MKL1|NP_065882 +E7ER32 396 A T ? E7ER32 396 A T benign 0.009 0.233 0.961 # chr22:40815256|CT|uc010gye.1-|MKL1| +B0QY83 346 A T ? B0QY83 346 A T benign 0.008 0.239 0.963 # chr22:40815256|CT|uc010gyf.1-|MKL1|NP_065882 +## Sources: +## Predictions: PolyPhen-2 v2.2.2r398 +## Sequences: UniProtKB/UniRef100 Release 2011_12 (14-Dec-2011) +## Structures: PDB/DSSP Snapshot 03-Jan-2012 (78304 Structures) +## Genes: UCSC MultiZ46Way GRCh37/hg19 (08-Oct-2009) diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/test-data/polyphen2_snp.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/test-data/polyphen2_snp.txt Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,28 @@ +#snp_pos str gene transcript ccid ccds cciden refa type ntpos nt1 nt2 flanks trv cpg jxdon jxacc exon cexon jxc dgn cdnpos frame cdn1 cdn2 aa1 aa2 aapos spmap spacc spname refs_acc dbrsid dbobsrvd dbavHet dbavHetSE dbRmPaPt comments +chr22:30421786 + MTMR3 uc003agu.3 ? CCDS13871.1 1 A/T missense 142629 A T AC 1 0 -2313 -168 20/20 18/18 ? 0 1170 1 AAC ATC N I 1170 1 Q13615-2 MTMR3_HUMAN NP_694690 rs75623810 A/T 0.016564 0.089485 A>A>A +chr22:30421786 + MTMR3 uc003agv.3 16552 CCDS13870.1 1 A/T missense 142629 A T AC 1 0 -3099 -168 20/20 18/18 ? 0 1198 1 AAC ATC N I 1198 1 Q13615 MTMR3_HUMAN NP_066576 rs75623810 A/T 0.016564 0.089485 A>A>A +chr22:30421786 + MTMR3 uc003agw.3 ? CCDS46682.1 1 A/T missense 142629 A T AC 1 0 -3099 -168 19/19 17/17 ? 0 1161 1 AAC ATC N I 1161 1 Q13615-3 MTMR3_HUMAN NP_694691 rs75623810 A/T 0.016564 0.089485 A>A>A +chr22:29446079 + ZNRF3 uc003aeg.2 16531 CCDS42999.1 1 A/G missense 166190 A G CC 0 2 +858 -895 8/9 7/8 ? 0 537 1 CAC CGC H R 637 1 Q9ULT6 ZNRF3_HUMAN NP_115549 rs62641746 A/G 0.030762 0.120144 A>A>A +chr22:29446079 + ZNRF3 uc003aeh.1 ? CCDS42999.1 0.982 A/G missense 63040 A G CC 0 2 -1599 -895 7/7 7/7 ? 0 537 1 CAC CGC H R 637 1 Q9ULT6 ZNRF3_HUMAN NP_115549 rs62641746 A/G 0.030762 0.120144 A>A>A +chr22:40814500 - MKL1 uc003ayv.1 ? CCDS14003.1 1 T/A missense 44939 A T CG 1 0 +123 -889 9/12 9/12 ? 0 648 0 AGC TGC S C 648 1 Q969V6 MKL1_HUMAN NP_065882 ? ? ? ? ? +chr22:40814500 - MKL1 uc003ayv.1 ? CCDS14003.1 1 T/G missense 44939 A C CG 1 2 +123 -889 9/12 9/12 ? 0 648 0 AGC CGC S R 648 1 Q969V6 MKL1_HUMAN NP_065882 ? ? ? ? ? +chr22:40814500 - MKL1 uc003ayw.1 16752 CCDS14003.1 1 T/A missense 218191 A T CG 1 0 +123 -889 12/15 9/12 ? 0 648 0 AGC TGC S C 648 1 Q969V6 MKL1_HUMAN NP_065882 ? ? ? ? ? +chr22:40814500 - MKL1 uc003ayw.1 16752 CCDS14003.1 1 T/G missense 218191 A C CG 1 2 +123 -889 12/15 9/12 ? 0 648 0 AGC CGC S R 648 1 Q969V6 MKL1_HUMAN NP_065882 ? ? ? ? ? +chr22:40814500 - MKL1 uc010gye.1 ? ? ? T/A missense 218191 A T CG 1 0 +123 -889 12/15 9/12 ? 0 648 0 AGC TGC S C 648 1 E7ER32 E7ER32_HUMAN ? ? ? ? ? ? +chr22:40814500 - MKL1 uc010gye.1 ? ? ? T/G missense 218191 A C CG 1 2 +123 -889 12/15 9/12 ? 0 648 0 AGC CGC S R 648 1 E7ER32 E7ER32_HUMAN ? ? ? ? ? ? +chr22:40814500 - MKL1 uc010gyf.1 ? ? ? T/A missense 218191 A T CG 1 0 +123 -889 11/14 8/11 ? 0 598 0 AGC TGC S C 598 1 B0QY83 B0QY83_HUMAN NP_065882 ? ? ? ? ? +chr22:40814500 - MKL1 uc010gyf.1 ? ? ? T/G missense 218191 A C CG 1 2 +123 -889 11/14 8/11 ? 0 598 0 AGC CGC S R 598 1 B0QY83 B0QY83_HUMAN NP_065882 ? ? ? ? ? +chr22:40815256 - MKL1 uc003ayv.1 ? CCDS14003.1 1 C/T missense 44183 G A CC 0 1 +879 -133 9/12 9/12 ? 0 396 0 GCC ACC A T 396 1 Q969V6 MKL1_HUMAN NP_065882 rs34736200 G/A 0.047299 0.14633 A>A>A +chr22:40815256 - MKL1 uc003ayw.1 16752 CCDS14003.1 1 C/T missense 217435 G A CC 0 1 +879 -133 12/15 9/12 ? 0 396 0 GCC ACC A T 396 1 Q969V6 MKL1_HUMAN NP_065882 rs34736200 G/A 0.047299 0.14633 A>A>A +chr22:40815256 - MKL1 uc010gye.1 ? ? ? C/T missense 217435 G A CC 0 1 +879 -133 12/15 9/12 ? 0 396 0 GCC ACC A T 396 1 E7ER32 E7ER32_HUMAN ? rs34736200 G/A 0.047299 0.14633 A>A>A +chr22:40815256 - MKL1 uc010gyf.1 ? ? ? C/T missense 217435 G A CC 0 1 +879 -133 11/14 8/11 ? 0 346 0 GCC ACC A T 346 1 B0QY83 B0QY83_HUMAN NP_065882 rs34736200 G/A 0.047299 0.14633 A>A>A +## Totals: +## lines input 4 +## lines skipped 0 +## alleles annotated 17 +## missense 17 +## nonsense 0 +## coding-synon 0 +## intron 0 +## utr-3 0 +## utr-5 0 diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/test-data/polyphen_input.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/test-data/polyphen_input.txt Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,7 @@ +Q92889 706 I T +Q92889 875 E G +XRCC1_HUMAN 399 R Q +NP_005792 59 L P +rs1799931 +chr1:1267483 G/A +chr1:1158631 A/C,G,T diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/test-data/polyphen_output_full.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/test-data/polyphen_output_full.tsv Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,20 @@ +#o_acc o_pos o_aa1 o_aa2 rsid acc pos aa1 aa2 nt1 nt2 prediction based_on effect pph2_class pph2_prob pph2_FPR pph2_TPR pph2_FDR site region PHAT dScore Score1 Score2 MSAv Nobs Nstruct Nfilt PDB_id PDB_pos PDB_ch ident length NormASA SecStr MapReg dVol dProp B-fact H-bonds AveNHet MinDHet AveNInt MinDInt AveNSit MinDSit Transv CodPos CpG MinDJxn PfamHit IdPmax IdPSNP IdQmin +Q92889 706 I T rs1800069 Q92889 706 I T T C probably damaging alignment ? deleterious 1 0.00026 0.00018 0.0109 NO NO ? +2.055 -1.216 -3.271 2 52 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 1 0 -100 PF02732.10 0.730 ? 14.63 +Q92889 875 E G rs1800124 Q92889 875 E G A G possibly damaging alignment ? deleterious 0.937 0.0566 0.801 0.0874 NO NO ? +1.645 -1.600 -3.245 2 51 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 1 0 -607 NO 3.009 ? 63.97 +XRCC1_HUMAN 399 R Q rs25487 P18887 399 R Q G A probably damaging alignment ? deleterious 0.979 0.0411 0.755 0.0687 NO NO ? +1.579 -1.999 -3.578 2 86 20 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 1 1 +4 NO 2.498 ? 46.92 +NP_005792 59 L P rs3390 P41567 59 L P T C possibly damaging structure 1.1.1 deleterious 0.895 0.0631 0.82 0.0945 NO NO ? +1.235 -1.254 -2.489 2 104 4 1 2if1 72 A 1.00 113 0.007 H A -55 1.07 0.00 ? ? ? ? ? ? ? 0 1 0 +20 PF01253.17 45.533 45.533 92.04 +rs1799931 ? ? ? rs1799931 P11245 286 G E G A benign alignment ? neutral 0.317 0.112 0.903 0.144 NO NO ? +1.145 -2.309 -3.454 2 59 20 2 2pfr 286 A 1.00 289 0.172 S l 78 0.75 -0.03 ? ? ? ? ? ? ? 0 1 0 -863 NO 8.156 ? 82.41 +Q7RTX0 191 R H ? Q7RTX0 191 R H G A probably damaging alignment ? deleterious 0.998 0.0112 0.273 0.0274 NO NO ? +2.547 -1.839 -4.386 2 57 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 1 1 -164 PF01094.23 2.232 ? 72.89 # chr1:1267483|GA|uc010nyk.1+|TAS1R3|NP_689414 +Q9BRK5 190 D E ? Q9BRK5 190 D E T G possibly damaging alignment ? neutral 0.454 0.0996 0.889 0.132 NO NO ? +0.793 -1.779 -2.572 2 87 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 2 0 +8 NO 7.550 7.550 41.99 # chr1:1158631|AC|uc001adh.3-|SDF4|NP_057260 +Q9BRK5 190 D E ? Q9BRK5 190 D E T A possibly damaging alignment ? neutral 0.454 0.0996 0.889 0.132 NO NO ? +0.793 -1.779 -2.572 2 87 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 2 0 +8 NO 7.550 7.550 41.99 # chr1:1158631|AT|uc001adh.3-|SDF4|NP_057260 +Q9BRK5-6 190 D E ? Q9BRK5-6 190 D E T G probably damaging alignment ? deleterious 0.998 0.0112 0.273 0.0274 ? ? ? +1.566 -1.300 -2.866 2 46 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 2 0 +8 ? 2.573 ? 36.21 # chr1:1158631|AC|uc001adi.3-|SDF4|NP_057631 +Q9BRK5-6 190 D E ? Q9BRK5-6 190 D E T A probably damaging alignment ? deleterious 0.998 0.0112 0.273 0.0274 ? ? ? +1.566 -1.300 -2.866 2 46 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 2 0 +8 ? 2.573 ? 36.21 # chr1:1158631|AT|uc001adi.3-|SDF4|NP_057631 +Q9BRK5 190 D E ? Q9BRK5 190 D E T G possibly damaging alignment ? neutral 0.454 0.0996 0.889 0.132 NO NO ? +0.793 -1.779 -2.572 2 87 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 2 0 -580 NO 7.550 7.550 41.99 # chr1:1158631|AC|uc001adj.1-|SDF4| +Q9BRK5 190 D E ? Q9BRK5 190 D E T A possibly damaging alignment ? neutral 0.454 0.0996 0.889 0.132 NO NO ? +0.793 -1.779 -2.572 2 87 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 2 0 -580 NO 7.550 7.550 41.99 # chr1:1158631|AT|uc001adj.1-|SDF4| +Q9BRK5 190 D E ? Q9BRK5 190 D E T G possibly damaging alignment ? neutral 0.454 0.0996 0.889 0.132 NO NO ? +0.793 -1.779 -2.572 2 87 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 2 0 +8 NO 7.550 7.550 41.99 # chr1:1158631|AC|uc009vjv.2-|SDF4|NP_057260 +Q9BRK5 190 D E ? Q9BRK5 190 D E T A possibly damaging alignment ? neutral 0.454 0.0996 0.889 0.132 NO NO ? +0.793 -1.779 -2.572 2 87 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 2 0 +8 NO 7.550 7.550 41.99 # chr1:1158631|AT|uc009vjv.2-|SDF4|NP_057260 +## Sources: +## Predictions: PolyPhen-2 v2.2.2r398 +## Sequences: UniProtKB/UniRef100 Release 2011_12 (14-Dec-2011) +## Structures: PDB/DSSP Snapshot 03-Jan-2012 (78304 Structures) +## Genes: UCSC MultiZ46Way GRCh37/hg19 (08-Oct-2009) diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/test-data/polyphen_output_log.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/test-data/polyphen_output_log.tsv Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,36 @@ +=========================== +Stage 1/7: Validating input +=========================== +No errors + +=============================== +Stage 2/7: Mapping genomic SNPs +=============================== +No errors + +============================ +Stage 3/7: Collecting output +============================ +No errors + +=============================================== +Stage 4/7: Building MSA and annotating proteins +=============================================== +(XRCC1_HUMAN:399:R/Q) WARNING: find_gene: Swapped codons (CAG>CGG) in uc002owt.2 nucleotide sequence at position: 1195 +(XRCC1_HUMAN:399:R/Q) WARNING: Replaced reference AA residue (Q) with (R) in uc002owt.2 protein sequence at position: 399 +Total errors/warnings: 2 + +============================ +Stage 5/7: Collecting output +============================ +No errors + +===================== +Stage 6/7: Predicting +===================== +No errors + +============================= +Stage 7/7: Generating reports +============================= +No errors diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/test-data/polyphen_output_short.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/test-data/polyphen_output_short.tsv Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,20 @@ +#o_acc o_pos o_aa1 o_aa2 rsid acc pos aa1 aa2 prediction pph2_prob pph2_FPR pph2_TPR +Q92889 706 I T rs1800069 Q92889 706 I T probably damaging 1 0.00026 0.00018 +Q92889 875 E G rs1800124 Q92889 875 E G possibly damaging 0.937 0.0566 0.801 +XRCC1_HUMAN 399 R Q rs25487 P18887 399 R Q probably damaging 0.979 0.0411 0.755 +NP_005792 59 L P rs3390 P41567 59 L P possibly damaging 0.895 0.0631 0.82 +rs1799931 ? ? ? rs1799931 P11245 286 G E benign 0.317 0.112 0.903 +Q7RTX0 191 R H ? Q7RTX0 191 R H probably damaging 0.998 0.0112 0.273 # chr1:1267483|GA|uc010nyk.1+|TAS1R3|NP_689414 +Q9BRK5 190 D E ? Q9BRK5 190 D E possibly damaging 0.454 0.0996 0.889 # chr1:1158631|AC|uc001adh.3-|SDF4|NP_057260 +Q9BRK5 190 D E ? Q9BRK5 190 D E possibly damaging 0.454 0.0996 0.889 # chr1:1158631|AT|uc001adh.3-|SDF4|NP_057260 +Q9BRK5-6 190 D E ? Q9BRK5-6 190 D E probably damaging 0.998 0.0112 0.273 # chr1:1158631|AC|uc001adi.3-|SDF4|NP_057631 +Q9BRK5-6 190 D E ? Q9BRK5-6 190 D E probably damaging 0.998 0.0112 0.273 # chr1:1158631|AT|uc001adi.3-|SDF4|NP_057631 +Q9BRK5 190 D E ? Q9BRK5 190 D E possibly damaging 0.454 0.0996 0.889 # chr1:1158631|AC|uc001adj.1-|SDF4| +Q9BRK5 190 D E ? Q9BRK5 190 D E possibly damaging 0.454 0.0996 0.889 # chr1:1158631|AT|uc001adj.1-|SDF4| +Q9BRK5 190 D E ? Q9BRK5 190 D E possibly damaging 0.454 0.0996 0.889 # chr1:1158631|AC|uc009vjv.2-|SDF4|NP_057260 +Q9BRK5 190 D E ? Q9BRK5 190 D E possibly damaging 0.454 0.0996 0.889 # chr1:1158631|AT|uc009vjv.2-|SDF4|NP_057260 +## Sources: +## Predictions: PolyPhen-2 v2.2.2r398 +## Sequences: UniProtKB/UniRef100 Release 2011_12 (14-Dec-2011) +## Structures: PDB/DSSP Snapshot 03-Jan-2012 (78304 Structures) +## Genes: UCSC MultiZ46Way GRCh37/hg19 (08-Oct-2009) diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/test-data/polyphen_output_snp.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/test-data/polyphen_output_snp.tsv Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,24 @@ +#snp_pos str gene transcript ccid ccds cciden refa type ntpos nt1 nt2 flanks trv cpg jxdon jxacc exon cexon jxc dgn cdnpos frame cdn1 cdn2 aa1 aa2 aapos spmap spacc spname refs_acc dbrsid dbobsrvd dbavHet dbavHetSE dbRmPaPt comments +chr1:1267483 + TAS1R3 uc010nyk.1 59 CCDS30556.1 0.999 G/A missense 758 G A CT 0 1 -164 -80 3/6 3/6 ? 0 191 1 CGT CAT R H 191 1 Q7RTX0 TS1R3_HUMAN NP_689414 rs141717515 A/G 0.00063 0.017742 G>G>G +chr1:1158631 - SDF4 uc001adh.3 49 CCDS30553.1 1 A/C missense 8817 T G AG 1 0 +8 -107 4/7 3/6 ? 2 190 2 GAT GAG D E 190 1 Q9BRK5 CAB45_HUMAN NP_057260 ? ? ? ? ? +chr1:1158631 - SDF4 uc001adh.3 49 CCDS30553.1 1 A/G coding-synon 8817 T C AG 0 2 +8 -107 4/7 3/6 ? 2 190 2 GAT GAC D D 190 1 Q9BRK5 CAB45_HUMAN NP_057260 rs6603781 T/C 0.133948 0.221431 T>C>C +chr1:1158631 - SDF4 uc001adh.3 49 CCDS30553.1 1 A/T missense 8817 T A AG 1 0 +8 -107 4/7 3/6 ? 2 190 2 GAT GAA D E 190 1 Q9BRK5 CAB45_HUMAN NP_057260 ? ? ? ? ? +chr1:1158631 - SDF4 uc001adi.3 ? CCDS12.1 1 A/C missense 8817 T G AG 1 0 +8 -107 4/7 3/6 ? 2 190 2 GAT GAG D E 190 1 Q9BRK5-6 CAB45_HUMAN NP_057631 ? ? ? ? ? +chr1:1158631 - SDF4 uc001adi.3 ? CCDS12.1 1 A/G coding-synon 8817 T C AG 0 2 +8 -107 4/7 3/6 ? 2 190 2 GAT GAC D D 190 1 Q9BRK5-6 CAB45_HUMAN NP_057631 rs6603781 T/C 0.133948 0.221431 T>C>C +chr1:1158631 - SDF4 uc001adi.3 ? CCDS12.1 1 A/T missense 8817 T A AG 1 0 +8 -107 4/7 3/6 ? 2 190 2 GAT GAA D E 190 1 Q9BRK5-6 CAB45_HUMAN NP_057631 ? ? ? ? ? +chr1:1158631 - SDF4 uc001adj.1 ? ? ? A/C missense 718 T G AG 1 0 -580 -107 2/2 2/2 ? 2 68 2 GAT GAG D E 190 1 Q9BRK5 CAB45_HUMAN ? ? ? ? ? ? +chr1:1158631 - SDF4 uc001adj.1 ? ? ? A/G coding-synon 718 T C AG 0 2 -580 -107 2/2 2/2 ? 2 68 2 GAT GAC D D 190 1 Q9BRK5 CAB45_HUMAN ? rs6603781 T/C 0.133948 0.221431 T>C>C +chr1:1158631 - SDF4 uc001adj.1 ? ? ? A/T missense 718 T A AG 1 0 -580 -107 2/2 2/2 ? 2 68 2 GAT GAA D E 190 1 Q9BRK5 CAB45_HUMAN ? ? ? ? ? ? +chr1:1158631 - SDF4 uc009vjv.2 ? ? ? A/C missense 8817 T G AG 1 0 +8 -107 3/6 2/5 ? 2 68 2 GAT GAG D E 190 1 Q9BRK5 CAB45_HUMAN NP_057260 ? ? ? ? ? +chr1:1158631 - SDF4 uc009vjv.2 ? ? ? A/G coding-synon 8817 T C AG 0 2 +8 -107 3/6 2/5 ? 2 68 2 GAT GAC D D 190 1 Q9BRK5 CAB45_HUMAN NP_057260 rs6603781 T/C 0.133948 0.221431 T>C>C +chr1:1158631 - SDF4 uc009vjv.2 ? ? ? A/T missense 8817 T A AG 1 0 +8 -107 3/6 2/5 ? 2 68 2 GAT GAA D E 190 1 Q9BRK5 CAB45_HUMAN NP_057260 ? ? ? ? ? +## Totals: +## lines input 2 +## lines skipped 0 +## alleles annotated 13 +## missense 9 +## nonsense 0 +## coding-synon 4 +## intron 0 +## utr-3 0 +## utr-5 0 diff -r 000000000000 -r 09f68bdd1999 polyphen2_web/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polyphen2_web/tool_dependencies.xml Tue Oct 07 19:21:15 2014 -0400 @@ -0,0 +1,6 @@ + + + + + +