# HG changeset patch # User saket-choudhary # Date 1412724307 14400 # Node ID fd66648ce5f98ac74025e2d552e59e8675bda6e1 Uploaded diff -r 000000000000 -r fd66648ce5f9 fathmm/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fathmm/README.rst Tue Oct 07 19:25:07 2014 -0400 @@ -0,0 +1,44 @@ +Galaxy wrapper for the FATHMM webservice +=================================================== + +This tool is copyright 2014 by Saket Choudhary, Indian Institute of Technology Bombay +All rights reserved. MIT licensed. + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +Citations +=========== + + + If you use this tool in Galaxy, please cite : + + Shihab HA, Gough J, Cooper DN, Stenson PD, Barker GLA, Edwards KJ, Day INM, Gaunt, TR. (2013). + Predicting the Functional, Molecular and PhenotypicConsequences of Amino Acid Substitutions using + Hidden Markov Models. Hum. Mutat., 34:57-65 + + + Shihab HA, Gough J, Cooper DN, Day INM, Gaunt, TR. (2013). Predicting the Functional Consequences + of Cancer-Associated Amino Acid Substitutions. Bioinformatics 29:1504-1510. + + + Shihab HA, Gough J, Mort M, Cooper DN, Day INM, Gaunt, TR. (2014). + Ranking Non-Synonymous Single Nucleotide Polymorphisms based on Disease Concepts. In Press diff -r 000000000000 -r fd66648ce5f9 fathmm/fathmm.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fathmm/fathmm.py Tue Oct 07 19:25:07 2014 -0400 @@ -0,0 +1,60 @@ +import requests +import argparse +import os +import sys + +__base_url__ = "http://supfam3.cs.bris.ac.uk/FATHMM/" +__submit_url__ = __base_url__ + "cgi-bin/submit.cgi" +__result_url__ = __base_url__ + "cgi-bin/" +__download_url__ = __base_url__ + "tmp/" +__type__="CANCER" ##Hidden field to show which type of variants we are processing + + +def stop_err(msg, err=1): + sys.stderr.write('%s\n' % msg) + sys.exit(err) + +def main_web(args): + assert os.path.exists(args.input) + with open(args.input) as f: + contents = f.read().strip() + threshold = -0.75 + if (args.threshold): + threshold = float(args.threshold) + data = {"weighted": __type__, + "batch": contents, + "threshold": threshold + } + response = requests.post(__submit_url__, data=data) + if response.status_code!=200: + stop_err("Error processing request, got" + response.status_code) + text = response.text + split_text = text.split("window.location = ") + try: + url = split_text[1] + url = url.split(";")[0] + url = url.split("session=")[1] + url = url.replace("'", "").replace("./","") + url = __download_url__ + url + ".tab" + except IndexError: + stop_err("Unable to parse result id") + response = requests.get(url) + with open(args.output, 'wb') as fp: + fp.write(response.text) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Process input output paths") + parser.add_argument('--input', + type=str, + required=True, + help='Input file location') + parser.add_argument('--output', + type=str, + required=True, + help='Output file location') + parser.add_argument('--threshold', + type=float, + required=False, + help='Predictions with score less than threshold are possibly cancer causing') + args = parser.parse_args() + main_web(args) diff -r 000000000000 -r fd66648ce5f9 fathmm/fathmm.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fathmm/fathmm.xml Tue Oct 07 19:25:07 2014 -0400 @@ -0,0 +1,65 @@ + + fathmm web service + + requests + requests + + fathmm.py --input $input --output $output --threshold $threshold + + + + + + + + + + + + + + + + + + + **What it does** + + + This script calls FATHMM(http://supfam3.cs.bris.ac.uk/FATHMM/about.html) Web API to fetch + predict functional impact of mutations. + + Input is a plain text file: + + 1. <protein> <substitution> + + 2. dbSNP rs identifiers + + + Where <protein> is the protein identifier and + <substitution> is the amino acid substitution in the conventional one letter format. + Multiple substitutions can be entered on a single line and should be separated by a comma. + SwissProt/TrEMBL, RefSeq and Ensembl protein identifiers are accepted: + + P43026 L441P + ENSP00000325527 N548I,E1073K,C2307S + + + + **Citations** + + If you use this tool in Galaxy, please cite : + + Shihab HA, Gough J, Cooper DN, Stenson PD, Barker GLA, Edwards KJ, Day INM, Gaunt, TR. (2013). + Predicting the Functional, Molecular and PhenotypicConsequences of Amino Acid Substitutions using + Hidden Markov Models. Hum. Mutat., 34:57-65 + + Shihab HA, Gough J, Cooper DN, Day INM, Gaunt, TR. (2013). Predicting the Functional Consequences + of Cancer-Associated Amino Acid Substitutions. Bioinformatics 29:1504-1510. + + Shihab HA, Gough J, Mort M, Cooper DN, Day INM, Gaunt, TR. (2014). + Ranking Non-Synonymous Single Nucleotide Polymorphisms based on Disease Concepts. In Press + + + + diff -r 000000000000 -r fd66648ce5f9 fathmm/test-data/fathmm_input.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fathmm/test-data/fathmm_input.txt Tue Oct 07 19:25:07 2014 -0400 @@ -0,0 +1,2 @@ +P43026 L441P +ENSP00000269305 E258A,R280G,G302E diff -r 000000000000 -r fd66648ce5f9 fathmm/test-data/fathmm_output.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fathmm/test-data/fathmm_output.tab Tue Oct 07 19:25:07 2014 -0400 @@ -0,0 +1,5 @@ +# dbSNP ID Protein ID Substitution Prediction Score Domain-Phenotype Association Warning HMM ID HMM Description HMM Pos. HMM Prob. W. HMM Prob. M. HMM Weights D. HMM Weights O. +1 P43026 L441P PASSENGER/OTHER 1.14 0040782 Cystine-knot cytokines 52 0.147630905777 0.0362233985562 2.0 5.0 +2 ENSP00000269305 E258A CANCER -9.61 P53 P53 DNA-binding domain 165 0.389792785613 0.0495541137229 1006.0 2.0 +3 ENSP00000269305 R280G CANCER -9.70 P53 P53 DNA-binding domain 187 0.410807723494 0.0287111422457 1006.0 2.0 +4 ENSP00000269305 G302E CANCER -8.16 81454 302 0.164704882077 0.095177662721 1316.0 5.0 diff -r 000000000000 -r fd66648ce5f9 fathmm/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fathmm/tool_dependencies.xml Tue Oct 07 19:25:07 2014 -0400 @@ -0,0 +1,6 @@ + + + + + +