changeset 0:e51722489ddb draft default tip

Uploaded
author saket-choudhary
date Tue, 07 Oct 2014 19:40:29 -0400
parents
children
files mutationassesor_web/README.rst mutationassesor_web/mutation_assesor.py mutationassesor_web/mutation_assesor.xml mutationassesor_web/test-data/ma_nucleotide_output.csv mutationassesor_web/test-data/ma_proper_nucleotide.csv mutationassesor_web/test-data/ma_proper_protein.csv mutationassesor_web/test-data/ma_protein_output.csv mutationassesor_web/test-data/mutationassessor_input.txt mutationassesor_web/test-data/mutationassessor_output.tsv mutationassesor_web/tool_dependencies.xml
diffstat 10 files changed, 262 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mutationassesor_web/README.rst	Tue Oct 07 19:40:29 2014 -0400
@@ -0,0 +1,36 @@
+Galaxy wrapper for the Mutation Assessor webservice 
+===================================================
+
+This tool is copyright 2014 by Saket Choudhary<saketkc@gmail.com>, Indian Institute of Technology Bombay
+All rights reserved. MIT licensed.
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+Citations
+===========
+
+
+If you use this Galaxy tool in work leading to a scientific publication please cite:
+
+Reva B, Antipin Y, Sander C. Nucleic Acids Research (2011) "Predicting the Functional Impact of Protein Mutations: Application to Cancer Genomics"
+
+Reva, B.A., Antipin, Y.A. and Sander, C. (2007) Genome Biol, 8, R232. "Determinants of protein    function revealed by combinatorial entropy optimization"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mutationassesor_web/mutation_assesor.py	Tue Oct 07 19:40:29 2014 -0400
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+import sys
+import requests
+import os
+import argparse
+import re
+import csv
+import StringIO
+__url__ = 'http://mutationassessor.org/'
+
+
+def stop_err(msg, err=1):
+    sys.stderr.write('%s\n' % msg)
+    sys.exit(err)
+
+
+def main_web(args):
+    assert os.path.exists(args.input)
+    with open(args.input) as f:
+        contents = f.read().strip()
+    if args.hg19 is True and args.protein is True:
+        stop_err('--hg19 option conflicts with --protein')
+    if args.protein is False:
+        ## Replace tabs/space with commas
+        re.sub('[\t\s]+', ',', contents)
+    if args.hg19:
+        ## Append hg19 to each line
+        lines = contents.split('\n')
+        contents = ('\n').join(
+            map((lambda x: 'hg19,' + x),
+                lines))
+
+    payload = {'vars': contents, 'tableQ': 1}
+    request = requests.post(__url__, data=payload)
+    response = request.text
+    if request.status_code != requests.codes.ok:
+        stop_err("""Error retrieving response from server.
+                 Server returned %s .
+                 Output: %s
+                 """ % (request.status_code, response))
+    r = StringIO.StringIO(response)
+    reader = csv.reader(r, delimiter=",")
+    csv.writer(open(args.output, "wb"), delimiter='\t').writerows(reader)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="Process input output paths")
+    parser.add_argument('--input',
+                        type=str,
+                        required=True,
+                        help='Input file location')
+    parser.add_argument('--output',
+                        type=str,
+                        required=True,
+                        help='Output file locatio')
+    parser.add_argument('--log',
+                        type=str,
+                        required=False)
+    parser.add_argument('--hg19',
+                        action='store_true',
+                        help="""Use hg19 build.
+                        Appends 'hg19' to each input line""")
+    parser.add_argument('--protein',
+                        action='store_true',
+                        help='Inputs are in protein space')
+    args = parser.parse_args()
+    main_web(args)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mutationassesor_web/mutation_assesor.xml	Tue Oct 07 19:40:29 2014 -0400
@@ -0,0 +1,107 @@
+<tool id="mutationassesor_web" name="MutationAssesor">
+    <description>MutationAssesor web service</description>
+    <requirements>
+        <requirement type="package" version="2.2.1">requests</requirement>
+        <requirement type="python-module">requests</requirement>
+    </requirements>
+    <command interpreter="python">mutation_assesor.py --input $input --output $output
+        #if $options.protein == "yes"
+            --protein
+        #else
+            $options.hg19
+        #end if
+    </command>
+    <inputs>
+        <param name="input" format="txt" type="data" label="Input variants" />
+        <conditional name="options">
+            <param name="protein" type="select" label="Protein Input">
+                <option value="yes">Yes</option>
+                <option value="no">No</option>
+            </param>
+            <when value="no">
+                <param name="hg19" type="select" label="hg19">
+                    <option value="--hg19">Yes</option>
+                    <option value="">No</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="csv"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="ma_proper_nucleotide.csv"/>
+            <param name="hg19" value="--hg19"/>
+            <param name="protein" value="no"/>
+            <output name="output" file="ma_nucleotide_output.csv"/>
+        </test>
+        <test>
+            <param name="input" value="ma_proper_protein.csv"/>
+            <param name="protein" value="yes"/>
+            <output name="output" file="ma_protein_output.csv"/>
+        </test>
+        <test>
+            <param name="input" value="mutationassessor_input.txt"/>
+            <param name="protein" value="yes"/>
+            <output name="output" file="mutationassessor_output.tsv" lines_diff="2"/>
+        </test>
+    </tests>
+    <help>
+
+
+    **What it does**
+
+        This script calls MutationAssesor(http://mutationassessor.org/) Web API to fetch
+        Mutation Assesor scores and associated output.
+
+        Input is a tab separated or comma separated varaibles file. MutationAssesor
+        server accepts list of variants, one variant per line, plus optional text thrown in
+        which might be a description of the variants  in genomic coordinates. The
+        variants are assumed to be coming from '+' strand:
+        &lt;genome build&gt;,&lt;chromosome&gt;,&lt;position&gt;,&lt;reference allele&gt;,&lt;substituted allele&gt;
+
+
+        Genome build is optional. By default 'hg18' build is used.
+        Input needs to be formatted in the following format:
+
+        1. Nucleotide space:
+
+        13,32912555,G,T   BRCA2
+
+        7,55178574,G,A   GBM
+
+        7,55178574,G,A   GBM
+
+        Note that the tool takes care of prepending 'hg19' while running the tool, if you
+        select 'yes' under 'hg19' label
+
+        2. Protein Space
+            &lt;protein ID&gt; &lt;variant&gt; &lt;text&gt;, where &lt;protein ID&gt; can be :
+
+            1. Uniprot protein accession (i.e. EGFR_HUMAN)
+            2. NCBI Refseq protein ID (i.e. NP_005219)
+
+            EGFR_HUMAN R521K
+            EGFR_HUMAN R98Q Polymorphism
+            EGFR_HUMAN G719D disease
+            NP_000537 G356A
+            NP_000537 G360A dbSNP:rs35993958
+            NP_000537 S46A Abolishes phosphorylation
+
+
+
+    **Citations**
+
+        If you use this tool in Galaxy, please cite :
+            Reva B, Antipin Y, Sander C. Nucleic Acids Research (2011)
+            "Predicting the Functional Impact of Protein Mutations: Application to Cancer Genomics"
+
+            Reva, B.A., Antipin, Y.A. and Sander, C. (2007) Genome Biol, 8, R232.
+            "Determinants of protein    function revealed by combinatorial entropy optimization"
+
+
+
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mutationassesor_web/test-data/ma_nucleotide_output.csv	Tue Oct 07 19:40:29 2014 -0400
@@ -0,0 +1,4 @@
+	Mutation	AA variant	Gene	MSA	PDB	Func. Impact	FI score	Uniprot	Refseq	MSA height	Codon start position	Func. region	Protein bind.site	DNA/RNA bind.site	small.mol bind.site
+1	hg19,13,32912555,G,T	D1355Y	BRCA2	http://getma.org/?cm=msa&ty=f&p=BRCA2_HUMAN&rb=1247&re=1420&var=D1355Y		low	1.24	BRCA2_HUMAN	NP_000050	14	chr13:32912555				
+2	hg19,7,55178574,G,A									0					
+3	hg19,7,55178574,G,A									0					
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mutationassesor_web/test-data/ma_proper_nucleotide.csv	Tue Oct 07 19:40:29 2014 -0400
@@ -0,0 +1,3 @@
+13,32912555,G,T   BRCA2
+7,55178574,G,A   GBM
+7,55178574,G,A   GBM
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mutationassesor_web/test-data/ma_proper_protein.csv	Tue Oct 07 19:40:29 2014 -0400
@@ -0,0 +1,6 @@
+EGFR_HUMAN,R521K
+EGFR_HUMAN,R98Q,Polymorphism
+EGFR_HUMAN,G719D,disease
+NP_000537,G356A
+NP_000537,G360A,dbSNP:rs35993958
+NP_000537,S46A,Abolishes,phosphorylation
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mutationassesor_web/test-data/ma_protein_output.csv	Tue Oct 07 19:40:29 2014 -0400
@@ -0,0 +1,7 @@
+	Mutation	AA variant	Gene	MSA	PDB	Func. Impact	FI score	Uniprot	Refseq	MSA height	Codon start position	Func. region	Protein bind.site	DNA/RNA bind.site	small.mol bind.site
+1	EGFR_HUMAN,R521K							EGFR_HUMAN,R521K		0					
+2	EGFR_HUMAN,R98Q,POLYMORPHISM							EGFR_HUMAN,R98Q,POLYMORPHISM		0					
+3	EGFR_HUMAN,G719D,DISEASE							EGFR_HUMAN,G719D,DISEASE		0					
+4	NP_000537,G356A								NP_000537,G356A	0					
+5	NP_000537,G360A,DBSNP:RS35993958								NP_000537,G360A,DBSNP:RS35993958	0					
+6	NP_000537,S46A,ABOLISHES,PHOSPHORYLATION								NP_000537,S46A,ABOLISHES,PHOSPHORYLATION	0					
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mutationassesor_web/test-data/mutationassessor_input.txt	Tue Oct 07 19:40:29 2014 -0400
@@ -0,0 +1,13 @@
+EGFR_HUMAN	G719S
+EGFR_HUMAN	G724S
+EGFR_HUMAN	E734K
+EGFR_HUMAN	L747F
+EGFR_HUMAN	R748P
+EGFR_HUMAN	Q787R
+EGFR_HUMAN	T790M
+EGFR_HUMAN	L833V
+EGFR_HUMAN	V834L
+EGFR_HUMAN	L858R
+EGFR_HUMAN	L861Q
+EGFR_HUMAN	G873E
+EGFR_HUMAN	R962G
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mutationassesor_web/test-data/mutationassessor_output.tsv	Tue Oct 07 19:40:29 2014 -0400
@@ -0,0 +1,14 @@
+	Mutation	AA variant	Gene	MSA	PDB	Func. Impact	FI score	Uniprot	Refseq	MSA height	Codon start position	Func. region	Protein bind.site	DNA/RNA bind.site	small.mol bind.site
+1	EGFR_HUMAN G719S	G719S	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=G719S	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=G719S	high	3.88	EGFR_HUMAN	NP_005219	700	chr7:55209201	1	1		0UN IRE FMM ANP CY7 HYZ 03P ITI DKI 685 T95 T74 ZZY M97 6XP 0K0 KRW 0JJ 0K1 P17 112 1N1 JIN STI P5C 585 S19 P16 VX6 P3Y SX7 ACK B90 AMP ZD6 STU 7PY BI9 BII ATP ADP 4ST VG8 YAM P1E 7X4 7X5 7X6 7X8 349 3JZ
+2	EGFR_HUMAN G724S	G724S	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=G724S	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=G724S	medium	2.7	EGFR_HUMAN	NP_005219	700	chr7:55209216	1			ANP 112 ATP AMP 057
+3	EGFR_HUMAN E734K	E734K	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=E734K	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=E734K	neutral	-0.08	EGFR_HUMAN	NP_005219	700	chr7:55209924				
+4	EGFR_HUMAN L747F	L747F	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=L747F	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=L747F	low	1.9	EGFR_HUMAN	NP_005219	700	chr7:55209963		1		
+5	EGFR_HUMAN R748P	R748P	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=R748P	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=R748P	low	1.155	EGFR_HUMAN	NP_005219	700	chr7:55209966		1		VX6 STU S19
+6	EGFR_HUMAN Q787R	Q787R	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=Q787R	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=Q787R	neutral	0.225	EGFR_HUMAN	NP_005219	700	chr7:55216555				
+7	EGFR_HUMAN T790M	T790M	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=T790M	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=T790M	low	1.17	EGFR_HUMAN	NP_005219	700	chr7:55216564	1	1		AQ4 AEE CY0 DJK IRE FMM ANP CY7 HYZ ITI POX 03P 03Q GW7 DKI 477 685 0JE T95 T74 ZZY PFY M97 6XP 0K0 VGH KRW 0JJ 0K1 PRC STI P17 112 1N1 7MP GIN JIN B91 919 9DP DB8 4ST 627 0JK P5C S19 P16 406 VX6 P3Y NIL SX7 0LI XY3 ACK PP1 X2M B90 AMP ZD6 X2K X2L ACP DBQ STU 057 7PY BII ATP ADP YAM P1E 7X4 7X5 7X6 7X8 349 3JZ 4JZ
+8	EGFR_HUMAN L833V	L833V	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=L833V	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=L833V	neutral	-1.13	EGFR_HUMAN	NP_005219	700	chr7:55226933		1		STI GIN B91 919 406 NIL 0LI
+9	EGFR_HUMAN V834L	V834L	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=V834L	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=V834L	low	1.87	EGFR_HUMAN	NP_005219	700	chr7:55226936		1		STI GIN 406 0LI XY3 PTR
+10	EGFR_HUMAN L858R	L858R	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=L858R	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=L858R	medium	3.32	EGFR_HUMAN	NP_005219	700	chr7:55227008		1		FMM CY7 HYZ ITI POX 03P GW7 BI9 ATP VG8 YAM
+11	EGFR_HUMAN L861Q	L861Q	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=L861Q	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=L861Q	neutral	0.735	EGFR_HUMAN	NP_005219	700	chr7:55227017		1		GIN PTR
+12	EGFR_HUMAN G873E	G873E	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=G873E	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=G873E	neutral	0.775	EGFR_HUMAN	NP_005219	700	chr7:55227053		1		
+13	EGFR_HUMAN R962G	R962G	EGFR	http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=R962G	http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=R962G	low	0.805	EGFR_HUMAN	NP_005219	700	chr7:55235538		1		
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mutationassesor_web/tool_dependencies.xml	Tue Oct 07 19:40:29 2014 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="requests" version="2.2.1">
+        <repository changeset_revision="04c9eef6c14b" name="package_requests_2_2_1" owner="saket-choudhary" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>