# HG changeset patch # User saket-choudhary # Date 1412724207 14400 # Node ID 99d838cef41ae93b412d88141d45ca2fccfc0393 Uploaded diff -r 000000000000 -r 99d838cef41a chasm_webservice/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/README.rst Tue Oct 07 19:23:27 2014 -0400 @@ -0,0 +1,34 @@ +Galaxy wrapper for the CHASM webservice +=================================================== + +This tool is copyright 2014 by Saket Choudhary, Indian Institute of Technology Bombay +All rights reserved. MIT licensed. + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +Citations +=========== + + +If you use this Galaxy tool in work leading to a scientific publication please cite: + +Douville C, Carter H, Kim R, Niknafs N, Diekhans M, Stenson PD, Cooper DN, Ryan M, Karchin R (2013). CRAVAT: Cancer-Related Analysis of VAriants Toolkit Bioinformatics, 29(5):647-648. diff -r 000000000000 -r 99d838cef41a chasm_webservice/chasm_webservice.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/chasm_webservice.py Tue Oct 07 19:23:27 2014 -0400 @@ -0,0 +1,285 @@ +#!/usr/bin/python +""" +The MIT License (MIT) + +Copyright (c) 2014 Saket Choudhary, + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the 'Software'), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +""" +import sys +import requests +import argparse +import time +from functools import wraps +import json +import zipfile +import tempfile +import ntpath +import shutil +import xlrd +import csv +import os +sheet_map = {0: 'Variant_Analysis.csv', + 1: 'Amino_Acid_Level_Analysis.csv', 2: 'Gene_Level_Analysis.csv'} + + +def retry(ExceptionToCheck, tries=40000, delay=3, backoff=2, logger=None): + '''Retry calling the decorated function using an exponential backoff. + + http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ + original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry + + :param ExceptionToCheck: the exception to check. may be a tuple of + exceptions to check + :type ExceptionToCheck: Exception or tuple + :param tries: number of times to try (not retry) before giving up + :type tries: int + :param delay: initial delay between retries in seconds + :type delay: int + :param backoff: backoff multiplier e.g. value of 2 will double the delay + each retry + :type backoff: int + :param logger: logger to use. If None, print + :type logger: logging.Logger instance + ''' + def deco_retry(f): + + @wraps(f) + def f_retry(*args, **kwargs): + mtries, mdelay = tries, delay + while mtries > 1: + try: + return f(*args, **kwargs) + except ExceptionToCheck, e: + #msg = '%s, Retrying in %d seconds...' % (str(e), mdelay) + msg = 'Retrying in %d seconds...' % (mdelay) + if logger: + logger.warning(msg) + else: + print msg + time.sleep(mdelay) + mtries -= 1 + mdelay *= backoff + return f(*args, **kwargs) + + return f_retry # true decorator + + return deco_retry +CANCERTYPES = ['Bladder', 'Blood-Lymphocyte', 'Blood-Myeloid', + 'Brain-Cerebellum', 'Brain-Glioblastoma_Multiforme', + 'Brain-Lower_Grade_Glioma', 'Breast', 'Cervix', + 'Colon', 'Head_and_Neck', 'Kidney-Chromophobe', + 'Kidney-Clear_Cell', 'Kidney-Papiallary_Cell', + 'Liver-Nonviral', 'Liver-Viral', 'Lung-Adenocarcinoma', + 'Lung-Squamous_Cell', 'Melanoma', 'Other', 'Ovary', + 'Pancreas', 'Prostate-Adenocarcinoma', 'Rectum', + 'Skin', 'Stomach', 'Thyroid', 'Uterus'] + +__URL__ = 'http://www.cravat.us/rest/service/submit' + + +def stop_err(msg): + sys.stderr.write('%s\n' % msg) + sys.exit() + + +class CHASMWeb: + + def __init__(self, + mutationbox=None, filepath=None, + is_hg_18=None, analysis_type=None, + analysis_program=None, chosendb=None, + cancer_type=None, email=None, + annotate_genes=None, text_reports=None, + mupit_out=None): + self.mutationbox = mutationbox + self.filepath = filepath + self.is_hg_18 = is_hg_18 + self.analysis_type = analysis_type + self.analysis_program = analysis_program + self.chosendb = chosendb + self.email = email + self.annotate_genes = annotate_genes + self.cancer_type = cancer_type + self.email = email + self.annotate_genes = annotate_genes + self.text_reports = text_reports + self.mupit_input = mupit_out + + def make_request(self): + data = { + 'mutations ': self.mutationbox, + 'hg18': self.is_hg_18, + 'analysistype': self.analysis_type, + 'analysisitem': self.analysis_program, + 'chasmclassifier': self.cancer_type, + 'geneannotation': self.annotate_genes, + 'email': self.email, + 'tsvreport': 'on', # self.text_reports, + 'mupitinput': self.mupit_input, + } + stripped_data = {} + + for key, value in data.iteritems(): + if value is True: + value = 'on' + if value is not None and value is not False: + stripped_data[key] = value + + if not self.mutationbox: + file_payload = {'inputfile': open(self.filepath)} + request = requests.post( + __URL__, data=stripped_data, files=file_payload) + else: + request = requests.post( + __URL__, data=stripped_data, files=dict(foo='bar')) + print request.text + job_id = json.loads(request.text)['jobid'] + return job_id + + @retry(requests.exceptions.HTTPError) + def zip_exists(self, job_id): + print job_id + url = 'http://www.cravat.us/results/%s/%s.zip' % (job_id, job_id) + zip_download_request = requests.request('GET', url) + if zip_download_request.status_code == 404: + raise requests.HTTPError() + else: + return url + + def download_zip(self, url, job_id): + self.tmp_dir = tempfile.mkdtemp() + r = requests.get(url, stream=True) + if r.status_code == 200: + self.path = os.path.join(self.tmp_dir, job_id + '.zip') + with open(self.path, 'wb') as f: + for chunk in r.iter_content(128): + f.write(chunk) + else: + self.path = None + return self.path + + def move_files(self, file_map): + fh = open(self.path, 'rb') + zip_files = zipfile.ZipFile(fh) + for name in zip_files.namelist(): + filename = ntpath.basename(name) + extension = ntpath.splitext(filename)[-1] + source_file = zip_files.open(name) + if extension == '.txt': + target_file = open(file_map['error.txt'], 'wb') + elif filename != 'SnvGet Feature Description.xls' and extension != '.xls': + target_file = open(file_map[filename], 'wbb') + else: + target_file = None + if target_file: + with source_file, target_file: + shutil.copyfileobj(source_file, target_file) + if filename == 'SnvGet Feature Description.xls': + with xlrd.open_workbook(source_file) as wb: + sheet_names = wb.sheet_names() + for name in sheet_names: + sh = wb.sheet_by_name(name) + name_shortened = name.replace(' ').strip() + '.csv' + with open(name_shortened, 'wb') as f: + c = csv.writer(f) + for r in range(sh.nrows): + c.writerow(sh.row_values(r)) + shutil.rmtree(self.tmp_dir) + fh.close() + + +def main(params): + + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--input', + type=str, dest='mutationbox', + help='Input variants') + parser.add_argument('--path', type=str, + dest='input_file_location', + help='Input file location') + parser.add_argument('--hg18', dest='hg18', + action='store_true') + parser.add_argument('--analysis_type', dest='analysis_type', + type=str, + choices=['driver', 'functional', + 'geneannotationonly'], + default='driver') + parser.add_argument('--chosendb', dest='chosendb', + type=str, nargs='*', + choices=['CHASM', 'SnvGet'], + default='CHASM') + parser.add_argument('--cancertype', dest='cancer_type', + type=str, choices=CANCERTYPES, + required=True) + parser.add_argument('--email', dest='email', + required=True, type=str) + parser.add_argument('--annotate', dest='annotate', + action='store_true', default=None) + parser.add_argument('--tsv_report', dest='tsv_report', + action='store_true', default=None) + parser.add_argument('--mupit_out', dest='mupit_out', + action='store_true', default=None) + parser.add_argument('--gene_analysis_out', dest='gene_analysis_out', + type=str, required=True) + parser.add_argument('--variant_analysis_out', + dest='variant_analysis_out', + type=str, required=True) + parser.add_argument('--amino_acid_level_analysis_out', + dest='amino_acid_level_analysis_out', + type=str, required=True,) + parser.add_argument('--codon_level_analysis_out', + dest='codon_level_analysis_out', + type=str, required=True,) + parser.add_argument('--error_file', dest='error_file_out', + type=str, required=True) + parser.add_argument('--snv_box_out', dest='snv_box_out', + type=str, required=False) + parser.add_argument('--snv_features', dest='snv_features_out', + type=str, required=False) + args = parser.parse_args(params) + chasm_web = CHASMWeb(mutationbox=args.mutationbox, + filepath=args.input_file_location, + is_hg_18=args.hg18, + analysis_type=args.analysis_type, + chosendb=args.chosendb, + cancer_type=args.cancer_type, + email=args.email, + annotate_genes=args.annotate, + text_reports=args.tsv_report, + mupit_out=args.mupit_out) + job_id = chasm_web.make_request() + file_map = {'Amino_Acid_Level_Analysis.Result.tsv': args.amino_acid_level_analysis_out, + 'SNVBox.tsv': args.snv_box_out, + 'Variant_Analysis.Result.tsv': args.variant_analysis_out, + 'Gene_Level_Analysis.Result.tsv': args.gene_analysis_out, + 'SnvGet Feature Description.xls': args.snv_features_out, + 'error.txt': args.error_file_out, + 'Codon_Level_Analysis.Result.tsv': args.codon_level_analysis_out, + } + url = chasm_web.zip_exists(job_id) + download = chasm_web.download_zip(url, job_id) + if download: + chasm_web.move_files(file_map=file_map) + else: + stop_err('Unable to download from the server') + +if __name__ == '__main__': + main(sys.argv[1:]) diff -r 000000000000 -r 99d838cef41a chasm_webservice/chasm_webservice.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/chasm_webservice.xml Tue Oct 07 19:23:27 2014 -0400 @@ -0,0 +1,294 @@ + diff -r 000000000000 -r 99d838cef41a chasm_webservice/test-data/chasm_input.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/chasm_input.txt Tue Oct 07 19:23:27 2014 -0400 @@ -0,0 +1,5 @@ +TR1 chr22 30421786 + A T +TR2 chr22 29446079 + A G +TR3 chr22 29446079 + A G +TR4 chr22 40814500 - A G +TR5 chr22 40815256 + C T diff -r 000000000000 -r 99d838cef41a chasm_webservice/test-data/chasm_output_aminoacids.tabular diff -r 000000000000 -r 99d838cef41a chasm_webservice/test-data/chasm_output_codons.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/chasm_output_codons.tabular Tue Oct 07 19:23:27 2014 -0400 @@ -0,0 +1,14 @@ +#Amino Acid Level Analysis Report +#2014-08-28 22:54:22.487446 +#CRAVAT version: 3.1 +#Analysis done at http://www.cravat.us. +#Input file: dataset_1.dat +#This report shows analysis results at amino acid level. +#Input coordinate was hg19 genomic. +#For more information on CRAVAT, visit http://www.cravat.us. +# +Transcript Amino acid position Sequence Ontology Reference amino acid(s) Protein sequence change HUGO symbol Other transcripts Occurrences in study [amino acid change] Transcript in COSMIC Protein sequence change in COSMIC Occurrences in COSMIC [amino acid change] Occurrences in COSMIC by primary sites [amino acid change] +NM_021090.3 1198 MS N I MTMR3 NM_153051.2:aAc>aTc:N1161I, NM_153050.2:aAc>aTc:N1170I, ENST00000323630:aAc>aTc:N1062I, ENST00000351488:aAc>aTc:N1161I, ENST00000333027:aAc>aTc:N1170I, ENST00000406629:aAc>aTc:N1170I, ENST00000401950:aAc>aTc:N1198I 1 0 +NM_001206998.1 637 MS H R ZNRF3 NM_032173.3:cAc>cGc:H537R, ENST00000406323:cAc>cGc:H537R, ENST00000332811:cAc>cGc:H537R, ENST00000402174:cAc>cGc:H537R, ENST00000544604:cAc>cGc:H637R 2 0 +NM_020831.3 648 MS S G MKL1 ENST00000396617:Agc>Ggc:S648G, ENST00000402042:Agc>Ggc:S598G, ENST00000407029:Agc>Ggc:S648G, ENST00000355630:Agc>Ggc:S648G 1 ENST00000355630 p.S648G (stomach 1) 1 stomach(1) +NM_020831.3 396 MS A T MKL1 ENST00000396617:Gcc>Acc:A396T, ENST00000402042:Gcc>Acc:A346T, ENST00000407029:Gcc>Acc:A396T, ENST00000355630:Gcc>Acc:A396T 1 0 diff -r 000000000000 -r 99d838cef41a chasm_webservice/test-data/chasm_output_errors.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/chasm_output_errors.tabular Tue Oct 07 19:23:27 2014 -0400 @@ -0,0 +1,1 @@ +# End of input format error output. If nothing is above this line, there was no format error in the input. \ No newline at end of file diff -r 000000000000 -r 99d838cef41a chasm_webservice/test-data/chasm_output_genes.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/chasm_output_genes.tabular Tue Oct 07 19:23:27 2014 -0400 @@ -0,0 +1,15 @@ +#Gene Level Analysis Report +#2014-08-28 22:54:22.487967 +#CRAVAT version: 3.1 +#Analysis done at http://www.cravat.us. +#Input file: dataset_1.dat +#This report shows analysis results at gene level. +#The composite p-value (Stouffer's combined p-value) and composite FDR of a gene show how probable it is to get the same p-value distribution for the gene as that obtained from the input variants by chance. +#Input coordinate was hg19 genomic. +#Tissue type for CHASM analysis: Other +#For more information on CRAVAT, visit http://www.cravat.us. +# +HUGO symbol Driver Genes TARGET Best driver score from representative transcripts Occurrences in study [gene mutated] Occurrences in COSMIC [gene mutated] Occurrences in COSMIC by primary sites [gene mutated] MuPIT link +MTMR3 0.582 1 103 upper_aerodigestive_tract(2);large_intestine(24);haematopoietic_and_lymphoid_tissue(5);endometrium(12);urinary_tract(4);oesophagus(1);breast(6);skin(17);lung(17);ovary(5);NS(1);prostate(2);kidney(4);liver(3) +ZNRF3 0.524 2 73 upper_aerodigestive_tract(2);large_intestine(19);autonomic_ganglia(1);haematopoietic_and_lymphoid_tissue(2);endometrium(12);urinary_tract(2);lung(16);liver(5);skin(1);oesophagus(3);ovary(2);NS(1);prostate(2);kidney(3);breast(2) +MKL1 0.668 2 68 large_intestine(15);stomach(2);central_nervous_system(1);haematopoietic_and_lymphoid_tissue(3);endometrium(13);urinary_tract(1);lung(19);breast(3);skin(1);ovary(5);kidney(4);liver(1) diff -r 000000000000 -r 99d838cef41a chasm_webservice/test-data/chasm_output_variants.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/chasm_output_variants.tabular Tue Oct 07 19:23:27 2014 -0400 @@ -0,0 +1,16 @@ +#Variant Level Analysis Report +#2014-08-28 22:54:22.488508 +#CRAVAT version: 3.1 +#Analysis done at http://www.cravat.us. +#Input file: dataset_1.dat +#This report shows analysis results at variant level. +#Input coordinate was hg19 genomic. +#Tissue type for CHASM analysis: Other +#For more information on CRAVAT, visit http://www.cravat.us. +# +ID Chromosome Position Strand Reference base Alternate base Sample ID HUGO symbol Transcript Transcript strand Codon change Amino acid position Sequence Ontology Reference amino acid(s) Protein sequence change Other transcripts Mappability Warning Driver Genes TARGET Driver score of representative transcript (driver mutations close to 0) Empirical p-value FDR (Benjamini-Hochberg) (not available with less than 10 unique mutations) Best driver score and transcript All transcripts and driver scores dbSNP 1000 Genomes allele frequency ESP6500 allele frequency (European American) ESP6500 allele frequency (African American) Occurrences in study [exact nucleotide change] Transcript in COSMIC Protein sequence change in COSMIC Occurrences in COSMIC [exact nucleotide change] Occurrences in COSMIC by primary sites [exact nucleotide change] MuPIT Link +TR1 chr22 30421786 + A T No_sample_ID MTMR3 NM_021090.3 + aAc>aTc 1198 MS N I NM_153051.2:aAc>aTc:N1161I, NM_153050.2:aAc>aTc:N1170I, ENST00000323630:aAc>aTc:N1062I, ENST00000351488:aAc>aTc:N1161I, ENST00000333027:aAc>aTc:N1170I, ENST00000406629:aAc>aTc:N1170I, ENST00000401950:aAc>aTc:N1198I 0.582(NM_021090.3:N1198I:&) 0.2972 0.572(ENST00000351488:N1161I:=) ENST00000323630:N1062I:=(0.594),ENST00000406629:N1170I:=(0.602),ENST00000401950:N1198I:=(0.592),ENST00000351488:N1161I:=(0.572),NM_153050.2:N1170I:=(0.582),ENST00000333027:N1170I:=(0.582),NM_021090.3:N1198I:&(0.582),NM_153051.2:N1161I:=(0.582) rs75623810 0.00778388278388 0 0.0226963 1 +TR2 chr22 29446079 + A G No_sample_ID ZNRF3 NM_001206998.1 + cAc>cGc 637 MS H R NM_032173.3:cAc>cGc:H537R, ENST00000406323:cAc>cGc:H537R, ENST00000332811:cAc>cGc:H537R, ENST00000402174:cAc>cGc:H537R, ENST00000544604:cAc>cGc:H637R 0.524(NM_001206998.1:H637R:&) 0.1872 0.442(ENST00000544604:H637R:=) ENST00000544604:H637R:=(0.442),NM_032173.3:H537R:=(0.524),ENST00000402174:H537R:=(0.524),ENST00000406323:H537R:=(0.514),NM_001206998.1:H637R:&(0.524),ENST00000332811:H537R:=(0.524) rs62641746 0 0.00598145 0.00104004 2 +TR3 chr22 29446079 + A G No_sample_ID ZNRF3 NM_001206998.1 + cAc>cGc 637 MS H R NM_032173.3:cAc>cGc:H537R, ENST00000406323:cAc>cGc:H537R, ENST00000332811:cAc>cGc:H537R, ENST00000402174:cAc>cGc:H537R, ENST00000544604:cAc>cGc:H637R 0.524(NM_001206998.1:H637R:&) 0.1872 0.442(ENST00000544604:H637R:=) ENST00000544604:H637R:=(0.442),NM_032173.3:H537R:=(0.524),ENST00000402174:H537R:=(0.524),ENST00000406323:H537R:=(0.514),NM_001206998.1:H637R:&(0.524),ENST00000332811:H537R:=(0.524) rs62641746 0 0.00598145 0.00104004 2 +TR4 chr22 40814500 - A G No_sample_ID MKL1 NM_020831.3 - Agc>Ggc 648 MS S G ENST00000396617:Agc>Ggc:S648G, ENST00000402042:Agc>Ggc:S598G, ENST00000407029:Agc>Ggc:S648G, ENST00000355630:Agc>Ggc:S648G 0.728(NM_020831.3:S648G:&) 0.6890 0.704(ENST00000402042:S598G:=) ENST00000407029:S648G:=(0.728),NM_020831.3:S648G:&(0.728),ENST00000396617:S648G:=(0.728),ENST00000355630:S648G:=(0.728),ENST00000402042:S598G:=(0.704) rs878756 0.419871794872 0.396977 0.860645 1 ENST00000355630 p.S648G (stomach 1) 1 stomach(1) +TR5 chr22 40815256 + C T No_sample_ID MKL1 NM_020831.3 - Gcc>Acc 396 MS A T ENST00000396617:Gcc>Acc:A396T, ENST00000402042:Gcc>Acc:A346T, ENST00000407029:Gcc>Acc:A396T, ENST00000355630:Gcc>Acc:A396T 0.668(NM_020831.3:A396T:&) 0.5102 0.668(ENST00000407029:A396T:=) ENST00000407029:A396T:=(0.668),NM_020831.3:A396T:&(0.668),ENST00000396617:A396T:=(0.668),ENST00000355630:A396T:=(0.668),ENST00000402042:A346T:=(0.67) rs34736200 0.0201465201465 0.00127937 0.0834091 1 diff -r 000000000000 -r 99d838cef41a chasm_webservice/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/tool_dependencies.xml Tue Oct 07 19:23:27 2014 -0400 @@ -0,0 +1,7 @@ + + + + + + +