Mercurial > repos > saket-choudhary > sift_web
changeset 5:697e66764209 draft default tip
Uploaded
author | saket-choudhary |
---|---|
date | Tue, 07 Oct 2014 19:19:11 -0400 |
parents | 5cc3210369ac |
children | |
files | sift_web/sift_web.py sift_web/sift_web.xml sift_web/test-data/sift_input.txt sift_web/test-data/sift_output_condensed.tsv sift_web/test-data/sift_output_full.tsv sift_web/test-data/sift_output_summary.tsv |
diffstat | 6 files changed, 128 insertions(+), 34 deletions(-) [+] |
line wrap: on
line diff
--- a/sift_web/sift_web.py Thu Sep 04 17:46:51 2014 -0400 +++ b/sift_web/sift_web.py Tue Oct 07 19:19:11 2014 -0400 @@ -15,26 +15,7 @@ def retry(ExceptionToCheck, tries=10, delay=3, backoff=2, logger=None): - """Retry calling the decorated function using an exponential backoff. - - http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ - original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry - - :param ExceptionToCheck: the exception to check. may be a tuple of - exceptions to check - :type ExceptionToCheck: Exception or tuple - :param tries: number of times to try (not retry) before giving up - :type tries: int - :param delay: initial delay between retries in seconds - :type delay: int - :param backoff: backoff multiplier e.g. value of 2 will double the delay - each retry - :type backoff: int - :param logger: logger to use. If None, print - :type logger: logging.Logger instance - """ def deco_retry(f): - @wraps(f) def f_retry(*args, **kwargs): mtries, mdelay = tries, delay @@ -53,9 +34,7 @@ mtries -= 1 mdelay *= backoff return f(*args, **kwargs) - return f_retry # true decorator - return deco_retry @@ -68,6 +47,7 @@ self.url_dict = {'full': self.full_download_url, 'condensed': self.condensed_download_url, 'summary': self.summary_download_url} + self.job_id = None def upload(self, inputpath): payload = {'table': 'human37_66'} @@ -77,19 +57,19 @@ return request.text @retry(requests.exceptions.HTTPError) - def get_full_data(self, job_id, full_output): + def get_full_data(self, full_output): r = requests.request( - 'GET', (self.full_download_url) % (job_id, job_id)) + 'GET', (self.full_download_url) % (self.job_id, self.job_id)) if r.text != 'No file exists': with open(full_output, 'wb') as f: f.write(r.text) else: - return requests.HTTPError() + raise(requests.HTTPError()) @retry(requests.exceptions.HTTPError) - def get_condensed_data(self, job_id, condensed_output): + def get_condensed_data(self, condensed_output): r = requests.request( - 'GET', (self.condensed_download_url) % (job_id, job_id)) + 'GET', (self.condensed_download_url) % (self.job_id, self.job_id)) if r.text != 'No file exists': with open(condensed_output, 'wb') as f: f.write(r.text) @@ -97,9 +77,9 @@ raise(requests.HTTPError()) @retry(requests.exceptions.HTTPError) - def get_summary_data(self, job_id, summary_output): + def get_summary_data(self, summary_output): r = requests.request( - 'GET', (self.summary_download_url) % (job_id, job_id)) + 'GET', (self.summary_download_url) % (self.job_id, self.job_id)) if r.text != 'No file exists': with open(summary_output, 'wb') as f: f.write(r.text) @@ -119,9 +99,10 @@ soup = BeautifulSoup(content) p = soup.findAll('p') job_id = p[1].string.split(':')[-1].replace(' ', '').replace(').', '') - sift_web.get_full_data(job_id, args.output1) - sift_web.get_condensed_data(job_id, args.output2) - sift_web.get_summary_data(job_id, args.output3) + sift_web.job_id = job_id + sift_web.get_full_data(args.output1) + sift_web.get_condensed_data(args.output2) + sift_web.get_summary_data(args.output3) if __name__ == '__main__':
--- a/sift_web/sift_web.xml Thu Sep 04 17:46:51 2014 -0400 +++ b/sift_web/sift_web.xml Tue Oct 07 19:19:11 2014 -0400 @@ -13,9 +13,17 @@ <param name="input" format="txt" type="data" label="Input Variants"/> </inputs> <outputs> - <data name="output1" format="tabular"/> - <data name="output2" format="tabular"/> - <data name="output3" format="tabular"/> + <data name="output1" format="tabular" label="${tool.name} on ${on_string}: Full Output"/> + <data name="output2" format="tabular" label="${tool.name} on ${on_string}: Condensed Output"/> + <data name="output3" format="tabular" label="${tool.name} on ${on_string}: Summary table"/> </outputs> + <tests> + <test> + <param name="input" value="sift_input.txt"/> + <output name="output1" file="sift_output_full.tsv"/> + <output name="output2" file="sift_output_condensed.tsv"/> + <output name="output3" file="sift_output_summary.tsv"/> + </test> + </tests> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sift_web/test-data/sift_input.txt Tue Oct 07 19:19:11 2014 -0400 @@ -0,0 +1,14 @@ +1,100382265,C,G,user comment 1 +1,100380997,A,G,user comment 2 +22,30163533,A,C +X,12905093,A,T +2,230633386,G,C +1,100382265,C,A +7,117199641,ATCA,. +7,117199647,TTT,. +10,50184923,TGG,. +12,121438957,ACC,. +1,43217995,G,GCCA +10,102762472,G,GGCG +9,117856130,T,G +9,117856135,C,G
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sift_web/test-data/sift_output_condensed.tsv Tue Oct 07 19:19:11 2014 -0400 @@ -0,0 +1,15 @@ +#ROW_NO. INPUT PROTEIN_ID LENGTH STRAND CODON_CHANGE POS RESIDUE_REF RESIDUE_ALT TYPE SCORE PREDICTION (cutoff=-2.5) #SEQ #CLUSTER SCORE PREDICTION (cutoff=0.05) MEDIAN_INFO #SEQ dbSNP_ID +1 1,100382265,C,G,user comment 1 ENSP00000294724 1532 1 TCC [C/G]GA CAT 1487 R G Single AA Change -1.75 Neutral 107 30 0.118 Tolerated 2.82 71 rs12118058 +2 1,100380997,A,G,user comment 2 ENSP00000294724 1532 1 GCA G[A/G]A AAA 1405 E G Single AA Change -4.58 Deleterious 107 30 0.004 Damaging 2.80 72 rs28730708 +3 22,30163533,A,C ENSP00000333278 132 1 ACG [A/C]GG GGA 49 R R Synonymous 0.00 Neutral 3 1 NA NA NA NA +4 X,12905093,A,T ENSP00000370034 1049 1 AAT G[A/T]A AGC 489 E V Single AA Change -1.99 Neutral 265 30 0.044 Damaging 2.87 154 +5 2,230633386,G,C ENSP00000373696 2040 -1 GAG [C/G]AG CAG 1958 Q E Single AA Change 0.46 Neutral 127 30 0.602 Tolerated 2.89 136 +6 1,100382265,C,A ENSP00000294724 1532 1 TCC [C/A]GA CAT 1487 R R Synonymous 0.00 Neutral 107 30 1.000 Tolerated 2.82 71 +7 7,117199641,ATCA,. ENSP00000003084 1480 1 Frameshift NA NA 273 30 NA NA NA NA +8 7,117199647,TTT,. ENSP00000003084 1480 1 ATC [TTT/-] GGT 508 F . Deletion -13.79 Deleterious 273 30 NA NA NA NA rs121909001 +9 10,50184923,TGG,. ENSP00000320563 3184 1 GCC [TGG/-] GGC 3064 W . Deletion -6.39 Deleterious 101 30 NA NA NA NA +10 12,121438957,ACC,. ENSP00000443112 662 1 GAG [ACC/-] TTC 651 T . Deletion -3.66 Deleterious 158 30 NA NA NA NA +11 1,43217995,G,GCCA ENSP00000236040 804 -1 CCA [-/TGG] CAT 506 P PW Insertion -9.11 Deleterious 157 30 NA NA NA NA +12 10,102762472,G,GGCG ENSP00000359240 669 1 CAG [-/GCG] GAT 59 Q QA Insertion -3.26 Deleterious 136 30 NA NA NA NA +13 9,117856130,T,G record not found rs1513 +14 9,117856135,C,G record not found
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sift_web/test-data/sift_output_full.tsv Tue Oct 07 19:19:11 2014 -0400 @@ -0,0 +1,63 @@ +#ROW_NO. INPUT PROTEIN_ID LENGTH STRAND CODON_CHANGE POS RESIDUE_REF RESIDUE_ALT TYPE SCORE PREDICTION (cutoff=-2.5) #SEQ #CLUSTER SCORE PREDICTION (cutoff=0.05) MEDIAN_INFO #SEQ dbSNP_ID +1 1,100382265,C,G,user comment 1 ENSP00000294724 1532 1 TCC [C/G]GA CAT 1487 R G Single AA Change -1.75 Neutral 107 30 0.118 Tolerated 2.82 71 rs12118058 +1 1,100382265,C,G,user comment 1 ENSP00000354635 1515 1 TCC [C/G]GA CAT 1470 R G Single AA Change -1.75 Neutral 107 30 0.114 Tolerated 2.84 71 rs12118058 +1 1,100382265,C,G,user comment 1 ENSP00000354971 1516 1 TCC [C/G]GA CAT 1471 R G Single AA Change -1.75 Neutral 107 30 0.108 Tolerated 2.83 73 rs12118058 +1 1,100382265,C,G,user comment 1 ENSP00000355106 1532 1 TCC [C/G]GA CAT 1487 R G Single AA Change -1.75 Neutral 107 30 0.118 Tolerated 2.82 71 rs12118058 +1 1,100382265,C,G,user comment 1 ENSP00000359180 1516 1 TCC [C/G]GA CAT 1471 R G Single AA Change -1.75 Neutral 107 30 0.108 Tolerated 2.83 73 rs12118058 +1 1,100382265,C,G,user comment 1 ENSP00000359182 1532 1 TCC [C/G]GA CAT 1487 R G Single AA Change -1.75 Neutral 107 30 0.118 Tolerated 2.82 71 rs12118058 +1 1,100382265,C,G,user comment 1 ENSP00000359184 1532 1 TCC [C/G]GA CAT 1487 R G Single AA Change -1.75 Neutral 107 30 0.118 Tolerated 2.82 71 rs12118058 +2 1,100380997,A,G,user comment 2 ENSP00000294724 1532 1 GCA G[A/G]A AAA 1405 E G Single AA Change -4.58 Deleterious 107 30 0.004 Damaging 2.80 72 rs28730708 +2 1,100380997,A,G,user comment 2 ENSP00000354635 1515 1 GCA G[A/G]A AAA 1388 E G Single AA Change -4.58 Deleterious 107 30 0.004 Damaging 2.82 72 rs28730708 +2 1,100380997,A,G,user comment 2 ENSP00000354971 1516 1 GCA G[A/G]A AAA 1389 E G Single AA Change -4.58 Deleterious 107 30 0.004 Damaging 2.81 74 rs28730708 +2 1,100380997,A,G,user comment 2 ENSP00000355106 1532 1 GCA G[A/G]A AAA 1405 E G Single AA Change -4.58 Deleterious 107 30 0.004 Damaging 2.80 72 rs28730708 +2 1,100380997,A,G,user comment 2 ENSP00000359180 1516 1 GCA G[A/G]A AAA 1389 E G Single AA Change -4.58 Deleterious 107 30 0.004 Damaging 2.81 74 rs28730708 +2 1,100380997,A,G,user comment 2 ENSP00000359182 1532 1 GCA G[A/G]A AAA 1405 E G Single AA Change -4.58 Deleterious 107 30 0.004 Damaging 2.80 72 rs28730708 +2 1,100380997,A,G,user comment 2 ENSP00000359184 1532 1 GCA G[A/G]A AAA 1405 E G Single AA Change -4.58 Deleterious 107 30 0.004 Damaging 2.80 72 rs28730708 +3 22,30163533,A,C ENSP00000332887 63 1 AAC G[A/C]G GGG 49 E A Single AA Change -3.11 Deleterious 87 30 0.020 Damaging 2.79 85 +3 22,30163533,A,C ENSP00000333278 132 1 ACG [A/C]GG GGA 49 R R Synonymous 0.00 Neutral 3 1 NA NA NA NA +3 22,30163533,A,C ENSP00000384962 62 1 AAC G[A/C]G GGG 49 E A Single AA Change -3.05 Deleterious 85 30 0.057 Tolerated 3.02 57 +3 22,30163533,A,C ENSP00000385696 82 1 AAC G[A/C]G GGG 49 E A Single AA Change -3.00 Deleterious 88 30 0.036 Damaging 3.07 83 +4 X,12905093,A,T ENSP00000370034 1049 1 AAT G[A/T]A AGC 489 E V Single AA Change -1.99 Neutral 265 30 0.044 Damaging 2.87 154 +5 2,230633386,G,C ENSP00000283943 1992 -1 GAG [C/G]AG CAG 1910 Q E Single AA Change 0.46 Neutral 127 30 0.811 Tolerated 2.90 99 +5 2,230633386,G,C ENSP00000373696 2040 -1 GAG [C/G]AG CAG 1958 Q E Single AA Change 0.46 Neutral 127 30 0.602 Tolerated 2.89 136 +5 2,230633386,G,C ENSP00000373697 1722 -1 GAG [C/G]AG CAG 1640 Q E Single AA Change 0.33 Neutral 129 30 1.000 Tolerated 2.96 95 +5 2,230633386,G,C ENSP00000408330 187 -1 GAG [C/G]AG CAG 170 Q E Single AA Change 0.86 Neutral 120 30 1.000 Tolerated 2.82 62 +6 1,100382265,C,A ENSP00000294724 1532 1 TCC [C/A]GA CAT 1487 R R Synonymous 0.00 Neutral 107 30 1.000 Tolerated 2.82 71 +6 1,100382265,C,A ENSP00000354635 1515 1 TCC [C/A]GA CAT 1470 R R Synonymous 0.00 Neutral 107 30 1.000 Tolerated 2.84 71 +6 1,100382265,C,A ENSP00000354971 1516 1 TCC [C/A]GA CAT 1471 R R Synonymous 0.00 Neutral 107 30 1.000 Tolerated 2.83 73 +6 1,100382265,C,A ENSP00000355106 1532 1 TCC [C/A]GA CAT 1487 R R Synonymous 0.00 Neutral 107 30 1.000 Tolerated 2.82 71 +6 1,100382265,C,A ENSP00000359180 1516 1 TCC [C/A]GA CAT 1471 R R Synonymous 0.00 Neutral 107 30 1.000 Tolerated 2.83 73 +6 1,100382265,C,A ENSP00000359182 1532 1 TCC [C/A]GA CAT 1487 R R Synonymous 0.00 Neutral 107 30 1.000 Tolerated 2.82 71 +6 1,100382265,C,A ENSP00000359184 1532 1 TCC [C/A]GA CAT 1487 R R Synonymous 0.00 Neutral 107 30 1.000 Tolerated 2.82 71 +7 7,117199641,ATCA,. ENSP00000003084 1480 1 Frameshift NA NA 273 30 NA NA NA NA +7 7,117199641,ATCA,. ENSP00000389119 1438 1 Frameshift NA NA 273 30 NA NA NA NA +7 7,117199641,ATCA,. ENSP00000403677 1419 1 Frameshift NA NA 275 30 NA NA NA NA +8 7,117199647,TTT,. ENSP00000003084 1480 1 ATC [TTT/-] GGT 508 F . Deletion -13.79 Deleterious 273 30 NA NA NA NA rs121909001 +8 7,117199647,TTT,. ENSP00000389119 1438 1 ATC [TTT/-] GGT 478 F . Deletion -13.79 Deleterious 273 30 NA NA NA NA rs121909001 +8 7,117199647,TTT,. ENSP00000403677 1419 1 ATC [TTT/-] GGT 447 F . Deletion -14.12 Deleterious 275 30 NA NA NA NA rs121909001 +9 10,50184923,TGG,. ENSP00000265453 1271 1 GCC [TGG/-] GGC 1151 W . Deletion -8.05 Deleterious 103 30 NA NA NA NA +9 10,50184923,TGG,. ENSP00000309314 2267 1 GCC [TGG/-] GGC 2155 W . Deletion -7.09 Deleterious 103 30 NA NA NA NA +9 10,50184923,TGG,. ENSP00000320563 3184 1 GCC [TGG/-] GGC 3064 W . Deletion -6.39 Deleterious 101 30 NA NA NA NA +9 10,50184923,TGG,. ENSP00000409696 3176 1 GCC [TGG/-] GGC 3064 W . Deletion -6.39 Deleterious 101 30 NA NA NA NA +9 10,50184923,TGG,. ENSP00000443499 624 1 GCC [TGG/-] GGC 527 W . Deletion -8.09 Deleterious 110 30 NA NA NA NA +10 12,121438957,ACC,. ENSP00000257555 631 1 GAG [ACC/-] TTC 620 T . Deletion -3.40 Deleterious 159 30 NA NA NA NA +10 12,121438957,ACC,. ENSP00000339938 572 1 GAG [ACC/-] TTC 561 T . Deletion -3.47 Deleterious 163 30 NA NA NA NA +10 12,121438957,ACC,. ENSP00000342683 521 1 GAG [ACC/-] TTC 510 T . Deletion -2.88 Deleterious 164 30 NA NA NA NA +10 12,121438957,ACC,. ENSP00000438300 523 1 GAG [ACC/-] TTC 512 T . Deletion -3.05 Deleterious 164 30 NA NA NA NA +10 12,121438957,ACC,. ENSP00000438531 452 1 GAG [ACC/-] TTC 441 T . Deletion -3.49 Deleterious 165 30 NA NA NA NA +10 12,121438957,ACC,. ENSP00000438804 638 1 GAG [ACC/-] TTC 627 T . Deletion -3.40 Deleterious 159 30 NA NA NA NA +10 12,121438957,ACC,. ENSP00000443112 662 1 GAG [ACC/-] TTC 651 T . Deletion -3.66 Deleterious 158 30 NA NA NA NA +11 1,43217995,G,GCCA ENSP00000236040 804 -1 CCA [-/TGG] CAT 506 P PW Insertion -9.11 Deleterious 157 30 NA NA NA NA +11 1,43217995,G,GCCA ENSP00000296388 736 -1 CCA [-/TGG] CAT 506 P PW Insertion -9.11 Deleterious 158 30 NA NA NA NA +11 1,43217995,G,GCCA ENSP00000380245 697 -1 CCA [-/TGG] CAT 506 P PW Insertion -9.14 Deleterious 160 30 NA NA NA NA +11 1,43217995,G,GCCA ENSP00000394294 152 -1 CCA [-/TGG] CAT 98 P PW Insertion -9.13 Deleterious 153 30 NA NA NA NA +11 1,43217995,G,GCCA ENSP00000408056 222 -1 CCA [-/TGG] CAT 114 P PW Insertion -9.17 Deleterious 153 30 NA NA NA NA +11 1,43217995,G,GCCA ENSP00000437795 601 -1 CCA [-/TGG] CAT 371 P PW Insertion -8.74 Deleterious 162 30 NA NA NA NA +12 10,102762472,G,GGCG ENSP00000314437 631 1 CAG [-/GCG] GAT 59 Q QA Insertion -3.15 Deleterious 137 30 NA NA NA NA +12 10,102762472,G,GGCG ENSP00000359240 669 1 CAG [-/GCG] GAT 59 Q QA Insertion -3.26 Deleterious 136 30 NA NA NA NA +12 10,102762472,G,GGCG ENSP00000359243 669 1 CAG [-/GCG] GAT 59 Q QA Insertion -3.26 Deleterious 136 30 NA NA NA NA +12 10,102762472,G,GGCG ENSP00000402042 199 1 CAG [-/GCG] GAT 59 Q QA Insertion -3.83 Deleterious 101 27 NA NA NA NA +12 10,102762472,G,GGCG ENSP00000407713 240 1 CAG [-/GCG] GAT 59 Q QA Insertion -3.24 Deleterious 113 30 NA NA NA NA +12 10,102762472,G,GGCG ENSP00000416972 221 1 CAG [-/GCG] GAT 59 Q QA Insertion -4.09 Deleterious 97 26 NA NA NA NA +13 9,117856130,T,G record not found rs1513 +14 9,117856135,C,G record not found
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sift_web/test-data/sift_output_summary.tsv Tue Oct 07 19:19:11 2014 -0400 @@ -0,0 +1,13 @@ +#Type Total Found_in_dbSNP Not_found_in_dbSNP PROVEAN_neutral PROVEAN_deleterious PROVEAN_NA SIFT_tolerated SIFT_damaging SIFT_NA +Protein_coding 12 3 9 5 6 1 3 2 7 +Single_AA_Change 4 2 2 3 1 0 2 2 0 +Synonymous 2 0 2 2 0 0 1 0 1 +Deletion 3 1 2 0 3 0 0 0 3 +Insertion 2 0 2 0 2 0 0 0 2 +Multiple_AA_Change 0 0 0 0 0 0 0 0 0 +Frameshift 1 0 1 0 0 1 0 0 1 +Nonsense 0 0 0 0 0 0 0 0 0 +Unknown 0 0 0 0 0 0 0 0 0 +Input_error 0 0 0 0 0 0 0 0 0 +Non_protein_coding 2 1 1 0 0 2 0 0 2 +Input_format_error 0 0 0 0 0 0 0 0 0