annotate data_manager/data_manager_snpsift_dbnsfp.py @ 5:09f9bfb2b33b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_snpsift_dbnsfp commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
author iuc
date Tue, 30 Oct 2018 18:44:43 -0400
parents 47f276ffcbfc
children e57f0b0bc73b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
1 #!/usr/bin/env python
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
2
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
3 import gzip
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
4 import json
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
5 import optparse
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
6 import os
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
7 import os.path
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
8 import re
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
9 import shutil
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
10 import sys
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
11 import urllib
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
12 import zipfile
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
13
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
14 from pysam import ctabix
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
15
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
16 """
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
17 # Install dbNSFP databases
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
18 # from DbNsfp site
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
19 # Download dbNSFP database
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
20 $ wget ftp://dbnsfp:dbnsfp@dbnsfp.softgenetics.com/dbNSFPv2.4.zip
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
21 # Uncompress
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
22 $ unzip dbNSFP2.4.zip
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
23 # Create a single file version
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
24 $ (head -n 1 dbNSFP2.4_variant.chr1 ; cat dbNSFP2.4_variant.chr* | grep -v "^#") > dbNSFP2.4.txt
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
25 # Compress using block-gzip algorithm
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
26 bgzip dbNSFP2.4.txt
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
27 # Create tabix index
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
28 tabix -s 1 -b 2 -e 2 dbNSFP2.4.txt.gz
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
29
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
30 data_table:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
31
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
32 <table name="snpsift_dbnsfps" comment_char="#">
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
33 <columns>key, build, name, value, annotations</columns>
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
34 <file path="tool-data/snpsift_dbnsfps.loc" />
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
35 </table>
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
36
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
37 #id build description path annotations
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
38 #GRCh37_dbNSFP2.4 GRCh37 GRCh37 dbNSFP2.4 /depot/snpeff/dbNSFP2.4.gz SIFT_pred,Uniprot_acc
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
39 #GRCh38_dbNSFP2.7 GRCh38 GRCh38 dbNSFP2.7 /depot/snpeff/dbNSFP2.7.gz SIFT_pred,Uniprot_acc
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
40 """
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
41
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
42 data_table = 'snpsift_dbnsfps'
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
43 softgenetics_url = 'ftp://dbnsfp:dbnsfp@dbnsfp.softgenetics.com/'
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
44 dbNSFP_file_pat = '(dbNSFP(.*)_variant|dbscSNV(.*)).chr(.*)'
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
45 tokenize = re.compile(r'(\d+)|(\D+)').findall
5
09f9bfb2b33b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_snpsift_dbnsfp commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
iuc
parents: 4
diff changeset
46 dbNSFP_name_pat = r'dbNSFP(v|_light)?(\d*).*?'
0
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
47
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
48
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
49 def stop_err(msg):
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
50 sys.stderr.write(msg)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
51 sys.exit(1)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
52
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
53
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
54 def get_nsfp_genome_version(name):
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
55 genome_version = 'hg19'
5
09f9bfb2b33b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_snpsift_dbnsfp commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
iuc
parents: 4
diff changeset
56 dbNSFP_name_pat = r'(dbscSNV|dbNSFP(v|_light)?)(\d*).*?'
0
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
57 m = re.match(dbNSFP_name_pat, name)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
58 if m:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
59 (base, mid, ver) = m.groups()
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
60 if base == 'dbscSNV':
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
61 genome_version = 'hg19'
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
62 else:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
63 genome_version = 'hg38' if ver == '3' else 'hg19' if ver == '2' else 'hg18'
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
64 return genome_version
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
65
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
66
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
67 def get_annotations(gzip_path):
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
68 annotations = None
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
69 fh = None
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
70 try:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
71 fh = gzip.open(gzip_path, 'r')
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
72 buf = fh.read(10000)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
73 lines = buf.splitlines()
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
74 headers = lines[0].split('\t')
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
75 annotations = ','.join([x.strip() for x in headers[4:]])
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
76 except Exception as e:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
77 stop_err('Error Reading annotations %s : %s' % (gzip_path, e))
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
78 finally:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
79 if fh:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
80 fh.close()
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
81 return annotations
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
82
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
83
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
84 def tabix_file(input_fname, output_fname):
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
85 print >> sys.stdout, "tabix_file: %s -> %s" % (input_fname, output_fname)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
86 ctabix.tabix_compress(input_fname, output_fname, force=True)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
87 # Column indices are 0-based.
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
88 ctabix.tabix_index(output_fname, seq_col=0, start_col=1, end_col=1)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
89
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
90
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
91 def natural_sortkey(string):
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
92 return tuple(int(num) if num else alpha for num, alpha in tokenize(string))
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
93
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
94
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
95 def download_dbnsfp_database(url, output_file):
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
96 dbnsfp_tsv = None
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
97 file_path = 'downloaded_file'
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
98 urllib.urlretrieve(url, file_path)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
99 with zipfile.ZipFile(file_path, 'r') as my_zip:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
100 dbnsfp_tsv = output_file if output_file else 'dbnsfp_tsv'
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
101 wtr = open(dbnsfp_tsv, 'w')
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
102 allfiles = [info.filename for info in my_zip.infolist()]
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
103 files = [f for f in allfiles if re.match(dbNSFP_file_pat, f)]
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
104 files = sorted(files, key=natural_sortkey)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
105 for j, file in enumerate(files):
2
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
106 tempfiles = []
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
107 tempfiles.append(file + "_%d" % len(tempfiles))
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
108 tfh = open(tempfiles[-1], 'w')
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
109 lastpos = None
0
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
110 fh = my_zip.open(file, 'rU')
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
111 for i, line in enumerate(fh):
2
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
112 if i == 0:
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
113 if j == 0:
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
114 wtr.write(line)
0
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
115 continue
2
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
116 else:
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
117 pos = int(line.split('\t')[1])
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
118 if lastpos and pos < lastpos:
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
119 tfh.close()
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
120 tempfiles.append(file + "_%d" % len(tempfiles))
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
121 tfh = open(tempfiles[-1], 'w')
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
122 print >> sys.stderr, "%s [%d] pos: %d < %d" % (file, i, pos, lastpos)
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
123 lastpos = pos
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
124 tfh.write(line)
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
125 tfh.close()
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
126 if len(tempfiles) == 1:
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
127 with open(tempfiles[0], 'r') as tfh:
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
128 wtr.writelines(tfh.readlines())
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
129 else:
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
130 tfha = [open(temp, 'r') for temp in tempfiles]
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
131 lines = [tfh.readline() for tfh in tfha]
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
132 curpos = [int(line.split('\t')[1]) for line in lines]
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
133 while len(tfha) > 0:
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
134 k = curpos.index(min(curpos))
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
135 wtr.write(lines[k])
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
136 line = tfha[k].readline()
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
137 if line:
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
138 lines[k] = line
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
139 curpos[k] = int(line.split('\t')[1])
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
140 else:
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
141 tfha[k].close()
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
142 del tfha[k]
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
143 del lines[k]
3d4cd0e3891f planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
iuc
parents: 0
diff changeset
144 del curpos[k]
0
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
145 return dbnsfp_tsv
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
146
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
147
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
148 def main():
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
149 # Parse Command Line
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
150 parser = optparse.OptionParser()
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
151 parser.add_option('-g', '--dbkey', dest='dbkey', action='store', type="string", default=None, help='dbkey genome version')
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
152 parser.add_option('-n', '--db_name', dest='db_name', action='store', type="string", default=None, help='A name for a history snpsiftdbnsfp dataset')
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
153 parser.add_option('-s', '--softgenetics', dest='softgenetics', action='store', type="string", default=None, help='A name for softgenetics dbNSFP file')
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
154 parser.add_option('-H', '--snpsiftdbnsfp', dest='snpsiftdbnsfp', action='store', type="string", default=None, help='A history snpsiftdbnsfp dataset')
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
155 parser.add_option('-T', '--dbnsfp_tabular', dest='dbnsfp_tabular', action='store', type="string", default=None, help='A history dbnsfp_tabular dataset')
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
156 (options, args) = parser.parse_args()
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
157
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
158 filename = args[0]
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
159 params = json.loads(open(filename).read())
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
160 target_directory = params['output_data'][0]['extra_files_path']
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
161 if not os.path.exists(target_directory):
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
162 os.mkdir(target_directory)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
163 data_manager_dict = {}
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
164 genome_version = options.dbkey if options.dbkey else 'unknown'
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
165 dbnsfp_tsv = None
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
166 db_name = None
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
167 bzip_path = None
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
168 if options.softgenetics:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
169 dbnsfp_url = softgenetics_url + options.softgenetics
5
09f9bfb2b33b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_snpsift_dbnsfp commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
iuc
parents: 4
diff changeset
170 db_name = options.db_name if options.db_name else re.sub(r'\.zip$', '', options.softgenetics)
0
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
171 genome_version = get_nsfp_genome_version(options.softgenetics)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
172 tsv = db_name + '.tsv'
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
173 dbnsfp_tsv = download_dbnsfp_database(dbnsfp_url, tsv)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
174 elif options.dbnsfp_tabular:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
175 db_name = options.db_name
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
176 dbnsfp_tsv = options.dbnsfp_tabular
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
177 elif options.snpsiftdbnsfp:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
178 (dirpath, bgzip_name) = os.path.split(options.snpsiftdbnsfp)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
179 idxpath = options.snpsiftdbnsfp + '.tbi'
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
180 shutil.copy(options.snpsiftdbnsfp, target_directory)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
181 shutil.copy(idxpath, target_directory)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
182 bzip_path = os.path.join(target_directory, bgzip_name)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
183 db_name = re.sub('(.txt)?.gz$', '', bgzip_name)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
184 else:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
185 stop_err('Either --softgenetics or --dbnsfp_tabular required')
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
186 if dbnsfp_tsv:
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
187 bgzip_name = '%s.txt.gz' % db_name
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
188 bzip_path = os.path.join(target_directory, bgzip_name)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
189 tabix_file(dbnsfp_tsv, bzip_path)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
190 annotations = get_annotations(bzip_path)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
191 # Create the SnpSift dbNSFP Reference Data
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
192 data_table_entry = dict(key='%s_%s' % (genome_version, db_name), build=genome_version, name='%s %s' % (genome_version, db_name), value=bgzip_name, annotations=annotations)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
193 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
194 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, [])
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
195 data_manager_dict['data_tables'][data_table].append(data_table_entry)
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
196
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
197 # save info to json file
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
198 open(filename, 'wb').write(json.dumps(data_manager_dict))
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
199
4
47f276ffcbfc planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 8652f36a3a3838dca989426961561e81432acf4f
iuc
parents: 2
diff changeset
200
0
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
201 if __name__ == "__main__":
0e9e3bb5776a planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 5316af00b4a71a7b526cbc9540d5158749cc38e4
iuc
parents:
diff changeset
202 main()