annotate data_manager/data_manager_diamond_database_builder.py @ 5:c1aaeaced1b6 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 09b56ef3e09ad6c5923c88616fea5cbd77d87616
author iuc
date Mon, 18 Dec 2023 09:36:21 +0000
parents 5558f74bd296
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
1 #!/usr/bin/env python
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
2 import bz2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
3 import gzip
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
4 import json
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
5 import optparse
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
6 import os
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
7 import shutil
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
8 import subprocess
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
9 import sys
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
10 import tarfile
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
11 import tempfile
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
12 import urllib.error
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
13 import urllib.parse
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
14 import urllib.request
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
15 import zipfile
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
16 from ftplib import FTP
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
17
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
18 CHUNK_SIZE = 2**20 # 1mb
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
19
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
20
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
21 def cleanup_before_exit(tmp_dir):
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
22 if tmp_dir and os.path.exists(tmp_dir):
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
23 shutil.rmtree(tmp_dir)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
24
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
25
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
26 def _get_files_in_ftp_path(ftp, path):
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
27 path_contents = []
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
28 ftp.retrlines('MLSD %s' % (path), path_contents.append)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
29 return [line.split(';')[-1].lstrip() for line in path_contents]
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
30
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
31
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
32 def _get_stream_readers_for_tar(file_obj, tmp_dir):
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
33 fasta_tar = tarfile.open(fileobj=file_obj, mode='r:*')
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
34 return [fasta_tar.extractfile(member) for member in fasta_tar.getmembers()]
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
35
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
36
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
37 def _get_stream_readers_for_zip(file_obj, tmp_dir):
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
38 fasta_zip = zipfile.ZipFile(file_obj, 'r')
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
39 rval = []
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
40 for member in fasta_zip.namelist():
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
41 fasta_zip.extract(member, tmp_dir)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
42 rval.append(open(os.path.join(tmp_dir, member), 'rb'))
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
43 return rval
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
44
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
45
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
46 def _get_stream_readers_for_gzip(file_obj, tmp_dir):
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
47 return [gzip.GzipFile(fileobj=file_obj, mode='rb')]
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
48
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
49
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
50 def _get_stream_readers_for_bz2(file_obj, tmp_dir):
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
51 return [bz2.BZ2File(file_obj.name, 'rb')]
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
52
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
53
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
54 def download_from_ncbi(data_manager_dict, params, target_directory,
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
55 database_id, database_name):
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
56 NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov'
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
57 NCBI_DOWNLOAD_PATH = '/blast/db/FASTA/'
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
58 COMPRESSED_EXTENSIONS = [('.tar.gz', _get_stream_readers_for_tar),
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
59 ('.tar.bz2', _get_stream_readers_for_tar),
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
60 ('.zip', _get_stream_readers_for_zip),
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
61 ('.gz', _get_stream_readers_for_gzip),
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
62 ('.bz2', _get_stream_readers_for_bz2)]
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
63
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
64 ncbi_identifier = params['reference_source']['requested_identifier']
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
65 ftp = FTP(NCBI_FTP_SERVER)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
66 ftp.login()
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
67
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
68 path_contents = _get_files_in_ftp_path(ftp, NCBI_DOWNLOAD_PATH)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
69
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
70 ncbi_file_name = None
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
71 get_stream_reader = None
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
72 ext = None
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
73 for ext, get_stream_reader in COMPRESSED_EXTENSIONS:
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
74 if "%s%s" % (ncbi_identifier, ext) in path_contents:
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
75 ncbi_file_name = "%s%s%s" % (NCBI_DOWNLOAD_PATH, ncbi_identifier, ext)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
76 break
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
77
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
78 if not ncbi_file_name:
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
79 raise Exception('Unable to determine filename for NCBI database for %s: %s' % (ncbi_identifier, path_contents))
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
80
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
81 tmp_dir = tempfile.mkdtemp(prefix='tmp-data-manager-ncbi-')
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
82 ncbi_fasta_filename = os.path.join(tmp_dir, "%s%s" % (ncbi_identifier, ext))
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
83
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
84 # fasta_base_filename = "%s.fa" % database_id
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
85 # fasta_filename = os.path.join(target_directory, fasta_base_filename)
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
86 # fasta_writer = open(fasta_filename, 'wb+')
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
87
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
88 tmp_extract_dir = os.path.join(tmp_dir, 'extracted_fasta')
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
89 os.mkdir(tmp_extract_dir)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
90
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
91 tmp_fasta = open(ncbi_fasta_filename, 'wb+')
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
92
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
93 ftp.retrbinary('RETR %s' % ncbi_file_name, tmp_fasta.write)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
94
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
95 tmp_fasta.flush()
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
96 tmp_fasta.seek(0)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
97
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
98 fasta_readers = get_stream_reader(tmp_fasta, tmp_extract_dir)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
99
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
100 data_table_entry = _stream_fasta_to_file(fasta_readers, target_directory, database_id, database_name, params)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
101 _add_data_table_entry(data_manager_dict, data_table_entry)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
102
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
103 for fasta_reader in fasta_readers:
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
104 fasta_reader.close()
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
105 tmp_fasta.close()
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
106 cleanup_before_exit(tmp_dir)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
107
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
108
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
109 def download_from_url(data_manager_dict, params, target_directory, database_id, database_name):
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
110 # TODO: we should automatically do decompression here
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
111 urls = list(filter(bool, [x.strip() for x in params['reference_source']['user_url'].split('\n')]))
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
112 fasta_reader = [urllib.request.urlopen(url) for url in urls]
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
113
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
114 data_table_entry = _stream_fasta_to_file(fasta_reader, target_directory, database_id, database_name, params)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
115 _add_data_table_entry(data_manager_dict, data_table_entry)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
116
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
117
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
118 def download_from_history(data_manager_dict, params, target_directory, database_id, database_name):
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
119 # TODO: allow multiple FASTA input files
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
120 input_filename = params['reference_source']['input_fasta']
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
121 if isinstance(input_filename, list):
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
122 fasta_reader = [open(filename, 'rb') for filename in input_filename]
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
123 else:
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
124 fasta_reader = open(input_filename, 'rb')
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
125
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
126 data_table_entry = _stream_fasta_to_file(fasta_reader, target_directory, database_id, database_name, params)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
127 _add_data_table_entry(data_manager_dict, data_table_entry)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
128
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
129
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
130 def copy_from_directory(data_manager_dict, params, target_directory, database_id, database_name):
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
131 input_filename = params['reference_source']['fasta_filename']
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
132 create_symlink = params['reference_source']['create_symlink'] == 'create_symlink'
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
133 if create_symlink:
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
134 data_table_entry = _create_symlink(input_filename, target_directory, database_id, database_name)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
135 else:
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
136 if isinstance(input_filename, list):
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
137 fasta_reader = [open(filename, 'rb') for filename in input_filename]
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
138 else:
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
139 fasta_reader = open(input_filename)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
140 data_table_entry = _stream_fasta_to_file(fasta_reader, target_directory, database_id, database_name, params)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
141 _add_data_table_entry(data_manager_dict, data_table_entry)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
142
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
143
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
144 def _add_data_table_entry(data_manager_dict, data_table_entry):
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
145 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
146 data_manager_dict['data_tables']['diamond_database'] = data_manager_dict['data_tables'].get('diamond_database', [])
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
147 data_manager_dict['data_tables']['diamond_database'].append(data_table_entry)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
148 return data_manager_dict
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
149
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
150
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
151 def _stream_fasta_to_file(fasta_stream, target_directory, database_id,
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
152 database_name, params, close_stream=True):
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
153 fasta_base_filename = "%s.fa" % database_id
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
154 fasta_filename = os.path.join(target_directory, fasta_base_filename)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
155
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
156 temp_fasta = tempfile.NamedTemporaryFile(delete=False, suffix=".fasta")
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
157 temp_fasta.close()
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
158 fasta_writer = open(temp_fasta.name, 'wb+')
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
159
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
160 if not isinstance(fasta_stream, list):
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
161 fasta_stream = [fasta_stream]
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
162
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
163 last_char = None
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
164 for fh in fasta_stream:
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
165 if last_char not in [None, '\n', '\r']:
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
166 fasta_writer.write('\n')
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
167 while True:
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
168 data = fh.read(CHUNK_SIZE)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
169 if data:
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
170 fasta_writer.write(data)
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
171 last_char = data[-1]
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
172 else:
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
173 break
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
174 if close_stream:
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
175 fh.close()
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
176
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
177 fasta_writer.close()
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
178
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
179 args = ['diamond', 'makedb',
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
180 '--in', temp_fasta.name,
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
181 '--db', fasta_filename]
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
182 if params['tax_cond']['tax_select'] == "history":
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
183 for i in ["taxonmap", "taxonnodes", "taxonnames"]:
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
184 args.extend(['--' + i, params['tax_cond'][i]])
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
185 elif params['tax_cond']['tax_select'] == "ncbi":
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
186 if os.path.isfile(os.path.join(params['tax_cond']['ncbi_tax'], 'prot.accession2taxid.FULL.gz')):
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
187 args.extend(['--taxonmap',
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
188 os.path.join(params['tax_cond']['ncbi_tax'], 'prot.accession2taxid.FULL.gz')])
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
189 elif os.path.isfile(os.path.join(params['tax_cond']['ncbi_tax'], 'prot.accession2taxid.FULL')):
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
190 args.extend(['--taxonmap',
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
191 os.path.join(params['tax_cond']['ncbi_tax'], 'prot.accession2taxid.FULL')])
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
192 elif os.path.isfile(os.path.join(params['tax_cond']['ncbi_tax'], 'prot.accession2taxid.gz')):
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
193 args.extend(['--taxonmap',
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
194 os.path.join(params['tax_cond']['ncbi_tax'], 'prot.accession2taxid.gz')])
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
195 elif os.path.isfile(os.path.join(params['tax_cond']['ncbi_tax'], 'prot.accession2taxid')):
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
196 args.extend(['--taxonmap',
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
197 os.path.join(params['tax_cond']['ncbi_tax'], 'prot.accession2taxid')])
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
198 else:
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
199 raise Exception('Unable to find prot.accession2taxid file in %s' % (params['tax_cond']['ncbi_tax']))
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
200
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
201 args.extend(['--taxonnodes',
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
202 os.path.join(params['tax_cond']['ncbi_tax'], 'nodes.dmp')])
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
203 args.extend(['--taxonnames',
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
204 os.path.join(params['tax_cond']['ncbi_tax'], 'names.dmp')])
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
205
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
206 tmp_stderr = tempfile.NamedTemporaryFile(prefix="tmp-data-manager-diamond-database-builder-stderr")
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
207 proc = subprocess.Popen(args=args, shell=False, cwd=target_directory,
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
208 stderr=tmp_stderr.fileno())
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
209 return_code = proc.wait()
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
210 if return_code:
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
211 tmp_stderr.flush()
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
212 tmp_stderr.seek(0)
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
213 print("Error building diamond database:", file=sys.stderr)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
214 while True:
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
215 chunk = tmp_stderr.read(CHUNK_SIZE)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
216 if not chunk:
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
217 break
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
218 sys.stderr.write(chunk.decode('utf-8'))
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
219 sys.exit(return_code)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
220 tmp_stderr.close()
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
221 os.remove(temp_fasta.name)
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
222 return dict(value=database_id, name=database_name,
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
223 db_path="%s.dmnd" % fasta_base_filename)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
224
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
225
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
226 def _create_symlink(input_filename, target_directory, database_id, database_name):
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
227 fasta_base_filename = "%s.fa" % database_id
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
228 fasta_filename = os.path.join(target_directory, fasta_base_filename)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
229 os.symlink(input_filename, fasta_filename)
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
230 return dict(value=database_id, name=database_name, db_path=fasta_base_filename)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
231
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
232
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
233 REFERENCE_SOURCE_TO_DOWNLOAD = dict(ncbi=download_from_ncbi,
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
234 url=download_from_url,
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
235 history=download_from_history,
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
236 directory=copy_from_directory)
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
237
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
238
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
239 def main():
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
240 # Parse Command Line
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
241 parser = optparse.OptionParser()
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
242 parser.add_option('-d', '--dbkey_description', dest='dbkey_description',
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
243 action='store', type="string", default=None,
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
244 help='dbkey_description')
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
245 (options, args) = parser.parse_args()
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
246
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
247 filename = args[0]
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
248
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
249 with open(filename) as fp:
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
250 params = json.load(fp)
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
251 target_directory = params['output_data'][0]['extra_files_path']
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
252 os.mkdir(target_directory)
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
253 data_manager_dict = {}
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
254
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
255 param_dict = params['param_dict']
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
256 database_id = param_dict['database_id']
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
257 database_name = param_dict['database_name']
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
258 if param_dict['tax_cond']['tax_select'] == "ncbi":
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
259 param_dict['tax_cond']['ncbi_tax'] = args[1]
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
260
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
261 # Fetch the FASTA
2
5558f74bd296 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_diamond_database_builder commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
iuc
parents: 1
diff changeset
262 REFERENCE_SOURCE_TO_DOWNLOAD[param_dict['reference_source']['reference_source_selector']](data_manager_dict, param_dict, target_directory, database_id, database_name)
1
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
263
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
264 # save info to json file
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
265 open(filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True))
5a0d0bee4f8d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_diamond_database_builder commit b2d290a8b609ebbc7f4b93716370143c41062ad4"
bgruening
parents: 0
diff changeset
266
0
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
267
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
268 if __name__ == "__main__":
ce62d0912b10 Imported from capsule None
bgruening
parents:
diff changeset
269 main()