annotate data_manager/data_manager_fetch_and_index_maf.py @ 0:aed50ca398bb draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
author iuc
date Thu, 25 Jun 2020 14:02:25 -0400
parents
children 3483c363dc6b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
2 import bz2
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
3 import ftplib
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
4 import gzip
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
5 import json
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
6 import optparse
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
7 import os
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
8 import re
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
9 import shutil
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
10 import subprocess
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
11 import sys
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
12 import tempfile
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
13 import urllib.parse
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
14 import urllib.request
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
15 import zipfile
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
16 from binascii import hexlify
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
17
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
18 CHUNK_SIZE = 2**20
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
19
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
20 DEFAULT_DATA_TABLE_NAME = "indexed_maf_files"
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
21
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
22 # Nice solution to opening compressed files (zip/bz2/gz) transparently
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
23 # https://stackoverflow.com/a/13045892/638445
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
24
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
25
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
26 class CompressedFile(object):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
27 magic = None
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
28 file_type = None
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
29 mime_type = None
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
30 proper_extension = None
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
31
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
32 def __init__(self, f):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
33 # f is an open file or file like object
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
34 self.f = f
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
35 self.accessor = self.open()
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
36
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
37 @classmethod
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
38 def is_magic(self, data):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
39 return hexlify(data).startswith(hexlify(self.magic))
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
40
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
41 def open(self):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
42 return None
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
43
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
44
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
45 class ZIPFile(CompressedFile):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
46 magic = b'\x50\x4b\x03\x04'
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
47 file_type = 'zip'
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
48 mime_type = 'compressed/zip'
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
49
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
50 def open(self):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
51 return zipfile.ZipFile(self.f)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
52
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
53
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
54 class BZ2File(CompressedFile):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
55 magic = b'\x42\x5a\x68'
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
56 file_type = 'bz2'
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
57 mime_type = 'compressed/bz2'
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
58
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
59 def open(self):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
60 return bz2.BZ2File(self.f)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
61
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
62
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
63 class GZFile(CompressedFile):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
64 magic = b'\x1f\x8b\x08'
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
65 file_type = 'gz'
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
66 mime_type = 'compressed/gz'
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
67
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
68 def open(self):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
69 return gzip.GzipFile(self.f)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
70
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
71
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
72 # Factory function to create a suitable instance for accessing files
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
73 def get_compressed_file(filename):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
74 with open(filename, 'rb') as f:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
75 start_of_file = f.read(16)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
76 f.seek(0)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
77 for cls in (ZIPFile, BZ2File, GZFile):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
78 if cls.is_magic(start_of_file):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
79 f.close()
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
80 return cls(filename)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
81
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
82 return None
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
83
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
84
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
85 def url_download(url, tmp=False, localpath=None):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
86 """Attempt to download file from a given url
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
87 :param url: full url to file
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
88 :type url: str.
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
89 :returns: name of downloaded file
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
90 :raises: ContentDecodingError, IOError
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
91 """
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
92
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
93 # Generate file_name
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
94 file_name = url.split('/')[-1]
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
95 if tmp:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
96 file_name = os.path.join(tempfile.mkdtemp(), file_name)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
97 elif localpath is not None:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
98 file_name = os.path.join(localpath, file_name)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
99
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
100 try:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
101 # download URL (FTP and HTTP work, probably local and data too)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
102 urllib.request.urlretrieve(url, file_name)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
103
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
104 # uncompress file if needed
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
105 cf = get_compressed_file(file_name)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
106 if cf is not None:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
107 uncompressed_file_name = os.path.splitext(file_name)[0]
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
108 with open(uncompressed_file_name, 'wb') as uncompressed_file:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
109 shutil.copyfileobj(cf.accessor, uncompressed_file)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
110 os.remove(file_name)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
111 file_name = uncompressed_file_name
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
112 except IOError as e:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
113 sys.stderr.write('Error occured downloading reference file: %s' % e)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
114 os.remove(file_name)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
115 return file_name
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
116
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
117
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
118 def generate_metadata(params, options):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
119 name = options.name
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
120 uid = name
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
121 species = []
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
122 # Found to be the fastest way to strip non-alphanumeric characters
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
123 # from a string in some post on StackOverflow
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
124 pattern = re.compile(r'[\W]+')
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
125 uid = pattern.sub('_', uid).strip('_')
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
126 url = options.nexus
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
127 with open(url_download(url, True), 'r') as fh:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
128 species = [line.strip(' (),').split(':')[0] for line in fh.readlines()]
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
129 return name, uid.upper(), species
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
130
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
131
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
132 def get_maf_listing(maf_path):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
133 maf_files = []
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
134 maf_url = urllib.parse.urlparse(maf_path)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
135 f = ftplib.FTP()
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
136 f.connect(maf_url.netloc)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
137 f.login()
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
138 listing = f.mlsd(maf_url.path)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
139 compressions = ['gz', 'bz2', 'zip']
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
140 for name, facts in listing:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
141 skip = False
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
142 if os.path.splitext(name)[-1].lstrip('.') not in compressions:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
143 skip = True
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
144 if facts['type'] != 'file':
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
145 skip = True
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
146 for compression in compressions:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
147 for exclusion in ['_alt', '_random']:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
148 if name.endswith('%s.maf.%s' % (exclusion, compression)):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
149 skip = True
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
150 break
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
151 if name.startswith('chrUn'):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
152 skip = True
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
153 if skip:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
154 continue
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
155 maf_files.append(urllib.parse.urljoin(maf_path, name))
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
156 f.close()
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
157 return maf_files
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
158
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
159
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
160 def index_maf_files(maf_files, maf_path, options, params, target_directory):
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
161 for maf_file in maf_files:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
162 maf_url = urllib.parse.urljoin(maf_path, maf_file)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
163 local_maf = url_download(maf_url, localpath=target_directory)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
164 index_command = ['maf_build_index.py', local_maf, local_maf + '.index']
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
165 executor = subprocess.Popen(index_command)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
166 stdout, stderr = executor.communicate()
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
167
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
168
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
169 def main():
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
170 parser = optparse.OptionParser()
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
171 parser.add_option('-x', '--nexus', dest='nexus', action='store', type='string', help='URL for .nh')
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
172 parser.add_option('-a', '--alignments', dest='alignments', action='store', type='string', help='URL for alignments')
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
173 parser.add_option('-n', '--name', dest='name', action='store', type='string', help='Name')
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
174 parser.add_option('-o', '--output', dest='output', action='store', type='string', help='Output')
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
175 parser.add_option('-d', '--dbkey', dest='dbkey', action='store', type='string', help='dbkey')
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
176 (options, args) = parser.parse_args()
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
177
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
178 params = {}
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
179
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
180 with open(options.output) as fh:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
181 params = json.load(fh)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
182 target_directory = params['output_data'][0]['extra_files_path']
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
183 os.makedirs(target_directory, exist_ok=True)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
184
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
185 display_name, uid, species_list = generate_metadata(params, options)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
186 maf_path = urllib.parse.urljoin(options.nexus, 'maf/')
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
187 maf_files = get_maf_listing(maf_path)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
188
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
189 data_manager_entry = {
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
190 'data_tables': {
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
191 'indexed_maf_files': {
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
192 'name': display_name,
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
193 'dbkey': options.dbkey, # This is needed for the output path
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
194 'value': uid,
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
195 'indexed_for': ','.join(species_list),
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
196 'exists_in_maf': ','.join(species_list),
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
197 'path': ','.join([maf_file.split('/')[-1] for maf_file in maf_files]),
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
198 }
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
199 }
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
200 }
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
201
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
202 # Fetch and index the MAFs
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
203 index_maf_files(maf_files, maf_path, options, params, target_directory)
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
204 with open(options.output, 'w') as fh:
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
205 fh.write(json.dumps(data_manager_entry))
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
206
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
207
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
208 if __name__ == "__main__":
aed50ca398bb "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 9c8e61ce03ebd67c2c852f5db4a62d19200c77fe"
iuc
parents:
diff changeset
209 main()