Mercurial > repos > dave > data_manager_fetch_index_maf
annotate data_manager/data_manager_fetch_and_index_maf.py @ 0:de73b258a601 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
author | dave |
---|---|
date | Wed, 15 Jul 2020 14:30:00 -0400 |
parents | |
children | edf39ed96bc3 |
rev | line source |
---|---|
0
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
1 #!/usr/bin/env python |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
2 import bz2 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
3 import ftplib |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
4 import gzip |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
5 import json |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
6 import optparse |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
7 import os |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
8 import re |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
9 import shutil |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
10 import subprocess |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
11 import sys |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
12 import tempfile |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
13 import urllib.parse |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
14 import urllib.request |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
15 import zipfile |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
16 from binascii import hexlify |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
17 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
18 CHUNK_SIZE = 2**20 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
19 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
20 DEFAULT_DATA_TABLE_NAME = "indexed_maf_files" |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
21 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
22 # Nice solution to opening compressed files (zip/bz2/gz) transparently |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
23 # https://stackoverflow.com/a/13045892/638445 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
24 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
25 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
26 class CompressedFile(object): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
27 magic = None |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
28 file_type = None |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
29 mime_type = None |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
30 proper_extension = None |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
31 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
32 def __init__(self, f): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
33 # f is an open file or file like object |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
34 self.f = f |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
35 self.accessor = self.open() |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
36 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
37 @classmethod |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
38 def is_magic(self, data): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
39 return hexlify(data).startswith(hexlify(self.magic)) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
40 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
41 def open(self): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
42 return None |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
43 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
44 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
45 class ZIPFile(CompressedFile): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
46 magic = b'\x50\x4b\x03\x04' |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
47 file_type = 'zip' |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
48 mime_type = 'compressed/zip' |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
49 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
50 def open(self): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
51 return zipfile.ZipFile(self.f) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
52 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
53 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
54 class BZ2File(CompressedFile): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
55 magic = b'\x42\x5a\x68' |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
56 file_type = 'bz2' |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
57 mime_type = 'compressed/bz2' |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
58 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
59 def open(self): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
60 return bz2.BZ2File(self.f) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
61 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
62 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
63 class GZFile(CompressedFile): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
64 magic = b'\x1f\x8b\x08' |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
65 file_type = 'gz' |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
66 mime_type = 'compressed/gz' |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
67 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
68 def open(self): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
69 return gzip.GzipFile(self.f) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
70 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
71 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
72 # Factory function to create a suitable instance for accessing files |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
73 def get_compressed_file(filename): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
74 with open(filename, 'rb') as f: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
75 start_of_file = f.read(16) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
76 f.seek(0) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
77 for cls in (ZIPFile, BZ2File, GZFile): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
78 if cls.is_magic(start_of_file): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
79 f.close() |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
80 return cls(filename) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
81 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
82 return None |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
83 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
84 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
85 def url_download(url, tmp=False, localpath=None): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
86 """Attempt to download file from a given url |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
87 :param url: full url to file |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
88 :type url: str. |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
89 :returns: name of downloaded file |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
90 :raises: ContentDecodingError, IOError |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
91 """ |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
92 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
93 # Generate file_name |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
94 file_name = url.split('/')[-1] |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
95 if tmp: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
96 file_name = os.path.join(tempfile.mkdtemp(), file_name) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
97 elif localpath is not None: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
98 file_name = os.path.join(localpath, file_name) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
99 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
100 try: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
101 # download URL (FTP and HTTP work, probably local and data too) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
102 urllib.request.urlretrieve(url, file_name) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
103 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
104 # uncompress file if needed |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
105 cf = get_compressed_file(file_name) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
106 if cf is not None: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
107 uncompressed_file_name = os.path.splitext(file_name)[0] |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
108 with open(uncompressed_file_name, 'wb') as uncompressed_file: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
109 shutil.copyfileobj(cf.accessor, uncompressed_file) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
110 os.remove(file_name) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
111 file_name = uncompressed_file_name |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
112 except IOError as e: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
113 sys.stderr.write('Error occured downloading reference file: %s' % e) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
114 os.remove(file_name) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
115 return file_name |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
116 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
117 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
118 def generate_metadata(params, options): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
119 name = options.name |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
120 uid = name |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
121 species = [] |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
122 # Found to be the fastest way to strip non-alphanumeric characters |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
123 # from a string in some post on StackOverflow |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
124 pattern = re.compile(r'[\W]+') |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
125 uid = pattern.sub('_', uid).strip('_') |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
126 url = options.nexus |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
127 with open(url_download(url, True), 'r') as fh: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
128 species = [line.strip(' (),').split(':')[0] for line in fh.readlines()] |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
129 return name, uid.upper(), species |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
130 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
131 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
132 def get_maf_listing(maf_path): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
133 maf_files = [] |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
134 maf_url = urllib.parse.urlparse(maf_path) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
135 f = ftplib.FTP() |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
136 f.connect(maf_url.netloc) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
137 f.login() |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
138 listing = f.mlsd(maf_url.path) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
139 compressions = ['gz', 'bz2', 'zip'] |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
140 for name, facts in listing: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
141 skip = False |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
142 if os.path.splitext(name)[-1].lstrip('.') not in compressions: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
143 skip = True |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
144 if facts['type'] != 'file': |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
145 skip = True |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
146 for compression in compressions: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
147 for exclusion in ['_alt', '_random']: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
148 if name.endswith('%s.maf.%s' % (exclusion, compression)): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
149 skip = True |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
150 break |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
151 if name.startswith('chrUn'): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
152 skip = True |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
153 if skip: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
154 continue |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
155 maf_files.append(urllib.parse.urljoin(maf_path, name)) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
156 f.close() |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
157 return maf_files |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
158 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
159 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
160 def index_maf_files(maf_files, maf_path, options, params, target_directory): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
161 for maf_file in maf_files: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
162 maf_url = urllib.parse.urljoin(maf_path, maf_file) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
163 local_maf = url_download(maf_url, localpath=target_directory) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
164 index_command = ['maf_build_index.py', local_maf, local_maf + '.index'] |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
165 executor = subprocess.Popen(index_command) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
166 stdout, stderr = executor.communicate() |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
167 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
168 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
169 def main(): |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
170 parser = optparse.OptionParser() |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
171 parser.add_option('-x', '--nexus', dest='nexus', action='store', type='string', help='URL for .nh') |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
172 parser.add_option('-a', '--alignments', dest='alignments', action='store', type='string', help='URL for alignments') |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
173 parser.add_option('-n', '--name', dest='name', action='store', type='string', help='Name') |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
174 parser.add_option('-o', '--output', dest='output', action='store', type='string', help='Output') |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
175 parser.add_option('-d', '--dbkey', dest='dbkey', action='store', type='string', help='dbkey') |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
176 (options, args) = parser.parse_args() |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
177 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
178 params = {} |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
179 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
180 with open(options.output) as fh: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
181 params = json.load(fh) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
182 target_directory = params['output_data'][0]['extra_files_path'] |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
183 os.makedirs(target_directory, exist_ok=True) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
184 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
185 display_name, uid, species_list = generate_metadata(params, options) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
186 maf_path = urllib.parse.urljoin(options.nexus, 'maf/') |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
187 maf_files = get_maf_listing(maf_path) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
188 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
189 data_manager_entry = { |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
190 'data_tables': { |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
191 'indexed_maf_files': { |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
192 'name': display_name, |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
193 'dbkey': options.dbkey, # This is needed for the output path |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
194 'value': uid, |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
195 'indexed_for': ','.join(species_list), |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
196 'exists_in_maf': ','.join(species_list), |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
197 'path': ','.join([maf_file.split('/')[-1] for maf_file in maf_files]), |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
198 } |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
199 } |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
200 } |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
201 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
202 # Fetch and index the MAFs |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
203 index_maf_files(maf_files, maf_path, options, params, target_directory) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
204 with open(options.output, 'w') as fh: |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
205 fh.write(json.dumps(data_manager_entry)) |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
206 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
207 |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
208 if __name__ == "__main__": |
de73b258a601
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_index_maf commit 21852ee28cf191d12b1ffe5583efaa5deeb1d80d-dirty"
dave
parents:
diff
changeset
|
209 main() |