Mercurial > repos > iuc > data_manager_mothur_toolsuite
diff data_manager/fetch_mothur_reference_data.py @ 4:0db22932bc39 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mothur_toolsuite/ commit f845716f6ac93500f143a30abef97eaba406344e"
author | iuc |
---|---|
date | Fri, 25 Jun 2021 09:37:05 +0000 |
parents | 9d09724f2bf1 |
children |
line wrap: on
line diff
--- a/data_manager/fetch_mothur_reference_data.py Sun Nov 22 12:51:44 2020 +0000 +++ b/data_manager/fetch_mothur_reference_data.py Fri Jun 25 09:37:05 2021 +0000 @@ -1,6 +1,7 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Data manager for reference data for the 'mothur_toolsuite' Galaxy tools +import io import json import optparse import os @@ -8,7 +9,9 @@ import sys import tarfile import tempfile -import urllib2 +import urllib.error +import urllib.parse +import urllib.request import zipfile from functools import reduce @@ -38,6 +41,14 @@ }, # RDP reference files # http://www.mothur.org/wiki/RDP_reference_files + "RDP_v18": { + "16S rRNA RDP training set 18": + [ + "https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset18_062020.rdp.tgz", ], + "16S rRNA PDS training set 18": + [ + "https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset18_062020.pds.tgz", ], + }, "RDP_v16": { "16S rRNA RDP training set 16": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset16_022016.rdp.tgz", ], @@ -76,6 +87,12 @@ }, # Silva reference files # http://www.mothur.org/wiki/Silva_reference_files + "silva_release_138.1": { + "SILVA release 138.1": + [ + "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v138_1.tgz", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.seed_v138_1.tgz", ], + }, "silva_release_128": { "SILVA release 128": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v128.tgz", @@ -230,14 +247,19 @@ Returns the name that the file is saved with. """ - print("Downloading %s" % url) + print(f"Downloading {url}") if not target: target = os.path.basename(url) if wd: target = os.path.join(wd, target) - print("Saving to %s" % target) + print(f"Saving to {target}") with open(target, 'wb') as fh: - fh.write(urllib2.urlopen(url).read()) + url_h = urllib.request.urlopen(url) + while True: + buffer = url_h.read(io.DEFAULT_BUFFER_SIZE) + if buffer == b"": + break + fh.write(buffer) return target @@ -257,13 +279,13 @@ """ if not zipfile.is_zipfile(filen): - print("%s: not ZIP formatted file") + print(f"{filen}: not ZIP formatted file") return [filen] file_list = [] with zipfile.ZipFile(filen) as z: for name in z.namelist(): if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): - print("Ignoring %s" % name) + print(f"Ignoring {name}") continue if wd: target = os.path.join(wd, name) @@ -271,14 +293,14 @@ target = name if name.endswith('/'): # Make directory - print("Creating dir %s" % target) + print(f"Creating dir {target}") try: os.makedirs(target) except OSError: pass else: # Extract file - print("Extracting %s" % name) + print("Extracting {target}") try: os.makedirs(os.path.dirname(target)) except OSError: @@ -286,7 +308,7 @@ with open(target, 'wb') as fh: fh.write(z.read(name)) file_list.append(target) - print("Removing %s" % filen) + print(f"Removing {filen}") os.remove(filen) return file_list @@ -309,23 +331,23 @@ """ file_list = [] if not tarfile.is_tarfile(filen): - print("%s: not TAR file") + print(f"{filen}: not TAR file") return [filen] with tarfile.open(filen) as t: for name in t.getnames(): # Check for unwanted files if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): - print("Ignoring %s" % name) + print(f"Ignoring {name}") continue # Extract file - print("Extracting %s" % name) + print(f"Extracting {name}") t.extract(name, wd) if wd: target = os.path.join(wd, name) else: target = name file_list.append(target) - print("Removing %s" % filen) + print(f"Removing {filen}") os.remove(filen) return file_list @@ -343,9 +365,9 @@ current working directory. """ - print("Unpack %s" % filen) + print(f"Unpack {filen}") ext = os.path.splitext(filen)[1] - print("Extension: %s" % ext) + print(f"Extension: {ext}") if ext == ".zip": return unpack_zip_archive(filen, wd=wd) elif ext == ".tgz": @@ -386,7 +408,7 @@ try: return MOTHUR_FILE_TYPES[ext] except KeyError: - print("WARNING: unknown file type for " + filen + ", skipping") + print(f"WARNING: unknown file type for {filen}, skipping") return None @@ -419,26 +441,27 @@ """ # Make working dir wd = tempfile.mkdtemp(suffix=".mothur", dir=os.getcwd()) - print("Working dir %s" % wd) + print(f"Working dir {wd}") # Iterate over all requested reference data URLs for dataset in datasets: - print("Handling dataset '%s'" % dataset) + print(f"Handling dataset '{dataset}'") for name in MOTHUR_REFERENCE_DATA[dataset]: for f in fetch_files(MOTHUR_REFERENCE_DATA[dataset][name], wd=wd): type_ = identify_type(f) - entry_name = "%s (%s)" % (os.path.splitext(os.path.basename(f))[0], name) - print("%s\t\'%s'\t.../%s" % (type_, entry_name, os.path.basename(f))) + name_from_file = os.path.splitext(os.path.basename(f))[0] + entry_name = f"{name_from_file} ({name})" + print(f"{type_}\t\'{entry_name}'\t.../{os.path.basename(f)}") if type_ is not None: # Move to target dir ref_data_file = os.path.basename(f) f1 = os.path.join(target_dir, ref_data_file) - print("Moving %s to %s" % (f, f1)) - os.rename(f, f1) + print(f"Moving {f} to {f1}") + shutil.move(f, f1) # Add entry to data table - table_name = "mothur_%s" % type_ + table_name = f"mothur_{type_}" add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) # Remove working dir - print("Removing %s" % wd) + print(f"Removing {wd}") shutil.rmtree(wd) @@ -454,7 +477,7 @@ files = [] for path in paths: path = os.path.abspath(path) - print("Examining '%s'..." % path) + print(f"Examining '{path}'...") if os.path.isfile(path): # Store full path for file files.append(path) @@ -493,21 +516,21 @@ for f in files: type_ = identify_type(f) if type_ is None: - print("%s: unrecognised type, skipped" % f) + print(f"{f}: unrecognised type, skipped") continue ref_data_file = os.path.basename(f) target_file = os.path.join(target_dir, ref_data_file) entry_name = "%s" % os.path.splitext(ref_data_file)[0] if description: entry_name += " (%s)" % description - print("%s\t\'%s'\t.../%s" % (type_, entry_name, ref_data_file)) + print(f"{type_}\t\'{entry_name}'\t.../{ref_data_file}") # Link to or copy the data if link_to_data: os.symlink(f, target_file) else: shutil.copyfile(f, target_file) # Add entry to data table - table_name = "mothur_%s" % type_ + table_name = f"mothur_{type_}" add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) @@ -522,8 +545,8 @@ parser.add_option('--description', action='store', dest='description', default='') parser.add_option('--link', action='store_true', dest='link_to_data') options, args = parser.parse_args() - print("options: %s" % options) - print("args : %s" % args) + print(f"options: {options}") + print(f"args : {args}") # Check for JSON file if len(args) != 1: @@ -536,7 +559,7 @@ params, target_dir = read_input_json(jsonfile) # Make the target directory - print("Making %s" % target_dir) + print(f"Making {target_dir}") os.mkdir(target_dir) # Set up data tables dictionary