Mercurial > repos > shellac > sam_consensus_v3
diff env/lib/python3.9/site-packages/planemo/shed/__init__.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.9/site-packages/planemo/shed/__init__.py Mon Mar 22 18:12:50 2021 +0000 @@ -0,0 +1,1428 @@ +"""Abstractions for shed related interactions used by the rest of planemo.""" +import contextlib +import copy +import fnmatch +import hashlib +import json +import os +import re +import shutil +import sys +import tarfile +from tempfile import ( + mkstemp, +) +from typing import NamedTuple + +import bioblend +import six +import yaml +from galaxy.util import ( + odict, + unicodify, +) + +from planemo import git +from planemo import glob +from planemo import templates +from planemo.bioblend import toolshed +from planemo.io import ( + can_write_to_path, + coalesce_return_codes, + error, + find_matching_directories, + info, + shell, + temp_directory, + warn, +) +from planemo.shed2tap.base import BasePackage +from planemo.tools import yield_tool_sources +from .diff import diff_and_remove +from .interface import ( + api_exception_to_message, + download_tar, + find_category_ids, + find_repository, + latest_installable_revision, + tool_shed_instance, + username, +) + +SHED_CONFIG_NAME = '.shed.yml' +DOCKSTORE_REGISTRY_CONF = ".dockstore.yml" +REPO_METADATA_FILES = (SHED_CONFIG_NAME, DOCKSTORE_REGISTRY_CONF) +REPO_DEPENDENCIES_CONFIG_NAME = "repository_dependencies.xml" +TOOL_DEPENDENCIES_CONFIG_NAME = "tool_dependencies.xml" + +NO_REPOSITORIES_MESSAGE = ("Could not find any .shed.yml files or a --name to " + "describe the target repository.") +NAME_INVALID_MESSAGE = ("Cannot use --name argument when multiple directories " + "in target contain .shed.yml files.") +NAME_REQUIRED_MESSAGE = ("No repository name discovered but one is required.") +CONFLICTING_NAMES_MESSAGE = ("The supplied name argument --name conflicts " + "with value discovered in .shed.yml.") +PARSING_PROBLEM = ("Problem parsing file .shed.yml in directory %s, skipping " + "repository. Message: [%s].") +AUTO_REPO_CONFLICT_MESSAGE = ("Cannot specify both auto_tool_repositories and " + "repositories in .shed.yml at this time.") +AUTO_NAME_CONFLICT_MESSAGE = ("Cannot specify both auto_tool_repositories and " + "in .shed.yml and --name on the command-line.") +REALIZAION_PROBLEMS_MESSAGE = ("Problem encountered executing action for one or more " + "repositories.") +INCORRECT_OWNER_MESSAGE = ("Attempting to create a repository with configured " + "owner [%s] that does not match API user [%s].") +PROBLEM_PROCESSING_REPOSITORY_MESSAGE = "Problem processing repositories, exiting." + +# Planemo generated or consumed files that do not need to be uploaded to the +# tool shed. +PLANEMO_FILES = [ + "shed_upload*.tar.gz", + "shed_download*.tar.gz", + "tool_test_output.*", + ".travis", + ".travis.yml", + ".shed.yml", + "*~", + "#*#", +] +SHED_SHORT_NAMES = { + "toolshed": "https://toolshed.g2.bx.psu.edu/", + "testtoolshed": "https://testtoolshed.g2.bx.psu.edu/", + "local": "http://localhost:9009/", +} +SHED_LABELS = { + "toolshed": "main Tool Shed", + "testtoolshed": "test Tool Shed", + "local": "local Tool Shed", +} +REPO_TYPE_UNRESTRICTED = "unrestricted" +REPO_TYPE_TOOL_DEP = "tool_dependency_definition" +REPO_TYPE_SUITE = "repository_suite_definition" + +# TODO: sync this with tool shed impl someday +VALID_REPOSITORYNAME_RE = re.compile(r"^[a-z0-9\_]+$") +VALID_PUBLICNAME_RE = re.compile(r"^[a-z0-9._\-]+$") + + +# Generate with python scripts/categories.py +CURRENT_CATEGORIES = [ + "Assembly", + "ChIP-seq", + "Combinatorial Selections", + "Computational chemistry", + "Constructive Solid Geometry", + "Convert Formats", + "Data Export", + "Data Managers", + "Data Source", + "Entomology", + "Epigenetics", + "Fasta Manipulation", + "Fastq Manipulation", + "Flow Cytometry Analysis", + "Genome annotation", + "Genome editing", + "Genome-Wide Association Study", + "Genomic Interval Operations", + "Graphics", + "Imaging", + "Machine Learning", + "Metabolomics", + "Metagenomics", + "Micro-array Analysis", + "Molecular Dynamics", + "Next Gen Mappers", + "NLP", + "Ontology Manipulation", + "Phylogenetics", + "Proteomics", + "RNA", + "SAM", + "Sequence Analysis", + "Statistics", + "Systems Biology", + "Text Manipulation", + "Tool Dependency Packages", + "Tool Generators", + "Transcriptomics", + "Variant Analysis", + "Visualization", + "Web Services", +] +# http://stackoverflow.com/questions/7676255/find-and-replace-urls-in-a-block-of-te +HTTP_REGEX_PATTERN = re.compile( + r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>\[\]]+|\(([^\s()<>\[\]]+|(\([^\s()<>\[\]]+\)))*\))+(?:\(([^\s()<>\[\]]+|(\([^\s()<>\[\]]+\)))*\)|[^\s`!(){};:'".,<>?\[\]]))""" # noqa +) + + +def _is_url(url): + return '://' in url and \ + ( + url.startswith('http') or + url.startswith('ftp') + ) + + +def _find_urls_in_text(text): + return [url for url in HTTP_REGEX_PATTERN.findall(text) if _is_url(url[0])] + + +def construct_yaml_str(self, node): + # Override the default string handling function + # to always return unicode objects + return self.construct_scalar(node) + + +yaml.Loader.add_constructor(u'tag:yaml.org,2002:str', construct_yaml_str) +yaml.SafeLoader.add_constructor(u'tag:yaml.org,2002:str', construct_yaml_str) + + +class ShedContext(NamedTuple): + tsi: toolshed.ToolShedInstance + shed_config: dict + config_owner: str + + def owner(self): + owner = self.config_owner + if owner is None: + owner = username(self.tsi) + return owner + + @property + def label(self): + return self.shed_config.get("label") or "tool shed" + + +def shed_init(ctx, path, **kwds): + """Initialize a new shed repository.""" + if not os.path.exists(path): + os.makedirs(path) + shed_config_path = os.path.join(path, SHED_CONFIG_NAME) + if not can_write_to_path(shed_config_path, **kwds): + # .shed.yml exists and no --force sent. + return 1 + + create_failed = _create_shed_config(ctx, shed_config_path, **kwds) + if create_failed: + return 1 + + repo_dependencies_path = os.path.join(path, REPO_DEPENDENCIES_CONFIG_NAME) + from_workflow = kwds.get("from_workflow") + + if from_workflow: + workflow_name = os.path.basename(from_workflow) + workflow_target = os.path.join(path, workflow_name) + if not os.path.exists(workflow_target): + shutil.copyfile(from_workflow, workflow_target) + + if not can_write_to_path(repo_dependencies_path, **kwds): + return 1 + + repo_pairs = _parse_repos_from_workflow(from_workflow) + repository_dependencies = RepositoryDependencies(repo_pairs) + repository_dependencies.write_to_path(repo_dependencies_path) + + return 0 + + +def install_arg_lists(ctx, paths, **kwds): + """Build a list of install args for resolved repositories.""" + shed_context = get_shed_context(ctx, **kwds) + install_args_list = [] + + def process_repo(realized_repository): + install_args_list.append(realized_repository.install_args(ctx, shed_context)) + return 0 + + exit_code = for_each_repository(ctx, process_repo, paths, **kwds) + if exit_code: + raise RuntimeError(PROBLEM_PROCESSING_REPOSITORY_MESSAGE) + + return install_args_list + + +def find_urls_for_xml(root): + """Returns two lists: explicit package URLs, and help text URLs. + + For validating the user-facing URLs is it sensible to mimic + a web browser user agent. + """ + urls = [] + for packages in root.findall("package"): + install_els = packages.findall("install") + assert len(install_els) in (0, 1) + + if len(install_els) == 0: + continue + + install_el = install_els[0] + package = BasePackage(None, packages, install_el, readme=None) + for action in package.get_all_actions(): + urls.extend([dl.url for dl in action.downloads()]) + + for subaction in action.actions: + if hasattr(subaction, 'packages'): + urls.extend(subaction.packages) + + docs = [] + for help_text in root.findall("help"): + for url in _find_urls_in_text(help_text.text): + docs.append(url[0]) + + return urls, docs + + +def handle_force_create(realized_repository, ctx, shed_context, **kwds): + repo_id = realized_repository.find_repository_id(ctx, shed_context) + if repo_id is None and kwds.get("force_repository_creation"): + repo_id = realized_repository.create(ctx, shed_context) + # failing to create the repo, give up + return repo_id + + +def report_non_existent_repository(realized_repository): + name = realized_repository.name + error("Repository [%s] does not exist in the targeted Tool Shed." % name) + return 2 + + +def upload_repository(ctx, realized_repository, **kwds): + """Upload a tool directory as a tarball to a tool shed.""" + path = realized_repository.path + tar_path = kwds.get("tar") + if not tar_path: + tar_path = build_tarball(path, **kwds) + if kwds.get("tar_only", False): + name = realized_repository.pattern_to_file_name("shed_upload.tar.gz") + shutil.copy(tar_path, name) + return 0 + shed_context = get_shed_context(ctx, **kwds) + update_kwds = {} + _update_commit_message(ctx, realized_repository, update_kwds, **kwds) + + repo_id = handle_force_create(realized_repository, ctx, shed_context, **kwds) + # failing to create the repo, give up + if repo_id is None: + return report_non_existent_repository(realized_repository) + + if kwds.get("check_diff", False): + is_diff = diff_repo(ctx, realized_repository, **kwds) != 0 + if not is_diff: + name = realized_repository.name + info("Repository [%s] not different, skipping upload." % name) + return 0 + + # TODO: support updating repo information if it changes in the config file + try: + shed_context.tsi.repositories.update_repository( + str(repo_id), tar_path, **update_kwds + ) + except Exception as e: + if isinstance(e, bioblend.ConnectionError) and e.status_code == 400 and \ + '"No changes to repository."' in e.body: + warn("Repository %s was not updated because there were no changes" % realized_repository.name) + return 0 + message = api_exception_to_message(e) + error("Could not update %s" % realized_repository.name) + error(message) + return -1 + info("Repository %s updated successfully." % realized_repository.name) + return 0 + + +def _update_commit_message(ctx, realized_repository, update_kwds, **kwds): + message = kwds.get("message") + git_rev = realized_repository.git_rev(ctx) + git_repo = realized_repository.git_repo(ctx) + if message is None: + message = "planemo upload" + if git_repo: + message += " for repository %s" % git_repo + if git_rev: + message += " commit %s" % git_rev + update_kwds["commit_message"] = message + + +def diff_repo(ctx, realized_repository, **kwds): + """Compare two repositories (local or remote) and check for differences. + + Returns 0 if and only the repositories are effectively the same + given supplied kwds for comparison description. + """ + with temp_directory("tool_shed_diff_") as working: + return _diff_in(ctx, working, realized_repository, **kwds) + + +def _diff_in(ctx, working, realized_repository, **kwds): + path = realized_repository.path + shed_target_source = kwds.get("shed_target_source") + + label_a = "_%s_" % (shed_target_source if shed_target_source else "workingdir") + shed_target = kwds.get("shed_target", "B") + if "/" in shed_target: + shed_target = "custom_shed" + label_b = "_%s_" % shed_target + + mine = os.path.join(working, label_a) + other = os.path.join(working, label_b) + + shed_context = get_shed_context(ctx, read_only=True, **kwds) + # In order to download the tarball, require repository ID... + repo_id = realized_repository.find_repository_id(ctx, shed_context) + if repo_id is None: + error("shed_diff: Repository [%s] does not exist in the targeted Tool Shed." + % realized_repository.name) + # $ diff README.rst not_a_file 2&>1 /dev/null; echo $? + # 2 + return 2 + info("Diffing repository [%s]" % realized_repository.name) + download_tarball( + ctx, + shed_context, + realized_repository, + destination=other, + clean=True, + destination_is_pattern=False, + **kwds + ) + if shed_target_source: + new_kwds = kwds.copy() + new_kwds["shed_target"] = shed_target_source + shed_context = get_shed_context(ctx, read_only=True, **new_kwds) + download_tarball( + ctx, + shed_context, + realized_repository, + destination=mine, + clean=True, + destination_is_pattern=False, + **new_kwds + ) + else: + tar_path = build_tarball(path) + os.mkdir(mine) + shell(['tar', '-xzf', tar_path, '-C', mine]) + shutil.rmtree(tar_path, ignore_errors=True) + + output = kwds.get("output") + raw = kwds.get("raw", False) + xml_diff = 0 + if not raw: + if output: + with open(output, "w") as f: + xml_diff = diff_and_remove(working, label_a, label_b, f) + else: + xml_diff = diff_and_remove(working, label_a, label_b, sys.stdout) + + cmd = ['diff', '-r', label_a, label_b] + if output: + with open(output, 'ab') as fh: + raw_diff = shell(cmd, cwd=working, stdout=fh) + else: + raw_diff = shell(cmd, cwd=working) + exit = raw_diff or xml_diff + if not raw: + if xml_diff: + ctx.vlog("One or more shed XML file(s) different!") + if raw_diff: + ctx.vlog("One or more non-shed XML file(s) different.") + if not xml_diff and not raw_diff: + ctx.vlog("No differences.") + return exit + + +def shed_repo_config(ctx, path, name=None): + shed_yaml_path = os.path.join(path, SHED_CONFIG_NAME) + config = {} + if os.path.exists(shed_yaml_path): + with open(shed_yaml_path, "r") as f: + config = yaml.safe_load(f) + + if config is None: # yaml may yield None + config = {} + _expand_raw_config(ctx, config, path, name=name) + return config + + +def tool_shed_client(ctx=None, **kwds): + return get_shed_context(ctx, **kwds).tsi + + +def get_shed_context(ctx=None, **kwds): + read_only = kwds.get("read_only", False) + shed_config, username = _shed_config_and_username(ctx, **kwds) + + def prop(key): + return kwds.get("shed_%s" % key) or shed_config.get(key) + + url = _shed_config_to_url(shed_config) + if read_only: + key = None + email = None + password = None + else: + key = _find_shed_key(kwds, shed_config) + email = prop("email") + password = prop("password") + + tsi = tool_shed_instance(url, key, email, password) + owner = username + return ShedContext(tsi, shed_config, owner) + + +def tool_shed_url(ctx, **kwds): + shed_config, _ = _shed_config_and_username(ctx, **kwds) + return _shed_config_to_url(shed_config) + + +def _shed_config_and_username(ctx, **kwds): + shed_target = kwds.get("shed_target") + global_config = getattr(ctx, "global_config", {}) + if global_config and "sheds" in global_config: + sheds_config = global_config["sheds"] + shed_config = sheds_config.get(shed_target, {}) or {} + else: + shed_config = {} + + if "url" not in shed_config: + if shed_target and shed_target in SHED_SHORT_NAMES: + shed_config["url"] = SHED_SHORT_NAMES[shed_target] + else: + shed_config["url"] = shed_target + + if "label" not in shed_config: + if shed_target and shed_target in SHED_LABELS: + shed_config["label"] = SHED_LABELS[shed_target] + else: + shed_config["label"] = "custom tool shed at %s" % shed_target + + default_shed_username = global_config.get("shed_username") + username = shed_config.get("username", default_shed_username) + + return shed_config, username + + +def _find_shed_key(kwds, shed_config): + shed_key = kwds.get("shed_key") + if shed_key is None: + shed_key_from_env = kwds.get("shed_key_from_env") + if shed_key_from_env is not None: + shed_key = os.environ[shed_key_from_env] + if shed_key is None: + shed_key = shed_config.get("key") + return shed_key + + +def find_repository_id(ctx, shed_context, path, **kwds): + repo_config = kwds.get("config") + if repo_config is None: + name = kwds.get("name") + repo_config = shed_repo_config(ctx, path, name=name) + name = repo_config["name"] + find_kwds = kwds.copy() + if "name" in find_kwds: + del find_kwds["name"] + return _find_repository_id(ctx, shed_context, name, repo_config, **find_kwds) + + +def _find_repository_id(ctx, shed_context, name, repo_config, **kwds): + # TODO: modify to consume shed_context + owner = _owner(ctx, repo_config, shed_context, **kwds) + matching_repository = find_repository(shed_context.tsi, owner, name) + if matching_repository is None: + if not kwds.get("allow_none", False): + message = "Failed to find repository for owner/name %s/%s" + raise Exception(message % (owner, name)) + else: + return None + else: + repo_id = matching_repository["id"] + return repo_id + + +def _owner(ctx, repo_config, shed_context=None, **kwds): + owner = kwds.get("owner") or repo_config.get("owner") + if owner is None: + if shed_context is None and "shed_target" in kwds: + shed_context = get_shed_context(ctx, **kwds) + if shed_context is not None: + owner = shed_context.owner() + return owner + + +def _expand_raw_config(ctx, config, path, name=None): + name_input = name + if "name" not in config: + config["name"] = name + if config["name"] is None: + config["name"] = path_to_repo_name(path) + + default_include = config.get("include", ["**"]) + repos = config.get("repositories") + auto_tool_repos = config.get("auto_tool_repositories", False) + suite_config = config.get("suite", False) + + if repos and auto_tool_repos: + raise Exception(AUTO_REPO_CONFLICT_MESSAGE) + if auto_tool_repos and name_input: + raise Exception(AUTO_NAME_CONFLICT_MESSAGE) + if auto_tool_repos: + repos = _build_auto_tool_repos(ctx, path, config, auto_tool_repos) + if suite_config: + if repos is None: + repos = odict.odict() + _build_suite_repo(config, repos, suite_config) + # If repositories aren't defined, just define a single + # one based on calculated name and including everything + # by default. + if repos is None: + repos = { + config["name"]: { + "include": default_include + } + } + config["repositories"] = repos + + +def _build_auto_tool_repos(ctx, path, config, auto_tool_repos): + default_include = config.get("include", ["**"]) + tool_source_pairs = list(yield_tool_sources(ctx, path, recursive=True)) + paths = [_[0] for _ in tool_source_pairs] + excludes = _shed_config_excludes(config) + + def _build_repository(tool_path, tool_source): + tool_id = tool_source.parse_id().lower() + tool_name = tool_source.parse_name() + description = tool_source.parse_description() + template_vars = dict( + tool_id=tool_id, + tool_name=tool_name, + description=description, + ) + other_paths = paths[:] + other_paths.remove(tool_path) + tool_excludes = excludes + list(other_paths) + repo_dict = { + "include": default_include, + "exclude": tool_excludes, + } + for key in ["name", "description", "long_description"]: + template_key = "%s_template" % key + template = auto_tool_repos.get(template_key) + if template: + value = templates.render(template, **template_vars) + repo_dict[key] = value + return repo_dict + + repos = odict.odict() + for tool_path, tool_source in tool_source_pairs: + repository_config = _build_repository(tool_path, tool_source) + repository_name = repository_config["name"] + repos[repository_name] = repository_config + return repos + + +def _build_suite_repo(config, repos, suite_config): + name = suite_config.get("name") + if not name: + raise Exception("suite requires a 'name'.") + description = suite_config.get("description", "") + long_description = suite_config.get("long_description") + owner = config["owner"] + repo_type = suite_config.get('type', REPO_TYPE_SUITE) + + repo_pairs = [(repo_dict.get('owner') or owner, repo_name) for repo_name, repo_dict in repos.items()] + extra_repos = suite_config.get("include_repositories", {}) + repo_pairs += [(_["owner"], _["name"]) for _ in extra_repos] + + repository_dependencies = RepositoryDependencies(repo_pairs, description) + + repo = { + "_files": { + REPO_DEPENDENCIES_CONFIG_NAME: str(repository_dependencies) + }, + "include": [], + "name": name, + "description": description, + "type": repo_type, + } + if long_description: + repo["long_description"] = long_description + repos[name] = repo + + +def update_repository_for(ctx, tsi, id, repo_config): + name = repo_config["name"] + description = repo_config.get("description") + long_description = repo_config.get("long_description") + repo_type = shed_repo_type(repo_config, name) + remote_repository_url = repo_config.get("remote_repository_url") + homepage_url = repo_config.get("homepage_url") + categories = repo_config.get("categories", []) + category_ids = find_category_ids(tsi, categories) + + _ensure_shed_description(description) + + kwds = dict( + name=name, + synopsis=description, + type=repo_type, + ) + if long_description is not None: + kwds["description"] = long_description + if remote_repository_url is not None: + kwds["remote_repository_url"] = remote_repository_url + if homepage_url is not None: + kwds["homepage_url"] = homepage_url + if category_ids is not None: + kwds['category_ids[]'] = category_ids + return bioblend.galaxy.client.Client._put(tsi.repositories, id=id, payload=kwds) + + +def create_repository_for(ctx, tsi, name, repo_config): + description = repo_config.get("description") + long_description = repo_config.get("long_description") + repo_type = shed_repo_type(repo_config, name) + remote_repository_url = repo_config.get("remote_repository_url") + homepage_url = repo_config.get("homepage_url") + categories = repo_config.get("categories", []) + category_ids = find_category_ids(tsi, categories) + + _ensure_shed_description(description) + + repo = tsi.repositories.create_repository( + name=name, + synopsis=description, + description=long_description, + type=repo_type, + remote_repository_url=remote_repository_url, + homepage_url=homepage_url, + category_ids=category_ids + ) + return repo + + +def download_tarball(ctx, shed_context, realized_repository, **kwds): + repo_id = realized_repository.find_repository_id(ctx, shed_context) + if repo_id is None: + message = "Unable to find repository id, cannot download." + error(message) + raise Exception(message) + destination_pattern = kwds.get('destination', 'shed_download.tar.gz') + if kwds.get("destination_is_pattern", True): + destination = realized_repository.pattern_to_file_name(destination_pattern) + else: + destination = destination_pattern + to_directory = not destination.endswith("gz") + download_tar(shed_context.tsi, repo_id, destination, to_directory=to_directory) + if to_directory: + clean = kwds.get("clean", False) + if clean: + archival_file = os.path.join(destination, ".hg_archival.txt") + if os.path.exists(archival_file): + os.remove(archival_file) + + +def build_tarball(realized_path, **kwds): + """Build a tool-shed tar ball for the specified path, caller is + responsible for deleting this file. + """ + + # Simplest solution to sorting the files is to use a list, + files = [] + for dirpath, _dirnames, filenames in os.walk(realized_path): + for f in filenames: + files.append(os.path.join(dirpath, f)) + files.sort() + + fd, temp_path = mkstemp() + try: + tar = tarfile.open(temp_path, "w:gz", dereference=True) + try: + for raw in files: + name = os.path.relpath(raw, realized_path) + tar.add(os.path.join(realized_path, name), arcname=name) + finally: + tar.close() + finally: + os.close(fd) + return temp_path + + +def find_raw_repositories(ctx, paths, **kwds): + """Return a list of "raw" repository objects for each repo on paths.""" + raw_repo_objects = [] + for path in paths: + raw_repo_objects.extend(_find_raw_repositories(ctx, path, **kwds)) + return raw_repo_objects + + +def for_each_repository(ctx, function, paths, **kwds): + ret_codes = [] + for path in paths: + with _path_on_disk(ctx, path) as raw_path: + try: + for realized_repository in _realize_effective_repositories( + ctx, raw_path, **kwds + ): + ret_codes.append( + function(realized_repository) + ) + except RealizationException: + error(REALIZAION_PROBLEMS_MESSAGE) + return 254 + + return coalesce_return_codes(ret_codes) + + +def path_to_repo_name(path): + return os.path.basename(os.path.abspath(path)) + + +def shed_repo_type(config, name): + repo_type = config.get("type") + if repo_type is None: + if name.startswith("package_"): + repo_type = REPO_TYPE_TOOL_DEP + elif name.startswith("suite_"): + repo_type = REPO_TYPE_SUITE + else: + repo_type = REPO_TYPE_UNRESTRICTED + return repo_type + + +def _shed_config_to_url(shed_config): + url = shed_config["url"] + if not url.startswith("http"): + message = ( + "Invalid shed url specified [{0}]. Please specify a valid " + "HTTP address or one of {1}" + ).format(url, list(SHED_SHORT_NAMES.keys())) + raise ValueError(message) + return url + + +def _realize_effective_repositories(ctx, path, **kwds): + """ Expands folders in a source code repository into tool shed + repositories. + + Each folder may have nested repositories and each folder may corresponding + to many repositories (for instance if a folder has n tools in the source + code repository but are published to the tool shed as one repository per + tool). + """ + raw_repo_objects = _find_raw_repositories(ctx, path, **kwds) + failed = False + with temp_directory() as base_dir: + for raw_repo_object in raw_repo_objects: + if isinstance(raw_repo_object, Exception): + _handle_realization_error(raw_repo_object, **kwds) + failed = True + continue + + realized_repos = raw_repo_object.realizations( + ctx, + base_dir, + **kwds + ) + for realized_repo in realized_repos: + if isinstance(realized_repo, Exception): + _handle_realization_error(realized_repo, **kwds) + failed = True + continue + yield realized_repo + if failed: + raise RealizationException() + + +def _create_shed_config(ctx, path, **kwds): + name = kwds.get("name") or path_to_repo_name(os.path.dirname(path)) + name_invalid = validate_repo_name(name) + if name_invalid: + error(name_invalid) + return 1 + + owner = kwds.get("owner") + if owner is None: + owner = ctx.global_config.get("shed_username") + owner_invalid = validate_repo_owner(owner) + if owner_invalid: + error(owner_invalid) + return 1 + description = kwds.get("description") or name + long_description = kwds.get("long_description") + remote_repository_url = kwds.get("remote_repository_url") + homepage_url = kwds.get("homepage_url") + categories = kwds.get("category", []) + config = dict( + name=name, + owner=owner, + description=description, + long_description=long_description, + remote_repository_url=remote_repository_url, + homepage_url=homepage_url, + categories=categories, + ) + # Remove empty entries... + for k in list(config.keys()): + if config[k] is None: + del config[k] + + with open(path, "w") as f: + yaml.safe_dump(config, f) + + +def _parse_repos_from_workflow(path): + with open(path, "r") as f: + workflow_json = json.load(f) + steps = workflow_json["steps"] + tool_ids = set() + for value in steps.values(): + step_type = value["type"] + if step_type != "tool": + continue + tool_id = value["tool_id"] + if "/repos/" in tool_id: + tool_ids.add(tool_id) + + repo_pairs = set() + for tool_id in tool_ids: + tool_repo_info = tool_id.split("/repos/", 1)[1] + tool_repo_parts = tool_repo_info.split("/") + owner = tool_repo_parts[0] + name = tool_repo_parts[1] + repo_pairs.add((owner, name)) + + return repo_pairs + + +@contextlib.contextmanager +def _path_on_disk(ctx, path): + git_path = None + if path.startswith("git:"): + git_path = path + elif path.startswith("git+"): + git_path = path[len("git+"):] + if git_path is None: + yield path + else: + with temp_directory() as git_repo: + git.clone(ctx, git_path, git_repo) + yield git_repo + + +def _find_raw_repositories(ctx, path, **kwds): + name = kwds.get("name") + recursive = kwds.get("recursive", False) + + shed_file_dirs = [] + for pattern in REPO_METADATA_FILES: + shed_file_dirs.extend(find_matching_directories( + path, pattern, recursive=recursive + )) + config_name = None + if len(shed_file_dirs) == 1: + shed_file_dir = shed_file_dirs[0] + try: + config = shed_repo_config(ctx, shed_file_dir, name=name) + except Exception as e: + error_message = PARSING_PROBLEM % (shed_file_dir, e) + exception = RuntimeError(error_message) + _handle_realization_error(exception, **kwds) + return [exception] + config_name = config.get("name") + + if len(shed_file_dirs) > 1 and name is not None: + raise Exception(NAME_INVALID_MESSAGE) + if config_name is not None and name is not None: + if config_name != name: + raise Exception(CONFLICTING_NAMES_MESSAGE) + raw_dirs = shed_file_dirs or [path] + kwds_copy = kwds.copy() + kwds_copy["name"] = name + return _build_raw_repo_objects(ctx, raw_dirs, **kwds_copy) + + +def _build_raw_repo_objects(ctx, raw_dirs, **kwds): + """ + From specific directories with .shed.yml files or specified directly from + the command-line build abstract description of directories that should be + expanded out into shed repositories. + """ + multiple = len(raw_dirs) > 1 + name = kwds.get("name") + + # List of RawRepositoryDirectories or parsing failures if + # fail_fast is not enabled. + raw_repo_objects = [] + for raw_dir in raw_dirs: + try: + config = shed_repo_config(ctx, raw_dir, name=name) + except Exception as e: + error_message = PARSING_PROBLEM % (raw_dir, e) + exception = RuntimeError(error_message) + _handle_realization_error(exception, **kwds) + raw_repo_objects.append(exception) + continue + raw_repo_object = RawRepositoryDirectory(raw_dir, config, multiple) + raw_repo_objects.append(raw_repo_object) + return raw_repo_objects + + +@six.python_2_unicode_compatible +class RepositoryDependencies(object): + """ Abstraction for shed repository_dependencies.xml files. + """ + + def __init__(self, repo_pairs, description=None): + self.repo_pairs = repo_pairs + self.description = description or "" + + def __str__(self): + contents = '<repositories description="%s">' % self.description + line_template = ' <repository owner="%s" name="%s" />\n' + for (owner, name) in self.repo_pairs: + contents += line_template % (owner, name) + contents += "</repositories>" + return contents + + def write_to_path(self, path): + with open(path, "w") as f: + f.write(six.text_type(self)) + + +class RawRepositoryDirectory(object): + + def __init__(self, path, config, multiple): + self.path = path + self.config = config + self.name = config["name"] + self.type = shed_repo_type(config, self.name) + self.multiple = multiple # operation over many repos? + + def _hash(self, name): + return hashlib.md5(name.encode('utf-8')).hexdigest() + + def realizations(self, ctx, parent_directory, **kwds): + names = self._repo_names() + + for name in names: + directory = os.path.join(parent_directory, self._hash(name), name) + multiple = self.multiple or len(names) > 1 + if not os.path.exists(directory): + os.makedirs(directory) + r_kwds = kwds.copy() + if "name" in r_kwds: + del r_kwds["name"] + yield self._realize_to(ctx, directory, name, multiple, **r_kwds) + + def _realize_to(self, ctx, directory, name, multiple, **kwds): + fail_on_missing = kwds.get("fail_on_missing", True) + ignore_list = [] + config = self._realize_config(name) + config["owner"] = _owner(ctx, config, **kwds) + + excludes = _shed_config_excludes(config) + for exclude in excludes: + ignore_list.extend(_glob(self.path, exclude)) + + realized_files = self._realized_files(name) + missing = realized_files.include_failures + if missing and fail_on_missing: + msg = "Failed to include files for %s" % missing + return RuntimeError(msg) + + for realized_file in realized_files.files: + relative_dest = realized_file.dest + implicit_ignore = self._implicit_ignores(relative_dest) + explicit_ignore = (realized_file.absolute_src in ignore_list) + if implicit_ignore or explicit_ignore: + continue + realized_file.realize_to(directory) + + for (name, contents) in six.iteritems(config.get("_files", {})): + path = os.path.join(directory, name) + with open(path, "w") as f: + f.write(contents) + + return RealizedRepositry( + realized_path=directory, + real_path=self.path, + config=config, + multiple=multiple, + missing=missing, + ) + + def _repo_names(self): + return self.config.get("repositories").keys() + + def _realized_files(self, name): + config = self._realize_config(name) + realized_files = [] + missing = [] + for include_info in config["include"]: + if not isinstance(include_info, dict): + include_info = {"source": include_info} + source_list = include_info.get("source") + if not isinstance(source_list, list): + source_list = [source_list] + # Preprocess any entries with a source list into copies + # with a single source entry: + for source in source_list: + include = include_info.copy() + include["source"] = source + included = RealizedFile.realized_files_for(self.path, include) + if not included: + missing.append(include) + else: + realized_files.extend(included) + return RealizedFiles(realized_files, missing) + + def _realize_config(self, name): + config = copy.deepcopy(self.config) + config["name"] = name + repo_config = config.get("repositories", {}).get(name, {}) + config.update(repo_config) + if "repositories" in config: + del config["repositories"] + return config + + def _implicit_ignores(self, relative_path): + # Filter out "unwanted files" :) like READMEs for special + # repository types. + if self.type == REPO_TYPE_TOOL_DEP: + if relative_path != TOOL_DEPENDENCIES_CONFIG_NAME: + return True + + if self.type == REPO_TYPE_SUITE: + if relative_path != REPO_DEPENDENCIES_CONFIG_NAME: + return True + + name = os.path.basename(relative_path) + for dvcs_prefix in [".git", ".hg"]: + if relative_path.startswith(dvcs_prefix): + return True + + if name.startswith(".svn"): + return True + + for pattern in PLANEMO_FILES: + if fnmatch.fnmatch(name, pattern): + return True + return False + + +class RealizedFiles(NamedTuple): + files: list + include_failures: list + + +class RealizedFile(object): + + def __init__(self, src_root, src, dest): + """Create object mapping from file system to tar-ball. + + * src_root - source root (i.e. folder with .shed.yml file) + * src - location of source file, relative to src_root + * dest - destination path, relative to root of tar-ball. + """ + if dest == ".": + raise ValueError("Destination for %r should be a full filename!" % src) + self.src_root = src_root + self.src = src + self.dest = dest + + @property + def absolute_src(self): + return os.path.abspath(os.path.join(self.src_root, self.src)) + + def realize_to(self, directory): + source_path = self.absolute_src + if os.path.islink(source_path): + source_path = os.path.realpath(source_path) + relative_dest = self.dest + assert relative_dest != "." + target_path = os.path.join(directory, relative_dest) + target_exists = os.path.exists(target_path) + # info("realize_to %r --> %r" % (source_path, target_path)) + if not target_exists: + target_dir = os.path.dirname(target_path) + if not os.path.exists(target_dir): + os.makedirs(target_dir) + if os.path.isdir(source_path): + os.makedirs(target_path) + else: + os.symlink(source_path, target_path) + + @staticmethod + def realized_files_for(path, include_info): + if not isinstance(include_info, dict): + include_info = {"source": include_info} + source = include_info.get("source") + abs_source = os.path.join(path, source) + destination = include_info.get("destination") + strip_components = include_info.get("strip_components", 0) + if destination is None: + destination = "./" + if not destination.endswith("/"): + # Check if source using wildcards (directory gets implicit wildcard) + # Should we use a regular exoression to catch [A-Z] style patterns? + if "*" in source or "?" in source or os.path.isdir(abs_source): + raise ValueError("destination must be a directory (with trailing slash) if source is a folder or uses wildcards") + realized_files = [] + for globbed_file in _glob(path, source): + src = os.path.relpath(globbed_file, path) + if not destination.endswith("/"): + # Given a filename, just use it! + dest = destination + if strip_components: + raise ValueError("strip_components should not be used if destination is a filename") + else: + # Destination is a directory... + if not strip_components: + dest = src + elif "/../" in globbed_file: + # Can't work from src=os.path.relpath(globbed_file, path) as lost any '..' + assert globbed_file.startswith(path + "/") + dest = "/".join(globbed_file[len(path) + 1:].split("/")[strip_components:]) + else: + dest = "/".join(src.split("/")[strip_components:]) + # Now apply the specified output directory: + dest = os.path.join(destination, dest) + realized_files.append( + RealizedFile(path, src, os.path.normpath(dest)) + ) + return realized_files + + def __str__(self): + return "RealizedFile[src={},dest={},src_root={}]".format( + self.src, self.dest, self.src_root + ) + + +class RealizedRepositry(object): + + def __init__(self, realized_path, real_path, config, multiple, missing): + self.path = realized_path + self.real_path = real_path + self.config = config + self.name = config["name"] + self.multiple = multiple + self.missing = missing + + @property + def owner(self): + return self.config["owner"] + + @property + def repository_type(self): + return shed_repo_type(self.config, self.name) + + @property + def is_package(self): + return self.repository_type == REPO_TYPE_TOOL_DEP + + @property + def is_suite(self): + return self.repository_type == REPO_TYPE_SUITE + + @property + def repo_dependencies_path(self): + return os.path.join(self.path, REPO_DEPENDENCIES_CONFIG_NAME) + + @property + def tool_dependencies_path(self): + return os.path.join(self.path, TOOL_DEPENDENCIES_CONFIG_NAME) + + def git_rev(self, ctx): + return git.rev_if_git(ctx, self.real_path) + + def git_repo(self, ctx): + return self.config.get("remote_repository_url") + + def pattern_to_file_name(self, pattern): + if not self.multiple: + return pattern + + name = self.config["name"] + suffix = "_%s" % name.replace("-", "_") + + if "." not in pattern: + return pattern + suffix + else: + parts = pattern.split(".", 1) + return parts[0] + suffix + "." + parts[1] + + def find_repository_id(self, ctx, shed_context): + try: + repo_id = _find_repository_id( + ctx, + shed_context, + name=self.name, + repo_config=self.config, + allow_none=True, + ) + return repo_id + except Exception as e: + message = api_exception_to_message(e) + error("Could not update %s" % self.name) + error(message) + return None + + def create(self, ctx, shed_context): + """Wrapper for creating the endpoint if it doesn't exist + """ + context_owner = shed_context.owner() + config_owner = self.config.get("owner") + if context_owner and config_owner and context_owner != config_owner: + # This is broken because context_owner is incorrect if using an API key. + # message = INCORRECT_OWNER_MESSAGE % (config_owner, context_owner) + # raise Exception(message) + pass + + def _create(): + repo = create_repository_for( + ctx, + shed_context.tsi, + self.name, + self.config, + ) + return repo['id'] + + return self._with_ts_exception_handling(_create) + + def update(self, ctx, shed_context, id): + """Wrapper for update the repository metadata. + """ + + def _update(): + repo = update_repository_for( + ctx, + shed_context.tsi, + id, + self.config, + ) + return repo + + return self._with_ts_exception_handling(_update) + + def _with_ts_exception_handling(self, f): + try: + return f() + except Exception as e: + # TODO: galaxyproject/bioblend#126 + try: + upstream_error = json.loads(e.read()) + error(upstream_error['err_msg']) + except Exception: + error(unicodify(e)) + return None + + def latest_installable_revision(self, ctx, shed_context): + repository_id = self.find_repository_id(ctx, shed_context) + return latest_installable_revision(shed_context.tsi, repository_id) + + def install_args(self, ctx, shed_context): + """ Arguments for bioblend's install_repository_revision + to install this repository against supplied tsi. + """ + tool_shed_url = shed_context.tsi.base_url + return dict( + tool_shed_url=tool_shed_url, + name=self.name, + owner=self.owner, + changeset_revision=self.latest_installable_revision( + ctx, shed_context + ), + ) + + +def _glob(path, pattern): + pattern = os.path.join(path, pattern) + if os.path.isdir(pattern): + pattern = "%s/**" % pattern + return glob.glob(pattern) + + +def _shed_config_excludes(config): + return config.get('ignore', []) + config.get('exclude', []) + + +def _handle_realization_error(exception, **kwds): + fail_fast = kwds.get("fail_fast", False) + if fail_fast: + raise exception + else: + error(unicodify(exception)) + + +def _ensure_shed_description(description): + # description is required, as is name. + if description is None: + message = ("description required for automatic creation or update of " + "shed metadata.") + raise ValueError(message) + + +def validate_repo_name(name): + def _build_error(descript): + return "Repository name [%s] invalid. %s" % (name, descript) + + msg = None + if len(name) < 2: + msg = _build_error( + "Repository names must be at least 2 characters in length." + ) + if len(name) > 80: + msg = _build_error( + "Repository names cannot be more than 80 characters in length." + ) + if not VALID_REPOSITORYNAME_RE.match(name): + msg = _build_error( + "Repository names must contain only lower-case letters, " + "numbers and underscore." + ) + return msg + + +def validate_repo_owner(owner): + def _build_error(descript): + return "Owner [%s] invalid. %s" % (owner, descript) + msg = None + if len(owner) < 3: + msg = _build_error("Owner must be at least 3 characters in length") + if len(owner) > 255: + msg = _build_error( + "Owner cannot be more than 255 characters in length" + ) + if not(VALID_PUBLICNAME_RE.match(owner)): + msg = _build_error( + "Owner must contain only lower-case letters, numbers, dots, underscores, and '-'" + ) + return msg + + +class RealizationException(Exception): + """ This exception indicates there was a problem while + realizing effective repositories for a shed command. As a + precondition - the user has already been informed with error(). + """ + + +__all__ = ( + 'api_exception_to_message', + 'CURRENT_CATEGORIES', + 'diff_repo', + 'download_tarball', + 'find_raw_repositories', + 'for_each_repository', + 'get_shed_context', + 'path_to_repo_name', + 'REPO_TYPE_SUITE', + 'REPO_TYPE_TOOL_DEP', + 'REPO_TYPE_UNRESTRICTED', + 'shed_init', + 'tool_shed_client', # Deprecated... + 'tool_shed_url', +)