diff env/lib/python3.9/site-packages/planemo/shed/__init__.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.9/site-packages/planemo/shed/__init__.py	Mon Mar 22 18:12:50 2021 +0000
@@ -0,0 +1,1428 @@
+"""Abstractions for shed related interactions used by the rest of planemo."""
+import contextlib
+import copy
+import fnmatch
+import hashlib
+import json
+import os
+import re
+import shutil
+import sys
+import tarfile
+from tempfile import (
+    mkstemp,
+)
+from typing import NamedTuple
+
+import bioblend
+import six
+import yaml
+from galaxy.util import (
+    odict,
+    unicodify,
+)
+
+from planemo import git
+from planemo import glob
+from planemo import templates
+from planemo.bioblend import toolshed
+from planemo.io import (
+    can_write_to_path,
+    coalesce_return_codes,
+    error,
+    find_matching_directories,
+    info,
+    shell,
+    temp_directory,
+    warn,
+)
+from planemo.shed2tap.base import BasePackage
+from planemo.tools import yield_tool_sources
+from .diff import diff_and_remove
+from .interface import (
+    api_exception_to_message,
+    download_tar,
+    find_category_ids,
+    find_repository,
+    latest_installable_revision,
+    tool_shed_instance,
+    username,
+)
+
+SHED_CONFIG_NAME = '.shed.yml'
+DOCKSTORE_REGISTRY_CONF = ".dockstore.yml"
+REPO_METADATA_FILES = (SHED_CONFIG_NAME, DOCKSTORE_REGISTRY_CONF)
+REPO_DEPENDENCIES_CONFIG_NAME = "repository_dependencies.xml"
+TOOL_DEPENDENCIES_CONFIG_NAME = "tool_dependencies.xml"
+
+NO_REPOSITORIES_MESSAGE = ("Could not find any .shed.yml files or a --name to "
+                           "describe the target repository.")
+NAME_INVALID_MESSAGE = ("Cannot use --name argument when multiple directories "
+                        "in target contain .shed.yml files.")
+NAME_REQUIRED_MESSAGE = ("No repository name discovered but one is required.")
+CONFLICTING_NAMES_MESSAGE = ("The supplied name argument --name conflicts "
+                             "with value discovered in .shed.yml.")
+PARSING_PROBLEM = ("Problem parsing file .shed.yml in directory %s, skipping "
+                   "repository. Message: [%s].")
+AUTO_REPO_CONFLICT_MESSAGE = ("Cannot specify both auto_tool_repositories and "
+                              "repositories in .shed.yml at this time.")
+AUTO_NAME_CONFLICT_MESSAGE = ("Cannot specify both auto_tool_repositories and "
+                              "in .shed.yml and --name on the command-line.")
+REALIZAION_PROBLEMS_MESSAGE = ("Problem encountered executing action for one or more "
+                               "repositories.")
+INCORRECT_OWNER_MESSAGE = ("Attempting to create a repository with configured "
+                           "owner [%s] that does not match API user [%s].")
+PROBLEM_PROCESSING_REPOSITORY_MESSAGE = "Problem processing repositories, exiting."
+
+# Planemo generated or consumed files that do not need to be uploaded to the
+# tool shed.
+PLANEMO_FILES = [
+    "shed_upload*.tar.gz",
+    "shed_download*.tar.gz",
+    "tool_test_output.*",
+    ".travis",
+    ".travis.yml",
+    ".shed.yml",
+    "*~",
+    "#*#",
+]
+SHED_SHORT_NAMES = {
+    "toolshed": "https://toolshed.g2.bx.psu.edu/",
+    "testtoolshed": "https://testtoolshed.g2.bx.psu.edu/",
+    "local": "http://localhost:9009/",
+}
+SHED_LABELS = {
+    "toolshed": "main Tool Shed",
+    "testtoolshed": "test Tool Shed",
+    "local": "local Tool Shed",
+}
+REPO_TYPE_UNRESTRICTED = "unrestricted"
+REPO_TYPE_TOOL_DEP = "tool_dependency_definition"
+REPO_TYPE_SUITE = "repository_suite_definition"
+
+# TODO: sync this with tool shed impl someday
+VALID_REPOSITORYNAME_RE = re.compile(r"^[a-z0-9\_]+$")
+VALID_PUBLICNAME_RE = re.compile(r"^[a-z0-9._\-]+$")
+
+
+# Generate with python scripts/categories.py
+CURRENT_CATEGORIES = [
+    "Assembly",
+    "ChIP-seq",
+    "Combinatorial Selections",
+    "Computational chemistry",
+    "Constructive Solid Geometry",
+    "Convert Formats",
+    "Data Export",
+    "Data Managers",
+    "Data Source",
+    "Entomology",
+    "Epigenetics",
+    "Fasta Manipulation",
+    "Fastq Manipulation",
+    "Flow Cytometry Analysis",
+    "Genome annotation",
+    "Genome editing",
+    "Genome-Wide Association Study",
+    "Genomic Interval Operations",
+    "Graphics",
+    "Imaging",
+    "Machine Learning",
+    "Metabolomics",
+    "Metagenomics",
+    "Micro-array Analysis",
+    "Molecular Dynamics",
+    "Next Gen Mappers",
+    "NLP",
+    "Ontology Manipulation",
+    "Phylogenetics",
+    "Proteomics",
+    "RNA",
+    "SAM",
+    "Sequence Analysis",
+    "Statistics",
+    "Systems Biology",
+    "Text Manipulation",
+    "Tool Dependency Packages",
+    "Tool Generators",
+    "Transcriptomics",
+    "Variant Analysis",
+    "Visualization",
+    "Web Services",
+]
+# http://stackoverflow.com/questions/7676255/find-and-replace-urls-in-a-block-of-te
+HTTP_REGEX_PATTERN = re.compile(
+    r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>\[\]]+|\(([^\s()<>\[\]]+|(\([^\s()<>\[\]]+\)))*\))+(?:\(([^\s()<>\[\]]+|(\([^\s()<>\[\]]+\)))*\)|[^\s`!(){};:'".,<>?\[\]]))"""  # noqa
+)
+
+
+def _is_url(url):
+    return '://' in url and \
+        (
+            url.startswith('http') or
+            url.startswith('ftp')
+        )
+
+
+def _find_urls_in_text(text):
+    return [url for url in HTTP_REGEX_PATTERN.findall(text) if _is_url(url[0])]
+
+
+def construct_yaml_str(self, node):
+    # Override the default string handling function
+    # to always return unicode objects
+    return self.construct_scalar(node)
+
+
+yaml.Loader.add_constructor(u'tag:yaml.org,2002:str', construct_yaml_str)
+yaml.SafeLoader.add_constructor(u'tag:yaml.org,2002:str', construct_yaml_str)
+
+
+class ShedContext(NamedTuple):
+    tsi: toolshed.ToolShedInstance
+    shed_config: dict
+    config_owner: str
+
+    def owner(self):
+        owner = self.config_owner
+        if owner is None:
+            owner = username(self.tsi)
+        return owner
+
+    @property
+    def label(self):
+        return self.shed_config.get("label") or "tool shed"
+
+
+def shed_init(ctx, path, **kwds):
+    """Initialize a new shed repository."""
+    if not os.path.exists(path):
+        os.makedirs(path)
+    shed_config_path = os.path.join(path, SHED_CONFIG_NAME)
+    if not can_write_to_path(shed_config_path, **kwds):
+        # .shed.yml exists and no --force sent.
+        return 1
+
+    create_failed = _create_shed_config(ctx, shed_config_path, **kwds)
+    if create_failed:
+        return 1
+
+    repo_dependencies_path = os.path.join(path, REPO_DEPENDENCIES_CONFIG_NAME)
+    from_workflow = kwds.get("from_workflow")
+
+    if from_workflow:
+        workflow_name = os.path.basename(from_workflow)
+        workflow_target = os.path.join(path, workflow_name)
+        if not os.path.exists(workflow_target):
+            shutil.copyfile(from_workflow, workflow_target)
+
+        if not can_write_to_path(repo_dependencies_path, **kwds):
+            return 1
+
+        repo_pairs = _parse_repos_from_workflow(from_workflow)
+        repository_dependencies = RepositoryDependencies(repo_pairs)
+        repository_dependencies.write_to_path(repo_dependencies_path)
+
+    return 0
+
+
+def install_arg_lists(ctx, paths, **kwds):
+    """Build a list of install args for resolved repositories."""
+    shed_context = get_shed_context(ctx, **kwds)
+    install_args_list = []
+
+    def process_repo(realized_repository):
+        install_args_list.append(realized_repository.install_args(ctx, shed_context))
+        return 0
+
+    exit_code = for_each_repository(ctx, process_repo, paths, **kwds)
+    if exit_code:
+        raise RuntimeError(PROBLEM_PROCESSING_REPOSITORY_MESSAGE)
+
+    return install_args_list
+
+
+def find_urls_for_xml(root):
+    """Returns two lists: explicit package URLs, and help text URLs.
+
+    For validating the user-facing URLs is it sensible to mimic
+    a web browser user agent.
+    """
+    urls = []
+    for packages in root.findall("package"):
+        install_els = packages.findall("install")
+        assert len(install_els) in (0, 1)
+
+        if len(install_els) == 0:
+            continue
+
+        install_el = install_els[0]
+        package = BasePackage(None, packages, install_el, readme=None)
+        for action in package.get_all_actions():
+            urls.extend([dl.url for dl in action.downloads()])
+
+            for subaction in action.actions:
+                if hasattr(subaction, 'packages'):
+                    urls.extend(subaction.packages)
+
+    docs = []
+    for help_text in root.findall("help"):
+        for url in _find_urls_in_text(help_text.text):
+            docs.append(url[0])
+
+    return urls, docs
+
+
+def handle_force_create(realized_repository, ctx, shed_context, **kwds):
+    repo_id = realized_repository.find_repository_id(ctx, shed_context)
+    if repo_id is None and kwds.get("force_repository_creation"):
+        repo_id = realized_repository.create(ctx, shed_context)
+    # failing to create the repo, give up
+    return repo_id
+
+
+def report_non_existent_repository(realized_repository):
+    name = realized_repository.name
+    error("Repository [%s] does not exist in the targeted Tool Shed." % name)
+    return 2
+
+
+def upload_repository(ctx, realized_repository, **kwds):
+    """Upload a tool directory as a tarball to a tool shed."""
+    path = realized_repository.path
+    tar_path = kwds.get("tar")
+    if not tar_path:
+        tar_path = build_tarball(path, **kwds)
+    if kwds.get("tar_only", False):
+        name = realized_repository.pattern_to_file_name("shed_upload.tar.gz")
+        shutil.copy(tar_path, name)
+        return 0
+    shed_context = get_shed_context(ctx, **kwds)
+    update_kwds = {}
+    _update_commit_message(ctx, realized_repository, update_kwds, **kwds)
+
+    repo_id = handle_force_create(realized_repository, ctx, shed_context, **kwds)
+    # failing to create the repo, give up
+    if repo_id is None:
+        return report_non_existent_repository(realized_repository)
+
+    if kwds.get("check_diff", False):
+        is_diff = diff_repo(ctx, realized_repository, **kwds) != 0
+        if not is_diff:
+            name = realized_repository.name
+            info("Repository [%s] not different, skipping upload." % name)
+            return 0
+
+    # TODO: support updating repo information if it changes in the config file
+    try:
+        shed_context.tsi.repositories.update_repository(
+            str(repo_id), tar_path, **update_kwds
+        )
+    except Exception as e:
+        if isinstance(e, bioblend.ConnectionError) and e.status_code == 400 and \
+                '"No changes to repository."' in e.body:
+            warn("Repository %s was not updated because there were no changes" % realized_repository.name)
+            return 0
+        message = api_exception_to_message(e)
+        error("Could not update %s" % realized_repository.name)
+        error(message)
+        return -1
+    info("Repository %s updated successfully." % realized_repository.name)
+    return 0
+
+
+def _update_commit_message(ctx, realized_repository, update_kwds, **kwds):
+    message = kwds.get("message")
+    git_rev = realized_repository.git_rev(ctx)
+    git_repo = realized_repository.git_repo(ctx)
+    if message is None:
+        message = "planemo upload"
+        if git_repo:
+            message += " for repository %s" % git_repo
+        if git_rev:
+            message += " commit %s" % git_rev
+    update_kwds["commit_message"] = message
+
+
+def diff_repo(ctx, realized_repository, **kwds):
+    """Compare two repositories (local or remote) and check for differences.
+
+    Returns 0 if and only the repositories are effectively the same
+    given supplied kwds for comparison description.
+    """
+    with temp_directory("tool_shed_diff_") as working:
+        return _diff_in(ctx, working, realized_repository, **kwds)
+
+
+def _diff_in(ctx, working, realized_repository, **kwds):
+    path = realized_repository.path
+    shed_target_source = kwds.get("shed_target_source")
+
+    label_a = "_%s_" % (shed_target_source if shed_target_source else "workingdir")
+    shed_target = kwds.get("shed_target", "B")
+    if "/" in shed_target:
+        shed_target = "custom_shed"
+    label_b = "_%s_" % shed_target
+
+    mine = os.path.join(working, label_a)
+    other = os.path.join(working, label_b)
+
+    shed_context = get_shed_context(ctx, read_only=True, **kwds)
+    # In order to download the tarball, require repository ID...
+    repo_id = realized_repository.find_repository_id(ctx, shed_context)
+    if repo_id is None:
+        error("shed_diff: Repository [%s] does not exist in the targeted Tool Shed."
+              % realized_repository.name)
+        # $ diff README.rst not_a_file 2&>1 /dev/null; echo $?
+        # 2
+        return 2
+    info("Diffing repository [%s]" % realized_repository.name)
+    download_tarball(
+        ctx,
+        shed_context,
+        realized_repository,
+        destination=other,
+        clean=True,
+        destination_is_pattern=False,
+        **kwds
+    )
+    if shed_target_source:
+        new_kwds = kwds.copy()
+        new_kwds["shed_target"] = shed_target_source
+        shed_context = get_shed_context(ctx, read_only=True, **new_kwds)
+        download_tarball(
+            ctx,
+            shed_context,
+            realized_repository,
+            destination=mine,
+            clean=True,
+            destination_is_pattern=False,
+            **new_kwds
+        )
+    else:
+        tar_path = build_tarball(path)
+        os.mkdir(mine)
+        shell(['tar', '-xzf', tar_path, '-C', mine])
+        shutil.rmtree(tar_path, ignore_errors=True)
+
+    output = kwds.get("output")
+    raw = kwds.get("raw", False)
+    xml_diff = 0
+    if not raw:
+        if output:
+            with open(output, "w") as f:
+                xml_diff = diff_and_remove(working, label_a, label_b, f)
+        else:
+            xml_diff = diff_and_remove(working, label_a, label_b, sys.stdout)
+
+    cmd = ['diff', '-r', label_a, label_b]
+    if output:
+        with open(output, 'ab') as fh:
+            raw_diff = shell(cmd, cwd=working, stdout=fh)
+    else:
+        raw_diff = shell(cmd, cwd=working)
+    exit = raw_diff or xml_diff
+    if not raw:
+        if xml_diff:
+            ctx.vlog("One or more shed XML file(s) different!")
+        if raw_diff:
+            ctx.vlog("One or more non-shed XML file(s) different.")
+        if not xml_diff and not raw_diff:
+            ctx.vlog("No differences.")
+    return exit
+
+
+def shed_repo_config(ctx, path, name=None):
+    shed_yaml_path = os.path.join(path, SHED_CONFIG_NAME)
+    config = {}
+    if os.path.exists(shed_yaml_path):
+        with open(shed_yaml_path, "r") as f:
+            config = yaml.safe_load(f)
+
+    if config is None:  # yaml may yield None
+        config = {}
+    _expand_raw_config(ctx, config, path, name=name)
+    return config
+
+
+def tool_shed_client(ctx=None, **kwds):
+    return get_shed_context(ctx, **kwds).tsi
+
+
+def get_shed_context(ctx=None, **kwds):
+    read_only = kwds.get("read_only", False)
+    shed_config, username = _shed_config_and_username(ctx, **kwds)
+
+    def prop(key):
+        return kwds.get("shed_%s" % key) or shed_config.get(key)
+
+    url = _shed_config_to_url(shed_config)
+    if read_only:
+        key = None
+        email = None
+        password = None
+    else:
+        key = _find_shed_key(kwds, shed_config)
+        email = prop("email")
+        password = prop("password")
+
+    tsi = tool_shed_instance(url, key, email, password)
+    owner = username
+    return ShedContext(tsi, shed_config, owner)
+
+
+def tool_shed_url(ctx, **kwds):
+    shed_config, _ = _shed_config_and_username(ctx, **kwds)
+    return _shed_config_to_url(shed_config)
+
+
+def _shed_config_and_username(ctx, **kwds):
+    shed_target = kwds.get("shed_target")
+    global_config = getattr(ctx, "global_config", {})
+    if global_config and "sheds" in global_config:
+        sheds_config = global_config["sheds"]
+        shed_config = sheds_config.get(shed_target, {}) or {}
+    else:
+        shed_config = {}
+
+    if "url" not in shed_config:
+        if shed_target and shed_target in SHED_SHORT_NAMES:
+            shed_config["url"] = SHED_SHORT_NAMES[shed_target]
+        else:
+            shed_config["url"] = shed_target
+
+    if "label" not in shed_config:
+        if shed_target and shed_target in SHED_LABELS:
+            shed_config["label"] = SHED_LABELS[shed_target]
+        else:
+            shed_config["label"] = "custom tool shed at %s" % shed_target
+
+    default_shed_username = global_config.get("shed_username")
+    username = shed_config.get("username", default_shed_username)
+
+    return shed_config, username
+
+
+def _find_shed_key(kwds, shed_config):
+    shed_key = kwds.get("shed_key")
+    if shed_key is None:
+        shed_key_from_env = kwds.get("shed_key_from_env")
+        if shed_key_from_env is not None:
+            shed_key = os.environ[shed_key_from_env]
+    if shed_key is None:
+        shed_key = shed_config.get("key")
+    return shed_key
+
+
+def find_repository_id(ctx, shed_context, path, **kwds):
+    repo_config = kwds.get("config")
+    if repo_config is None:
+        name = kwds.get("name")
+        repo_config = shed_repo_config(ctx, path, name=name)
+    name = repo_config["name"]
+    find_kwds = kwds.copy()
+    if "name" in find_kwds:
+        del find_kwds["name"]
+    return _find_repository_id(ctx, shed_context, name, repo_config, **find_kwds)
+
+
+def _find_repository_id(ctx, shed_context, name, repo_config, **kwds):
+    # TODO: modify to consume shed_context
+    owner = _owner(ctx, repo_config, shed_context, **kwds)
+    matching_repository = find_repository(shed_context.tsi, owner, name)
+    if matching_repository is None:
+        if not kwds.get("allow_none", False):
+            message = "Failed to find repository for owner/name %s/%s"
+            raise Exception(message % (owner, name))
+        else:
+            return None
+    else:
+        repo_id = matching_repository["id"]
+        return repo_id
+
+
+def _owner(ctx, repo_config, shed_context=None, **kwds):
+    owner = kwds.get("owner") or repo_config.get("owner")
+    if owner is None:
+        if shed_context is None and "shed_target" in kwds:
+            shed_context = get_shed_context(ctx, **kwds)
+        if shed_context is not None:
+            owner = shed_context.owner()
+    return owner
+
+
+def _expand_raw_config(ctx, config, path, name=None):
+    name_input = name
+    if "name" not in config:
+        config["name"] = name
+    if config["name"] is None:
+        config["name"] = path_to_repo_name(path)
+
+    default_include = config.get("include", ["**"])
+    repos = config.get("repositories")
+    auto_tool_repos = config.get("auto_tool_repositories", False)
+    suite_config = config.get("suite", False)
+
+    if repos and auto_tool_repos:
+        raise Exception(AUTO_REPO_CONFLICT_MESSAGE)
+    if auto_tool_repos and name_input:
+        raise Exception(AUTO_NAME_CONFLICT_MESSAGE)
+    if auto_tool_repos:
+        repos = _build_auto_tool_repos(ctx, path, config, auto_tool_repos)
+    if suite_config:
+        if repos is None:
+            repos = odict.odict()
+        _build_suite_repo(config, repos, suite_config)
+    # If repositories aren't defined, just define a single
+    # one based on calculated name and including everything
+    # by default.
+    if repos is None:
+        repos = {
+            config["name"]: {
+                "include": default_include
+            }
+        }
+    config["repositories"] = repos
+
+
+def _build_auto_tool_repos(ctx, path, config, auto_tool_repos):
+    default_include = config.get("include", ["**"])
+    tool_source_pairs = list(yield_tool_sources(ctx, path, recursive=True))
+    paths = [_[0] for _ in tool_source_pairs]
+    excludes = _shed_config_excludes(config)
+
+    def _build_repository(tool_path, tool_source):
+        tool_id = tool_source.parse_id().lower()
+        tool_name = tool_source.parse_name()
+        description = tool_source.parse_description()
+        template_vars = dict(
+            tool_id=tool_id,
+            tool_name=tool_name,
+            description=description,
+        )
+        other_paths = paths[:]
+        other_paths.remove(tool_path)
+        tool_excludes = excludes + list(other_paths)
+        repo_dict = {
+            "include": default_include,
+            "exclude": tool_excludes,
+        }
+        for key in ["name", "description", "long_description"]:
+            template_key = "%s_template" % key
+            template = auto_tool_repos.get(template_key)
+            if template:
+                value = templates.render(template, **template_vars)
+                repo_dict[key] = value
+        return repo_dict
+
+    repos = odict.odict()
+    for tool_path, tool_source in tool_source_pairs:
+        repository_config = _build_repository(tool_path, tool_source)
+        repository_name = repository_config["name"]
+        repos[repository_name] = repository_config
+    return repos
+
+
+def _build_suite_repo(config, repos, suite_config):
+    name = suite_config.get("name")
+    if not name:
+        raise Exception("suite requires a 'name'.")
+    description = suite_config.get("description", "")
+    long_description = suite_config.get("long_description")
+    owner = config["owner"]
+    repo_type = suite_config.get('type', REPO_TYPE_SUITE)
+
+    repo_pairs = [(repo_dict.get('owner') or owner, repo_name) for repo_name, repo_dict in repos.items()]
+    extra_repos = suite_config.get("include_repositories", {})
+    repo_pairs += [(_["owner"], _["name"]) for _ in extra_repos]
+
+    repository_dependencies = RepositoryDependencies(repo_pairs, description)
+
+    repo = {
+        "_files": {
+            REPO_DEPENDENCIES_CONFIG_NAME: str(repository_dependencies)
+        },
+        "include": [],
+        "name": name,
+        "description": description,
+        "type": repo_type,
+    }
+    if long_description:
+        repo["long_description"] = long_description
+    repos[name] = repo
+
+
+def update_repository_for(ctx, tsi, id, repo_config):
+    name = repo_config["name"]
+    description = repo_config.get("description")
+    long_description = repo_config.get("long_description")
+    repo_type = shed_repo_type(repo_config, name)
+    remote_repository_url = repo_config.get("remote_repository_url")
+    homepage_url = repo_config.get("homepage_url")
+    categories = repo_config.get("categories", [])
+    category_ids = find_category_ids(tsi, categories)
+
+    _ensure_shed_description(description)
+
+    kwds = dict(
+        name=name,
+        synopsis=description,
+        type=repo_type,
+    )
+    if long_description is not None:
+        kwds["description"] = long_description
+    if remote_repository_url is not None:
+        kwds["remote_repository_url"] = remote_repository_url
+    if homepage_url is not None:
+        kwds["homepage_url"] = homepage_url
+    if category_ids is not None:
+        kwds['category_ids[]'] = category_ids
+    return bioblend.galaxy.client.Client._put(tsi.repositories, id=id, payload=kwds)
+
+
+def create_repository_for(ctx, tsi, name, repo_config):
+    description = repo_config.get("description")
+    long_description = repo_config.get("long_description")
+    repo_type = shed_repo_type(repo_config, name)
+    remote_repository_url = repo_config.get("remote_repository_url")
+    homepage_url = repo_config.get("homepage_url")
+    categories = repo_config.get("categories", [])
+    category_ids = find_category_ids(tsi, categories)
+
+    _ensure_shed_description(description)
+
+    repo = tsi.repositories.create_repository(
+        name=name,
+        synopsis=description,
+        description=long_description,
+        type=repo_type,
+        remote_repository_url=remote_repository_url,
+        homepage_url=homepage_url,
+        category_ids=category_ids
+    )
+    return repo
+
+
+def download_tarball(ctx, shed_context, realized_repository, **kwds):
+    repo_id = realized_repository.find_repository_id(ctx, shed_context)
+    if repo_id is None:
+        message = "Unable to find repository id, cannot download."
+        error(message)
+        raise Exception(message)
+    destination_pattern = kwds.get('destination', 'shed_download.tar.gz')
+    if kwds.get("destination_is_pattern", True):
+        destination = realized_repository.pattern_to_file_name(destination_pattern)
+    else:
+        destination = destination_pattern
+    to_directory = not destination.endswith("gz")
+    download_tar(shed_context.tsi, repo_id, destination, to_directory=to_directory)
+    if to_directory:
+        clean = kwds.get("clean", False)
+        if clean:
+            archival_file = os.path.join(destination, ".hg_archival.txt")
+            if os.path.exists(archival_file):
+                os.remove(archival_file)
+
+
+def build_tarball(realized_path, **kwds):
+    """Build a tool-shed tar ball for the specified path, caller is
+    responsible for deleting this file.
+    """
+
+    # Simplest solution to sorting the files is to use a list,
+    files = []
+    for dirpath, _dirnames, filenames in os.walk(realized_path):
+        for f in filenames:
+            files.append(os.path.join(dirpath, f))
+    files.sort()
+
+    fd, temp_path = mkstemp()
+    try:
+        tar = tarfile.open(temp_path, "w:gz", dereference=True)
+        try:
+            for raw in files:
+                name = os.path.relpath(raw, realized_path)
+                tar.add(os.path.join(realized_path, name), arcname=name)
+        finally:
+            tar.close()
+    finally:
+        os.close(fd)
+    return temp_path
+
+
+def find_raw_repositories(ctx, paths, **kwds):
+    """Return a list of "raw" repository objects for each repo on paths."""
+    raw_repo_objects = []
+    for path in paths:
+        raw_repo_objects.extend(_find_raw_repositories(ctx, path, **kwds))
+    return raw_repo_objects
+
+
+def for_each_repository(ctx, function, paths, **kwds):
+    ret_codes = []
+    for path in paths:
+        with _path_on_disk(ctx, path) as raw_path:
+            try:
+                for realized_repository in _realize_effective_repositories(
+                    ctx, raw_path, **kwds
+                ):
+                    ret_codes.append(
+                        function(realized_repository)
+                    )
+            except RealizationException:
+                error(REALIZAION_PROBLEMS_MESSAGE)
+                return 254
+
+    return coalesce_return_codes(ret_codes)
+
+
+def path_to_repo_name(path):
+    return os.path.basename(os.path.abspath(path))
+
+
+def shed_repo_type(config, name):
+    repo_type = config.get("type")
+    if repo_type is None:
+        if name.startswith("package_"):
+            repo_type = REPO_TYPE_TOOL_DEP
+        elif name.startswith("suite_"):
+            repo_type = REPO_TYPE_SUITE
+        else:
+            repo_type = REPO_TYPE_UNRESTRICTED
+    return repo_type
+
+
+def _shed_config_to_url(shed_config):
+    url = shed_config["url"]
+    if not url.startswith("http"):
+        message = (
+            "Invalid shed url specified [{0}]. Please specify a valid "
+            "HTTP address or one of {1}"
+        ).format(url, list(SHED_SHORT_NAMES.keys()))
+        raise ValueError(message)
+    return url
+
+
+def _realize_effective_repositories(ctx, path, **kwds):
+    """ Expands folders in a source code repository into tool shed
+    repositories.
+
+    Each folder may have nested repositories and each folder may corresponding
+    to many repositories (for instance if a folder has n tools in the source
+    code repository but are published to the tool shed as one repository per
+    tool).
+    """
+    raw_repo_objects = _find_raw_repositories(ctx, path, **kwds)
+    failed = False
+    with temp_directory() as base_dir:
+        for raw_repo_object in raw_repo_objects:
+            if isinstance(raw_repo_object, Exception):
+                _handle_realization_error(raw_repo_object, **kwds)
+                failed = True
+                continue
+
+            realized_repos = raw_repo_object.realizations(
+                ctx,
+                base_dir,
+                **kwds
+            )
+            for realized_repo in realized_repos:
+                if isinstance(realized_repo, Exception):
+                    _handle_realization_error(realized_repo, **kwds)
+                    failed = True
+                    continue
+                yield realized_repo
+    if failed:
+        raise RealizationException()
+
+
+def _create_shed_config(ctx, path, **kwds):
+    name = kwds.get("name") or path_to_repo_name(os.path.dirname(path))
+    name_invalid = validate_repo_name(name)
+    if name_invalid:
+        error(name_invalid)
+        return 1
+
+    owner = kwds.get("owner")
+    if owner is None:
+        owner = ctx.global_config.get("shed_username")
+    owner_invalid = validate_repo_owner(owner)
+    if owner_invalid:
+        error(owner_invalid)
+        return 1
+    description = kwds.get("description") or name
+    long_description = kwds.get("long_description")
+    remote_repository_url = kwds.get("remote_repository_url")
+    homepage_url = kwds.get("homepage_url")
+    categories = kwds.get("category", [])
+    config = dict(
+        name=name,
+        owner=owner,
+        description=description,
+        long_description=long_description,
+        remote_repository_url=remote_repository_url,
+        homepage_url=homepage_url,
+        categories=categories,
+    )
+    # Remove empty entries...
+    for k in list(config.keys()):
+        if config[k] is None:
+            del config[k]
+
+    with open(path, "w") as f:
+        yaml.safe_dump(config, f)
+
+
+def _parse_repos_from_workflow(path):
+    with open(path, "r") as f:
+        workflow_json = json.load(f)
+    steps = workflow_json["steps"]
+    tool_ids = set()
+    for value in steps.values():
+        step_type = value["type"]
+        if step_type != "tool":
+            continue
+        tool_id = value["tool_id"]
+        if "/repos/" in tool_id:
+            tool_ids.add(tool_id)
+
+    repo_pairs = set()
+    for tool_id in tool_ids:
+        tool_repo_info = tool_id.split("/repos/", 1)[1]
+        tool_repo_parts = tool_repo_info.split("/")
+        owner = tool_repo_parts[0]
+        name = tool_repo_parts[1]
+        repo_pairs.add((owner, name))
+
+    return repo_pairs
+
+
+@contextlib.contextmanager
+def _path_on_disk(ctx, path):
+    git_path = None
+    if path.startswith("git:"):
+        git_path = path
+    elif path.startswith("git+"):
+        git_path = path[len("git+"):]
+    if git_path is None:
+        yield path
+    else:
+        with temp_directory() as git_repo:
+            git.clone(ctx, git_path, git_repo)
+            yield git_repo
+
+
+def _find_raw_repositories(ctx, path, **kwds):
+    name = kwds.get("name")
+    recursive = kwds.get("recursive", False)
+
+    shed_file_dirs = []
+    for pattern in REPO_METADATA_FILES:
+        shed_file_dirs.extend(find_matching_directories(
+            path, pattern, recursive=recursive
+        ))
+    config_name = None
+    if len(shed_file_dirs) == 1:
+        shed_file_dir = shed_file_dirs[0]
+        try:
+            config = shed_repo_config(ctx, shed_file_dir, name=name)
+        except Exception as e:
+            error_message = PARSING_PROBLEM % (shed_file_dir, e)
+            exception = RuntimeError(error_message)
+            _handle_realization_error(exception, **kwds)
+            return [exception]
+        config_name = config.get("name")
+
+    if len(shed_file_dirs) > 1 and name is not None:
+        raise Exception(NAME_INVALID_MESSAGE)
+    if config_name is not None and name is not None:
+        if config_name != name:
+            raise Exception(CONFLICTING_NAMES_MESSAGE)
+    raw_dirs = shed_file_dirs or [path]
+    kwds_copy = kwds.copy()
+    kwds_copy["name"] = name
+    return _build_raw_repo_objects(ctx, raw_dirs, **kwds_copy)
+
+
+def _build_raw_repo_objects(ctx, raw_dirs, **kwds):
+    """
+    From specific directories with .shed.yml files or specified directly from
+    the command-line build abstract description of directories that should be
+    expanded out into shed repositories.
+    """
+    multiple = len(raw_dirs) > 1
+    name = kwds.get("name")
+
+    # List of RawRepositoryDirectories or parsing failures if
+    # fail_fast is not enabled.
+    raw_repo_objects = []
+    for raw_dir in raw_dirs:
+        try:
+            config = shed_repo_config(ctx, raw_dir, name=name)
+        except Exception as e:
+            error_message = PARSING_PROBLEM % (raw_dir, e)
+            exception = RuntimeError(error_message)
+            _handle_realization_error(exception, **kwds)
+            raw_repo_objects.append(exception)
+            continue
+        raw_repo_object = RawRepositoryDirectory(raw_dir, config, multiple)
+        raw_repo_objects.append(raw_repo_object)
+    return raw_repo_objects
+
+
+@six.python_2_unicode_compatible
+class RepositoryDependencies(object):
+    """ Abstraction for shed repository_dependencies.xml files.
+    """
+
+    def __init__(self, repo_pairs, description=None):
+        self.repo_pairs = repo_pairs
+        self.description = description or ""
+
+    def __str__(self):
+        contents = '<repositories description="%s">' % self.description
+        line_template = '  <repository owner="%s" name="%s" />\n'
+        for (owner, name) in self.repo_pairs:
+            contents += line_template % (owner, name)
+        contents += "</repositories>"
+        return contents
+
+    def write_to_path(self, path):
+        with open(path, "w") as f:
+            f.write(six.text_type(self))
+
+
+class RawRepositoryDirectory(object):
+
+    def __init__(self, path, config, multiple):
+        self.path = path
+        self.config = config
+        self.name = config["name"]
+        self.type = shed_repo_type(config, self.name)
+        self.multiple = multiple  # operation over many repos?
+
+    def _hash(self, name):
+        return hashlib.md5(name.encode('utf-8')).hexdigest()
+
+    def realizations(self, ctx, parent_directory, **kwds):
+        names = self._repo_names()
+
+        for name in names:
+            directory = os.path.join(parent_directory, self._hash(name), name)
+            multiple = self.multiple or len(names) > 1
+            if not os.path.exists(directory):
+                os.makedirs(directory)
+            r_kwds = kwds.copy()
+            if "name" in r_kwds:
+                del r_kwds["name"]
+            yield self._realize_to(ctx, directory, name, multiple, **r_kwds)
+
+    def _realize_to(self, ctx, directory, name, multiple, **kwds):
+        fail_on_missing = kwds.get("fail_on_missing", True)
+        ignore_list = []
+        config = self._realize_config(name)
+        config["owner"] = _owner(ctx, config, **kwds)
+
+        excludes = _shed_config_excludes(config)
+        for exclude in excludes:
+            ignore_list.extend(_glob(self.path, exclude))
+
+        realized_files = self._realized_files(name)
+        missing = realized_files.include_failures
+        if missing and fail_on_missing:
+            msg = "Failed to include files for %s" % missing
+            return RuntimeError(msg)
+
+        for realized_file in realized_files.files:
+            relative_dest = realized_file.dest
+            implicit_ignore = self._implicit_ignores(relative_dest)
+            explicit_ignore = (realized_file.absolute_src in ignore_list)
+            if implicit_ignore or explicit_ignore:
+                continue
+            realized_file.realize_to(directory)
+
+        for (name, contents) in six.iteritems(config.get("_files", {})):
+            path = os.path.join(directory, name)
+            with open(path, "w") as f:
+                f.write(contents)
+
+        return RealizedRepositry(
+            realized_path=directory,
+            real_path=self.path,
+            config=config,
+            multiple=multiple,
+            missing=missing,
+        )
+
+    def _repo_names(self):
+        return self.config.get("repositories").keys()
+
+    def _realized_files(self, name):
+        config = self._realize_config(name)
+        realized_files = []
+        missing = []
+        for include_info in config["include"]:
+            if not isinstance(include_info, dict):
+                include_info = {"source": include_info}
+            source_list = include_info.get("source")
+            if not isinstance(source_list, list):
+                source_list = [source_list]
+            # Preprocess any entries with a source list into copies
+            # with a single source entry:
+            for source in source_list:
+                include = include_info.copy()
+                include["source"] = source
+                included = RealizedFile.realized_files_for(self.path, include)
+                if not included:
+                    missing.append(include)
+                else:
+                    realized_files.extend(included)
+        return RealizedFiles(realized_files, missing)
+
+    def _realize_config(self, name):
+        config = copy.deepcopy(self.config)
+        config["name"] = name
+        repo_config = config.get("repositories", {}).get(name, {})
+        config.update(repo_config)
+        if "repositories" in config:
+            del config["repositories"]
+        return config
+
+    def _implicit_ignores(self, relative_path):
+        # Filter out "unwanted files" :) like READMEs for special
+        # repository types.
+        if self.type == REPO_TYPE_TOOL_DEP:
+            if relative_path != TOOL_DEPENDENCIES_CONFIG_NAME:
+                return True
+
+        if self.type == REPO_TYPE_SUITE:
+            if relative_path != REPO_DEPENDENCIES_CONFIG_NAME:
+                return True
+
+        name = os.path.basename(relative_path)
+        for dvcs_prefix in [".git", ".hg"]:
+            if relative_path.startswith(dvcs_prefix):
+                return True
+
+        if name.startswith(".svn"):
+            return True
+
+        for pattern in PLANEMO_FILES:
+            if fnmatch.fnmatch(name, pattern):
+                return True
+        return False
+
+
+class RealizedFiles(NamedTuple):
+    files: list
+    include_failures: list
+
+
+class RealizedFile(object):
+
+    def __init__(self, src_root, src, dest):
+        """Create object mapping from file system to tar-ball.
+
+        * src_root - source root (i.e. folder with .shed.yml file)
+        * src - location of source file, relative to src_root
+        * dest - destination path, relative to root of tar-ball.
+        """
+        if dest == ".":
+            raise ValueError("Destination for %r should be a full filename!" % src)
+        self.src_root = src_root
+        self.src = src
+        self.dest = dest
+
+    @property
+    def absolute_src(self):
+        return os.path.abspath(os.path.join(self.src_root, self.src))
+
+    def realize_to(self, directory):
+        source_path = self.absolute_src
+        if os.path.islink(source_path):
+            source_path = os.path.realpath(source_path)
+        relative_dest = self.dest
+        assert relative_dest != "."
+        target_path = os.path.join(directory, relative_dest)
+        target_exists = os.path.exists(target_path)
+        # info("realize_to %r --> %r" % (source_path, target_path))
+        if not target_exists:
+            target_dir = os.path.dirname(target_path)
+            if not os.path.exists(target_dir):
+                os.makedirs(target_dir)
+            if os.path.isdir(source_path):
+                os.makedirs(target_path)
+            else:
+                os.symlink(source_path, target_path)
+
+    @staticmethod
+    def realized_files_for(path, include_info):
+        if not isinstance(include_info, dict):
+            include_info = {"source": include_info}
+        source = include_info.get("source")
+        abs_source = os.path.join(path, source)
+        destination = include_info.get("destination")
+        strip_components = include_info.get("strip_components", 0)
+        if destination is None:
+            destination = "./"
+        if not destination.endswith("/"):
+            # Check if source using wildcards (directory gets implicit wildcard)
+            # Should we use a regular exoression to catch [A-Z] style patterns?
+            if "*" in source or "?" in source or os.path.isdir(abs_source):
+                raise ValueError("destination must be a directory (with trailing slash) if source is a folder or uses wildcards")
+        realized_files = []
+        for globbed_file in _glob(path, source):
+            src = os.path.relpath(globbed_file, path)
+            if not destination.endswith("/"):
+                # Given a filename, just use it!
+                dest = destination
+                if strip_components:
+                    raise ValueError("strip_components should not be used if destination is a filename")
+            else:
+                # Destination is a directory...
+                if not strip_components:
+                    dest = src
+                elif "/../" in globbed_file:
+                    # Can't work from src=os.path.relpath(globbed_file, path) as lost any '..'
+                    assert globbed_file.startswith(path + "/")
+                    dest = "/".join(globbed_file[len(path) + 1:].split("/")[strip_components:])
+                else:
+                    dest = "/".join(src.split("/")[strip_components:])
+                # Now apply the specified output directory:
+                dest = os.path.join(destination, dest)
+            realized_files.append(
+                RealizedFile(path, src, os.path.normpath(dest))
+            )
+        return realized_files
+
+    def __str__(self):
+        return "RealizedFile[src={},dest={},src_root={}]".format(
+            self.src, self.dest, self.src_root
+        )
+
+
+class RealizedRepositry(object):
+
+    def __init__(self, realized_path, real_path, config, multiple, missing):
+        self.path = realized_path
+        self.real_path = real_path
+        self.config = config
+        self.name = config["name"]
+        self.multiple = multiple
+        self.missing = missing
+
+    @property
+    def owner(self):
+        return self.config["owner"]
+
+    @property
+    def repository_type(self):
+        return shed_repo_type(self.config, self.name)
+
+    @property
+    def is_package(self):
+        return self.repository_type == REPO_TYPE_TOOL_DEP
+
+    @property
+    def is_suite(self):
+        return self.repository_type == REPO_TYPE_SUITE
+
+    @property
+    def repo_dependencies_path(self):
+        return os.path.join(self.path, REPO_DEPENDENCIES_CONFIG_NAME)
+
+    @property
+    def tool_dependencies_path(self):
+        return os.path.join(self.path, TOOL_DEPENDENCIES_CONFIG_NAME)
+
+    def git_rev(self, ctx):
+        return git.rev_if_git(ctx, self.real_path)
+
+    def git_repo(self, ctx):
+        return self.config.get("remote_repository_url")
+
+    def pattern_to_file_name(self, pattern):
+        if not self.multiple:
+            return pattern
+
+        name = self.config["name"]
+        suffix = "_%s" % name.replace("-", "_")
+
+        if "." not in pattern:
+            return pattern + suffix
+        else:
+            parts = pattern.split(".", 1)
+            return parts[0] + suffix + "." + parts[1]
+
+    def find_repository_id(self, ctx, shed_context):
+        try:
+            repo_id = _find_repository_id(
+                ctx,
+                shed_context,
+                name=self.name,
+                repo_config=self.config,
+                allow_none=True,
+            )
+            return repo_id
+        except Exception as e:
+            message = api_exception_to_message(e)
+            error("Could not update %s" % self.name)
+            error(message)
+        return None
+
+    def create(self, ctx, shed_context):
+        """Wrapper for creating the endpoint if it doesn't exist
+        """
+        context_owner = shed_context.owner()
+        config_owner = self.config.get("owner")
+        if context_owner and config_owner and context_owner != config_owner:
+            # This is broken because context_owner is incorrect if using an API key.
+            # message = INCORRECT_OWNER_MESSAGE % (config_owner, context_owner)
+            # raise Exception(message)
+            pass
+
+        def _create():
+            repo = create_repository_for(
+                ctx,
+                shed_context.tsi,
+                self.name,
+                self.config,
+            )
+            return repo['id']
+
+        return self._with_ts_exception_handling(_create)
+
+    def update(self, ctx, shed_context, id):
+        """Wrapper for update the repository metadata.
+        """
+
+        def _update():
+            repo = update_repository_for(
+                ctx,
+                shed_context.tsi,
+                id,
+                self.config,
+            )
+            return repo
+
+        return self._with_ts_exception_handling(_update)
+
+    def _with_ts_exception_handling(self, f):
+        try:
+            return f()
+        except Exception as e:
+            # TODO: galaxyproject/bioblend#126
+            try:
+                upstream_error = json.loads(e.read())
+                error(upstream_error['err_msg'])
+            except Exception:
+                error(unicodify(e))
+            return None
+
+    def latest_installable_revision(self, ctx, shed_context):
+        repository_id = self.find_repository_id(ctx, shed_context)
+        return latest_installable_revision(shed_context.tsi, repository_id)
+
+    def install_args(self, ctx, shed_context):
+        """ Arguments for bioblend's install_repository_revision
+        to install this repository against supplied tsi.
+        """
+        tool_shed_url = shed_context.tsi.base_url
+        return dict(
+            tool_shed_url=tool_shed_url,
+            name=self.name,
+            owner=self.owner,
+            changeset_revision=self.latest_installable_revision(
+                ctx, shed_context
+            ),
+        )
+
+
+def _glob(path, pattern):
+    pattern = os.path.join(path, pattern)
+    if os.path.isdir(pattern):
+        pattern = "%s/**" % pattern
+    return glob.glob(pattern)
+
+
+def _shed_config_excludes(config):
+    return config.get('ignore', []) + config.get('exclude', [])
+
+
+def _handle_realization_error(exception, **kwds):
+    fail_fast = kwds.get("fail_fast", False)
+    if fail_fast:
+        raise exception
+    else:
+        error(unicodify(exception))
+
+
+def _ensure_shed_description(description):
+    # description is required, as is name.
+    if description is None:
+        message = ("description required for automatic creation or update of "
+                   "shed metadata.")
+        raise ValueError(message)
+
+
+def validate_repo_name(name):
+    def _build_error(descript):
+        return "Repository name [%s] invalid. %s" % (name, descript)
+
+    msg = None
+    if len(name) < 2:
+        msg = _build_error(
+            "Repository names must be at least 2 characters in length."
+        )
+    if len(name) > 80:
+        msg = _build_error(
+            "Repository names cannot be more than 80 characters in length."
+        )
+    if not VALID_REPOSITORYNAME_RE.match(name):
+        msg = _build_error(
+            "Repository names must contain only lower-case letters, "
+            "numbers and underscore."
+        )
+    return msg
+
+
+def validate_repo_owner(owner):
+    def _build_error(descript):
+        return "Owner [%s] invalid. %s" % (owner, descript)
+    msg = None
+    if len(owner) < 3:
+        msg = _build_error("Owner must be at least 3 characters in length")
+    if len(owner) > 255:
+        msg = _build_error(
+            "Owner cannot be more than 255 characters in length"
+        )
+    if not(VALID_PUBLICNAME_RE.match(owner)):
+        msg = _build_error(
+            "Owner must contain only lower-case letters, numbers, dots, underscores, and '-'"
+        )
+    return msg
+
+
+class RealizationException(Exception):
+    """ This exception indicates there was a problem while
+    realizing effective repositories for a shed command. As a
+    precondition - the user has already been informed with error().
+    """
+
+
+__all__ = (
+    'api_exception_to_message',
+    'CURRENT_CATEGORIES',
+    'diff_repo',
+    'download_tarball',
+    'find_raw_repositories',
+    'for_each_repository',
+    'get_shed_context',
+    'path_to_repo_name',
+    'REPO_TYPE_SUITE',
+    'REPO_TYPE_TOOL_DEP',
+    'REPO_TYPE_UNRESTRICTED',
+    'shed_init',
+    'tool_shed_client',  # Deprecated...
+    'tool_shed_url',
+)