comparison env/lib/python3.9/site-packages/planemo/shed/__init__.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Abstractions for shed related interactions used by the rest of planemo."""
2 import contextlib
3 import copy
4 import fnmatch
5 import hashlib
6 import json
7 import os
8 import re
9 import shutil
10 import sys
11 import tarfile
12 from tempfile import (
13 mkstemp,
14 )
15 from typing import NamedTuple
16
17 import bioblend
18 import six
19 import yaml
20 from galaxy.util import (
21 odict,
22 unicodify,
23 )
24
25 from planemo import git
26 from planemo import glob
27 from planemo import templates
28 from planemo.bioblend import toolshed
29 from planemo.io import (
30 can_write_to_path,
31 coalesce_return_codes,
32 error,
33 find_matching_directories,
34 info,
35 shell,
36 temp_directory,
37 warn,
38 )
39 from planemo.shed2tap.base import BasePackage
40 from planemo.tools import yield_tool_sources
41 from .diff import diff_and_remove
42 from .interface import (
43 api_exception_to_message,
44 download_tar,
45 find_category_ids,
46 find_repository,
47 latest_installable_revision,
48 tool_shed_instance,
49 username,
50 )
51
52 SHED_CONFIG_NAME = '.shed.yml'
53 DOCKSTORE_REGISTRY_CONF = ".dockstore.yml"
54 REPO_METADATA_FILES = (SHED_CONFIG_NAME, DOCKSTORE_REGISTRY_CONF)
55 REPO_DEPENDENCIES_CONFIG_NAME = "repository_dependencies.xml"
56 TOOL_DEPENDENCIES_CONFIG_NAME = "tool_dependencies.xml"
57
58 NO_REPOSITORIES_MESSAGE = ("Could not find any .shed.yml files or a --name to "
59 "describe the target repository.")
60 NAME_INVALID_MESSAGE = ("Cannot use --name argument when multiple directories "
61 "in target contain .shed.yml files.")
62 NAME_REQUIRED_MESSAGE = ("No repository name discovered but one is required.")
63 CONFLICTING_NAMES_MESSAGE = ("The supplied name argument --name conflicts "
64 "with value discovered in .shed.yml.")
65 PARSING_PROBLEM = ("Problem parsing file .shed.yml in directory %s, skipping "
66 "repository. Message: [%s].")
67 AUTO_REPO_CONFLICT_MESSAGE = ("Cannot specify both auto_tool_repositories and "
68 "repositories in .shed.yml at this time.")
69 AUTO_NAME_CONFLICT_MESSAGE = ("Cannot specify both auto_tool_repositories and "
70 "in .shed.yml and --name on the command-line.")
71 REALIZAION_PROBLEMS_MESSAGE = ("Problem encountered executing action for one or more "
72 "repositories.")
73 INCORRECT_OWNER_MESSAGE = ("Attempting to create a repository with configured "
74 "owner [%s] that does not match API user [%s].")
75 PROBLEM_PROCESSING_REPOSITORY_MESSAGE = "Problem processing repositories, exiting."
76
77 # Planemo generated or consumed files that do not need to be uploaded to the
78 # tool shed.
79 PLANEMO_FILES = [
80 "shed_upload*.tar.gz",
81 "shed_download*.tar.gz",
82 "tool_test_output.*",
83 ".travis",
84 ".travis.yml",
85 ".shed.yml",
86 "*~",
87 "#*#",
88 ]
89 SHED_SHORT_NAMES = {
90 "toolshed": "https://toolshed.g2.bx.psu.edu/",
91 "testtoolshed": "https://testtoolshed.g2.bx.psu.edu/",
92 "local": "http://localhost:9009/",
93 }
94 SHED_LABELS = {
95 "toolshed": "main Tool Shed",
96 "testtoolshed": "test Tool Shed",
97 "local": "local Tool Shed",
98 }
99 REPO_TYPE_UNRESTRICTED = "unrestricted"
100 REPO_TYPE_TOOL_DEP = "tool_dependency_definition"
101 REPO_TYPE_SUITE = "repository_suite_definition"
102
103 # TODO: sync this with tool shed impl someday
104 VALID_REPOSITORYNAME_RE = re.compile(r"^[a-z0-9\_]+$")
105 VALID_PUBLICNAME_RE = re.compile(r"^[a-z0-9._\-]+$")
106
107
108 # Generate with python scripts/categories.py
109 CURRENT_CATEGORIES = [
110 "Assembly",
111 "ChIP-seq",
112 "Combinatorial Selections",
113 "Computational chemistry",
114 "Constructive Solid Geometry",
115 "Convert Formats",
116 "Data Export",
117 "Data Managers",
118 "Data Source",
119 "Entomology",
120 "Epigenetics",
121 "Fasta Manipulation",
122 "Fastq Manipulation",
123 "Flow Cytometry Analysis",
124 "Genome annotation",
125 "Genome editing",
126 "Genome-Wide Association Study",
127 "Genomic Interval Operations",
128 "Graphics",
129 "Imaging",
130 "Machine Learning",
131 "Metabolomics",
132 "Metagenomics",
133 "Micro-array Analysis",
134 "Molecular Dynamics",
135 "Next Gen Mappers",
136 "NLP",
137 "Ontology Manipulation",
138 "Phylogenetics",
139 "Proteomics",
140 "RNA",
141 "SAM",
142 "Sequence Analysis",
143 "Statistics",
144 "Systems Biology",
145 "Text Manipulation",
146 "Tool Dependency Packages",
147 "Tool Generators",
148 "Transcriptomics",
149 "Variant Analysis",
150 "Visualization",
151 "Web Services",
152 ]
153 # http://stackoverflow.com/questions/7676255/find-and-replace-urls-in-a-block-of-te
154 HTTP_REGEX_PATTERN = re.compile(
155 r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>\[\]]+|\(([^\s()<>\[\]]+|(\([^\s()<>\[\]]+\)))*\))+(?:\(([^\s()<>\[\]]+|(\([^\s()<>\[\]]+\)))*\)|[^\s`!(){};:'".,<>?\[\]]))""" # noqa
156 )
157
158
159 def _is_url(url):
160 return '://' in url and \
161 (
162 url.startswith('http') or
163 url.startswith('ftp')
164 )
165
166
167 def _find_urls_in_text(text):
168 return [url for url in HTTP_REGEX_PATTERN.findall(text) if _is_url(url[0])]
169
170
171 def construct_yaml_str(self, node):
172 # Override the default string handling function
173 # to always return unicode objects
174 return self.construct_scalar(node)
175
176
177 yaml.Loader.add_constructor(u'tag:yaml.org,2002:str', construct_yaml_str)
178 yaml.SafeLoader.add_constructor(u'tag:yaml.org,2002:str', construct_yaml_str)
179
180
181 class ShedContext(NamedTuple):
182 tsi: toolshed.ToolShedInstance
183 shed_config: dict
184 config_owner: str
185
186 def owner(self):
187 owner = self.config_owner
188 if owner is None:
189 owner = username(self.tsi)
190 return owner
191
192 @property
193 def label(self):
194 return self.shed_config.get("label") or "tool shed"
195
196
197 def shed_init(ctx, path, **kwds):
198 """Initialize a new shed repository."""
199 if not os.path.exists(path):
200 os.makedirs(path)
201 shed_config_path = os.path.join(path, SHED_CONFIG_NAME)
202 if not can_write_to_path(shed_config_path, **kwds):
203 # .shed.yml exists and no --force sent.
204 return 1
205
206 create_failed = _create_shed_config(ctx, shed_config_path, **kwds)
207 if create_failed:
208 return 1
209
210 repo_dependencies_path = os.path.join(path, REPO_DEPENDENCIES_CONFIG_NAME)
211 from_workflow = kwds.get("from_workflow")
212
213 if from_workflow:
214 workflow_name = os.path.basename(from_workflow)
215 workflow_target = os.path.join(path, workflow_name)
216 if not os.path.exists(workflow_target):
217 shutil.copyfile(from_workflow, workflow_target)
218
219 if not can_write_to_path(repo_dependencies_path, **kwds):
220 return 1
221
222 repo_pairs = _parse_repos_from_workflow(from_workflow)
223 repository_dependencies = RepositoryDependencies(repo_pairs)
224 repository_dependencies.write_to_path(repo_dependencies_path)
225
226 return 0
227
228
229 def install_arg_lists(ctx, paths, **kwds):
230 """Build a list of install args for resolved repositories."""
231 shed_context = get_shed_context(ctx, **kwds)
232 install_args_list = []
233
234 def process_repo(realized_repository):
235 install_args_list.append(realized_repository.install_args(ctx, shed_context))
236 return 0
237
238 exit_code = for_each_repository(ctx, process_repo, paths, **kwds)
239 if exit_code:
240 raise RuntimeError(PROBLEM_PROCESSING_REPOSITORY_MESSAGE)
241
242 return install_args_list
243
244
245 def find_urls_for_xml(root):
246 """Returns two lists: explicit package URLs, and help text URLs.
247
248 For validating the user-facing URLs is it sensible to mimic
249 a web browser user agent.
250 """
251 urls = []
252 for packages in root.findall("package"):
253 install_els = packages.findall("install")
254 assert len(install_els) in (0, 1)
255
256 if len(install_els) == 0:
257 continue
258
259 install_el = install_els[0]
260 package = BasePackage(None, packages, install_el, readme=None)
261 for action in package.get_all_actions():
262 urls.extend([dl.url for dl in action.downloads()])
263
264 for subaction in action.actions:
265 if hasattr(subaction, 'packages'):
266 urls.extend(subaction.packages)
267
268 docs = []
269 for help_text in root.findall("help"):
270 for url in _find_urls_in_text(help_text.text):
271 docs.append(url[0])
272
273 return urls, docs
274
275
276 def handle_force_create(realized_repository, ctx, shed_context, **kwds):
277 repo_id = realized_repository.find_repository_id(ctx, shed_context)
278 if repo_id is None and kwds.get("force_repository_creation"):
279 repo_id = realized_repository.create(ctx, shed_context)
280 # failing to create the repo, give up
281 return repo_id
282
283
284 def report_non_existent_repository(realized_repository):
285 name = realized_repository.name
286 error("Repository [%s] does not exist in the targeted Tool Shed." % name)
287 return 2
288
289
290 def upload_repository(ctx, realized_repository, **kwds):
291 """Upload a tool directory as a tarball to a tool shed."""
292 path = realized_repository.path
293 tar_path = kwds.get("tar")
294 if not tar_path:
295 tar_path = build_tarball(path, **kwds)
296 if kwds.get("tar_only", False):
297 name = realized_repository.pattern_to_file_name("shed_upload.tar.gz")
298 shutil.copy(tar_path, name)
299 return 0
300 shed_context = get_shed_context(ctx, **kwds)
301 update_kwds = {}
302 _update_commit_message(ctx, realized_repository, update_kwds, **kwds)
303
304 repo_id = handle_force_create(realized_repository, ctx, shed_context, **kwds)
305 # failing to create the repo, give up
306 if repo_id is None:
307 return report_non_existent_repository(realized_repository)
308
309 if kwds.get("check_diff", False):
310 is_diff = diff_repo(ctx, realized_repository, **kwds) != 0
311 if not is_diff:
312 name = realized_repository.name
313 info("Repository [%s] not different, skipping upload." % name)
314 return 0
315
316 # TODO: support updating repo information if it changes in the config file
317 try:
318 shed_context.tsi.repositories.update_repository(
319 str(repo_id), tar_path, **update_kwds
320 )
321 except Exception as e:
322 if isinstance(e, bioblend.ConnectionError) and e.status_code == 400 and \
323 '"No changes to repository."' in e.body:
324 warn("Repository %s was not updated because there were no changes" % realized_repository.name)
325 return 0
326 message = api_exception_to_message(e)
327 error("Could not update %s" % realized_repository.name)
328 error(message)
329 return -1
330 info("Repository %s updated successfully." % realized_repository.name)
331 return 0
332
333
334 def _update_commit_message(ctx, realized_repository, update_kwds, **kwds):
335 message = kwds.get("message")
336 git_rev = realized_repository.git_rev(ctx)
337 git_repo = realized_repository.git_repo(ctx)
338 if message is None:
339 message = "planemo upload"
340 if git_repo:
341 message += " for repository %s" % git_repo
342 if git_rev:
343 message += " commit %s" % git_rev
344 update_kwds["commit_message"] = message
345
346
347 def diff_repo(ctx, realized_repository, **kwds):
348 """Compare two repositories (local or remote) and check for differences.
349
350 Returns 0 if and only the repositories are effectively the same
351 given supplied kwds for comparison description.
352 """
353 with temp_directory("tool_shed_diff_") as working:
354 return _diff_in(ctx, working, realized_repository, **kwds)
355
356
357 def _diff_in(ctx, working, realized_repository, **kwds):
358 path = realized_repository.path
359 shed_target_source = kwds.get("shed_target_source")
360
361 label_a = "_%s_" % (shed_target_source if shed_target_source else "workingdir")
362 shed_target = kwds.get("shed_target", "B")
363 if "/" in shed_target:
364 shed_target = "custom_shed"
365 label_b = "_%s_" % shed_target
366
367 mine = os.path.join(working, label_a)
368 other = os.path.join(working, label_b)
369
370 shed_context = get_shed_context(ctx, read_only=True, **kwds)
371 # In order to download the tarball, require repository ID...
372 repo_id = realized_repository.find_repository_id(ctx, shed_context)
373 if repo_id is None:
374 error("shed_diff: Repository [%s] does not exist in the targeted Tool Shed."
375 % realized_repository.name)
376 # $ diff README.rst not_a_file 2&>1 /dev/null; echo $?
377 # 2
378 return 2
379 info("Diffing repository [%s]" % realized_repository.name)
380 download_tarball(
381 ctx,
382 shed_context,
383 realized_repository,
384 destination=other,
385 clean=True,
386 destination_is_pattern=False,
387 **kwds
388 )
389 if shed_target_source:
390 new_kwds = kwds.copy()
391 new_kwds["shed_target"] = shed_target_source
392 shed_context = get_shed_context(ctx, read_only=True, **new_kwds)
393 download_tarball(
394 ctx,
395 shed_context,
396 realized_repository,
397 destination=mine,
398 clean=True,
399 destination_is_pattern=False,
400 **new_kwds
401 )
402 else:
403 tar_path = build_tarball(path)
404 os.mkdir(mine)
405 shell(['tar', '-xzf', tar_path, '-C', mine])
406 shutil.rmtree(tar_path, ignore_errors=True)
407
408 output = kwds.get("output")
409 raw = kwds.get("raw", False)
410 xml_diff = 0
411 if not raw:
412 if output:
413 with open(output, "w") as f:
414 xml_diff = diff_and_remove(working, label_a, label_b, f)
415 else:
416 xml_diff = diff_and_remove(working, label_a, label_b, sys.stdout)
417
418 cmd = ['diff', '-r', label_a, label_b]
419 if output:
420 with open(output, 'ab') as fh:
421 raw_diff = shell(cmd, cwd=working, stdout=fh)
422 else:
423 raw_diff = shell(cmd, cwd=working)
424 exit = raw_diff or xml_diff
425 if not raw:
426 if xml_diff:
427 ctx.vlog("One or more shed XML file(s) different!")
428 if raw_diff:
429 ctx.vlog("One or more non-shed XML file(s) different.")
430 if not xml_diff and not raw_diff:
431 ctx.vlog("No differences.")
432 return exit
433
434
435 def shed_repo_config(ctx, path, name=None):
436 shed_yaml_path = os.path.join(path, SHED_CONFIG_NAME)
437 config = {}
438 if os.path.exists(shed_yaml_path):
439 with open(shed_yaml_path, "r") as f:
440 config = yaml.safe_load(f)
441
442 if config is None: # yaml may yield None
443 config = {}
444 _expand_raw_config(ctx, config, path, name=name)
445 return config
446
447
448 def tool_shed_client(ctx=None, **kwds):
449 return get_shed_context(ctx, **kwds).tsi
450
451
452 def get_shed_context(ctx=None, **kwds):
453 read_only = kwds.get("read_only", False)
454 shed_config, username = _shed_config_and_username(ctx, **kwds)
455
456 def prop(key):
457 return kwds.get("shed_%s" % key) or shed_config.get(key)
458
459 url = _shed_config_to_url(shed_config)
460 if read_only:
461 key = None
462 email = None
463 password = None
464 else:
465 key = _find_shed_key(kwds, shed_config)
466 email = prop("email")
467 password = prop("password")
468
469 tsi = tool_shed_instance(url, key, email, password)
470 owner = username
471 return ShedContext(tsi, shed_config, owner)
472
473
474 def tool_shed_url(ctx, **kwds):
475 shed_config, _ = _shed_config_and_username(ctx, **kwds)
476 return _shed_config_to_url(shed_config)
477
478
479 def _shed_config_and_username(ctx, **kwds):
480 shed_target = kwds.get("shed_target")
481 global_config = getattr(ctx, "global_config", {})
482 if global_config and "sheds" in global_config:
483 sheds_config = global_config["sheds"]
484 shed_config = sheds_config.get(shed_target, {}) or {}
485 else:
486 shed_config = {}
487
488 if "url" not in shed_config:
489 if shed_target and shed_target in SHED_SHORT_NAMES:
490 shed_config["url"] = SHED_SHORT_NAMES[shed_target]
491 else:
492 shed_config["url"] = shed_target
493
494 if "label" not in shed_config:
495 if shed_target and shed_target in SHED_LABELS:
496 shed_config["label"] = SHED_LABELS[shed_target]
497 else:
498 shed_config["label"] = "custom tool shed at %s" % shed_target
499
500 default_shed_username = global_config.get("shed_username")
501 username = shed_config.get("username", default_shed_username)
502
503 return shed_config, username
504
505
506 def _find_shed_key(kwds, shed_config):
507 shed_key = kwds.get("shed_key")
508 if shed_key is None:
509 shed_key_from_env = kwds.get("shed_key_from_env")
510 if shed_key_from_env is not None:
511 shed_key = os.environ[shed_key_from_env]
512 if shed_key is None:
513 shed_key = shed_config.get("key")
514 return shed_key
515
516
517 def find_repository_id(ctx, shed_context, path, **kwds):
518 repo_config = kwds.get("config")
519 if repo_config is None:
520 name = kwds.get("name")
521 repo_config = shed_repo_config(ctx, path, name=name)
522 name = repo_config["name"]
523 find_kwds = kwds.copy()
524 if "name" in find_kwds:
525 del find_kwds["name"]
526 return _find_repository_id(ctx, shed_context, name, repo_config, **find_kwds)
527
528
529 def _find_repository_id(ctx, shed_context, name, repo_config, **kwds):
530 # TODO: modify to consume shed_context
531 owner = _owner(ctx, repo_config, shed_context, **kwds)
532 matching_repository = find_repository(shed_context.tsi, owner, name)
533 if matching_repository is None:
534 if not kwds.get("allow_none", False):
535 message = "Failed to find repository for owner/name %s/%s"
536 raise Exception(message % (owner, name))
537 else:
538 return None
539 else:
540 repo_id = matching_repository["id"]
541 return repo_id
542
543
544 def _owner(ctx, repo_config, shed_context=None, **kwds):
545 owner = kwds.get("owner") or repo_config.get("owner")
546 if owner is None:
547 if shed_context is None and "shed_target" in kwds:
548 shed_context = get_shed_context(ctx, **kwds)
549 if shed_context is not None:
550 owner = shed_context.owner()
551 return owner
552
553
554 def _expand_raw_config(ctx, config, path, name=None):
555 name_input = name
556 if "name" not in config:
557 config["name"] = name
558 if config["name"] is None:
559 config["name"] = path_to_repo_name(path)
560
561 default_include = config.get("include", ["**"])
562 repos = config.get("repositories")
563 auto_tool_repos = config.get("auto_tool_repositories", False)
564 suite_config = config.get("suite", False)
565
566 if repos and auto_tool_repos:
567 raise Exception(AUTO_REPO_CONFLICT_MESSAGE)
568 if auto_tool_repos and name_input:
569 raise Exception(AUTO_NAME_CONFLICT_MESSAGE)
570 if auto_tool_repos:
571 repos = _build_auto_tool_repos(ctx, path, config, auto_tool_repos)
572 if suite_config:
573 if repos is None:
574 repos = odict.odict()
575 _build_suite_repo(config, repos, suite_config)
576 # If repositories aren't defined, just define a single
577 # one based on calculated name and including everything
578 # by default.
579 if repos is None:
580 repos = {
581 config["name"]: {
582 "include": default_include
583 }
584 }
585 config["repositories"] = repos
586
587
588 def _build_auto_tool_repos(ctx, path, config, auto_tool_repos):
589 default_include = config.get("include", ["**"])
590 tool_source_pairs = list(yield_tool_sources(ctx, path, recursive=True))
591 paths = [_[0] for _ in tool_source_pairs]
592 excludes = _shed_config_excludes(config)
593
594 def _build_repository(tool_path, tool_source):
595 tool_id = tool_source.parse_id().lower()
596 tool_name = tool_source.parse_name()
597 description = tool_source.parse_description()
598 template_vars = dict(
599 tool_id=tool_id,
600 tool_name=tool_name,
601 description=description,
602 )
603 other_paths = paths[:]
604 other_paths.remove(tool_path)
605 tool_excludes = excludes + list(other_paths)
606 repo_dict = {
607 "include": default_include,
608 "exclude": tool_excludes,
609 }
610 for key in ["name", "description", "long_description"]:
611 template_key = "%s_template" % key
612 template = auto_tool_repos.get(template_key)
613 if template:
614 value = templates.render(template, **template_vars)
615 repo_dict[key] = value
616 return repo_dict
617
618 repos = odict.odict()
619 for tool_path, tool_source in tool_source_pairs:
620 repository_config = _build_repository(tool_path, tool_source)
621 repository_name = repository_config["name"]
622 repos[repository_name] = repository_config
623 return repos
624
625
626 def _build_suite_repo(config, repos, suite_config):
627 name = suite_config.get("name")
628 if not name:
629 raise Exception("suite requires a 'name'.")
630 description = suite_config.get("description", "")
631 long_description = suite_config.get("long_description")
632 owner = config["owner"]
633 repo_type = suite_config.get('type', REPO_TYPE_SUITE)
634
635 repo_pairs = [(repo_dict.get('owner') or owner, repo_name) for repo_name, repo_dict in repos.items()]
636 extra_repos = suite_config.get("include_repositories", {})
637 repo_pairs += [(_["owner"], _["name"]) for _ in extra_repos]
638
639 repository_dependencies = RepositoryDependencies(repo_pairs, description)
640
641 repo = {
642 "_files": {
643 REPO_DEPENDENCIES_CONFIG_NAME: str(repository_dependencies)
644 },
645 "include": [],
646 "name": name,
647 "description": description,
648 "type": repo_type,
649 }
650 if long_description:
651 repo["long_description"] = long_description
652 repos[name] = repo
653
654
655 def update_repository_for(ctx, tsi, id, repo_config):
656 name = repo_config["name"]
657 description = repo_config.get("description")
658 long_description = repo_config.get("long_description")
659 repo_type = shed_repo_type(repo_config, name)
660 remote_repository_url = repo_config.get("remote_repository_url")
661 homepage_url = repo_config.get("homepage_url")
662 categories = repo_config.get("categories", [])
663 category_ids = find_category_ids(tsi, categories)
664
665 _ensure_shed_description(description)
666
667 kwds = dict(
668 name=name,
669 synopsis=description,
670 type=repo_type,
671 )
672 if long_description is not None:
673 kwds["description"] = long_description
674 if remote_repository_url is not None:
675 kwds["remote_repository_url"] = remote_repository_url
676 if homepage_url is not None:
677 kwds["homepage_url"] = homepage_url
678 if category_ids is not None:
679 kwds['category_ids[]'] = category_ids
680 return bioblend.galaxy.client.Client._put(tsi.repositories, id=id, payload=kwds)
681
682
683 def create_repository_for(ctx, tsi, name, repo_config):
684 description = repo_config.get("description")
685 long_description = repo_config.get("long_description")
686 repo_type = shed_repo_type(repo_config, name)
687 remote_repository_url = repo_config.get("remote_repository_url")
688 homepage_url = repo_config.get("homepage_url")
689 categories = repo_config.get("categories", [])
690 category_ids = find_category_ids(tsi, categories)
691
692 _ensure_shed_description(description)
693
694 repo = tsi.repositories.create_repository(
695 name=name,
696 synopsis=description,
697 description=long_description,
698 type=repo_type,
699 remote_repository_url=remote_repository_url,
700 homepage_url=homepage_url,
701 category_ids=category_ids
702 )
703 return repo
704
705
706 def download_tarball(ctx, shed_context, realized_repository, **kwds):
707 repo_id = realized_repository.find_repository_id(ctx, shed_context)
708 if repo_id is None:
709 message = "Unable to find repository id, cannot download."
710 error(message)
711 raise Exception(message)
712 destination_pattern = kwds.get('destination', 'shed_download.tar.gz')
713 if kwds.get("destination_is_pattern", True):
714 destination = realized_repository.pattern_to_file_name(destination_pattern)
715 else:
716 destination = destination_pattern
717 to_directory = not destination.endswith("gz")
718 download_tar(shed_context.tsi, repo_id, destination, to_directory=to_directory)
719 if to_directory:
720 clean = kwds.get("clean", False)
721 if clean:
722 archival_file = os.path.join(destination, ".hg_archival.txt")
723 if os.path.exists(archival_file):
724 os.remove(archival_file)
725
726
727 def build_tarball(realized_path, **kwds):
728 """Build a tool-shed tar ball for the specified path, caller is
729 responsible for deleting this file.
730 """
731
732 # Simplest solution to sorting the files is to use a list,
733 files = []
734 for dirpath, _dirnames, filenames in os.walk(realized_path):
735 for f in filenames:
736 files.append(os.path.join(dirpath, f))
737 files.sort()
738
739 fd, temp_path = mkstemp()
740 try:
741 tar = tarfile.open(temp_path, "w:gz", dereference=True)
742 try:
743 for raw in files:
744 name = os.path.relpath(raw, realized_path)
745 tar.add(os.path.join(realized_path, name), arcname=name)
746 finally:
747 tar.close()
748 finally:
749 os.close(fd)
750 return temp_path
751
752
753 def find_raw_repositories(ctx, paths, **kwds):
754 """Return a list of "raw" repository objects for each repo on paths."""
755 raw_repo_objects = []
756 for path in paths:
757 raw_repo_objects.extend(_find_raw_repositories(ctx, path, **kwds))
758 return raw_repo_objects
759
760
761 def for_each_repository(ctx, function, paths, **kwds):
762 ret_codes = []
763 for path in paths:
764 with _path_on_disk(ctx, path) as raw_path:
765 try:
766 for realized_repository in _realize_effective_repositories(
767 ctx, raw_path, **kwds
768 ):
769 ret_codes.append(
770 function(realized_repository)
771 )
772 except RealizationException:
773 error(REALIZAION_PROBLEMS_MESSAGE)
774 return 254
775
776 return coalesce_return_codes(ret_codes)
777
778
779 def path_to_repo_name(path):
780 return os.path.basename(os.path.abspath(path))
781
782
783 def shed_repo_type(config, name):
784 repo_type = config.get("type")
785 if repo_type is None:
786 if name.startswith("package_"):
787 repo_type = REPO_TYPE_TOOL_DEP
788 elif name.startswith("suite_"):
789 repo_type = REPO_TYPE_SUITE
790 else:
791 repo_type = REPO_TYPE_UNRESTRICTED
792 return repo_type
793
794
795 def _shed_config_to_url(shed_config):
796 url = shed_config["url"]
797 if not url.startswith("http"):
798 message = (
799 "Invalid shed url specified [{0}]. Please specify a valid "
800 "HTTP address or one of {1}"
801 ).format(url, list(SHED_SHORT_NAMES.keys()))
802 raise ValueError(message)
803 return url
804
805
806 def _realize_effective_repositories(ctx, path, **kwds):
807 """ Expands folders in a source code repository into tool shed
808 repositories.
809
810 Each folder may have nested repositories and each folder may corresponding
811 to many repositories (for instance if a folder has n tools in the source
812 code repository but are published to the tool shed as one repository per
813 tool).
814 """
815 raw_repo_objects = _find_raw_repositories(ctx, path, **kwds)
816 failed = False
817 with temp_directory() as base_dir:
818 for raw_repo_object in raw_repo_objects:
819 if isinstance(raw_repo_object, Exception):
820 _handle_realization_error(raw_repo_object, **kwds)
821 failed = True
822 continue
823
824 realized_repos = raw_repo_object.realizations(
825 ctx,
826 base_dir,
827 **kwds
828 )
829 for realized_repo in realized_repos:
830 if isinstance(realized_repo, Exception):
831 _handle_realization_error(realized_repo, **kwds)
832 failed = True
833 continue
834 yield realized_repo
835 if failed:
836 raise RealizationException()
837
838
839 def _create_shed_config(ctx, path, **kwds):
840 name = kwds.get("name") or path_to_repo_name(os.path.dirname(path))
841 name_invalid = validate_repo_name(name)
842 if name_invalid:
843 error(name_invalid)
844 return 1
845
846 owner = kwds.get("owner")
847 if owner is None:
848 owner = ctx.global_config.get("shed_username")
849 owner_invalid = validate_repo_owner(owner)
850 if owner_invalid:
851 error(owner_invalid)
852 return 1
853 description = kwds.get("description") or name
854 long_description = kwds.get("long_description")
855 remote_repository_url = kwds.get("remote_repository_url")
856 homepage_url = kwds.get("homepage_url")
857 categories = kwds.get("category", [])
858 config = dict(
859 name=name,
860 owner=owner,
861 description=description,
862 long_description=long_description,
863 remote_repository_url=remote_repository_url,
864 homepage_url=homepage_url,
865 categories=categories,
866 )
867 # Remove empty entries...
868 for k in list(config.keys()):
869 if config[k] is None:
870 del config[k]
871
872 with open(path, "w") as f:
873 yaml.safe_dump(config, f)
874
875
876 def _parse_repos_from_workflow(path):
877 with open(path, "r") as f:
878 workflow_json = json.load(f)
879 steps = workflow_json["steps"]
880 tool_ids = set()
881 for value in steps.values():
882 step_type = value["type"]
883 if step_type != "tool":
884 continue
885 tool_id = value["tool_id"]
886 if "/repos/" in tool_id:
887 tool_ids.add(tool_id)
888
889 repo_pairs = set()
890 for tool_id in tool_ids:
891 tool_repo_info = tool_id.split("/repos/", 1)[1]
892 tool_repo_parts = tool_repo_info.split("/")
893 owner = tool_repo_parts[0]
894 name = tool_repo_parts[1]
895 repo_pairs.add((owner, name))
896
897 return repo_pairs
898
899
900 @contextlib.contextmanager
901 def _path_on_disk(ctx, path):
902 git_path = None
903 if path.startswith("git:"):
904 git_path = path
905 elif path.startswith("git+"):
906 git_path = path[len("git+"):]
907 if git_path is None:
908 yield path
909 else:
910 with temp_directory() as git_repo:
911 git.clone(ctx, git_path, git_repo)
912 yield git_repo
913
914
915 def _find_raw_repositories(ctx, path, **kwds):
916 name = kwds.get("name")
917 recursive = kwds.get("recursive", False)
918
919 shed_file_dirs = []
920 for pattern in REPO_METADATA_FILES:
921 shed_file_dirs.extend(find_matching_directories(
922 path, pattern, recursive=recursive
923 ))
924 config_name = None
925 if len(shed_file_dirs) == 1:
926 shed_file_dir = shed_file_dirs[0]
927 try:
928 config = shed_repo_config(ctx, shed_file_dir, name=name)
929 except Exception as e:
930 error_message = PARSING_PROBLEM % (shed_file_dir, e)
931 exception = RuntimeError(error_message)
932 _handle_realization_error(exception, **kwds)
933 return [exception]
934 config_name = config.get("name")
935
936 if len(shed_file_dirs) > 1 and name is not None:
937 raise Exception(NAME_INVALID_MESSAGE)
938 if config_name is not None and name is not None:
939 if config_name != name:
940 raise Exception(CONFLICTING_NAMES_MESSAGE)
941 raw_dirs = shed_file_dirs or [path]
942 kwds_copy = kwds.copy()
943 kwds_copy["name"] = name
944 return _build_raw_repo_objects(ctx, raw_dirs, **kwds_copy)
945
946
947 def _build_raw_repo_objects(ctx, raw_dirs, **kwds):
948 """
949 From specific directories with .shed.yml files or specified directly from
950 the command-line build abstract description of directories that should be
951 expanded out into shed repositories.
952 """
953 multiple = len(raw_dirs) > 1
954 name = kwds.get("name")
955
956 # List of RawRepositoryDirectories or parsing failures if
957 # fail_fast is not enabled.
958 raw_repo_objects = []
959 for raw_dir in raw_dirs:
960 try:
961 config = shed_repo_config(ctx, raw_dir, name=name)
962 except Exception as e:
963 error_message = PARSING_PROBLEM % (raw_dir, e)
964 exception = RuntimeError(error_message)
965 _handle_realization_error(exception, **kwds)
966 raw_repo_objects.append(exception)
967 continue
968 raw_repo_object = RawRepositoryDirectory(raw_dir, config, multiple)
969 raw_repo_objects.append(raw_repo_object)
970 return raw_repo_objects
971
972
973 @six.python_2_unicode_compatible
974 class RepositoryDependencies(object):
975 """ Abstraction for shed repository_dependencies.xml files.
976 """
977
978 def __init__(self, repo_pairs, description=None):
979 self.repo_pairs = repo_pairs
980 self.description = description or ""
981
982 def __str__(self):
983 contents = '<repositories description="%s">' % self.description
984 line_template = ' <repository owner="%s" name="%s" />\n'
985 for (owner, name) in self.repo_pairs:
986 contents += line_template % (owner, name)
987 contents += "</repositories>"
988 return contents
989
990 def write_to_path(self, path):
991 with open(path, "w") as f:
992 f.write(six.text_type(self))
993
994
995 class RawRepositoryDirectory(object):
996
997 def __init__(self, path, config, multiple):
998 self.path = path
999 self.config = config
1000 self.name = config["name"]
1001 self.type = shed_repo_type(config, self.name)
1002 self.multiple = multiple # operation over many repos?
1003
1004 def _hash(self, name):
1005 return hashlib.md5(name.encode('utf-8')).hexdigest()
1006
1007 def realizations(self, ctx, parent_directory, **kwds):
1008 names = self._repo_names()
1009
1010 for name in names:
1011 directory = os.path.join(parent_directory, self._hash(name), name)
1012 multiple = self.multiple or len(names) > 1
1013 if not os.path.exists(directory):
1014 os.makedirs(directory)
1015 r_kwds = kwds.copy()
1016 if "name" in r_kwds:
1017 del r_kwds["name"]
1018 yield self._realize_to(ctx, directory, name, multiple, **r_kwds)
1019
1020 def _realize_to(self, ctx, directory, name, multiple, **kwds):
1021 fail_on_missing = kwds.get("fail_on_missing", True)
1022 ignore_list = []
1023 config = self._realize_config(name)
1024 config["owner"] = _owner(ctx, config, **kwds)
1025
1026 excludes = _shed_config_excludes(config)
1027 for exclude in excludes:
1028 ignore_list.extend(_glob(self.path, exclude))
1029
1030 realized_files = self._realized_files(name)
1031 missing = realized_files.include_failures
1032 if missing and fail_on_missing:
1033 msg = "Failed to include files for %s" % missing
1034 return RuntimeError(msg)
1035
1036 for realized_file in realized_files.files:
1037 relative_dest = realized_file.dest
1038 implicit_ignore = self._implicit_ignores(relative_dest)
1039 explicit_ignore = (realized_file.absolute_src in ignore_list)
1040 if implicit_ignore or explicit_ignore:
1041 continue
1042 realized_file.realize_to(directory)
1043
1044 for (name, contents) in six.iteritems(config.get("_files", {})):
1045 path = os.path.join(directory, name)
1046 with open(path, "w") as f:
1047 f.write(contents)
1048
1049 return RealizedRepositry(
1050 realized_path=directory,
1051 real_path=self.path,
1052 config=config,
1053 multiple=multiple,
1054 missing=missing,
1055 )
1056
1057 def _repo_names(self):
1058 return self.config.get("repositories").keys()
1059
1060 def _realized_files(self, name):
1061 config = self._realize_config(name)
1062 realized_files = []
1063 missing = []
1064 for include_info in config["include"]:
1065 if not isinstance(include_info, dict):
1066 include_info = {"source": include_info}
1067 source_list = include_info.get("source")
1068 if not isinstance(source_list, list):
1069 source_list = [source_list]
1070 # Preprocess any entries with a source list into copies
1071 # with a single source entry:
1072 for source in source_list:
1073 include = include_info.copy()
1074 include["source"] = source
1075 included = RealizedFile.realized_files_for(self.path, include)
1076 if not included:
1077 missing.append(include)
1078 else:
1079 realized_files.extend(included)
1080 return RealizedFiles(realized_files, missing)
1081
1082 def _realize_config(self, name):
1083 config = copy.deepcopy(self.config)
1084 config["name"] = name
1085 repo_config = config.get("repositories", {}).get(name, {})
1086 config.update(repo_config)
1087 if "repositories" in config:
1088 del config["repositories"]
1089 return config
1090
1091 def _implicit_ignores(self, relative_path):
1092 # Filter out "unwanted files" :) like READMEs for special
1093 # repository types.
1094 if self.type == REPO_TYPE_TOOL_DEP:
1095 if relative_path != TOOL_DEPENDENCIES_CONFIG_NAME:
1096 return True
1097
1098 if self.type == REPO_TYPE_SUITE:
1099 if relative_path != REPO_DEPENDENCIES_CONFIG_NAME:
1100 return True
1101
1102 name = os.path.basename(relative_path)
1103 for dvcs_prefix in [".git", ".hg"]:
1104 if relative_path.startswith(dvcs_prefix):
1105 return True
1106
1107 if name.startswith(".svn"):
1108 return True
1109
1110 for pattern in PLANEMO_FILES:
1111 if fnmatch.fnmatch(name, pattern):
1112 return True
1113 return False
1114
1115
1116 class RealizedFiles(NamedTuple):
1117 files: list
1118 include_failures: list
1119
1120
1121 class RealizedFile(object):
1122
1123 def __init__(self, src_root, src, dest):
1124 """Create object mapping from file system to tar-ball.
1125
1126 * src_root - source root (i.e. folder with .shed.yml file)
1127 * src - location of source file, relative to src_root
1128 * dest - destination path, relative to root of tar-ball.
1129 """
1130 if dest == ".":
1131 raise ValueError("Destination for %r should be a full filename!" % src)
1132 self.src_root = src_root
1133 self.src = src
1134 self.dest = dest
1135
1136 @property
1137 def absolute_src(self):
1138 return os.path.abspath(os.path.join(self.src_root, self.src))
1139
1140 def realize_to(self, directory):
1141 source_path = self.absolute_src
1142 if os.path.islink(source_path):
1143 source_path = os.path.realpath(source_path)
1144 relative_dest = self.dest
1145 assert relative_dest != "."
1146 target_path = os.path.join(directory, relative_dest)
1147 target_exists = os.path.exists(target_path)
1148 # info("realize_to %r --> %r" % (source_path, target_path))
1149 if not target_exists:
1150 target_dir = os.path.dirname(target_path)
1151 if not os.path.exists(target_dir):
1152 os.makedirs(target_dir)
1153 if os.path.isdir(source_path):
1154 os.makedirs(target_path)
1155 else:
1156 os.symlink(source_path, target_path)
1157
1158 @staticmethod
1159 def realized_files_for(path, include_info):
1160 if not isinstance(include_info, dict):
1161 include_info = {"source": include_info}
1162 source = include_info.get("source")
1163 abs_source = os.path.join(path, source)
1164 destination = include_info.get("destination")
1165 strip_components = include_info.get("strip_components", 0)
1166 if destination is None:
1167 destination = "./"
1168 if not destination.endswith("/"):
1169 # Check if source using wildcards (directory gets implicit wildcard)
1170 # Should we use a regular exoression to catch [A-Z] style patterns?
1171 if "*" in source or "?" in source or os.path.isdir(abs_source):
1172 raise ValueError("destination must be a directory (with trailing slash) if source is a folder or uses wildcards")
1173 realized_files = []
1174 for globbed_file in _glob(path, source):
1175 src = os.path.relpath(globbed_file, path)
1176 if not destination.endswith("/"):
1177 # Given a filename, just use it!
1178 dest = destination
1179 if strip_components:
1180 raise ValueError("strip_components should not be used if destination is a filename")
1181 else:
1182 # Destination is a directory...
1183 if not strip_components:
1184 dest = src
1185 elif "/../" in globbed_file:
1186 # Can't work from src=os.path.relpath(globbed_file, path) as lost any '..'
1187 assert globbed_file.startswith(path + "/")
1188 dest = "/".join(globbed_file[len(path) + 1:].split("/")[strip_components:])
1189 else:
1190 dest = "/".join(src.split("/")[strip_components:])
1191 # Now apply the specified output directory:
1192 dest = os.path.join(destination, dest)
1193 realized_files.append(
1194 RealizedFile(path, src, os.path.normpath(dest))
1195 )
1196 return realized_files
1197
1198 def __str__(self):
1199 return "RealizedFile[src={},dest={},src_root={}]".format(
1200 self.src, self.dest, self.src_root
1201 )
1202
1203
1204 class RealizedRepositry(object):
1205
1206 def __init__(self, realized_path, real_path, config, multiple, missing):
1207 self.path = realized_path
1208 self.real_path = real_path
1209 self.config = config
1210 self.name = config["name"]
1211 self.multiple = multiple
1212 self.missing = missing
1213
1214 @property
1215 def owner(self):
1216 return self.config["owner"]
1217
1218 @property
1219 def repository_type(self):
1220 return shed_repo_type(self.config, self.name)
1221
1222 @property
1223 def is_package(self):
1224 return self.repository_type == REPO_TYPE_TOOL_DEP
1225
1226 @property
1227 def is_suite(self):
1228 return self.repository_type == REPO_TYPE_SUITE
1229
1230 @property
1231 def repo_dependencies_path(self):
1232 return os.path.join(self.path, REPO_DEPENDENCIES_CONFIG_NAME)
1233
1234 @property
1235 def tool_dependencies_path(self):
1236 return os.path.join(self.path, TOOL_DEPENDENCIES_CONFIG_NAME)
1237
1238 def git_rev(self, ctx):
1239 return git.rev_if_git(ctx, self.real_path)
1240
1241 def git_repo(self, ctx):
1242 return self.config.get("remote_repository_url")
1243
1244 def pattern_to_file_name(self, pattern):
1245 if not self.multiple:
1246 return pattern
1247
1248 name = self.config["name"]
1249 suffix = "_%s" % name.replace("-", "_")
1250
1251 if "." not in pattern:
1252 return pattern + suffix
1253 else:
1254 parts = pattern.split(".", 1)
1255 return parts[0] + suffix + "." + parts[1]
1256
1257 def find_repository_id(self, ctx, shed_context):
1258 try:
1259 repo_id = _find_repository_id(
1260 ctx,
1261 shed_context,
1262 name=self.name,
1263 repo_config=self.config,
1264 allow_none=True,
1265 )
1266 return repo_id
1267 except Exception as e:
1268 message = api_exception_to_message(e)
1269 error("Could not update %s" % self.name)
1270 error(message)
1271 return None
1272
1273 def create(self, ctx, shed_context):
1274 """Wrapper for creating the endpoint if it doesn't exist
1275 """
1276 context_owner = shed_context.owner()
1277 config_owner = self.config.get("owner")
1278 if context_owner and config_owner and context_owner != config_owner:
1279 # This is broken because context_owner is incorrect if using an API key.
1280 # message = INCORRECT_OWNER_MESSAGE % (config_owner, context_owner)
1281 # raise Exception(message)
1282 pass
1283
1284 def _create():
1285 repo = create_repository_for(
1286 ctx,
1287 shed_context.tsi,
1288 self.name,
1289 self.config,
1290 )
1291 return repo['id']
1292
1293 return self._with_ts_exception_handling(_create)
1294
1295 def update(self, ctx, shed_context, id):
1296 """Wrapper for update the repository metadata.
1297 """
1298
1299 def _update():
1300 repo = update_repository_for(
1301 ctx,
1302 shed_context.tsi,
1303 id,
1304 self.config,
1305 )
1306 return repo
1307
1308 return self._with_ts_exception_handling(_update)
1309
1310 def _with_ts_exception_handling(self, f):
1311 try:
1312 return f()
1313 except Exception as e:
1314 # TODO: galaxyproject/bioblend#126
1315 try:
1316 upstream_error = json.loads(e.read())
1317 error(upstream_error['err_msg'])
1318 except Exception:
1319 error(unicodify(e))
1320 return None
1321
1322 def latest_installable_revision(self, ctx, shed_context):
1323 repository_id = self.find_repository_id(ctx, shed_context)
1324 return latest_installable_revision(shed_context.tsi, repository_id)
1325
1326 def install_args(self, ctx, shed_context):
1327 """ Arguments for bioblend's install_repository_revision
1328 to install this repository against supplied tsi.
1329 """
1330 tool_shed_url = shed_context.tsi.base_url
1331 return dict(
1332 tool_shed_url=tool_shed_url,
1333 name=self.name,
1334 owner=self.owner,
1335 changeset_revision=self.latest_installable_revision(
1336 ctx, shed_context
1337 ),
1338 )
1339
1340
1341 def _glob(path, pattern):
1342 pattern = os.path.join(path, pattern)
1343 if os.path.isdir(pattern):
1344 pattern = "%s/**" % pattern
1345 return glob.glob(pattern)
1346
1347
1348 def _shed_config_excludes(config):
1349 return config.get('ignore', []) + config.get('exclude', [])
1350
1351
1352 def _handle_realization_error(exception, **kwds):
1353 fail_fast = kwds.get("fail_fast", False)
1354 if fail_fast:
1355 raise exception
1356 else:
1357 error(unicodify(exception))
1358
1359
1360 def _ensure_shed_description(description):
1361 # description is required, as is name.
1362 if description is None:
1363 message = ("description required for automatic creation or update of "
1364 "shed metadata.")
1365 raise ValueError(message)
1366
1367
1368 def validate_repo_name(name):
1369 def _build_error(descript):
1370 return "Repository name [%s] invalid. %s" % (name, descript)
1371
1372 msg = None
1373 if len(name) < 2:
1374 msg = _build_error(
1375 "Repository names must be at least 2 characters in length."
1376 )
1377 if len(name) > 80:
1378 msg = _build_error(
1379 "Repository names cannot be more than 80 characters in length."
1380 )
1381 if not VALID_REPOSITORYNAME_RE.match(name):
1382 msg = _build_error(
1383 "Repository names must contain only lower-case letters, "
1384 "numbers and underscore."
1385 )
1386 return msg
1387
1388
1389 def validate_repo_owner(owner):
1390 def _build_error(descript):
1391 return "Owner [%s] invalid. %s" % (owner, descript)
1392 msg = None
1393 if len(owner) < 3:
1394 msg = _build_error("Owner must be at least 3 characters in length")
1395 if len(owner) > 255:
1396 msg = _build_error(
1397 "Owner cannot be more than 255 characters in length"
1398 )
1399 if not(VALID_PUBLICNAME_RE.match(owner)):
1400 msg = _build_error(
1401 "Owner must contain only lower-case letters, numbers, dots, underscores, and '-'"
1402 )
1403 return msg
1404
1405
1406 class RealizationException(Exception):
1407 """ This exception indicates there was a problem while
1408 realizing effective repositories for a shed command. As a
1409 precondition - the user has already been informed with error().
1410 """
1411
1412
1413 __all__ = (
1414 'api_exception_to_message',
1415 'CURRENT_CATEGORIES',
1416 'diff_repo',
1417 'download_tarball',
1418 'find_raw_repositories',
1419 'for_each_repository',
1420 'get_shed_context',
1421 'path_to_repo_name',
1422 'REPO_TYPE_SUITE',
1423 'REPO_TYPE_TOOL_DEP',
1424 'REPO_TYPE_UNRESTRICTED',
1425 'shed_init',
1426 'tool_shed_client', # Deprecated...
1427 'tool_shed_url',
1428 )