comparison env/lib/python3.9/site-packages/bioblend/galaxy/objects/wrappers.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # pylint: disable=W0622,E1101
2
3 """
4 A basic object-oriented interface for Galaxy entities.
5 """
6
7 import abc
8 import json
9 from collections.abc import (
10 Iterable,
11 Mapping,
12 Sequence,
13 )
14
15 import bioblend
16
17
18 __all__ = (
19 'Wrapper',
20 'Step',
21 'Workflow',
22 'ContentInfo',
23 'LibraryContentInfo',
24 'HistoryContentInfo',
25 'DatasetContainer',
26 'History',
27 'Library',
28 'Folder',
29 'Dataset',
30 'HistoryDatasetAssociation',
31 'DatasetCollection',
32 'HistoryDatasetCollectionAssociation',
33 'LibraryDatasetDatasetAssociation',
34 'LibraryDataset',
35 'Tool',
36 'Job',
37 'Preview',
38 'LibraryPreview',
39 'HistoryPreview',
40 'WorkflowPreview',
41 )
42
43
44 class Wrapper(metaclass=abc.ABCMeta):
45 """
46 Abstract base class for Galaxy entity wrappers.
47
48 Wrapper instances wrap deserialized JSON dictionaries such as the
49 ones obtained by the Galaxy web API, converting key-based access to
50 attribute-based access (e.g., ``library['name'] -> library.name``).
51
52 Dict keys that are converted to attributes are listed in the
53 ``BASE_ATTRS`` class variable: this is the 'stable' interface.
54 Note that the wrapped dictionary is accessible via the ``wrapped``
55 attribute.
56 """
57 BASE_ATTRS = ('id', 'name')
58
59 @abc.abstractmethod
60 def __init__(self, wrapped, parent=None, gi=None):
61 """
62 :type wrapped: dict
63 :param wrapped: JSON-serializable dictionary
64
65 :type parent: :class:`Wrapper`
66 :param parent: the parent of this wrapper
67
68 :type gi: :class:`GalaxyInstance`
69 :param gi: the GalaxyInstance through which we can access this wrapper
70 """
71 if not isinstance(wrapped, Mapping):
72 raise TypeError('wrapped object must be a mapping type')
73 # loads(dumps(x)) is a bit faster than deepcopy and allows type checks
74 try:
75 dumped = json.dumps(wrapped)
76 except (TypeError, ValueError):
77 raise ValueError('wrapped object must be JSON-serializable')
78 object.__setattr__(self, 'wrapped', json.loads(dumped))
79 for k in self.BASE_ATTRS:
80 object.__setattr__(self, k, self.wrapped.get(k))
81 object.__setattr__(self, '_cached_parent', parent)
82 object.__setattr__(self, 'is_modified', False)
83 object.__setattr__(self, 'gi', gi)
84
85 @abc.abstractproperty
86 def gi_module(self):
87 """
88 The GalaxyInstance module that deals with objects of this type.
89 """
90 pass
91
92 @property
93 def parent(self):
94 """
95 The parent of this wrapper.
96 """
97 return self._cached_parent
98
99 @property
100 def is_mapped(self):
101 """
102 ``True`` if this wrapper is mapped to an actual Galaxy entity.
103 """
104 return self.id is not None
105
106 def unmap(self):
107 """
108 Disconnect this wrapper from Galaxy.
109 """
110 object.__setattr__(self, 'id', None)
111
112 def clone(self):
113 """
114 Return an independent copy of this wrapper.
115 """
116 return self.__class__(self.wrapped)
117
118 def touch(self):
119 """
120 Mark this wrapper as having been modified since its creation.
121 """
122 object.__setattr__(self, 'is_modified', True)
123 if self.parent:
124 self.parent.touch()
125
126 def to_json(self):
127 """
128 Return a JSON dump of this wrapper.
129 """
130 return json.dumps(self.wrapped)
131
132 @classmethod
133 def from_json(cls, jdef):
134 """
135 Build a new wrapper from a JSON dump.
136 """
137 return cls(json.loads(jdef))
138
139 # FIXME: things like self.x[0] = 'y' do NOT call self.__setattr__
140 def __setattr__(self, name, value):
141 if name not in self.wrapped:
142 raise AttributeError("can't set attribute")
143 else:
144 self.wrapped[name] = value
145 object.__setattr__(self, name, value)
146 self.touch()
147
148 def __repr__(self):
149 return f"{self.__class__.__name__}({self.wrapped!r})"
150
151
152 class Step(Wrapper):
153 """
154 Abstract base class for workflow steps.
155
156 Steps are the main building blocks of a Galaxy workflow. A step can be: an
157 input (type ``data_collection_input``, ``data_input`` or
158 ``parameter_input``), a computational tool (type ``tool``), a subworkflow
159 (type ``subworkflow``) or a pause (type ``pause``).
160 """
161 BASE_ATTRS = Wrapper.BASE_ATTRS + (
162 'input_steps', 'tool_id', 'tool_inputs', 'tool_version', 'type'
163 )
164
165 def __init__(self, step_dict, parent):
166 super().__init__(step_dict, parent=parent, gi=parent.gi)
167 try:
168 stype = step_dict['type']
169 except KeyError:
170 raise ValueError('not a step dict')
171 if stype not in {'data_collection_input', 'data_input', 'parameter_input', 'pause', 'subworkflow', 'tool'}:
172 raise ValueError('Unknown step type: %r' % stype)
173
174 @property
175 def gi_module(self):
176 return self.gi.workflows
177
178
179 class Workflow(Wrapper):
180 """
181 Workflows represent ordered sequences of computations on Galaxy.
182
183 A workflow defines a sequence of steps that produce one or more
184 results from an input dataset.
185 """
186 BASE_ATTRS = Wrapper.BASE_ATTRS + (
187 'deleted', 'inputs', 'owner', 'published', 'steps', 'tags'
188 )
189 POLLING_INTERVAL = 10 # for output state monitoring
190
191 def __init__(self, wf_dict, gi=None):
192 super().__init__(wf_dict, gi=gi)
193 missing_ids = []
194 if gi:
195 tools_list_by_id = [t.id for t in gi.tools.get_previews()]
196 else:
197 tools_list_by_id = []
198 tool_labels_to_ids = {}
199 for k, v in self.steps.items():
200 # convert step ids to str for consistency with outer keys
201 v['id'] = str(v['id'])
202 for i in v['input_steps'].values():
203 i['source_step'] = str(i['source_step'])
204 step = Step(v, self)
205 self.steps[k] = step
206 if step.type == 'tool':
207 if not step.tool_inputs or step.tool_id not in tools_list_by_id:
208 missing_ids.append(k)
209 tool_labels_to_ids.setdefault(step.tool_id, set()).add(step.id)
210 input_labels_to_ids = {}
211 for id_, d in self.inputs.items():
212 input_labels_to_ids.setdefault(d['label'], set()).add(id_)
213 object.__setattr__(self, 'input_labels_to_ids', input_labels_to_ids)
214 object.__setattr__(self, 'tool_labels_to_ids', tool_labels_to_ids)
215 dag, inv_dag = self._get_dag()
216 heads, tails = set(dag), set(inv_dag)
217 object.__setattr__(self, 'dag', dag)
218 object.__setattr__(self, 'inv_dag', inv_dag)
219 object.__setattr__(self, 'source_ids', heads - tails)
220 assert set(self.inputs) == self.data_collection_input_ids | self.data_input_ids | self.parameter_input_ids, \
221 "inputs is {!r}, while data_collection_input_ids is {!r}, data_input_ids is {!r} and parameter_input_ids is {!r}".format(
222 self.inputs, self.data_collection_input_ids, self.data_input_ids, self.parameter_input_ids)
223 object.__setattr__(self, 'sink_ids', tails - heads)
224 object.__setattr__(self, 'missing_ids', missing_ids)
225
226 @property
227 def gi_module(self):
228 return self.gi.workflows
229
230 def _get_dag(self):
231 """
232 Return the workflow's DAG.
233
234 For convenience, this method computes a 'direct' (step =>
235 successors) and an 'inverse' (step => predecessors)
236 representation of the same DAG.
237
238 For instance, a workflow with a single tool *c*, two inputs
239 *a, b* and three outputs *d, e, f* is represented by (direct)::
240
241 {'a': {'c'}, 'b': {'c'}, 'c': {'d', 'e', 'f'}}
242
243 and by (inverse)::
244
245 {'c': {'a', 'b'}, 'd': {'c'}, 'e': {'c'}, 'f': {'c'}}
246 """
247 dag, inv_dag = {}, {}
248 for s in self.steps.values():
249 for i in s.input_steps.values():
250 head, tail = i['source_step'], s.id
251 dag.setdefault(head, set()).add(tail)
252 inv_dag.setdefault(tail, set()).add(head)
253 return dag, inv_dag
254
255 def sorted_step_ids(self):
256 """
257 Return a topological sort of the workflow's DAG.
258 """
259 ids = []
260 source_ids = self.source_ids.copy()
261 inv_dag = {k: v.copy() for k, v in self.inv_dag.items()}
262 while source_ids:
263 head = source_ids.pop()
264 ids.append(head)
265 for tail in self.dag.get(head, []):
266 incoming = inv_dag[tail]
267 incoming.remove(head)
268 if not incoming:
269 source_ids.add(tail)
270 return ids
271
272 @property
273 def data_input_ids(self):
274 """
275 Return the ids of data input steps for this workflow.
276 """
277 return {id_ for id_, s in self.steps.items() if s.type == 'data_input'}
278
279 @property
280 def data_collection_input_ids(self):
281 """
282 Return the ids of data collection input steps for this workflow.
283 """
284 return {id_ for id_, s in self.steps.items() if s.type == 'data_collection_input'}
285
286 @property
287 def parameter_input_ids(self):
288 """
289 Return the ids of parameter input steps for this workflow.
290 """
291 return {id_ for id_, s in self.steps.items() if s.type == 'parameter_input'}
292
293 @property
294 def tool_ids(self):
295 """
296 Return the ids of tool steps for this workflow.
297 """
298 return {id_ for id_, s in self.steps.items() if s.type == 'tool'}
299
300 @property
301 def input_labels(self):
302 """
303 Return the labels of this workflow's input steps.
304 """
305 return set(self.input_labels_to_ids)
306
307 @property
308 def is_runnable(self):
309 """
310 Return True if the workflow can be run on Galaxy.
311
312 A workflow is considered runnable on a Galaxy instance if all
313 of the tools it uses are installed in that instance.
314 """
315 return not self.missing_ids
316
317 def convert_input_map(self, input_map):
318 """
319 Convert ``input_map`` to the format required by the Galaxy web API.
320
321 :type input_map: dict
322 :param input_map: a mapping from input labels to datasets
323
324 :rtype: dict
325 :return: a mapping from input slot ids to dataset ids in the
326 format required by the Galaxy web API.
327 """
328 m = {}
329 for label, slot_ids in self.input_labels_to_ids.items():
330 datasets = input_map.get(label, [])
331 if not isinstance(datasets, Iterable):
332 datasets = [datasets]
333 if len(datasets) < len(slot_ids):
334 raise RuntimeError('not enough datasets for "%s"' % label)
335 for id_, ds in zip(slot_ids, datasets):
336 m[id_] = {'id': ds.id, 'src': ds.SRC}
337 return m
338
339 def preview(self):
340 getf = self.gi.workflows.get_previews
341 try:
342 p = [_ for _ in getf(published=True) if _.id == self.id][0]
343 except IndexError:
344 raise ValueError('no object for id %s' % self.id)
345 return p
346
347 def run(self, input_map=None, history='', params=None, import_inputs=False,
348 replacement_params=None, wait=False,
349 polling_interval=POLLING_INTERVAL, break_on_error=True):
350 """
351 Run the workflow in the current Galaxy instance.
352
353 :type input_map: dict
354 :param input_map: a mapping from workflow input labels to
355 datasets, e.g.: ``dict(zip(workflow.input_labels,
356 library.get_datasets()))``
357
358 :type history: :class:`History` or str
359 :param history: either a valid history object (results will be
360 stored there) or a string (a new history will be created with
361 the given name).
362
363 :type params: dict
364 :param params: a mapping of non-datasets tool parameters (see below)
365
366 :type import_inputs: bool
367 :param import_inputs: If ``True``, workflow inputs will be imported into
368 the history; if ``False``, only workflow outputs will be visible in
369 the history.
370
371 :type replacement_params: dict
372 :param replacement_params: pattern-based replacements for
373 post-job actions (see the docs for
374 :meth:`~bioblend.galaxy.workflows.WorkflowClient.invoke_workflow`)
375
376 :type wait: bool
377 :param wait: whether to wait while the returned datasets are
378 in a pending state
379
380 :type polling_interval: float
381 :param polling_interval: polling interval in seconds
382
383 :type break_on_error: bool
384 :param break_on_error: whether to break as soon as at least one
385 of the returned datasets is in the 'error' state
386
387 :rtype: tuple
388 :return: list of output datasets, output history
389
390 The ``params`` dict should be specified as follows::
391
392 {STEP_ID: PARAM_DICT, ...}
393
394 where PARAM_DICT is::
395
396 {PARAM_NAME: VALUE, ...}
397
398 For backwards compatibility, the following (deprecated) format is
399 also supported for ``params``::
400
401 {TOOL_ID: PARAM_DICT, ...}
402
403 in which case PARAM_DICT affects all steps with the given tool id.
404 If both by-tool-id and by-step-id specifications are used, the
405 latter takes precedence.
406
407 Finally (again, for backwards compatibility), PARAM_DICT can also
408 be specified as::
409
410 {'param': PARAM_NAME, 'value': VALUE}
411
412 Note that this format allows only one parameter to be set per step.
413
414 Example: set 'a' to 1 for the third workflow step::
415
416 params = {workflow.steps[2].id: {'a': 1}}
417
418 .. warning::
419
420 This is a blocking operation that can take a very long time. If
421 ``wait`` is set to ``False``, the method will return as soon as the
422 workflow has been *scheduled*, otherwise it will wait until the
423 workflow has been *run*. With a large number of steps, however, the
424 delay may not be negligible even in the former case (e.g. minutes for
425 100 steps).
426 """
427 if not self.is_mapped:
428 raise RuntimeError('workflow is not mapped to a Galaxy object')
429 if not self.is_runnable:
430 raise RuntimeError('workflow has missing tools: %s' % ', '.join(
431 '{}[{}]'.format(self.steps[_].tool_id, _)
432 for _ in self.missing_ids))
433 kwargs = {
434 'dataset_map': self.convert_input_map(input_map or {}),
435 'params': params,
436 'import_inputs_to_history': import_inputs,
437 'replacement_params': replacement_params,
438 }
439 if isinstance(history, History):
440 try:
441 kwargs['history_id'] = history.id
442 except AttributeError:
443 raise RuntimeError('history does not have an id')
444 elif isinstance(history, str):
445 kwargs['history_name'] = history
446 else:
447 raise TypeError(
448 'history must be either a history wrapper or a string')
449 res = self.gi.gi.workflows.run_workflow(self.id, **kwargs)
450 # res structure: {'history': HIST_ID, 'outputs': [CI_ID, CI_ID, ...]}
451 out_hist = self.gi.histories.get(res['history'])
452 content_infos_dict = dict()
453 for ci in out_hist.content_infos:
454 content_infos_dict[ci.id] = ci
455 outputs = []
456 for output_id in res['outputs']:
457 if content_infos_dict[output_id].type == 'file':
458 outputs.append(out_hist.get_dataset(output_id))
459 elif content_infos_dict[output_id].type == 'collection':
460 outputs.append(out_hist.get_dataset_collection(output_id))
461
462 if wait:
463 self.gi._wait_datasets(outputs, polling_interval=polling_interval,
464 break_on_error=break_on_error)
465 return outputs, out_hist
466
467 def export(self):
468 """
469 Export a re-importable representation of the workflow.
470
471 :rtype: dict
472 :return: a JSON-serializable dump of the workflow
473 """
474 return self.gi.gi.workflows.export_workflow_dict(self.id)
475
476 def delete(self):
477 """
478 Delete this workflow.
479
480 .. warning::
481 Deleting a workflow is irreversible - all of the data from
482 the workflow will be permanently deleted.
483 """
484 self.gi.workflows.delete(id_=self.id)
485 self.unmap()
486
487
488 class Dataset(Wrapper, metaclass=abc.ABCMeta):
489 """
490 Abstract base class for Galaxy datasets.
491 """
492 BASE_ATTRS = Wrapper.BASE_ATTRS + (
493 'data_type', 'file_ext', 'file_name', 'file_size', 'genome_build', 'misc_info', 'state'
494 )
495 POLLING_INTERVAL = 1 # for state monitoring
496
497 @abc.abstractmethod
498 def __init__(self, ds_dict, container, gi=None):
499 super().__init__(ds_dict, gi=gi)
500 object.__setattr__(self, 'container', container)
501
502 @property
503 def container_id(self):
504 """
505 Deprecated property.
506
507 Id of the dataset container. Use :attr:`.container.id` instead.
508 """
509 return self.container.id
510
511 @abc.abstractproperty
512 def _stream_url(self):
513 """
514 Return the URL to stream this dataset.
515 """
516 pass
517
518 def get_stream(self, chunk_size=bioblend.CHUNK_SIZE):
519 """
520 Open dataset for reading and return an iterator over its contents.
521
522 :type chunk_size: int
523 :param chunk_size: read this amount of bytes at a time
524 """
525 kwargs = {'stream': True}
526 if isinstance(self, LibraryDataset):
527 kwargs['params'] = {'ld_ids%5B%5D': self.id}
528 r = self.gi.gi.make_get_request(self._stream_url, **kwargs)
529 if isinstance(self, LibraryDataset) and r.status_code == 500:
530 # compatibility with older Galaxy releases
531 kwargs['params'] = {'ldda_ids%5B%5D': self.id}
532 r = self.gi.gi.make_get_request(self._stream_url, **kwargs)
533 r.raise_for_status()
534 return r.iter_content(chunk_size) # FIXME: client can't close r
535
536 def peek(self, chunk_size=bioblend.CHUNK_SIZE):
537 """
538 Open dataset for reading and return the first chunk.
539
540 See :meth:`.get_stream` for param info.
541 """
542 try:
543 return next(self.get_stream(chunk_size=chunk_size))
544 except StopIteration:
545 return b''
546
547 def download(self, file_object, chunk_size=bioblend.CHUNK_SIZE):
548 """
549 Open dataset for reading and save its contents to ``file_object``.
550
551 :type file_object: file
552 :param file_object: output file object
553
554 See :meth:`.get_stream` for info on other params.
555 """
556 for chunk in self.get_stream(chunk_size=chunk_size):
557 file_object.write(chunk)
558
559 def get_contents(self, chunk_size=bioblend.CHUNK_SIZE):
560 """
561 Open dataset for reading and return its **full** contents.
562
563 See :meth:`.get_stream` for param info.
564 """
565 return b''.join(self.get_stream(chunk_size=chunk_size))
566
567 def refresh(self):
568 """
569 Re-fetch the attributes pertaining to this object.
570
571 Returns: self
572 """
573 gi_client = getattr(self.gi.gi, self.container.API_MODULE)
574 ds_dict = gi_client.show_dataset(self.container.id, self.id)
575 self.__init__(ds_dict, self.container, self.gi)
576 return self
577
578 def wait(self, polling_interval=POLLING_INTERVAL, break_on_error=True):
579 """
580 Wait for this dataset to come out of the pending states.
581
582 :type polling_interval: float
583 :param polling_interval: polling interval in seconds
584
585 :type break_on_error: bool
586 :param break_on_error: if ``True``, raise a RuntimeError exception if
587 the dataset ends in the 'error' state.
588
589 .. warning::
590
591 This is a blocking operation that can take a very long time. Also,
592 note that this method does not return anything; however, this dataset
593 is refreshed (possibly multiple times) during the execution.
594 """
595 self.gi._wait_datasets([self], polling_interval=polling_interval,
596 break_on_error=break_on_error)
597
598
599 class HistoryDatasetAssociation(Dataset):
600 """
601 Maps to a Galaxy ``HistoryDatasetAssociation``.
602 """
603 BASE_ATTRS = Dataset.BASE_ATTRS + ('annotation', 'deleted', 'purged', 'tags', 'visible')
604 SRC = 'hda'
605
606 def __init__(self, ds_dict, container, gi=None):
607 super().__init__(ds_dict, container, gi=gi)
608
609 @property
610 def gi_module(self):
611 return self.gi.histories
612
613 @property
614 def _stream_url(self):
615 base_url = self.gi.gi.histories._make_url(module_id=self.container.id, contents=True)
616 return f"{base_url}/{self.id}/display"
617
618 def update(self, **kwds):
619 """
620 Update this history dataset metadata. Some of the attributes that can be
621 modified are documented below.
622
623 :type name: str
624 :param name: Replace history dataset name with the given string
625
626 :type genome_build: str
627 :param genome_build: Replace history dataset genome build (dbkey)
628
629 :type annotation: str
630 :param annotation: Replace history dataset annotation with given string
631
632 :type deleted: bool
633 :param deleted: Mark or unmark history dataset as deleted
634
635 :type visible: bool
636 :param visible: Mark or unmark history dataset as visible
637 """
638 res = self.gi.gi.histories.update_dataset(self.container.id, self.id, **kwds)
639 # Refresh also the history because the dataset may have been (un)deleted
640 self.container.refresh()
641 self.__init__(res, self.container, gi=self.gi)
642 return self
643
644 def delete(self, purge=False):
645 """
646 Delete this history dataset.
647
648 :type purge: bool
649 :param purge: if ``True``, also purge (permanently delete) the dataset
650
651 .. note::
652 For the purge option to work, the Galaxy instance must have the
653 ``allow_user_dataset_purge`` option set to ``true`` in the
654 ``config/galaxy.yml`` configuration file.
655 """
656 self.gi.gi.histories.delete_dataset(self.container.id, self.id, purge=purge)
657 self.container.refresh()
658 self.refresh()
659
660
661 class DatasetCollection(Wrapper, metaclass=abc.ABCMeta):
662 """
663 Abstract base class for Galaxy dataset collections.
664 """
665 BASE_ATTRS = Wrapper.BASE_ATTRS + (
666 'state', 'deleted', 'collection_type'
667 )
668
669 @abc.abstractmethod
670 def __init__(self, dsc_dict, container, gi=None):
671 super().__init__(dsc_dict, gi=gi)
672 object.__setattr__(self, 'container', container)
673
674 def refresh(self):
675 """
676 Re-fetch the attributes pertaining to this object.
677
678 Returns: self
679 """
680 gi_client = getattr(self.gi.gi, self.container.API_MODULE)
681 dsc_dict = gi_client.show_dataset_collection(self.container.id, self.id)
682 self.__init__(dsc_dict, self.container, self.gi)
683 return self
684
685
686 class HistoryDatasetCollectionAssociation(DatasetCollection):
687 """
688 Maps to a Galaxy ``HistoryDatasetCollectionAssociation``.
689 """
690 BASE_ATTRS = DatasetCollection.BASE_ATTRS + ('tags', 'visible', 'elements')
691 SRC = 'hdca'
692
693 def __init__(self, dsc_dict, container, gi=None):
694 super().__init__(dsc_dict, container, gi=gi)
695
696 @property
697 def gi_module(self):
698 return self.gi.histories
699
700 def delete(self):
701 """
702 Delete this dataset collection.
703 """
704 self.gi.gi.histories.delete_dataset_collection(self.container.id, self.id)
705 self.container.refresh()
706 self.refresh()
707
708
709 class LibRelatedDataset(Dataset):
710 """
711 Base class for LibraryDatasetDatasetAssociation and LibraryDataset classes.
712 """
713
714 def __init__(self, ds_dict, container, gi=None):
715 super().__init__(ds_dict, container, gi=gi)
716
717 @property
718 def gi_module(self):
719 return self.gi.libraries
720
721 @property
722 def _stream_url(self):
723 base_url = self.gi.gi.libraries._make_url()
724 return "%s/datasets/download/uncompressed" % base_url
725
726
727 class LibraryDatasetDatasetAssociation(LibRelatedDataset):
728 """
729 Maps to a Galaxy ``LibraryDatasetDatasetAssociation``.
730 """
731 BASE_ATTRS = LibRelatedDataset.BASE_ATTRS + ('deleted',)
732 SRC = 'ldda'
733
734
735 class LibraryDataset(LibRelatedDataset):
736 """
737 Maps to a Galaxy ``LibraryDataset``.
738 """
739 SRC = 'ld'
740
741 def delete(self, purged=False):
742 """
743 Delete this library dataset.
744
745 :type purged: bool
746 :param purged: if ``True``, also purge (permanently delete) the dataset
747 """
748 self.gi.gi.libraries.delete_library_dataset(
749 self.container.id, self.id, purged=purged)
750 self.container.refresh()
751 self.refresh()
752
753 def update(self, **kwds):
754 """
755 Update this library dataset metadata. Some of the attributes that can be
756 modified are documented below.
757
758 :type name: str
759 :param name: Replace history dataset name with the given string
760
761 :type genome_build: str
762 :param genome_build: Replace history dataset genome build (dbkey)
763 """
764 res = self.gi.gi.libraries.update_library_dataset(self.id, **kwds)
765 self.container.refresh()
766 self.__init__(res, self.container, gi=self.gi)
767 return self
768
769
770 class ContentInfo(Wrapper, metaclass=abc.ABCMeta):
771 """
772 Instances of this class wrap dictionaries obtained by getting
773 ``/api/{histories,libraries}/<ID>/contents`` from Galaxy.
774 """
775 BASE_ATTRS = Wrapper.BASE_ATTRS + ('type',)
776
777 @abc.abstractmethod
778 def __init__(self, info_dict, gi=None):
779 super().__init__(info_dict, gi=gi)
780
781
782 class LibraryContentInfo(ContentInfo):
783 """
784 Instances of this class wrap dictionaries obtained by getting
785 ``/api/libraries/<ID>/contents`` from Galaxy.
786 """
787 def __init__(self, info_dict, gi=None):
788 super().__init__(info_dict, gi=gi)
789
790 @property
791 def gi_module(self):
792 return self.gi.libraries
793
794
795 class HistoryContentInfo(ContentInfo):
796 """
797 Instances of this class wrap dictionaries obtained by getting
798 ``/api/histories/<ID>/contents`` from Galaxy.
799 """
800 BASE_ATTRS = ContentInfo.BASE_ATTRS + ('deleted', 'state', 'visible')
801
802 def __init__(self, info_dict, gi=None):
803 super().__init__(info_dict, gi=gi)
804
805 @property
806 def gi_module(self):
807 return self.gi.histories
808
809
810 class DatasetContainer(Wrapper, metaclass=abc.ABCMeta):
811 """
812 Abstract base class for dataset containers (histories and libraries).
813 """
814 BASE_ATTRS = Wrapper.BASE_ATTRS + ('deleted',)
815
816 @abc.abstractmethod
817 def __init__(self, c_dict, content_infos=None, gi=None):
818 """
819 :type content_infos: list of :class:`ContentInfo`
820 :param content_infos: info objects for the container's contents
821 """
822 super().__init__(c_dict, gi=gi)
823 if content_infos is None:
824 content_infos = []
825 object.__setattr__(self, 'content_infos', content_infos)
826
827 @property
828 def dataset_ids(self):
829 """
830 Return the ids of the contained datasets.
831 """
832 return [_.id for _ in self.content_infos if _.type == 'file']
833
834 def preview(self):
835 getf = self.gi_module.get_previews
836 # self.state could be stale: check both regular and deleted containers
837 try:
838 p = [_ for _ in getf() if _.id == self.id][0]
839 except IndexError:
840 try:
841 p = [_ for _ in getf(deleted=True) if _.id == self.id][0]
842 except IndexError:
843 raise ValueError('no object for id %s' % self.id)
844 return p
845
846 def refresh(self):
847 """
848 Re-fetch the attributes pertaining to this object.
849
850 Returns: self
851 """
852 fresh = self.gi_module.get(self.id)
853 self.__init__(
854 fresh.wrapped, content_infos=fresh.content_infos, gi=self.gi)
855 return self
856
857 def get_dataset(self, ds_id):
858 """
859 Retrieve the dataset corresponding to the given id.
860
861 :type ds_id: str
862 :param ds_id: dataset id
863
864 :rtype: :class:`~.HistoryDatasetAssociation` or
865 :class:`~.LibraryDataset`
866 :return: the dataset corresponding to ``ds_id``
867 """
868 gi_client = getattr(self.gi.gi, self.API_MODULE)
869 ds_dict = gi_client.show_dataset(self.id, ds_id)
870 return self.DS_TYPE(ds_dict, self, gi=self.gi)
871
872 def get_datasets(self, name=None):
873 """
874 Get all datasets contained inside this dataset container.
875
876 :type name: str
877 :param name: return only datasets with this name
878
879 :rtype: list of :class:`~.HistoryDatasetAssociation` or list of
880 :class:`~.LibraryDataset`
881 :return: datasets with the given name contained inside this
882 container
883
884 .. note::
885
886 when filtering library datasets by name, specify their full
887 paths starting from the library's root folder, e.g.,
888 ``/seqdata/reads.fastq``. Full paths are available through
889 the ``content_infos`` attribute of
890 :class:`~.Library` objects.
891 """
892 if name is None:
893 ds_ids = self.dataset_ids
894 else:
895 ds_ids = [_.id for _ in self.content_infos if _.name == name]
896 return [self.get_dataset(_) for _ in ds_ids]
897
898
899 class History(DatasetContainer):
900 """
901 Maps to a Galaxy history.
902 """
903 BASE_ATTRS = DatasetContainer.BASE_ATTRS + ('annotation', 'published', 'state', 'state_ids', 'state_details', 'tags')
904 DS_TYPE = HistoryDatasetAssociation
905 DSC_TYPE = HistoryDatasetCollectionAssociation
906 CONTENT_INFO_TYPE = HistoryContentInfo
907 API_MODULE = 'histories'
908
909 def __init__(self, hist_dict, content_infos=None, gi=None):
910 super().__init__(hist_dict, content_infos=content_infos, gi=gi)
911
912 @property
913 def gi_module(self):
914 return self.gi.histories
915
916 def update(self, **kwds):
917 """
918 Update history metadata information. Some of the attributes that can be
919 modified are documented below.
920
921 :type name: str
922 :param name: Replace history name with the given string
923
924 :type annotation: str
925 :param annotation: Replace history annotation with the given string
926
927 :type deleted: bool
928 :param deleted: Mark or unmark history as deleted
929
930 :type purged: bool
931 :param purged: If True, mark history as purged (permanently deleted).
932
933 :type published: bool
934 :param published: Mark or unmark history as published
935
936 :type importable: bool
937 :param importable: Mark or unmark history as importable
938
939 :type tags: list
940 :param tags: Replace history tags with the given list
941 """
942 # TODO: wouldn't it be better if name and annotation were attributes?
943 self.gi.gi.histories.update_history(self.id, **kwds)
944 self.refresh()
945 return self
946
947 def delete(self, purge=False):
948 """
949 Delete this history.
950
951 :type purge: bool
952 :param purge: if ``True``, also purge (permanently delete) the history
953
954 .. note::
955 For the purge option to work, the Galaxy instance must have the
956 ``allow_user_dataset_purge`` option set to ``true`` in the
957 ``config/galaxy.yml`` configuration file.
958 """
959 self.gi.histories.delete(id_=self.id, purge=purge)
960 self.refresh()
961 self.unmap()
962
963 def import_dataset(self, lds):
964 """
965 Import a dataset into the history from a library.
966
967 :type lds: :class:`~.LibraryDataset`
968 :param lds: the library dataset to import
969
970 :rtype: :class:`~.HistoryDatasetAssociation`
971 :return: the imported history dataset
972 """
973 if not self.is_mapped:
974 raise RuntimeError('history is not mapped to a Galaxy object')
975 if not isinstance(lds, LibraryDataset):
976 raise TypeError('lds is not a LibraryDataset')
977 res = self.gi.gi.histories.upload_dataset_from_library(self.id, lds.id)
978 if not isinstance(res, Mapping):
979 raise RuntimeError(
980 'upload_dataset_from_library: unexpected reply: %r' % res)
981 self.refresh()
982 return self.get_dataset(res['id'])
983
984 def upload_file(self, path, **kwargs):
985 """
986 Upload the file specified by ``path`` to this history.
987
988 :type path: str
989 :param path: path of the file to upload
990
991 See :meth:`~bioblend.galaxy.tools.ToolClient.upload_file` for
992 the optional parameters.
993
994 :rtype: :class:`~.HistoryDatasetAssociation`
995 :return: the uploaded dataset
996 """
997 out_dict = self.gi.gi.tools.upload_file(path, self.id, **kwargs)
998 self.refresh()
999 return self.get_dataset(out_dict['outputs'][0]['id'])
1000
1001 upload_dataset = upload_file
1002
1003 def upload_from_ftp(self, path, **kwargs):
1004 """
1005 Upload the file specified by ``path`` from the user's FTP directory to
1006 this history.
1007
1008 :type path: str
1009 :param path: path of the file in the user's FTP directory
1010
1011 See :meth:`~bioblend.galaxy.tools.ToolClient.upload_file` for
1012 the optional parameters.
1013
1014 :rtype: :class:`~.HistoryDatasetAssociation`
1015 :return: the uploaded dataset
1016 """
1017 out_dict = self.gi.gi.tools.upload_from_ftp(path, self.id, **kwargs)
1018 self.refresh()
1019 return self.get_dataset(out_dict['outputs'][0]['id'])
1020
1021 def paste_content(self, content, **kwargs):
1022 """
1023 Upload a string to a new dataset in this history.
1024
1025 :type content: str
1026 :param content: content of the new dataset to upload
1027
1028 See :meth:`~bioblend.galaxy.tools.ToolClient.upload_file` for
1029 the optional parameters (except file_name).
1030
1031 :rtype: :class:`~.HistoryDatasetAssociation`
1032 :return: the uploaded dataset
1033 """
1034 out_dict = self.gi.gi.tools.paste_content(content, self.id, **kwargs)
1035 self.refresh()
1036 return self.get_dataset(out_dict['outputs'][0]['id'])
1037
1038 def export(self, gzip=True, include_hidden=False, include_deleted=False,
1039 wait=False, maxwait=None):
1040 """
1041 Start a job to create an export archive for this history. See
1042 :meth:`~bioblend.galaxy.histories.HistoryClient.export_history`
1043 for parameter and return value info.
1044 """
1045 return self.gi.gi.histories.export_history(
1046 self.id, gzip=gzip, include_hidden=include_hidden,
1047 include_deleted=include_deleted, wait=wait, maxwait=maxwait)
1048
1049 def download(self, jeha_id, outf, chunk_size=bioblend.CHUNK_SIZE):
1050 """
1051 Download an export archive for this history. Use :meth:`export`
1052 to create an export and get the required ``jeha_id``. See
1053 :meth:`~bioblend.galaxy.histories.HistoryClient.download_history`
1054 for parameter and return value info.
1055 """
1056 return self.gi.gi.histories.download_history(
1057 self.id, jeha_id, outf, chunk_size=chunk_size)
1058
1059 def create_dataset_collection(self, collection_description):
1060 """
1061 Create a new dataset collection in the history by providing a collection description.
1062
1063 :type collection_description: bioblend.galaxy.dataset_collections.CollectionDescription
1064 :param collection_description: a description of the dataset collection
1065
1066 :rtype: :class:`~.HistoryDatasetCollectionAssociation`
1067 :return: the new dataset collection
1068 """
1069 dataset_collection = self.gi.gi.histories.create_dataset_collection(self.id, collection_description)
1070 self.refresh()
1071 return self.get_dataset_collection(dataset_collection['id'])
1072
1073 def get_dataset_collection(self, dsc_id):
1074 """
1075 Retrieve the dataset collection corresponding to the given id.
1076
1077 :type dsc_id: str
1078 :param dsc_id: dataset collection id
1079
1080 :rtype: :class:`~.HistoryDatasetCollectionAssociation`
1081 :return: the dataset collection corresponding to ``dsc_id``
1082 """
1083 dsc_dict = self.gi.gi.histories.show_dataset_collection(self.id, dsc_id)
1084 return self.DSC_TYPE(dsc_dict, self, gi=self.gi)
1085
1086
1087 class Library(DatasetContainer):
1088 """
1089 Maps to a Galaxy library.
1090 """
1091 BASE_ATTRS = DatasetContainer.BASE_ATTRS + ('description', 'synopsis')
1092 DS_TYPE = LibraryDataset
1093 CONTENT_INFO_TYPE = LibraryContentInfo
1094 API_MODULE = 'libraries'
1095
1096 def __init__(self, lib_dict, content_infos=None, gi=None):
1097 super().__init__(lib_dict, content_infos=content_infos, gi=gi)
1098
1099 @property
1100 def gi_module(self):
1101 return self.gi.libraries
1102
1103 @property
1104 def folder_ids(self):
1105 """
1106 Return the ids of the contained folders.
1107 """
1108 return [_.id for _ in self.content_infos if _.type == 'folder']
1109
1110 def delete(self):
1111 """
1112 Delete this library.
1113 """
1114 self.gi.libraries.delete(id_=self.id)
1115 self.refresh()
1116 self.unmap()
1117
1118 def _pre_upload(self, folder):
1119 """
1120 Return the id of the given folder, after sanity checking.
1121 """
1122 if not self.is_mapped:
1123 raise RuntimeError('library is not mapped to a Galaxy object')
1124 return None if folder is None else folder.id
1125
1126 def upload_data(self, data, folder=None, **kwargs):
1127 """
1128 Upload data to this library.
1129
1130 :type data: str
1131 :param data: dataset contents
1132
1133 :type folder: :class:`~.Folder`
1134 :param folder: a folder object, or ``None`` to upload to the root folder
1135
1136 :rtype: :class:`~.LibraryDataset`
1137 :return: the dataset object that represents the uploaded content
1138
1139 Optional keyword arguments: ``file_type``, ``dbkey``.
1140 """
1141 fid = self._pre_upload(folder)
1142 res = self.gi.gi.libraries.upload_file_contents(
1143 self.id, data, folder_id=fid, **kwargs)
1144 self.refresh()
1145 return self.get_dataset(res[0]['id'])
1146
1147 def upload_from_url(self, url, folder=None, **kwargs):
1148 """
1149 Upload data to this library from the given URL.
1150
1151 :type url: str
1152 :param url: URL from which data should be read
1153
1154 See :meth:`.upload_data` for info on other params.
1155 """
1156 fid = self._pre_upload(folder)
1157 res = self.gi.gi.libraries.upload_file_from_url(
1158 self.id, url, folder_id=fid, **kwargs)
1159 self.refresh()
1160 return self.get_dataset(res[0]['id'])
1161
1162 def upload_from_local(self, path, folder=None, **kwargs):
1163 """
1164 Upload data to this library from a local file.
1165
1166 :type path: str
1167 :param path: local file path from which data should be read
1168
1169 See :meth:`.upload_data` for info on other params.
1170 """
1171 fid = self._pre_upload(folder)
1172 res = self.gi.gi.libraries.upload_file_from_local_path(
1173 self.id, path, folder_id=fid, **kwargs)
1174 self.refresh()
1175 return self.get_dataset(res[0]['id'])
1176
1177 def upload_from_galaxy_fs(self, paths, folder=None, link_data_only=None, **kwargs):
1178 """
1179 Upload data to this library from filesystem paths on the server.
1180
1181 .. note::
1182 For this method to work, the Galaxy instance must have the
1183 ``allow_path_paste`` option set to ``true`` in the
1184 ``config/galaxy.yml`` configuration file.
1185
1186 :type paths: str or :class:`~collections.abc.Iterable` of str
1187 :param paths: server-side file paths from which data should be read
1188
1189 :type link_data_only: str
1190 :param link_data_only: either 'copy_files' (default) or
1191 'link_to_files'. Setting to 'link_to_files' symlinks instead of
1192 copying the files
1193
1194 :rtype: list of :class:`~.LibraryDataset`
1195 :return: the dataset objects that represent the uploaded content
1196
1197 See :meth:`.upload_data` for info on other params.
1198 """
1199 fid = self._pre_upload(folder)
1200 if isinstance(paths, str):
1201 paths = (paths,)
1202 paths = '\n'.join(paths)
1203 res = self.gi.gi.libraries.upload_from_galaxy_filesystem(
1204 self.id, paths, folder_id=fid, link_data_only=link_data_only,
1205 **kwargs)
1206 if res is None:
1207 raise RuntimeError('upload_from_galaxy_filesystem: no reply')
1208 if not isinstance(res, Sequence):
1209 raise RuntimeError(
1210 'upload_from_galaxy_filesystem: unexpected reply: %r' % res)
1211 new_datasets = [
1212 self.get_dataset(ds_info['id']) for ds_info in res
1213 ]
1214 self.refresh()
1215 return new_datasets
1216
1217 def copy_from_dataset(self, hda, folder=None, message=''):
1218 """
1219 Copy a history dataset into this library.
1220
1221 :type hda: :class:`~.HistoryDatasetAssociation`
1222 :param hda: history dataset to copy into the library
1223
1224 See :meth:`.upload_data` for info on other params.
1225 """
1226 fid = self._pre_upload(folder)
1227 res = self.gi.gi.libraries.copy_from_dataset(
1228 self.id, hda.id, folder_id=fid, message=message)
1229 self.refresh()
1230 return self.get_dataset(res['library_dataset_id'])
1231
1232 def create_folder(self, name, description=None, base_folder=None):
1233 """
1234 Create a folder in this library.
1235
1236 :type name: str
1237 :param name: folder name
1238
1239 :type description: str
1240 :param description: optional folder description
1241
1242 :type base_folder: :class:`~.Folder`
1243 :param base_folder: parent folder, or ``None`` to create in the root
1244 folder
1245
1246 :rtype: :class:`~.Folder`
1247 :return: the folder just created
1248 """
1249 bfid = None if base_folder is None else base_folder.id
1250 res = self.gi.gi.libraries.create_folder(
1251 self.id, name, description=description, base_folder_id=bfid)
1252 self.refresh()
1253 return self.get_folder(res[0]['id'])
1254
1255 def get_folder(self, f_id):
1256 """
1257 Retrieve the folder corresponding to the given id.
1258
1259 :rtype: :class:`~.Folder`
1260 :return: the folder corresponding to ``f_id``
1261 """
1262 f_dict = self.gi.gi.libraries.show_folder(self.id, f_id)
1263 return Folder(f_dict, self, gi=self.gi)
1264
1265 @property
1266 def root_folder(self):
1267 """
1268 The root folder of this library.
1269
1270 :rtype: :class:`~.Folder`
1271 :return: the root folder of this library
1272 """
1273 return self.get_folder(self.gi.gi.libraries._get_root_folder_id(self.id))
1274
1275
1276 class Folder(Wrapper):
1277 """
1278 Maps to a folder in a Galaxy library.
1279 """
1280 BASE_ATTRS = Wrapper.BASE_ATTRS + ('description', 'deleted', 'item_count')
1281
1282 def __init__(self, f_dict, container, gi=None):
1283 super().__init__(f_dict, gi=gi)
1284 object.__setattr__(self, 'container', container)
1285
1286 @property
1287 def parent(self):
1288 """
1289 The parent folder of this folder. The parent of the root folder is
1290 ``None``.
1291
1292 :rtype: :class:`~.Folder`
1293 :return: the parent of this folder
1294 """
1295 if self._cached_parent is None:
1296 object.__setattr__(self,
1297 '_cached_parent',
1298 self._get_parent())
1299 return self._cached_parent
1300
1301 def _get_parent(self):
1302 """
1303 Return the parent folder of this folder.
1304 """
1305 parent_id = self.wrapped['parent_id']
1306 if parent_id is None:
1307 return None
1308 return self.container.get_folder(parent_id)
1309
1310 @property
1311 def gi_module(self):
1312 return self.gi.libraries
1313
1314 @property
1315 def container_id(self):
1316 """
1317 Deprecated property.
1318
1319 Id of the folder container. Use :attr:`.container.id` instead.
1320 """
1321 return self.container.id
1322
1323 def refresh(self):
1324 """
1325 Re-fetch the attributes pertaining to this object.
1326
1327 Returns: self
1328 """
1329 f_dict = self.gi.gi.libraries.show_folder(self.container.id, self.id)
1330 self.__init__(f_dict, self.container, gi=self.gi)
1331 return self
1332
1333
1334 class Tool(Wrapper):
1335 """
1336 Maps to a Galaxy tool.
1337 """
1338 BASE_ATTRS = Wrapper.BASE_ATTRS + ('version',)
1339 POLLING_INTERVAL = 10 # for output state monitoring
1340
1341 def __init__(self, t_dict, gi=None):
1342 super().__init__(t_dict, gi=gi)
1343
1344 @property
1345 def gi_module(self):
1346 return self.gi.tools
1347
1348 def run(self, inputs, history, wait=False,
1349 polling_interval=POLLING_INTERVAL):
1350 """
1351 Execute this tool in the given history with inputs from dict
1352 ``inputs``.
1353
1354 :type inputs: dict
1355 :param inputs: dictionary of input datasets and parameters for
1356 the tool (see below)
1357
1358 :type history: :class:`History`
1359 :param history: the history where to execute the tool
1360
1361 :type wait: bool
1362 :param wait: whether to wait while the returned datasets are
1363 in a pending state
1364
1365 :type polling_interval: float
1366 :param polling_interval: polling interval in seconds
1367
1368 :rtype: list of :class:`HistoryDatasetAssociation`
1369 :return: list of output datasets
1370
1371 The ``inputs`` dict should contain input datasets and parameters
1372 in the (largely undocumented) format used by the Galaxy API.
1373 Some examples can be found in `Galaxy's API test suite
1374 <https://github.com/galaxyproject/galaxy/blob/dev/lib/galaxy_test/api/test_tools.py>`_.
1375 The value of an input dataset can also be a :class:`Dataset`
1376 object, which will be automatically converted to the needed
1377 format.
1378 """
1379 for k, v in inputs.items():
1380 if isinstance(v, Dataset):
1381 inputs[k] = {'src': v.SRC, 'id': v.id}
1382 out_dict = self.gi.gi.tools.run_tool(history.id, self.id, inputs)
1383 outputs = [history.get_dataset(_['id']) for _ in out_dict['outputs']]
1384 if wait:
1385 self.gi._wait_datasets(outputs, polling_interval=polling_interval)
1386 return outputs
1387
1388
1389 class Job(Wrapper):
1390 """
1391 Maps to a Galaxy job.
1392 """
1393 BASE_ATTRS = ('id', 'state')
1394
1395 def __init__(self, j_dict, gi=None):
1396 super().__init__(j_dict, gi=gi)
1397
1398 @property
1399 def gi_module(self):
1400 return self.gi.jobs
1401
1402
1403 class Preview(Wrapper, metaclass=abc.ABCMeta):
1404 """
1405 Abstract base class for Galaxy entity 'previews'.
1406
1407 Classes derived from this one model the short summaries returned
1408 by global getters such as ``/api/libraries``.
1409 """
1410 BASE_ATTRS = Wrapper.BASE_ATTRS + ('deleted',)
1411
1412 @abc.abstractmethod
1413 def __init__(self, pw_dict, gi=None):
1414 super().__init__(pw_dict, gi=gi)
1415
1416
1417 class LibraryPreview(Preview):
1418 """
1419 Models Galaxy library 'previews'.
1420
1421 Instances of this class wrap dictionaries obtained by getting
1422 ``/api/libraries`` from Galaxy.
1423 """
1424 def __init__(self, pw_dict, gi=None):
1425 super().__init__(pw_dict, gi=gi)
1426
1427 @property
1428 def gi_module(self):
1429 return self.gi.libraries
1430
1431
1432 class HistoryPreview(Preview):
1433 """
1434 Models Galaxy history 'previews'.
1435
1436 Instances of this class wrap dictionaries obtained by getting
1437 ``/api/histories`` from Galaxy.
1438 """
1439 BASE_ATTRS = Preview.BASE_ATTRS + ('annotation', 'published', 'purged', 'tags',)
1440
1441 def __init__(self, pw_dict, gi=None):
1442 super().__init__(pw_dict, gi=gi)
1443
1444 @property
1445 def gi_module(self):
1446 return self.gi.histories
1447
1448
1449 class WorkflowPreview(Preview):
1450 """
1451 Models Galaxy workflow 'previews'.
1452
1453 Instances of this class wrap dictionaries obtained by getting
1454 ``/api/workflows`` from Galaxy.
1455 """
1456 BASE_ATTRS = Preview.BASE_ATTRS + ('published', 'tags')
1457
1458 def __init__(self, pw_dict, gi=None):
1459 super().__init__(pw_dict, gi=gi)
1460
1461 @property
1462 def gi_module(self):
1463 return self.gi.workflows
1464
1465
1466 class JobPreview(Preview):
1467 """
1468 Models Galaxy job 'previews'.
1469
1470 Instances of this class wrap dictionaries obtained by getting
1471 ``/api/jobs`` from Galaxy.
1472 """
1473 BASE_ATTRS = ('id', 'state')
1474
1475 def __init__(self, pw_dict, gi=None):
1476 super().__init__(pw_dict, gi=gi)
1477
1478 @property
1479 def gi_module(self):
1480 return self.gi.jobs