comparison env/lib/python3.9/site-packages/bioblend/galaxy/histories/__init__.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """
2 Contains possible interactions with the Galaxy Histories
3 """
4 import logging
5 import re
6 import sys
7 import time
8 import webbrowser
9 from urllib.parse import urljoin
10
11 import bioblend
12 from bioblend import ConnectionError
13 from bioblend.galaxy.client import Client
14 from bioblend.util import attach_file
15
16 log = logging.getLogger(__name__)
17
18
19 class HistoryClient(Client):
20
21 def __init__(self, galaxy_instance):
22 self.module = 'histories'
23 super().__init__(galaxy_instance)
24
25 def create_history(self, name=None):
26 """
27 Create a new history, optionally setting the ``name``.
28
29 :type name: str
30 :param name: Optional name for new history
31
32 :rtype: dict
33 :return: Dictionary containing information about newly created history
34 """
35 payload = {}
36 if name is not None:
37 payload['name'] = name
38 return self._post(payload)
39
40 def import_history(self, file_path=None, url=None):
41 """
42 Import a history from an archive on disk or a URL.
43
44 :type file_path: str
45 :param file_path: Path to exported history archive on disk.
46 :type url: str
47 :param url: URL for an exported history archive
48 """
49 if file_path:
50 archive_file = attach_file(file_path)
51 payload = dict(archive_source='', archive_file=archive_file, archive_type="file")
52 else:
53 payload = dict(archive_source=url, archive_type='url')
54
55 return self._post(payload=payload, files_attached=file_path is not None)
56
57 def _get_histories(self, history_id=None, name=None, deleted=False, filter_user_published=None, get_all_published=False, slug=None):
58 """
59 Hidden method to be used by both get_histories() and get_published_histories()
60 """
61 if history_id is not None and name is not None:
62 raise ValueError('Provide only one argument between name or history_id, but not both')
63 assert not (filter_user_published is not None and get_all_published)
64
65 params = {}
66 if deleted:
67 params.setdefault('q', []).append('deleted')
68 params.setdefault('qv', []).append(deleted)
69 if filter_user_published is not None:
70 params.setdefault('q', []).append('published')
71 params.setdefault('qv', []).append(filter_user_published)
72 if slug is not None:
73 params.setdefault('q', []).append('slug')
74 params.setdefault('qv', []).append(slug)
75
76 url = '/'.join((self._make_url(), 'published')) if get_all_published else None
77 histories = self._get(url=url, params=params)
78
79 if history_id is not None:
80 history = next((_ for _ in histories if _['id'] == history_id), None)
81 histories = [history] if history is not None else []
82 elif name is not None:
83 histories = [_ for _ in histories if _['name'] == name]
84 return histories
85
86 def get_histories(self, history_id=None, name=None, deleted=False, published=None, slug=None):
87 """
88 Get all histories or filter the specific one(s) by ``name`` or other
89 arguments.
90
91 :type history_id: str
92 :param history_id: Encoded history ID to filter on
93
94 .. deprecated:: 0.15.0
95 To get details of a history for which you know the ID, use the much
96 more efficient :meth:`show_history` instead.
97
98 :type name: str
99 :param name: Name of history to filter on
100
101 :type deleted: bool
102 :param deleted: whether to filter for the deleted histories (``True``)
103 or for the non-deleted ones (``False``)
104
105 :type published: bool or None
106 :param published: whether to filter for the published histories
107 (``True``) or for the non-published ones (``False``). If not set, no
108 filtering is applied. Note the filtering is only applied to the user's
109 own histories; to access all histories published by any user, use the
110 ``get_published_histories`` method.
111
112 :type slug: str
113 :param slug: History slug to filter on
114
115 :rtype: list
116 :return: List of history dicts.
117 """
118 return self._get_histories(history_id=history_id, name=name, deleted=deleted, filter_user_published=published, get_all_published=False, slug=slug)
119
120 def get_published_histories(self, name=None, deleted=False, slug=None):
121 """
122 Get all published histories (by any user) or filter the specific one(s)
123 by ``name`` or other arguments.
124
125 :type name: str
126 :param name: Name of history to filter on
127
128 :type deleted: bool
129 :param deleted: whether to filter for the deleted histories (``True``)
130 or for the non-deleted ones (``False``)
131
132 :type slug: str
133 :param slug: History slug to filter on
134
135 :rtype: list
136 :return: List of history dicts.
137 """
138 return self._get_histories(name=name, deleted=deleted, filter_user_published=None, get_all_published=True, slug=slug)
139
140 def show_history(self, history_id, contents=False, deleted=None, visible=None, details=None, types=None):
141 """
142 Get details of a given history. By default, just get the history meta
143 information.
144
145 :type history_id: str
146 :param history_id: Encoded history ID to filter on
147
148 :type contents: bool
149 :param contents: When ``True``, instead of the history details, return
150 a list with info for all datasets in the given history.
151 Note that inside each dataset info dict, the id which should be used
152 for further requests about this history dataset is given by the value
153 of the `id` (not `dataset_id`) key.
154
155 :type deleted: bool or None
156 :param deleted: When ``contents=True``, whether to filter for the
157 deleted datasets (``True``) or for the non-deleted ones (``False``).
158 If not set, no filtering is applied.
159
160 :type visible: bool or None
161 :param visible: When ``contents=True``, whether to filter for the
162 visible datasets (``True``) or for the hidden ones (``False``). If not
163 set, no filtering is applied.
164
165 :type details: str
166 :param details: When ``contents=True``, include dataset details. Set to
167 'all' for the most information.
168
169 :type types: list
170 :param types: When ``contents=True``, filter for history content types.
171 If set to ``['dataset']``, return only datasets. If set to
172 ``['dataset_collection']``, return only dataset collections. If not
173 set, no filtering is applied.
174
175 :rtype: dict or list of dicts
176 :return: details of the given history or list of dataset info
177 """
178 params = {}
179 if contents:
180 if details:
181 params['details'] = details
182 if deleted is not None:
183 params['deleted'] = deleted
184 if visible is not None:
185 params['visible'] = visible
186 if types is not None:
187 params['types'] = types
188 return self._get(id=history_id, contents=contents, params=params)
189
190 def delete_dataset(self, history_id, dataset_id, purge=False):
191 """
192 Mark corresponding dataset as deleted.
193
194 :type history_id: str
195 :param history_id: Encoded history ID
196
197 :type dataset_id: str
198 :param dataset_id: Encoded dataset ID
199
200 :type purge: bool
201 :param purge: if ``True``, also purge (permanently delete) the dataset
202
203 :rtype: None
204 :return: None
205
206 .. note::
207 For the purge option to work, the Galaxy instance must have the
208 ``allow_user_dataset_purge`` option set to ``true`` in the
209 ``config/galaxy.yml`` configuration file.
210 """
211 url = '/'.join((self._make_url(history_id, contents=True), dataset_id))
212 payload = {}
213 if purge is True:
214 payload['purge'] = purge
215 self._delete(payload=payload, url=url)
216
217 def delete_dataset_collection(self, history_id, dataset_collection_id):
218 """
219 Mark corresponding dataset collection as deleted.
220
221 :type history_id: str
222 :param history_id: Encoded history ID
223
224 :type dataset_collection_id: str
225 :param dataset_collection_id: Encoded dataset collection ID
226
227 :rtype: None
228 :return: None
229 """
230 url = '/'.join((self._make_url(history_id, contents=True), 'dataset_collections', dataset_collection_id))
231 self._delete(url=url)
232
233 def show_dataset(self, history_id, dataset_id):
234 """
235 Get details about a given history dataset.
236
237 :type history_id: str
238 :param history_id: Encoded history ID
239
240 :type dataset_id: str
241 :param dataset_id: Encoded dataset ID
242
243 :rtype: dict
244 :return: Information about the dataset
245 """
246 url = '/'.join((self._make_url(history_id, contents=True), dataset_id))
247 return self._get(url=url)
248
249 def show_dataset_collection(self, history_id, dataset_collection_id):
250 """
251 Get details about a given history dataset collection.
252
253 :type history_id: str
254 :param history_id: Encoded history ID
255
256 :type dataset_collection_id: str
257 :param dataset_collection_id: Encoded dataset collection ID
258
259 :rtype: dict
260 :return: Information about the dataset collection
261 """
262 url = '/'.join((self._make_url(history_id, contents=True), 'dataset_collections', dataset_collection_id))
263 return self._get(url=url)
264
265 def show_matching_datasets(self, history_id, name_filter=None):
266 """
267 Get dataset details for matching datasets within a history.
268
269 :type history_id: str
270 :param history_id: Encoded history ID
271
272 :type name_filter: str
273 :param name_filter: Only datasets whose name matches the
274 ``name_filter`` regular expression will be
275 returned; use plain strings for exact matches and
276 None to match all datasets in the history
277
278 :rtype: list
279 :return: List of dictionaries
280 """
281 if isinstance(name_filter, str):
282 name_filter = re.compile(name_filter + '$')
283 return [self.show_dataset(history_id, h['id'])
284 for h in self.show_history(history_id, contents=True)
285 if name_filter is None or name_filter.match(h['name'])]
286
287 def show_dataset_provenance(self, history_id, dataset_id, follow=False):
288 """
289 Get details related to how dataset was created (``id``, ``job_id``,
290 ``tool_id``, ``stdout``, ``stderr``, ``parameters``, ``inputs``,
291 etc...).
292
293 :type history_id: str
294 :param history_id: Encoded history ID
295
296 :type dataset_id: str
297 :param dataset_id: Encoded dataset ID
298
299 :type follow: bool
300 :param follow: If ``True``, recursively fetch dataset provenance
301 information for all inputs and their inputs, etc.
302
303 :rtype: dict
304 :return: Dataset provenance information
305 For example::
306
307 {'id': '6fbd9b2274c62ebe',
308 'job_id': '5471ba76f274f929',
309 'parameters': {'chromInfo': '"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/mm9.len"',
310 'dbkey': '"mm9"',
311 'experiment_name': '"H3K4me3_TAC_MACS2"',
312 'input_chipseq_file1': {'id': '6f0a311a444290f2',
313 'uuid': 'null'},
314 'input_control_file1': {'id': 'c21816a91f5dc24e',
315 'uuid': '16f8ee5e-228f-41e2-921e-a07866edce06'},
316 'major_command': '{"gsize": "2716965481.0", "bdg": "False", "__current_case__": 0, "advanced_options": {"advanced_options_selector": "off", "__current_case__": 1}, "input_chipseq_file1": 104715, "xls_to_interval": "False", "major_command_selector": "callpeak", "input_control_file1": 104721, "pq_options": {"pq_options_selector": "qvalue", "qvalue": "0.05", "__current_case__": 1}, "bw": "300", "nomodel_type": {"nomodel_type_selector": "create_model", "__current_case__": 1}}'},
317 'stderr': '',
318 'stdout': '',
319 'tool_id': 'toolshed.g2.bx.psu.edu/repos/ziru-zhou/macs2/modencode_peakcalling_macs2/2.0.10.2',
320 'uuid': '5c0c43f5-8d93-44bd-939d-305e82f213c6'}
321 """
322 url = '/'.join((self._make_url(history_id, contents=True), dataset_id, 'provenance'))
323 return self._get(url=url)
324
325 def update_history(self, history_id, **kwds):
326 """
327 Update history metadata information. Some of the attributes that can be
328 modified are documented below.
329
330 :type history_id: str
331 :param history_id: Encoded history ID
332
333 :type name: str
334 :param name: Replace history name with the given string
335
336 :type annotation: str
337 :param annotation: Replace history annotation with given string
338
339 :type deleted: bool
340 :param deleted: Mark or unmark history as deleted
341
342 :type purged: bool
343 :param purged: If ``True``, mark history as purged (permanently deleted).
344
345 :type published: bool
346 :param published: Mark or unmark history as published
347
348 :type importable: bool
349 :param importable: Mark or unmark history as importable
350
351 :type tags: list
352 :param tags: Replace history tags with the given list
353
354 :rtype: dict
355 :return: details of the updated history
356
357 .. versionchanged:: 0.8.0
358 Changed the return value from the status code (type int) to a dict.
359 """
360 return self._put(payload=kwds, id=history_id)
361
362 def update_dataset(self, history_id, dataset_id, **kwds):
363 """
364 Update history dataset metadata. Some of the attributes that can be
365 modified are documented below.
366
367 :type history_id: str
368 :param history_id: Encoded history ID
369
370 :type dataset_id: str
371 :param dataset_id: ID of the dataset
372
373 :type name: str
374 :param name: Replace history dataset name with the given string
375
376 :type datatype: str
377 :param datatype: Replace the datatype of the history dataset with the
378 given string. The string must be a valid Galaxy datatype, both the
379 current and the target datatypes must allow datatype changes, and the
380 dataset must not be in use as input or output of a running job
381 (including uploads), otherwise an error will be raised.
382
383 :type genome_build: str
384 :param genome_build: Replace history dataset genome build (dbkey)
385
386 :type annotation: str
387 :param annotation: Replace history dataset annotation with given string
388
389 :type deleted: bool
390 :param deleted: Mark or unmark history dataset as deleted
391
392 :type visible: bool
393 :param visible: Mark or unmark history dataset as visible
394
395 :rtype: dict
396 :return: details of the updated dataset
397
398 .. versionchanged:: 0.8.0
399 Changed the return value from the status code (type int) to a dict.
400 """
401 url = '/'.join((self._make_url(history_id, contents=True), dataset_id))
402 return self._put(payload=kwds, url=url)
403
404 def update_dataset_collection(self, history_id, dataset_collection_id, **kwds):
405 """
406 Update history dataset collection metadata. Some of the attributes that
407 can be modified are documented below.
408
409 :type history_id: str
410 :param history_id: Encoded history ID
411
412 :type dataset_collection_id: str
413 :param dataset_collection_id: Encoded dataset_collection ID
414
415 :type name: str
416 :param name: Replace history dataset collection name with the given
417 string
418
419 :type deleted: bool
420 :param deleted: Mark or unmark history dataset collection as deleted
421
422 :type visible: bool
423 :param visible: Mark or unmark history dataset collection as visible
424
425 :rtype: dict
426 :return: the updated dataset collection attributes
427
428 .. versionchanged:: 0.8.0
429 Changed the return value from the status code (type int) to a dict.
430 """
431 url = '/'.join((self._make_url(history_id, contents=True), 'dataset_collections', dataset_collection_id))
432 return self._put(payload=kwds, url=url)
433
434 def create_history_tag(self, history_id, tag):
435 """
436 Create history tag
437
438 :type history_id: str
439 :param history_id: Encoded history ID
440
441 :type tag: str
442 :param tag: Add tag to history
443
444 :rtype: dict
445 :return: A dictionary with information regarding the tag.
446 For example::
447
448 {'id': 'f792763bee8d277a',
449 'model_class': 'HistoryTagAssociation',
450 'user_tname': 'NGS_PE_RUN',
451 'user_value': None}
452 """
453 # empty payload since we are adding the new tag using the url
454 payload = {}
455 url = '/'.join((self._make_url(history_id), 'tags', tag))
456 return self._post(payload, url=url)
457
458 def upload_dataset_from_library(self, history_id, lib_dataset_id):
459 """
460 Upload a dataset into the history from a library. Requires the
461 library dataset ID, which can be obtained from the library
462 contents.
463
464 :type history_id: str
465 :param history_id: Encoded history ID
466
467 :type lib_dataset_id: str
468 :param lib_dataset_id: Encoded library dataset ID
469
470 :rtype: dict
471 :return: Information about the newly created HDA
472 """
473 payload = {
474 'content': lib_dataset_id,
475 'source': 'library',
476 'from_ld_id': lib_dataset_id, # compatibility with old API
477 }
478 return self._post(payload, id=history_id, contents=True)
479
480 def create_dataset_collection(self, history_id, collection_description):
481 """
482 Create a new dataset collection
483
484 :type history_id: str
485 :param history_id: Encoded history ID
486
487 :type collection_description: bioblend.galaxy.dataset_collections.CollectionDescription
488 :param collection_description: a description of the dataset collection
489 For example::
490
491 {'collection_type': 'list',
492 'element_identifiers': [{'id': 'f792763bee8d277a',
493 'name': 'element 1',
494 'src': 'hda'},
495 {'id': 'f792763bee8d277a',
496 'name': 'element 2',
497 'src': 'hda'}],
498 'name': 'My collection list'}
499
500 :rtype: dict
501 :return: Information about the new HDCA
502 """
503 try:
504 collection_description = collection_description.to_dict()
505 except AttributeError:
506 pass
507 payload = dict(
508 name=collection_description["name"],
509 type="dataset_collection",
510 collection_type=collection_description["collection_type"],
511 element_identifiers=collection_description["element_identifiers"],
512 )
513 return self._post(payload, id=history_id, contents=True)
514
515 def delete_history(self, history_id, purge=False):
516 """
517 Delete a history.
518
519 :type history_id: str
520 :param history_id: Encoded history ID
521
522 :type purge: bool
523 :param purge: if ``True``, also purge (permanently delete) the history
524
525 :rtype: dict
526 :return: An error object if an error occurred or a dictionary
527 containing: ``id`` (the encoded id of the history), ``deleted`` (if the
528 history was marked as deleted), ``purged`` (if the history was
529 purged).
530
531 .. note::
532 For the purge option to work, the Galaxy instance must have the
533 ``allow_user_dataset_purge`` option set to ``true`` in the
534 ``config/galaxy.yml`` configuration file.
535 """
536 payload = {}
537 if purge is True:
538 payload['purge'] = purge
539 return self._delete(payload=payload, id=history_id)
540
541 def undelete_history(self, history_id):
542 """
543 Undelete a history
544
545 :type history_id: str
546 :param history_id: Encoded history ID
547
548 :rtype: str
549 :return: 'OK' if it was deleted
550 """
551 url = self._make_url(history_id, deleted=True) + '/undelete'
552 return self._post(payload={}, url=url)
553
554 def get_status(self, history_id):
555 """
556 Returns the state of this history
557
558 :type history_id: str
559 :param history_id: Encoded history ID
560
561 :rtype: dict
562 :return: A dict documenting the current state of the history. Has the following keys:
563 'state' = This is the current state of the history, such as ok, error, new etc.
564 'state_details' = Contains individual statistics for various dataset states.
565 'percent_complete' = The overall number of datasets processed to completion.
566 """
567 state = {}
568 history = self.show_history(history_id)
569 state['state'] = history['state']
570 if history.get('state_details') is not None:
571 state['state_details'] = history['state_details']
572 total_complete = sum(history['state_details'].values())
573 if total_complete > 0:
574 state['percent_complete'] = 100 * history['state_details']['ok'] / total_complete
575 else:
576 state['percent_complete'] = 0
577 return state
578
579 def get_most_recently_used_history(self):
580 """
581 Returns the current user's most recently used history (not deleted).
582
583 :rtype: dict
584 :return: History representation
585 """
586 url = self._make_url() + '/most_recently_used'
587 return self._get(url=url)
588
589 def export_history(self, history_id, gzip=True, include_hidden=False,
590 include_deleted=False, wait=False, maxwait=None):
591 """
592 Start a job to create an export archive for the given history.
593
594 :type history_id: str
595 :param history_id: history ID
596
597 :type gzip: bool
598 :param gzip: create .tar.gz archive if ``True``, else .tar
599
600 :type include_hidden: bool
601 :param include_hidden: whether to include hidden datasets
602 in the export
603
604 :type include_deleted: bool
605 :param include_deleted: whether to include deleted datasets
606 in the export
607
608 :type wait: bool
609 :param wait: if ``True``, block until the export is ready; else, return
610 immediately
611
612 :type maxwait: float
613 :param maxwait: Total time (in seconds) to wait for the export to become
614 ready. When set, implies that ``wait`` is ``True``.
615
616 :rtype: str
617 :return: ``jeha_id`` of the export, or empty if ``wait`` is ``False``
618 and the export is not ready.
619 """
620 if maxwait is not None:
621 assert maxwait >= 0
622 else:
623 if wait:
624 maxwait = sys.maxsize
625 else:
626 maxwait = 0
627 params = {
628 'gzip': gzip,
629 'include_hidden': include_hidden,
630 'include_deleted': include_deleted,
631 }
632 url = '%s/exports' % self._make_url(history_id)
633 time_left = maxwait
634 while True:
635 try:
636 r = self._put(payload={}, url=url, params=params)
637 except ConnectionError as e:
638 if e.status_code == 202: # export is not ready
639 if time_left > 0:
640 log.warning("Waiting for the export of history %s to complete. Will wait %i more s", history_id, time_left)
641 time.sleep(1)
642 time_left -= 1
643 else:
644 return ''
645 else:
646 raise
647 else:
648 break
649 jeha_id = r['download_url'].rsplit('/', 1)[-1]
650 return jeha_id
651
652 def download_history(self, history_id, jeha_id, outf,
653 chunk_size=bioblend.CHUNK_SIZE):
654 """
655 Download a history export archive. Use :meth:`export_history`
656 to create an export.
657
658 :type history_id: str
659 :param history_id: history ID
660
661 :type jeha_id: str
662 :param jeha_id: jeha ID (this should be obtained via
663 :meth:`export_history`)
664
665 :type outf: file
666 :param outf: output file object, open for writing in binary mode
667
668 :type chunk_size: int
669 :param chunk_size: how many bytes at a time should be read into memory
670
671 :rtype: None
672 :return: None
673 """
674 url = '{}/exports/{}'.format(
675 self._make_url(module_id=history_id), jeha_id)
676 r = self.gi.make_get_request(url, stream=True)
677 r.raise_for_status()
678 for chunk in r.iter_content(chunk_size):
679 outf.write(chunk)
680
681 def copy_dataset(self, history_id, dataset_id, source='hda'):
682 """
683 Copy a dataset to a history.
684
685 :type history_id: str
686 :param history_id: history ID to which the dataset should be copied
687
688 :type dataset_id: str
689 :param dataset_id: dataset ID
690
691 :type source: str
692 :param source: Source of the dataset to be copied: 'hda' (the default), 'library' or 'library_folder'
693
694 :rtype: dict
695 :return: Information about the copied dataset
696 """
697 return self.copy_content(history_id, dataset_id, source)
698
699 def copy_content(self, history_id, content_id, source='hda'):
700 """
701 Copy existing content (e.g. a dataset) to a history.
702
703 :type history_id: str
704 :param history_id: ID of the history to which the content should be copied
705
706 :type content_id: str
707 :param content_id: ID of the content to copy
708
709 :type source: str
710 :param source: Source of the content to be copied: 'hda' (for a history
711 dataset, the default), 'hdca' (for a dataset collection), 'library'
712 (for a library dataset) or 'library_folder' (for all datasets in a
713 library folder).
714
715 :rtype: dict
716 :return: Information about the copied content
717 """
718
719 payload = {
720 'content': content_id,
721 'source': source,
722 'type': 'dataset' if source != 'hdca' else 'dataset_collection',
723 }
724
725 url = self._make_url(history_id, contents=True)
726 return self._post(payload=payload, url=url)
727
728 def open_history(self, history_id):
729 """
730 Open Galaxy in a new tab of the default web browser and switch to the
731 specified history.
732
733 :type history_id: str
734 :param history_id: ID of the history to switch to
735
736 :rtype: NoneType
737 :return: ``None``
738
739 .. warning::
740 After opening the specified history, all previously opened Galaxy tabs
741 in the browser session will have the current history changed to this
742 one, even if the interface still shows another history. Refreshing
743 any such tab is recommended.
744 """
745
746 url = urljoin(self.gi.base_url, f"history/switch_to_history?hist_id={history_id}")
747 webbrowser.open_new_tab(url)