Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/planemo/training/tutorial.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
| author | shellac |
|---|---|
| date | Mon, 01 Jun 2020 08:59:25 -0400 |
| parents | 79f47841a781 |
| children |
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/planemo/training/tutorial.py Thu May 14 16:47:39 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,665 +0,0 @@ -"""Module contains code for the Tutorial class, dealing with the creation of a training tutorial.""" - -import collections -import json -import os -import re -import shutil - -import oyaml as yaml -import requests -import six - -from planemo import templates -from planemo.bioblend import galaxy -from planemo.engine import ( - engine_context, - is_galaxy_engine, -) -from planemo.io import info -from planemo.runnable import for_path -from .tool_input import ( - get_empty_input, - get_empty_param, - ToolInput -) -from .utils import ( - load_yaml, - save_to_yaml -) - -TUTO_HAND_ON_TEMPLATE = """--- -layout: tutorial_hands_on - -{{ metadata }} ---- - -{{ body }} -""" - -TUTO_SLIDES_TEMPLATE = """--- -layout: tutorial_slides -logo: "GTN" - -{{ metadata }} ---- - -### How to fill the slide decks? - -Please follow our -[tutorial to learn how to fill the slides]({{ '{{' }} site.baseurl {{ '}}' }}/topics/contributing/tutorials/create-new-tutorial-slides/slides.html) -""" - - -HANDS_ON_TOOL_BOX_TEMPLATE = """ -## Sub-step with **{{tool_name}}** - -> ### {{ '{%' }} icon hands_on {{ '%}' }} Hands-on: Task description -> -> 1. **{{tool_name}}** {{ '{%' }} icon tool {{ '%}' }} with the following parameters:{{inputlist}}{{paramlist}} -> -> ***TODO***: *Check parameter descriptions* -> -> ***TODO***: *Consider adding a comment or tip box* -> -> > ### {{ '{%' }} icon comment {{ '%}' }} Comment -> > -> > A comment about the tool or something else. This box can also be in the main text -> {: .comment} -> -{: .hands_on} - -***TODO***: *Consider adding a question to test the learners understanding of the previous exercise* - -> ### {{ '{%' }} icon question {{ '%}' }} Questions -> -> 1. Question1? -> 2. Question2? -> -> > ### {{ '{%' }} icon solution {{ '%}' }} Solution -> > -> > 1. Answer for question1 -> > 2. Answer for question2 -> > -> {: .solution} -> -{: .question} - -""" - -TUTO_BIBLIOGRAPHY_TEMPLATE = """ -# This is the bibliography file for your tutorial. -# -# To add bibliography (bibtex) entries here, follow these steps: -# 1) Find the DOI for the article you want to cite -# 2) Go to https://doi2bib.org and fill in the DOI -# 3) Copy the resulting bibtex entry into this file -# -# To cite the example below, in your tutorial.md file -# use {{ '{%' }} Batut2018 {{ '%}' }} - -@article{Batut2018, - doi = {10.1016/j.cels.2018.05.012}, - url = {https://doi.org/10.1016/j.cels.2018.05.012}, - year = {2018}, - month = jun, - publisher = {Elsevier {BV}}, - volume = {6}, - number = {6}, - pages = {752--758.e1}, - author = {B{\\'{e}}r{\\'{e}}nice Batut and Saskia Hiltemann and Andrea Bagnacani and Dannon Baker and Vivek Bhardwaj and - Clemens Blank and Anthony Bretaudeau and Loraine Brillet-Gu{\\'{e}}guen and Martin {\\v{C}}ech and John Chilton - and Dave Clements and Olivia Doppelt-Azeroual and Anika Erxleben and Mallory Ann Freeberg and Simon Gladman and - Youri Hoogstrate and Hans-Rudolf Hotz and Torsten Houwaart and Pratik Jagtap and Delphine Larivi{\\`{e}}re and - Gildas Le Corguill{\\'{e}} and Thomas Manke and Fabien Mareuil and Fidel Ram{\\'{i}}rez and Devon Ryan and - Florian Christoph Sigloch and Nicola Soranzo and Joachim Wolff and Pavankumar Videm and Markus Wolfien and - Aisanjiang Wubuli and Dilmurat Yusuf and James Taylor and Rolf Backofen and Anton Nekrutenko and Bj\\"{o}rn Gr\\"{u}ning}, - title = {Community-Driven Data Analysis Training for Biology}, - journal = {Cell Systems} -} -""" - -TUTO_HAND_ON_BODY_TEMPLATE = """ -# Introduction -{:.no_toc} - -<!-- This is a comment. --> - -General introduction about the topic and then an introduction of the -tutorial (the questions and the objectives). It is nice also to have a -scheme to sum up the pipeline used during the tutorial. The idea is to -give to trainees insight into the content of the tutorial and the (theoretical -and technical) key concepts they will learn. - -You may want to cite some publications; this can be done by adding citations to the -bibliography file (`tutorial.bib` file next to your `tutorial.md` file). These citations -must be in bibtex format. If you have the DOI for the paper you wish to cite, you can -get the corresponding bibtex entry using [doi2bib.org](https://doi2bib.org). - -With the example you will find in the `tutorial.bib` file, you can add a citation to -this article here in your tutorial like this: -{{ '{%' }} raw {{ '%}' }} `{{ '{%' }} cite Batut2018 {{ '%}' }}`{{ '{%' }} endraw {{ '%}' }}. -This will be rendered like this: {{ '{%' }} cite Batut2018 {{ '%}' }}, and links to a -[bibliography section](#bibliography) which will automatically be created at the end of the -tutorial. - - -**Please follow our -[tutorial to learn how to fill the Markdown]({{ '{{' }} site.baseurl {{ '}}' }}/topics/contributing/tutorials/\ -create-new-tutorial-content/tutorial.html)** - -> ### Agenda -> -> In this tutorial, we will cover: -> -> 1. TOC -> {:toc} -> -{: .agenda} - -# Title for your first section - -Give some background about what the trainees will be doing in the section. -Remember that many people reading your materials will likely be novices, -so make sure to explain all the relevant concepts. - -## Title for a subsection -Section and subsection titles will be displayed in the tutorial index on the left side of -the page, so try to make them informative and concise! - -# Hands-on Sections -Below are a series of hand-on boxes, one for each tool in your workflow file. -Often you may wish to combine several boxes into one or make other adjustments such -as breaking the tutorial into sections, we encourage you to make such changes as you -see fit, this is just a starting point :) - -Anywhere you find the word "***TODO***", there is something that needs to be changed -depending on the specifics of your tutorial. - -have fun! - -## Get data - -> ### {{ '{%' }} icon hands_on {{ '%}' }} Hands-on: Data upload -> -> 1. Create a new history for this tutorial -> 2. Import the files from [Zenodo]({{ zenodo_link }}) or from the shared data library -> -> ``` -> {{ z_file_links }} -> ``` -> ***TODO***: *Add the files by the ones on Zenodo here (if not added)* -> -> ***TODO***: *Remove the useless files (if added)* -> -> {{ '{%' }} include snippets/import_via_link.md {{ '%}' }} -> {{ '{%' }} include snippets/import_from_data_library.md {{ '%}' }} -> -> 3. Rename the datasets -> 4. Check that the datatype -> -> {{ '{%' }} include snippets/change_datatype.md datatype="datatypes" {{ '%}' }} -> -> 5. Add to each database a tag corresponding to ... -> -> {{ '{%' }} include snippets/add_tag.md {{ '%}' }} -> -{: .hands_on} - -# Title of the section usually corresponding to a big step in the analysis - -It comes first a description of the step: some background and some theory. -Some image can be added there to support the theory explanation: - - - -The idea is to keep the theory description before quite simple to focus more on the practical part. - -***TODO***: *Consider adding a detail box to expand the theory* - -> ### {{ '{%' }} icon details {{ '%}' }} More details about the theory -> -> But to describe more details, it is possible to use the detail boxes which are expandable -> -{: .details} - -A big step can have several subsections or sub steps: - -{{ body }} - -## Re-arrange - -To create the template, each step of the workflow had its own subsection. - -***TODO***: *Re-arrange the generated subsections into sections or other subsections. -Consider merging some hands-on boxes to have a meaningful flow of the analyses* - -# Conclusion -{:.no_toc} - -Sum up the tutorial and the key takeaways here. We encourage adding an overview image of the -pipeline used. -""" - - -class Tutorial(object): - """Class to describe a training tutorial.""" - - def __init__(self, training, topic, name="new_tuto", title="The new tutorial", zenodo_link=""): - """Init a tutorial instance.""" - self.training = training - self.topic = topic - self.name = name - self.title = title - self.zenodo_link = zenodo_link - self.zenodo_file_links = [] - self.questions = [] - self.objectives = [] - self.time = "" - self.key_points = [] - self.contributors = [] - self.body = "" - self.init_wf_fp = None - self.init_wf_id = None - self.hands_on = True - self.slides = False - self.datatype_fp = "" - self.set_dir_name() - self.init_data_lib() - self.body = templates.render(HANDS_ON_TOOL_BOX_TEMPLATE, **{ - 'tool_name': "My Tool", - 'inputlist': get_empty_input(), - 'paramlist': get_empty_param() - }) - - def init_from_kwds(self, kwds): - """Init a tutorial instance from a kwds dictionary.""" - self.name = kwds["tutorial_name"] - self.title = kwds["tutorial_title"] - self.zenodo_link = kwds["zenodo_link"] if kwds["zenodo_link"] else '' - self.questions = [ - "Which biological questions are addressed by the tutorial?", - "Which bioinformatics techniques are important to know for this type of data?"] - self.objectives = [ - "The learning objectives are the goals of the tutorial", - "They will be informed by your audience and will communicate to them and to yourself what you should focus on during the course", - "They are single sentences describing what a learner should be able to do once they have completed the tutorial", - "You can use Bloom's Taxonomy to write effective learning objectives"] - self.time = "3H" - self.key_points = [ - "The take-home messages", - "They will appear at the end of the tutorial"] - self.contributors = ["contributor1", "contributor2"] - self.init_wf_fp = kwds['workflow'] - self.init_wf_id = kwds['workflow_id'] - self.hands_on = kwds['hands_on'] - self.slides = kwds['slides'] - self.datatype_fp = kwds['datatypes'] - self.set_dir_name() - self.init_data_lib() - - def init_from_existing_tutorial(self, tuto_name): - """Init a tutorial instance from an existing tutorial (data library and tutorial.md).""" - self.name = tuto_name - self.set_dir_name() - - if not self.exists(): - raise Exception("The tutorial %s does not exists. It should be created" % self.name) - - # get the metadata information of the tutorial (from the top of the tutorial.md) - with open(self.tuto_fp, "r") as tuto_f: - tuto_content = tuto_f.read() - regex = r'^---\n(?P<metadata>[\s\S]*)\n---(?P<body>[\s\S]*)' - tuto_split_regex = re.search(regex, tuto_content) - if not tuto_split_regex: - raise Exception("No metadata found at the top of the tutorial") - metadata = yaml.safe_load(tuto_split_regex.group("metadata")) - self.title = metadata["title"] - self.zenodo_link = metadata["zenodo_link"] - self.questions = metadata["questions"] - self.objectives = metadata["objectives"] - self.time_estimation = metadata["time_estimation"] - self.key_points = metadata["key_points"] - self.contributors = metadata["contributors"] - - # the tutorial content - self.body = tuto_split_regex.group("body") - - # get the data library - self.init_data_lib() - - def init_data_lib(self): - """Init the data library dictionary.""" - if os.path.exists(self.data_lib_fp): - self.data_lib = load_yaml(self.data_lib_fp) - else: - self.data_lib = collections.OrderedDict() - # set default information - self.data_lib.setdefault('destination', collections.OrderedDict()) - self.data_lib['destination']['type'] = 'library' - self.data_lib['destination']['name'] = 'GTN - Material' - self.data_lib['destination']['description'] = 'Galaxy Training Network Material' - self.data_lib['destination']['synopsis'] = 'Galaxy Training Network Material. See https://training.galaxyproject.org' - self.data_lib.setdefault('items', []) - self.data_lib.pop('libraries', None) - # get topic or create new one - topic = collections.OrderedDict() - for item in self.data_lib['items']: - if item['name'] == self.topic.title: - topic = item - if not topic: - self.data_lib['items'].append(topic) - topic['name'] = self.topic.title - topic['description'] = self.topic.summary - topic['items'] = [] - # get tutorial or create new one - self.tuto_data_lib = collections.OrderedDict() - for item in topic['items']: - if item['name'] == self.title: - self.tuto_data_lib = item - if not self.tuto_data_lib: - topic['items'].append(self.tuto_data_lib) - self.tuto_data_lib['name'] = self.title - self.tuto_data_lib['items'] = [] - - # GETTERS - def get_tuto_metata(self): - """Return the string corresponding to the tutorial metadata.""" - metadata = collections.OrderedDict() - metadata['title'] = self.title - metadata['zenodo_link'] = self.zenodo_link - metadata['questions'] = self.questions - metadata['objectives'] = self.objectives - metadata['time_estimation'] = self.time - metadata['key_points'] = self.key_points - metadata['contributors'] = self.contributors - return yaml.safe_dump( - metadata, - indent=2, - default_flow_style=False, - default_style='', - explicit_start=False) - - # SETTERS - def set_dir_name(self): - """Set the path to dir and files of a tutorial.""" - self.dir = os.path.join(self.topic.dir, "tutorials", self.name) - self.tuto_fp = os.path.join(self.dir, "tutorial.md") - self.bib_fp = os.path.join(self.dir, "tutorial.bib") - self.slide_fp = os.path.join(self.dir, 'slides.html') - self.data_lib_fp = os.path.join(self.dir, "data-library.yaml") - self.wf_dir = os.path.join(self.dir, "workflows") - self.wf_fp = os.path.join(self.wf_dir, "main_workflow.ga") - self.tour_dir = os.path.join(self.dir, "tours") - # remove empty workflow file if there - empty_wf_filepath = os.path.join(self.wf_dir, "empty_workflow.ga") - if os.path.exists(empty_wf_filepath): - os.remove(empty_wf_filepath) - - # TEST METHODS - def exists(self): - """Test if the tutorial exists.""" - return os.path.isdir(self.dir) - - def has_workflow(self): - """Test if a workflow is provided for the tutorial.""" - return self.init_wf_fp or self.init_wf_id - - # EXPORT METHODS - def export_workflow_file(self): - """Copy or extract workflow file and add it to the tutorial directory.""" - if not os.path.exists(self.wf_dir): - os.makedirs(self.wf_dir) - if not os.path.exists(os.path.join(self.wf_dir, 'index.md')): - with open(os.path.join(self.wf_dir, 'index.md'), 'w') as handle: - handle.write('---\nlayout: workflow-list\n---\n') - if self.init_wf_fp: - shutil.copy(self.init_wf_fp, self.wf_fp) - elif self.init_wf_id: - gi = galaxy.GalaxyInstance(self.training.galaxy_url, key=self.training.galaxy_api_key) - gi.workflows.export_workflow_to_local_path( - self.init_wf_id, - self.wf_fp, - use_default_filename=False) - - # OTHER METHODS - def get_files_from_zenodo(self): - """Extract a list of URLs and dictionary describing the files from the JSON output of the Zenodo API.""" - z_record, req_res = get_zenodo_record(self.zenodo_link) - - self.zenodo_file_links = [] - if 'files' not in req_res: - raise ValueError("No files in the Zenodo record") - - files = [] - for f in req_res['files']: - file_dict = {'url': '', 'src': 'url', 'ext': '', 'info': self.zenodo_link} - if 'type' in f: - file_dict['ext'] = get_galaxy_datatype(f['type'], self.datatype_fp) - if 'links' not in f and 'self' not in f['links']: - raise ValueError("No link for file %s" % f) - file_dict['url'] = f['links']['self'] - self.zenodo_file_links.append(f['links']['self']) - files.append(file_dict) - - return (files, z_record) - - def prepare_data_library_from_zenodo(self): - """Get the list of URLs of the files on Zenodo, fill the data library, save it into the file.""" - self.zenodo_file_links = [] - if self.zenodo_link != '': - files, z_record = self.get_files_from_zenodo() - if z_record: - # get current data library and/or previous data library for the tutorial - # remove the latest tag of any existing library - # remove the any other existing library - current_data_lib = collections.OrderedDict() - previous_data_lib = collections.OrderedDict() - for item in self.tuto_data_lib['items']: - if item['name'] == "DOI: 10.5281/zenodo.%s" % z_record: - current_data_lib = item - elif item['description'] == 'latest': - previous_data_lib = item - previous_data_lib['description'] = '' - if not current_data_lib: - current_data_lib['name'] = "DOI: 10.5281/zenodo.%s" % z_record - current_data_lib['description'] = 'latest' - current_data_lib['items'] = [] - current_data_lib['items'] = files - - self.tuto_data_lib['items'] = [current_data_lib] - if previous_data_lib: - self.tuto_data_lib['items'].append(previous_data_lib) - save_to_yaml(self.data_lib, self.data_lib_fp) - - def write_hands_on_tutorial(self, add_z_file_links=True): - """Write the content of the hands-on tutorial in the corresponding file.""" - if add_z_file_links: - self.body = templates.render(TUTO_HAND_ON_BODY_TEMPLATE, **{ - "z_file_links": "\n> ".join(self.zenodo_file_links), - "body": self.body - }) - # write in the tutorial file with the metadata on the top - metadata = self.get_tuto_metata() - with open(self.tuto_fp, 'w') as md: - md.write(templates.render(TUTO_HAND_ON_TEMPLATE, **{ - "metadata": metadata, - "body": self.body - })) - - # create the bibliography file - self.write_bibliography() - - def write_bibliography(self): - """Write the content of the bibliography file for the tutorial.""" - with open(self.bib_fp, 'w') as bib: - bib.write(templates.render(TUTO_BIBLIOGRAPHY_TEMPLATE, **{ - "body": self.body - })) - - def create_hands_on_tutorial(self, ctx): - """Create tutorial structure from the workflow file (if it is provided).""" - # load workflow and get hands-on body from the workflow - if self.init_wf_id: - if not self.training.galaxy_url: - raise ValueError("No Galaxy URL given") - if not self.training.galaxy_api_key: - raise ValueError("No API key to access the given Galaxy instance") - self.body = get_hands_on_boxes_from_running_galaxy(self.init_wf_id, self.training.galaxy_url, self.training.galaxy_api_key) - elif self.init_wf_fp: - self.body = get_hands_on_boxes_from_local_galaxy(self.training.kwds, self.init_wf_fp, ctx) - # write tutorial body - self.write_hands_on_tutorial() - - def create_tutorial(self, ctx): - """Create the skeleton of a new tutorial.""" - # create tuto folder and empty files - os.makedirs(self.dir) - os.makedirs(self.tour_dir) - os.makedirs(self.wf_dir) - - # extract the data library from Zenodo and the links for the tutorial - if self.zenodo_link != '': - info("Create the data library from Zenodo") - self.prepare_data_library_from_zenodo() - - # create tutorial skeleton from workflow and copy workflow file - if self.hands_on: - info("Create tutorial skeleton from workflow (if it is provided)") - self.create_hands_on_tutorial(ctx) - self.export_workflow_file() - - # create slide skeleton - if self.slides: - with open(self.slide_fp, 'w') as slide_f: - slide_f.write( - templates.render(TUTO_SLIDES_TEMPLATE, **{"metadata": self.get_tuto_metata()})) - - -def get_galaxy_datatype(z_ext, datatype_fp): - """Get the Galaxy datatype corresponding to a Zenodo file type.""" - g_datatype = '' - datatypes = load_yaml(datatype_fp) - if z_ext in datatypes: - g_datatype = datatypes[z_ext] - if g_datatype == '': - g_datatype = '# Please add a Galaxy datatype or update the shared/datatypes.yaml file' - info("Get Galaxy datatypes: %s --> %s" % (z_ext, g_datatype)) - return g_datatype - - -def get_zenodo_record(zenodo_link): - """Get the content of a Zenodo record.""" - # get the record in the Zenodo link - if 'doi' in zenodo_link: - z_record = zenodo_link.split('.')[-1] - else: - z_record = zenodo_link.split('/')[-1] - # get JSON corresponding to the record from Zenodo API - req = "https://zenodo.org/api/records/%s" % (z_record) - r = requests.get(req) - if r: - req_res = r.json() - else: - info("The Zenodo link (%s) seems invalid" % (zenodo_link)) - req_res = {'files': []} - z_record = None - return(z_record, req_res) - - -def get_wf_inputs(step_inp): - """Get the inputs from a workflow step and format them into a hierarchical dictionary.""" - inputs = {} - for inp_n, inp in step_inp.items(): - if '|' in inp_n: - repeat_regex = r'(?P<prefix>[^\|]*)_(?P<nb>\d+)\|(?P<suffix>.+).+' - repeat_search = re.search(repeat_regex, inp_n) - hier_regex = r'(?P<prefix>[^\|]*)\|(?P<suffix>.+)' - hier_regex = re.search(hier_regex, inp_n) - if repeat_search and repeat_search.start(0) <= hier_regex.start(0): - inputs.setdefault(repeat_search.group('prefix'), {}) - inputs[repeat_search.group('prefix')].setdefault( - repeat_search.group('nb'), - get_wf_inputs({hier_regex.group('suffix'): inp})) - else: - inputs.setdefault(hier_regex.group('prefix'), {}) - inputs[hier_regex.group('prefix')].update( - get_wf_inputs({hier_regex.group('suffix'): inp})) - else: - inputs.setdefault(inp_n, inp) - return inputs - - -def get_wf_param_values(init_params, inp_connections): - """Get the param values from a workflow step and format them into a hierarchical dictionary.""" - if not isinstance(init_params, six.string_types) or '": ' not in init_params: - form_params = init_params - else: - form_params = json.loads(init_params) - if isinstance(form_params, dict): - if '__class__' in form_params and (form_params['__class__'] == 'RuntimeValue' or form_params['__class__'] == 'ConnectedValue'): - form_params = inp_connections - else: - for p in form_params: - inp = inp_connections[p] if p in inp_connections else {} - form_params[p] = get_wf_param_values(form_params[p], inp) - elif isinstance(form_params, list): - json_params = form_params - form_params = [] - for i, p in enumerate(json_params): - inp = inp_connections[str(i)] if str(i) in inp_connections else {} - form_params.append(get_wf_param_values(p, inp)) - elif isinstance(form_params, six.string_types) and '"' in form_params: - form_params = form_params.replace('"', '') - return form_params - - -def format_wf_steps(wf, gi): - """Get a string with the hands-on boxes describing the different steps of the worklow.""" - body = '' - steps = wf['steps'] - - for s in range(len(steps)): - wf_step = steps[str(s)] - # get params in workflow - wf_param_values = {} - if wf_step['tool_state'] and wf_step['input_connections']: - wf_param_values = get_wf_param_values(wf_step['tool_state'], get_wf_inputs(wf_step['input_connections'])) - if not wf_param_values: - continue - # get tool description - try: - tool_desc = gi.tools.show_tool(wf_step['tool_id'], io_details=True) - except Exception: - tool_desc = {'inputs': []} - # get formatted param description - paramlist = '' - for inp in tool_desc["inputs"]: - tool_inp = ToolInput(inp, wf_param_values, steps, 1, should_be_there=True) - paramlist += tool_inp.get_formatted_desc() - # format the hands-on box - body += templates.render(HANDS_ON_TOOL_BOX_TEMPLATE, **{ - "tool_name": wf_step['name'], - "paramlist": paramlist}) - return body - - -def get_hands_on_boxes_from_local_galaxy(kwds, wf_filepath, ctx): - """Server local Galaxy and get the workflow dictionary.""" - assert is_galaxy_engine(**kwds) - runnable = for_path(wf_filepath) - tuto_body = '' - with engine_context(ctx, **kwds) as galaxy_engine: - with galaxy_engine.ensure_runnables_served([runnable]) as config: - workflow_id = config.workflow_id(wf_filepath) - wf = config.gi.workflows.export_workflow_dict(workflow_id) - tuto_body = format_wf_steps(wf, config.gi) - return tuto_body - - -def get_hands_on_boxes_from_running_galaxy(wf_id, galaxy_url, galaxy_api_key): - """Get the workflow dictionary from a running Galaxy instance with the workflow installed on it.""" - gi = galaxy.GalaxyInstance(galaxy_url, key=galaxy_api_key) - wf = gi.workflows.export_workflow_dict(wf_id) - tuto_body = format_wf_steps(wf, gi) - return tuto_body
