comparison planemo/lib/python3.7/site-packages/gxformat2/markdown_parse.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 """Utilities for parsing "Galaxy Flavored Markdown".
2
3 See markdown_util.py for loading objects and interacting with the rest of Galaxy.
4 This file is meant to be relatively self contained and just used to "parse" and validate
5 Galaxy Markdown. Keeping things isolated to allow re-use of these utilities in other
6 projects (e.g. gxformat2).
7 """
8 import re
9
10 BLOCK_FENCE_START = re.compile(r'```.*')
11 BLOCK_FENCE_END = re.compile(r'```[\s]*')
12 GALAXY_FLAVORED_MARKDOWN_CONTAINER_LINE_PATTERN = re.compile(
13 r"```\s*galaxy\s*"
14 )
15 VALID_CONTAINER_END_PATTERN = re.compile(r"^```\s*$")
16 VALID_ARGUMENTS = {
17 "history_dataset_display": ["input", "output", "history_dataset_id"],
18 "history_dataset_embedded": ["input", "output", "history_dataset_id"],
19 "history_dataset_as_image": ["input", "output", "history_dataset_id", "path"],
20 "history_dataset_peek": ["input", "output", "history_dataset_id"],
21 "history_dataset_info": ["input", "output", "history_dataset_id"],
22 "history_dataset_link": ["input", "output", "history_dataset_id", "path", "label"],
23 "history_dataset_index": ["input", "output", "history_dataset_id", "path"],
24 "history_dataset_name": ["input", "output", "history_dataset_id"],
25 "history_dataset_type": ["input", "output", "history_dataset_id"],
26 "history_dataset_collection_display": ["input", "output", "history_dataset_collection_id"],
27 "workflow_display": ["workflow_id"],
28 "job_metrics": ["step", "job_id"],
29 "job_parameters": ["step", "job_id"],
30 "tool_stderr": ["step", "job_id"],
31 "tool_stdout": ["step", "job_id"],
32 "generate_galaxy_version": [],
33 "generate_time": [],
34 "invocation_time": ["invocation_id"],
35 # Invocation Flavored Markdown
36 "invocation_outputs": [],
37 "invocation_inputs": [],
38 }
39 GALAXY_FLAVORED_MARKDOWN_CONTAINERS = list(VALID_ARGUMENTS.keys())
40 GALAXY_FLAVORED_MARKDOWN_CONTAINER_REGEX = r'(?P<container>%s)' % "|".join(GALAXY_FLAVORED_MARKDOWN_CONTAINERS)
41
42 ARG_VAL_REGEX = r'''[\w_\-]+|\"[^\"]+\"|\'[^\']+\''''
43 FUNCTION_ARG = r'\s*\w+\s*=\s*(?:%s)\s*' % ARG_VAL_REGEX
44 # embed commas between arguments
45 FUNCTION_MULTIPLE_ARGS = r'(?P<firstargcall>%s)(?P<restargcalls>(?:,%s)*)' % (FUNCTION_ARG, FUNCTION_ARG)
46 FUNCTION_MULTIPLE_ARGS_PATTERN = re.compile(FUNCTION_MULTIPLE_ARGS)
47 FUNCTION_CALL_LINE_TEMPLATE = r'\s*%s\s*\((?:' + FUNCTION_MULTIPLE_ARGS + r')?\)\s*'
48 GALAXY_MARKDOWN_FUNCTION_CALL_LINE = re.compile(FUNCTION_CALL_LINE_TEMPLATE % GALAXY_FLAVORED_MARKDOWN_CONTAINER_REGEX)
49 WHITE_SPACE_ONLY_PATTERN = re.compile(r"^[\s]+$")
50
51
52 def validate_galaxy_markdown(galaxy_markdown, internal=True):
53 """Validate the supplied markdown and throw an ValueError with reason if invalid."""
54 expecting_container_close_for = None
55 last_line_no = 0
56 function_calls = 0
57 for (line, fenced, open_fence, line_no) in _split_markdown_lines(galaxy_markdown):
58 last_line_no = line_no
59
60 def invalid_line(template, **kwd):
61 if "line" in kwd:
62 kwd["line"] = line.rstrip("\r\n")
63 raise ValueError("Invalid line %d: %s" % (line_no + 1, template.format(**kwd)))
64
65 expecting_container_close = expecting_container_close_for is not None
66 if not fenced and expecting_container_close:
67 invalid_line("[{line}] is not expected close line for [{expected_for}]", line=line, expected_for=expecting_container_close_for)
68 continue
69 elif not fenced:
70 continue
71 elif fenced and expecting_container_close and BLOCK_FENCE_END.match(line):
72 # reset
73 expecting_container_close_for = None
74 function_calls = 0
75 elif open_fence and GALAXY_FLAVORED_MARKDOWN_CONTAINER_LINE_PATTERN.match(line):
76 if expecting_container_close:
77 if not VALID_CONTAINER_END_PATTERN.match(line):
78 invalid_line("Invalid command close line [{line}] for [{expected_for}]", line=line, expected_for=expecting_container_close_for)
79 # else closing container and we're done
80 expecting_container_close_for = None
81 function_calls = 0
82 continue
83
84 expecting_container_close_for = line
85 continue
86 elif fenced and line and expecting_container_close_for:
87 func_call_match = GALAXY_MARKDOWN_FUNCTION_CALL_LINE.match(line)
88 if func_call_match:
89 function_calls += 1
90 if function_calls > 1:
91 invalid_line("Only one Galaxy directive is allowed per fenced Galaxy block (```galaxy)")
92 container = func_call_match.group("container")
93 valid_args = VALID_ARGUMENTS[container]
94 first_arg_call = func_call_match.group("firstargcall")
95
96 def _validate_arg(arg_str):
97 if arg_str is not None:
98 arg_name = arg_str.split("=", 1)[0].strip()
99 if arg_name not in valid_args:
100 invalid_line("Invalid argument to Galaxy directive [{argument}]", argument=arg_name)
101
102 _validate_arg(first_arg_call)
103 rest = func_call_match.group("restargcalls")
104 while rest:
105 rest = rest.strip().split(",", 1)[1]
106 arg_match = FUNCTION_MULTIPLE_ARGS_PATTERN.match(rest)
107 if not arg_match:
108 break
109 first_arg_call = arg_match.group("firstargcall")
110 _validate_arg(first_arg_call)
111 rest = arg_match.group("restargcalls")
112
113 continue
114 else:
115 invalid_line("Invalid embedded Galaxy markup line [{line}]", line=line)
116
117 # Markdown unrelated to Galaxy object containers.
118 continue
119
120 if expecting_container_close_for:
121 template = "Invalid line %d: %s"
122 msg = template % (last_line_no, "close of block for [{expected_for}] expected".format(expected_for=expecting_container_close_for))
123 raise ValueError(msg)
124
125
126 def _split_markdown_lines(markdown):
127 """Yield lines of a markdown document line-by-line keeping track of fencing.
128
129 'Fenced' lines are code-like block (e.g. between ```) that shouldn't contain
130 Markdown markup.
131 """
132 block_fenced = False
133 indent_fenced = False
134 for line_number, line in enumerate(markdown.splitlines(True)):
135 open_fence_this_iteration = False
136 indent_fenced = line.startswith(" ") or (indent_fenced and WHITE_SPACE_ONLY_PATTERN.match(line))
137 if not block_fenced:
138 if BLOCK_FENCE_START.match(line):
139 open_fence_this_iteration = True
140 block_fenced = True
141 yield (line, block_fenced or indent_fenced, open_fence_this_iteration, line_number)
142 if not open_fence_this_iteration and BLOCK_FENCE_END.match(line):
143 block_fenced = False
144
145
146 __all__ = (
147 'validate_galaxy_markdown',
148 'GALAXY_MARKDOWN_FUNCTION_CALL_LINE',
149 )