Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/schema_salad/schema.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 """Functions to process Schema Salad schemas.""" | |
| 2 from __future__ import absolute_import | |
| 3 | |
| 4 import copy | |
| 5 import hashlib | |
| 6 from typing import ( | |
| 7 IO, | |
| 8 Any, | |
| 9 Dict, | |
| 10 List, | |
| 11 Mapping, | |
| 12 MutableMapping, | |
| 13 MutableSequence, | |
| 14 Optional, | |
| 15 Set, | |
| 16 Tuple, | |
| 17 TypeVar, | |
| 18 Union, | |
| 19 cast, | |
| 20 ) | |
| 21 | |
| 22 from future.utils import raise_from | |
| 23 from pkg_resources import resource_stream | |
| 24 from six import iteritems, string_types | |
| 25 from six.moves import urllib | |
| 26 from typing_extensions import Text # pylint: disable=unused-import | |
| 27 | |
| 28 from ruamel import yaml | |
| 29 from ruamel.yaml.comments import CommentedMap, CommentedSeq | |
| 30 from schema_salad.utils import ( | |
| 31 add_dictlist, | |
| 32 aslist, | |
| 33 convert_to_dict, | |
| 34 flatten, | |
| 35 json_dumps, | |
| 36 ) | |
| 37 | |
| 38 from . import _logger, jsonld_context, ref_resolver, validate | |
| 39 from .exceptions import ( | |
| 40 ClassValidationException, | |
| 41 ValidationException, | |
| 42 SchemaSaladException, | |
| 43 ) | |
| 44 from .avro.schema import Names, SchemaParseException, make_avsc_object | |
| 45 from .ref_resolver import Loader | |
| 46 from .sourceline import SourceLine, add_lc_filename, relname | |
| 47 | |
| 48 SALAD_FILES = ( | |
| 49 "metaschema.yml", | |
| 50 "metaschema_base.yml", | |
| 51 "salad.md", | |
| 52 "field_name.yml", | |
| 53 "import_include.md", | |
| 54 "link_res.yml", | |
| 55 "ident_res.yml", | |
| 56 "vocab_res.yml", | |
| 57 "vocab_res.yml", | |
| 58 "field_name_schema.yml", | |
| 59 "field_name_src.yml", | |
| 60 "field_name_proc.yml", | |
| 61 "ident_res_schema.yml", | |
| 62 "ident_res_src.yml", | |
| 63 "ident_res_proc.yml", | |
| 64 "link_res_schema.yml", | |
| 65 "link_res_src.yml", | |
| 66 "link_res_proc.yml", | |
| 67 "vocab_res_schema.yml", | |
| 68 "vocab_res_src.yml", | |
| 69 "vocab_res_proc.yml", | |
| 70 "map_res.yml", | |
| 71 "map_res_schema.yml", | |
| 72 "map_res_src.yml", | |
| 73 "map_res_proc.yml", | |
| 74 "typedsl_res.yml", | |
| 75 "typedsl_res_schema.yml", | |
| 76 "typedsl_res_src.yml", | |
| 77 "typedsl_res_proc.yml", | |
| 78 "sfdsl_res.yml", | |
| 79 "sfdsl_res_schema.yml", | |
| 80 "sfdsl_res_src.yml", | |
| 81 "sfdsl_res_proc.yml", | |
| 82 ) | |
| 83 | |
| 84 saladp = "https://w3id.org/cwl/salad#" | |
| 85 | |
| 86 | |
| 87 def get_metaschema(): # type: () -> Tuple[Names, List[Dict[Text, Any]], Loader] | |
| 88 """Instantiate the metaschema.""" | |
| 89 loader = ref_resolver.Loader( | |
| 90 { | |
| 91 "Any": saladp + "Any", | |
| 92 "ArraySchema": saladp + "ArraySchema", | |
| 93 "Array_symbol": saladp + "ArraySchema/type/Array_symbol", | |
| 94 "DocType": saladp + "DocType", | |
| 95 "Documentation": saladp + "Documentation", | |
| 96 "Documentation_symbol": saladp + "Documentation/type/Documentation_symbol", | |
| 97 "Documented": saladp + "Documented", | |
| 98 "EnumSchema": saladp + "EnumSchema", | |
| 99 "Enum_symbol": saladp + "EnumSchema/type/Enum_symbol", | |
| 100 "JsonldPredicate": saladp + "JsonldPredicate", | |
| 101 "NamedType": saladp + "NamedType", | |
| 102 "PrimitiveType": saladp + "PrimitiveType", | |
| 103 "RecordField": saladp + "RecordField", | |
| 104 "RecordSchema": saladp + "RecordSchema", | |
| 105 "Record_symbol": saladp + "RecordSchema/type/Record_symbol", | |
| 106 "SaladEnumSchema": saladp + "SaladEnumSchema", | |
| 107 "SaladRecordField": saladp + "SaladRecordField", | |
| 108 "SaladRecordSchema": saladp + "SaladRecordSchema", | |
| 109 "SchemaDefinedType": saladp + "SchemaDefinedType", | |
| 110 "SpecializeDef": saladp + "SpecializeDef", | |
| 111 "_container": saladp + "JsonldPredicate/_container", | |
| 112 "_id": {"@id": saladp + "_id", "@type": "@id", "identity": True}, | |
| 113 "_type": saladp + "JsonldPredicate/_type", | |
| 114 "abstract": saladp + "SaladRecordSchema/abstract", | |
| 115 "array": saladp + "array", | |
| 116 "boolean": "http://www.w3.org/2001/XMLSchema#boolean", | |
| 117 "dct": "http://purl.org/dc/terms/", | |
| 118 "default": {"@id": saladp + "default", "noLinkCheck": True}, | |
| 119 "doc": "rdfs:comment", | |
| 120 "docAfter": {"@id": saladp + "docAfter", "@type": "@id"}, | |
| 121 "docChild": {"@id": saladp + "docChild", "@type": "@id"}, | |
| 122 "docParent": {"@id": saladp + "docParent", "@type": "@id"}, | |
| 123 "documentRoot": saladp + "SchemaDefinedType/documentRoot", | |
| 124 "documentation": saladp + "documentation", | |
| 125 "double": "http://www.w3.org/2001/XMLSchema#double", | |
| 126 "enum": saladp + "enum", | |
| 127 "extends": {"@id": saladp + "extends", "@type": "@id", "refScope": 1}, | |
| 128 "fields": { | |
| 129 "@id": saladp + "fields", | |
| 130 "mapPredicate": "type", | |
| 131 "mapSubject": "name", | |
| 132 }, | |
| 133 "float": "http://www.w3.org/2001/XMLSchema#float", | |
| 134 "identity": saladp + "JsonldPredicate/identity", | |
| 135 "inVocab": saladp + "NamedType/inVocab", | |
| 136 "int": "http://www.w3.org/2001/XMLSchema#int", | |
| 137 "items": {"@id": saladp + "items", "@type": "@vocab", "refScope": 2}, | |
| 138 "jsonldPredicate": "sld:jsonldPredicate", | |
| 139 "long": "http://www.w3.org/2001/XMLSchema#long", | |
| 140 "mapPredicate": saladp + "JsonldPredicate/mapPredicate", | |
| 141 "mapSubject": saladp + "JsonldPredicate/mapSubject", | |
| 142 "name": "@id", | |
| 143 "noLinkCheck": saladp + "JsonldPredicate/noLinkCheck", | |
| 144 "null": saladp + "null", | |
| 145 "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", | |
| 146 "rdfs": "http://www.w3.org/2000/01/rdf-schema#", | |
| 147 "record": saladp + "record", | |
| 148 "refScope": saladp + "JsonldPredicate/refScope", | |
| 149 "sld": saladp, | |
| 150 "specialize": { | |
| 151 "@id": saladp + "specialize", | |
| 152 "mapPredicate": "specializeTo", | |
| 153 "mapSubject": "specializeFrom", | |
| 154 }, | |
| 155 "specializeFrom": { | |
| 156 "@id": saladp + "specializeFrom", | |
| 157 "@type": "@id", | |
| 158 "refScope": 1, | |
| 159 }, | |
| 160 "specializeTo": { | |
| 161 "@id": saladp + "specializeTo", | |
| 162 "@type": "@id", | |
| 163 "refScope": 1, | |
| 164 }, | |
| 165 "string": "http://www.w3.org/2001/XMLSchema#string", | |
| 166 "subscope": saladp + "JsonldPredicate/subscope", | |
| 167 "symbols": {"@id": saladp + "symbols", "@type": "@id", "identity": True}, | |
| 168 "type": { | |
| 169 "@id": saladp + "type", | |
| 170 "@type": "@vocab", | |
| 171 "refScope": 2, | |
| 172 "typeDSL": True, | |
| 173 }, | |
| 174 "typeDSL": saladp + "JsonldPredicate/typeDSL", | |
| 175 "xsd": "http://www.w3.org/2001/XMLSchema#", | |
| 176 } | |
| 177 ) | |
| 178 | |
| 179 for salad in SALAD_FILES: | |
| 180 with resource_stream(__name__, "metaschema/" + salad) as stream: | |
| 181 loader.cache["https://w3id.org/cwl/" + salad] = stream.read() | |
| 182 | |
| 183 with resource_stream(__name__, "metaschema/metaschema.yml") as stream: | |
| 184 loader.cache["https://w3id.org/cwl/salad"] = stream.read() | |
| 185 | |
| 186 j = yaml.round_trip_load(loader.cache["https://w3id.org/cwl/salad"]) | |
| 187 add_lc_filename(j, "metaschema.yml") | |
| 188 j, _ = loader.resolve_all(j, saladp) | |
| 189 | |
| 190 sch_obj = make_avro(j, loader) | |
| 191 try: | |
| 192 sch_names = make_avro_schema_from_avro(sch_obj) | |
| 193 except SchemaParseException: | |
| 194 _logger.error("Metaschema error, avro was:\n%s", json_dumps(sch_obj, indent=4)) | |
| 195 raise | |
| 196 validate_doc(sch_names, j, loader, strict=True) | |
| 197 return (sch_names, j, loader) | |
| 198 | |
| 199 | |
| 200 def add_namespaces(metadata, namespaces): | |
| 201 # type: (Mapping[Text, Any], MutableMapping[Text, Text]) -> None | |
| 202 """Collect the provided namespaces, checking for conflicts.""" | |
| 203 for key, value in metadata.items(): | |
| 204 if key not in namespaces: | |
| 205 namespaces[key] = value | |
| 206 elif namespaces[key] != value: | |
| 207 raise ValidationException( | |
| 208 "Namespace prefix '{}' has conflicting definitions '{}'" | |
| 209 " and '{}'.".format(key, namespaces[key], value) | |
| 210 ) | |
| 211 | |
| 212 | |
| 213 def collect_namespaces(metadata): | |
| 214 # type: (Mapping[Text, Any]) -> Dict[Text, Text] | |
| 215 """Walk through the metadata object, collecting namespace declarations.""" | |
| 216 namespaces = {} # type: Dict[Text, Text] | |
| 217 if "$import_metadata" in metadata: | |
| 218 for value in metadata["$import_metadata"].values(): | |
| 219 add_namespaces(collect_namespaces(value), namespaces) | |
| 220 if "$namespaces" in metadata: | |
| 221 add_namespaces(metadata["$namespaces"], namespaces) | |
| 222 return namespaces | |
| 223 | |
| 224 | |
| 225 schema_type = Tuple[Loader, Union[Names, SchemaParseException], Dict[Text, Any], Loader] | |
| 226 | |
| 227 | |
| 228 def load_schema( | |
| 229 schema_ref, # type: Union[CommentedMap, CommentedSeq, Text] | |
| 230 cache=None, # type: Optional[Dict[Text, Text]] | |
| 231 ): | |
| 232 # type: (...) -> schema_type | |
| 233 """ | |
| 234 Load a schema that can be used to validate documents using load_and_validate. | |
| 235 | |
| 236 return: document_loader, avsc_names, schema_metadata, metaschema_loader | |
| 237 """ | |
| 238 | |
| 239 metaschema_names, _metaschema_doc, metaschema_loader = get_metaschema() | |
| 240 if cache is not None: | |
| 241 metaschema_loader.cache.update(cache) | |
| 242 schema_doc, schema_metadata = metaschema_loader.resolve_ref(schema_ref, "") | |
| 243 | |
| 244 if not isinstance(schema_doc, MutableSequence): | |
| 245 raise ValidationException("Schema reference must resolve to a list.") | |
| 246 | |
| 247 validate_doc(metaschema_names, schema_doc, metaschema_loader, True) | |
| 248 metactx = schema_metadata.get("@context", {}) | |
| 249 metactx.update(collect_namespaces(schema_metadata)) | |
| 250 schema_ctx = jsonld_context.salad_to_jsonld_context(schema_doc, metactx)[0] | |
| 251 | |
| 252 # Create the loader that will be used to load the target document. | |
| 253 document_loader = Loader(schema_ctx, cache=cache) | |
| 254 | |
| 255 # Make the Avro validation that will be used to validate the target | |
| 256 # document | |
| 257 avsc_names = make_avro_schema(schema_doc, document_loader) | |
| 258 | |
| 259 return document_loader, avsc_names, schema_metadata, metaschema_loader | |
| 260 | |
| 261 | |
| 262 def load_and_validate( | |
| 263 document_loader, # type: Loader | |
| 264 avsc_names, # type: Names | |
| 265 document, # type: Union[CommentedMap, Text] | |
| 266 strict, # type: bool | |
| 267 strict_foreign_properties=False, # type: bool | |
| 268 ): | |
| 269 # type: (...) -> Tuple[Any, Dict[Text, Any]] | |
| 270 """Load a document and validate it with the provided schema. | |
| 271 | |
| 272 return data, metadata | |
| 273 """ | |
| 274 try: | |
| 275 if isinstance(document, CommentedMap): | |
| 276 data, metadata = document_loader.resolve_all( | |
| 277 document, | |
| 278 document["id"], | |
| 279 checklinks=True, | |
| 280 strict_foreign_properties=strict_foreign_properties, | |
| 281 ) | |
| 282 else: | |
| 283 data, metadata = document_loader.resolve_ref( | |
| 284 document, | |
| 285 checklinks=True, | |
| 286 strict_foreign_properties=strict_foreign_properties, | |
| 287 ) | |
| 288 | |
| 289 validate_doc( | |
| 290 avsc_names, | |
| 291 data, | |
| 292 document_loader, | |
| 293 strict, | |
| 294 strict_foreign_properties=strict_foreign_properties, | |
| 295 ) | |
| 296 except ValidationException as exc: | |
| 297 raise_from(ValidationException("", None, [exc]), exc) | |
| 298 return data, metadata | |
| 299 | |
| 300 | |
| 301 def validate_doc( | |
| 302 schema_names, # type: Names | |
| 303 doc, # type: Union[Dict[Text, Any], List[Dict[Text, Any]], Text, None] | |
| 304 loader, # type: Loader | |
| 305 strict, # type: bool | |
| 306 strict_foreign_properties=False, # type: bool | |
| 307 ): | |
| 308 # type: (...) -> None | |
| 309 """Validate a document using the provided schema.""" | |
| 310 has_root = False | |
| 311 for root in schema_names.names.values(): | |
| 312 if (hasattr(root, "get_prop") and root.get_prop(u"documentRoot")) or ( | |
| 313 u"documentRoot" in root.props | |
| 314 ): | |
| 315 has_root = True | |
| 316 break | |
| 317 | |
| 318 if not has_root: | |
| 319 raise ValidationException("No document roots defined in the schema") | |
| 320 | |
| 321 if isinstance(doc, MutableSequence): | |
| 322 vdoc = doc | |
| 323 elif isinstance(doc, CommentedMap): | |
| 324 vdoc = CommentedSeq([doc]) | |
| 325 vdoc.lc.add_kv_line_col(0, [doc.lc.line, doc.lc.col]) | |
| 326 vdoc.lc.filename = doc.lc.filename | |
| 327 else: | |
| 328 raise ValidationException("Document must be dict or list") | |
| 329 | |
| 330 roots = [] | |
| 331 for root in schema_names.names.values(): | |
| 332 if (hasattr(root, "get_prop") and root.get_prop(u"documentRoot")) or ( | |
| 333 root.props.get(u"documentRoot") | |
| 334 ): | |
| 335 roots.append(root) | |
| 336 | |
| 337 anyerrors = [] | |
| 338 for pos, item in enumerate(vdoc): | |
| 339 sourceline = SourceLine(vdoc, pos, Text) | |
| 340 success = False | |
| 341 for root in roots: | |
| 342 success = validate.validate_ex( | |
| 343 root, | |
| 344 item, | |
| 345 loader.identifiers, | |
| 346 strict, | |
| 347 foreign_properties=loader.foreign_properties, | |
| 348 raise_ex=False, | |
| 349 skip_foreign_properties=loader.skip_schemas, | |
| 350 strict_foreign_properties=strict_foreign_properties, | |
| 351 ) | |
| 352 if success: | |
| 353 break | |
| 354 | |
| 355 if not success: | |
| 356 errors = [] # type: List[SchemaSaladException] | |
| 357 for root in roots: | |
| 358 if hasattr(root, "get_prop"): | |
| 359 name = root.get_prop(u"name") | |
| 360 elif hasattr(root, "name"): | |
| 361 name = root.name | |
| 362 | |
| 363 try: | |
| 364 validate.validate_ex( | |
| 365 root, | |
| 366 item, | |
| 367 loader.identifiers, | |
| 368 strict, | |
| 369 foreign_properties=loader.foreign_properties, | |
| 370 raise_ex=True, | |
| 371 skip_foreign_properties=loader.skip_schemas, | |
| 372 strict_foreign_properties=strict_foreign_properties, | |
| 373 ) | |
| 374 except ClassValidationException as exc: | |
| 375 errors = [ | |
| 376 ClassValidationException( | |
| 377 "tried `{}` but".format(name), sourceline, [exc] | |
| 378 ) | |
| 379 ] | |
| 380 break | |
| 381 except ValidationException as exc: | |
| 382 errors.append( | |
| 383 ValidationException( | |
| 384 "tried `{}` but".format(name), sourceline, [exc] | |
| 385 ) | |
| 386 ) | |
| 387 | |
| 388 objerr = u"Invalid" | |
| 389 for ident in loader.identifiers: | |
| 390 if ident in item: | |
| 391 objerr = u"Object `{}` is not valid because".format( | |
| 392 relname(item[ident]) | |
| 393 ) | |
| 394 break | |
| 395 anyerrors.append(ValidationException(objerr, sourceline, errors, "-")) | |
| 396 if anyerrors: | |
| 397 raise ValidationException("", None, anyerrors, "*") | |
| 398 | |
| 399 | |
| 400 def get_anon_name(rec): | |
| 401 # type: (MutableMapping[Text, Union[Text, Dict[Text, Text]]]) -> Text | |
| 402 """Calculate a reproducible name for anonymous types.""" | |
| 403 if "name" in rec: | |
| 404 name = rec["name"] | |
| 405 if isinstance(name, Text): | |
| 406 return name | |
| 407 raise ValidationException( | |
| 408 "Expected name field to be a string, was {}".format(name) | |
| 409 ) | |
| 410 anon_name = u"" | |
| 411 if rec["type"] in ("enum", saladp + "enum"): | |
| 412 for sym in rec["symbols"]: | |
| 413 anon_name += sym | |
| 414 return "enum_" + hashlib.sha1(anon_name.encode("UTF-8")).hexdigest() | |
| 415 if rec["type"] in ("record", saladp + "record"): | |
| 416 for field in rec["fields"]: | |
| 417 if isinstance(field, Mapping): | |
| 418 anon_name += field[u"name"] | |
| 419 else: | |
| 420 raise ValidationException( | |
| 421 "Expected entries in 'fields' to also be maps, was {}.".format( | |
| 422 field | |
| 423 ) | |
| 424 ) | |
| 425 return u"record_" + hashlib.sha1(anon_name.encode("UTF-8")).hexdigest() | |
| 426 if rec["type"] in ("array", saladp + "array"): | |
| 427 return u"" | |
| 428 raise ValidationException("Expected enum or record, was {}".format(rec["type"])) | |
| 429 | |
| 430 | |
| 431 def replace_type(items, spec, loader, found, find_embeds=True, deepen=True): | |
| 432 # type: (Any, Dict[Text, Any], Loader, Set[Text], bool, bool) -> Any | |
| 433 """ Go through and replace types in the 'spec' mapping""" | |
| 434 | |
| 435 if isinstance(items, MutableMapping): | |
| 436 # recursively check these fields for types to replace | |
| 437 if items.get("type") in ("record", "enum") and items.get("name"): | |
| 438 if items["name"] in found: | |
| 439 return items["name"] | |
| 440 found.add(items["name"]) | |
| 441 | |
| 442 if not deepen: | |
| 443 return items | |
| 444 | |
| 445 items = copy.copy(items) | |
| 446 if not items.get("name"): | |
| 447 items["name"] = get_anon_name(items) | |
| 448 for name in ("type", "items", "fields"): | |
| 449 if name in items: | |
| 450 items[name] = replace_type( | |
| 451 items[name], | |
| 452 spec, | |
| 453 loader, | |
| 454 found, | |
| 455 find_embeds=find_embeds, | |
| 456 deepen=find_embeds, | |
| 457 ) | |
| 458 if isinstance(items[name], MutableSequence): | |
| 459 items[name] = flatten(items[name]) | |
| 460 | |
| 461 return items | |
| 462 if isinstance(items, MutableSequence): | |
| 463 # recursively transform list | |
| 464 return [ | |
| 465 replace_type(i, spec, loader, found, find_embeds=find_embeds, deepen=deepen) | |
| 466 for i in items | |
| 467 ] | |
| 468 if isinstance(items, string_types): | |
| 469 # found a string which is a symbol corresponding to a type. | |
| 470 replace_with = None | |
| 471 if items in loader.vocab: | |
| 472 # If it's a vocabulary term, first expand it to its fully qualified | |
| 473 # URI | |
| 474 items = loader.vocab[items] | |
| 475 | |
| 476 if items in spec: | |
| 477 # Look up in specialization map | |
| 478 replace_with = spec[items] | |
| 479 | |
| 480 if replace_with: | |
| 481 return replace_type( | |
| 482 replace_with, spec, loader, found, find_embeds=find_embeds | |
| 483 ) | |
| 484 found.add(items) | |
| 485 return items | |
| 486 | |
| 487 | |
| 488 def avro_name(url): # type: (Text) -> Text | |
| 489 """ | |
| 490 Turn a URL into an Avro-safe name. | |
| 491 | |
| 492 If the URL has no fragment, return this plain URL. | |
| 493 | |
| 494 Extract either the last part of the URL fragment past the slash, otherwise | |
| 495 the whole fragment. | |
| 496 """ | |
| 497 frg = urllib.parse.urldefrag(url)[1] | |
| 498 if frg != "": | |
| 499 if "/" in frg: | |
| 500 return frg[frg.rindex("/") + 1 :] | |
| 501 return frg | |
| 502 return url | |
| 503 | |
| 504 | |
| 505 Avro = TypeVar("Avro", Dict[Text, Any], List[Any], Text) | |
| 506 | |
| 507 | |
| 508 def make_valid_avro( | |
| 509 items, # type: Avro | |
| 510 alltypes, # type: Dict[Text, Dict[Text, Any]] | |
| 511 found, # type: Set[Text] | |
| 512 union=False, # type: bool | |
| 513 ): # type: (...) -> Union[Avro, Dict[Text, Text], Text] | |
| 514 """Convert our schema to be more avro like.""" | |
| 515 # Possibly could be integrated into our fork of avro/schema.py? | |
| 516 if isinstance(items, MutableMapping): | |
| 517 items = copy.copy(items) | |
| 518 if items.get("name") and items.get("inVocab", True): | |
| 519 items["name"] = avro_name(items["name"]) | |
| 520 | |
| 521 if "type" in items and items["type"] in ( | |
| 522 saladp + "record", | |
| 523 saladp + "enum", | |
| 524 "record", | |
| 525 "enum", | |
| 526 ): | |
| 527 if (hasattr(items, "get") and items.get("abstract")) or ( | |
| 528 "abstract" in items | |
| 529 ): | |
| 530 return items | |
| 531 if items["name"] in found: | |
| 532 return cast(Text, items["name"]) | |
| 533 found.add(items["name"]) | |
| 534 for field in ("type", "items", "values", "fields"): | |
| 535 if field in items: | |
| 536 items[field] = make_valid_avro( | |
| 537 items[field], alltypes, found, union=True | |
| 538 ) | |
| 539 if "symbols" in items: | |
| 540 items["symbols"] = [avro_name(sym) for sym in items["symbols"]] | |
| 541 return items | |
| 542 if isinstance(items, MutableSequence): | |
| 543 ret = [] | |
| 544 for i in items: | |
| 545 ret.append(make_valid_avro(i, alltypes, found, union=union)) | |
| 546 return ret | |
| 547 if union and isinstance(items, string_types): | |
| 548 if items in alltypes and avro_name(items) not in found: | |
| 549 return cast( | |
| 550 Dict[Text, Text], | |
| 551 make_valid_avro(alltypes[items], alltypes, found, union=union), | |
| 552 ) | |
| 553 items = avro_name(items) | |
| 554 return items | |
| 555 | |
| 556 | |
| 557 def deepcopy_strip(item): # type: (Any) -> Any | |
| 558 """ | |
| 559 Make a deep copy of list and dict objects. | |
| 560 | |
| 561 Intentionally do not copy attributes. This is to discard CommentedMap and | |
| 562 CommentedSeq metadata which is very expensive with regular copy.deepcopy. | |
| 563 """ | |
| 564 | |
| 565 if isinstance(item, MutableMapping): | |
| 566 return {k: deepcopy_strip(v) for k, v in iteritems(item)} | |
| 567 if isinstance(item, MutableSequence): | |
| 568 return [deepcopy_strip(k) for k in item] | |
| 569 return item | |
| 570 | |
| 571 | |
| 572 def extend_and_specialize(items, loader): | |
| 573 # type: (List[Dict[Text, Any]], Loader) -> List[Dict[Text, Any]] | |
| 574 """ | |
| 575 Apply 'extend' and 'specialize' to fully materialize derived record types. | |
| 576 """ | |
| 577 | |
| 578 items = deepcopy_strip(items) | |
| 579 types = {i["name"]: i for i in items} # type: Dict[Text, Any] | |
| 580 results = [] | |
| 581 | |
| 582 for stype in items: | |
| 583 if "extends" in stype: | |
| 584 specs = {} # type: Dict[Text, Text] | |
| 585 if "specialize" in stype: | |
| 586 for spec in aslist(stype["specialize"]): | |
| 587 specs[spec["specializeFrom"]] = spec["specializeTo"] | |
| 588 | |
| 589 exfields = [] # type: List[Text] | |
| 590 exsym = [] # type: List[Text] | |
| 591 for ex in aslist(stype["extends"]): | |
| 592 if ex not in types: | |
| 593 raise ValidationException( | |
| 594 "Extends {} in {} refers to invalid base type.".format( | |
| 595 stype["extends"], stype["name"] | |
| 596 ) | |
| 597 ) | |
| 598 | |
| 599 basetype = copy.copy(types[ex]) | |
| 600 | |
| 601 if stype["type"] == "record": | |
| 602 if specs: | |
| 603 basetype["fields"] = replace_type( | |
| 604 basetype.get("fields", []), specs, loader, set() | |
| 605 ) | |
| 606 | |
| 607 for field in basetype.get("fields", []): | |
| 608 if "inherited_from" not in field: | |
| 609 field["inherited_from"] = ex | |
| 610 | |
| 611 exfields.extend(basetype.get("fields", [])) | |
| 612 elif stype["type"] == "enum": | |
| 613 exsym.extend(basetype.get("symbols", [])) | |
| 614 | |
| 615 if stype["type"] == "record": | |
| 616 stype = copy.copy(stype) | |
| 617 exfields.extend(stype.get("fields", [])) | |
| 618 stype["fields"] = exfields | |
| 619 | |
| 620 fieldnames = set() # type: Set[Text] | |
| 621 for field in stype["fields"]: | |
| 622 if field["name"] in fieldnames: | |
| 623 raise ValidationException( | |
| 624 "Field name {} appears twice in {}".format( | |
| 625 field["name"], stype["name"] | |
| 626 ) | |
| 627 ) | |
| 628 else: | |
| 629 fieldnames.add(field["name"]) | |
| 630 elif stype["type"] == "enum": | |
| 631 stype = copy.copy(stype) | |
| 632 exsym.extend(stype.get("symbols", [])) | |
| 633 stype["symbol"] = exsym | |
| 634 | |
| 635 types[stype["name"]] = stype | |
| 636 | |
| 637 results.append(stype) | |
| 638 | |
| 639 ex_types = {} | |
| 640 for result in results: | |
| 641 ex_types[result["name"]] = result | |
| 642 | |
| 643 extended_by = {} # type: Dict[Text, Text] | |
| 644 for result in results: | |
| 645 if "extends" in result: | |
| 646 for ex in aslist(result["extends"]): | |
| 647 if ex_types[ex].get("abstract"): | |
| 648 add_dictlist(extended_by, ex, ex_types[result["name"]]) | |
| 649 add_dictlist(extended_by, avro_name(ex), ex_types[ex]) | |
| 650 | |
| 651 for result in results: | |
| 652 if result.get("abstract") and result["name"] not in extended_by: | |
| 653 raise ValidationException( | |
| 654 "{} is abstract but missing a concrete subtype".format(result["name"]) | |
| 655 ) | |
| 656 | |
| 657 for result in results: | |
| 658 if "fields" in result: | |
| 659 result["fields"] = replace_type( | |
| 660 result["fields"], extended_by, loader, set() | |
| 661 ) | |
| 662 | |
| 663 return results | |
| 664 | |
| 665 | |
| 666 def make_avro( | |
| 667 i, # type: List[Dict[Text, Any]] | |
| 668 loader, # type: Loader | |
| 669 ): # type: (...) -> List[Any] | |
| 670 | |
| 671 j = extend_and_specialize(i, loader) | |
| 672 | |
| 673 name_dict = {} # type: Dict[Text, Dict[Text, Any]] | |
| 674 for entry in j: | |
| 675 name_dict[entry["name"]] = entry | |
| 676 avro = make_valid_avro(j, name_dict, set()) | |
| 677 | |
| 678 return [ | |
| 679 t | |
| 680 for t in avro | |
| 681 if isinstance(t, MutableMapping) | |
| 682 and not t.get("abstract") | |
| 683 and t.get("type") != "documentation" | |
| 684 ] | |
| 685 | |
| 686 | |
| 687 def make_avro_schema( | |
| 688 i, # type: List[Any] | |
| 689 loader, # type: Loader | |
| 690 ): # type: (...) -> Names | |
| 691 """ | |
| 692 All in one convenience function. | |
| 693 | |
| 694 Call make_avro() and make_avro_schema_from_avro() separately if you need | |
| 695 the intermediate result for diagnostic output. | |
| 696 """ | |
| 697 names = Names() | |
| 698 avro = make_avro(i, loader) | |
| 699 make_avsc_object(convert_to_dict(avro), names) | |
| 700 return names | |
| 701 | |
| 702 | |
| 703 def make_avro_schema_from_avro(avro): | |
| 704 # type: (List[Union[Avro, Dict[Text, Text], Text]]) -> Names | |
| 705 names = Names() | |
| 706 make_avsc_object(convert_to_dict(avro), names) | |
| 707 return names | |
| 708 | |
| 709 | |
| 710 def shortname(inputid): # type: (Text) -> Text | |
| 711 """Returns the last segment of the provided fragment or path.""" | |
| 712 parsed_id = urllib.parse.urlparse(inputid) | |
| 713 if parsed_id.fragment: | |
| 714 return parsed_id.fragment.split(u"/")[-1] | |
| 715 return parsed_id.path.split(u"/")[-1] | |
| 716 | |
| 717 | |
| 718 def print_inheritance(doc, stream): | |
| 719 # type: (List[Dict[Text, Any]], IO[Any]) -> None | |
| 720 """Write a Grapviz inheritance graph for the supplied document.""" | |
| 721 stream.write("digraph {\n") | |
| 722 for entry in doc: | |
| 723 if entry["type"] == "record": | |
| 724 label = name = shortname(entry["name"]) | |
| 725 fields = entry.get("fields", []) | |
| 726 if fields: | |
| 727 label += "\\n* {}\\l".format( | |
| 728 "\\l* ".join(shortname(field["name"]) for field in fields) | |
| 729 ) | |
| 730 shape = "ellipse" if entry.get("abstract") else "box" | |
| 731 stream.write('"{}" [shape={} label="{}"];\n'.format(name, shape, label)) | |
| 732 if "extends" in entry: | |
| 733 for target in aslist(entry["extends"]): | |
| 734 stream.write('"{}" -> "{}";\n'.format(shortname(target), name)) | |
| 735 stream.write("}\n") | |
| 736 | |
| 737 | |
| 738 def print_fieldrefs(doc, loader, stream): | |
| 739 # type: (List[Dict[Text, Any]], Loader, IO[Any]) -> None | |
| 740 """Write a GraphViz graph of the relationships between the fields.""" | |
| 741 obj = extend_and_specialize(doc, loader) | |
| 742 | |
| 743 primitives = set( | |
| 744 ( | |
| 745 "http://www.w3.org/2001/XMLSchema#string", | |
| 746 "http://www.w3.org/2001/XMLSchema#boolean", | |
| 747 "http://www.w3.org/2001/XMLSchema#int", | |
| 748 "http://www.w3.org/2001/XMLSchema#long", | |
| 749 saladp + "null", | |
| 750 saladp + "enum", | |
| 751 saladp + "array", | |
| 752 saladp + "record", | |
| 753 saladp + "Any", | |
| 754 ) | |
| 755 ) | |
| 756 | |
| 757 stream.write("digraph {\n") | |
| 758 for entry in obj: | |
| 759 if entry.get("abstract"): | |
| 760 continue | |
| 761 if entry["type"] == "record": | |
| 762 label = shortname(entry["name"]) | |
| 763 for field in entry.get("fields", []): | |
| 764 found = set() # type: Set[Text] | |
| 765 field_name = shortname(field["name"]) | |
| 766 replace_type(field["type"], {}, loader, found, find_embeds=False) | |
| 767 for each_type in found: | |
| 768 if each_type not in primitives: | |
| 769 stream.write( | |
| 770 '"{}" -> "{}" [label="{}"];\n'.format( | |
| 771 label, shortname(each_type), field_name | |
| 772 ) | |
| 773 ) | |
| 774 stream.write("}\n") |
