comparison env/lib/python3.9/site-packages/rdflib/term.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """
2 This module defines the different types of terms. Terms are the kinds of
3 objects that can appear in a quoted/asserted triple. This includes those
4 that are core to RDF:
5
6 * :class:`Blank Nodes <rdflib.term.BNode>`
7 * :class:`URI References <rdflib.term.URIRef>`
8 * :class:`Literals <rdflib.term.Literal>` (which consist of a literal value,datatype and language tag)
9
10 Those that extend the RDF model into N3:
11
12 * :class:`Formulae <rdflib.graph.QuotedGraph>`
13 * :class:`Universal Quantifications (Variables) <rdflib.term.Variable>`
14
15 And those that are primarily for matching against 'Nodes' in the
16 underlying Graph:
17
18 * REGEX Expressions
19 * Date Ranges
20 * Numerical Ranges
21
22 """
23 from __future__ import absolute_import
24 from __future__ import division
25 from __future__ import print_function
26 # from __future__ import unicode_literals
27 from fractions import Fraction
28
29 __all__ = [
30 'bind',
31
32 'Node',
33 'Identifier',
34
35 'URIRef',
36 'BNode',
37 'Literal',
38
39 'Variable',
40 'Statement',
41 ]
42
43 import logging
44 logger = logging.getLogger(__name__)
45 import warnings
46 import math
47
48 import base64
49 import xml.dom.minidom
50
51 from datetime import date, time, datetime, timedelta
52 from re import sub, compile
53 from collections import defaultdict
54 from unicodedata import category
55
56 from isodate import parse_time, parse_date, parse_datetime, Duration, parse_duration, duration_isoformat
57 from binascii import hexlify, unhexlify
58
59 import rdflib
60 from six import PY2
61 from six import PY3
62 from six import b
63 from rdflib.compat import long_type
64 from six import string_types
65 from six import text_type
66 from six.moves.urllib.parse import urldefrag
67 from six.moves.urllib.parse import urljoin
68 from six.moves.urllib.parse import urlparse
69
70 skolem_genid = "/.well-known/genid/"
71 rdflib_skolem_genid = "/.well-known/genid/rdflib/"
72 skolems = {}
73
74
75 _invalid_uri_chars = '<>" {}|\\^`'
76
77
78 def _is_valid_uri(uri):
79 return all(map(lambda c: ord(c) > 256 or not c in _invalid_uri_chars, uri))
80
81
82 _lang_tag_regex = compile('^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$')
83
84
85 def _is_valid_langtag(tag):
86 return bool(_lang_tag_regex.match(tag))
87
88
89 def _is_valid_unicode(value):
90 """
91 Verify that the provided value can be converted into a Python
92 unicode object.
93 """
94 if isinstance(value, bytes):
95 coding_func, param = getattr(value, 'decode'), 'utf-8'
96 elif PY3:
97 coding_func, param = str, value
98 else:
99 coding_func, param = unicode, value
100
101 # try to convert value into unicode
102 try:
103 coding_func(param)
104 except UnicodeError:
105 return False
106 return True
107
108
109 class Node(object):
110 """
111 A Node in the Graph.
112 """
113
114 __slots__ = ()
115
116
117 class Identifier(Node, text_type): # allow Identifiers to be Nodes in the Graph
118 """
119 See http://www.w3.org/2002/07/rdf-identifer-terminology/
120 regarding choice of terminology.
121 """
122
123 __slots__ = ()
124
125 def __new__(cls, value):
126 return text_type.__new__(cls, value)
127
128 def eq(self, other):
129 """A "semantic"/interpreted equality function,
130 by default, same as __eq__"""
131 return self.__eq__(other)
132
133 def neq(self, other):
134 """A "semantic"/interpreted not equal function,
135 by default, same as __ne__"""
136 return self.__ne__(other)
137
138 def __ne__(self, other):
139 return not self.__eq__(other)
140
141 def __eq__(self, other):
142 """
143 Equality for Nodes.
144
145 >>> BNode("foo")==None
146 False
147 >>> BNode("foo")==URIRef("foo")
148 False
149 >>> URIRef("foo")==BNode("foo")
150 False
151 >>> BNode("foo")!=URIRef("foo")
152 True
153 >>> URIRef("foo")!=BNode("foo")
154 True
155 >>> Variable('a')!=URIRef('a')
156 True
157 >>> Variable('a')!=Variable('a')
158 False
159 """
160
161 if type(self) == type(other):
162 return text_type(self) == text_type(other)
163 else:
164 return False
165
166 def __gt__(self, other):
167 """
168 This implements ordering for Nodes,
169
170 This tries to implement this:
171 http://www.w3.org/TR/sparql11-query/#modOrderBy
172
173 Variables are not included in the SPARQL list, but
174 they are greater than BNodes and smaller than everything else
175
176 """
177 if other is None:
178 return True # everything bigger than None
179 elif type(self) == type(other):
180 return text_type(self) > text_type(other)
181 elif isinstance(other, Node):
182 return _ORDERING[type(self)] > _ORDERING[type(other)]
183
184 return NotImplemented
185
186 def __lt__(self, other):
187 if other is None:
188 return False # Nothing is less than None
189 elif type(self) == type(other):
190 return text_type(self) < text_type(other)
191 elif isinstance(other, Node):
192 return _ORDERING[type(self)] < _ORDERING[type(other)]
193
194 return NotImplemented
195
196 def __le__(self, other):
197 r = self.__lt__(other)
198 if r:
199 return True
200 return self == other
201
202 def __ge__(self, other):
203 r = self.__gt__(other)
204 if r:
205 return True
206 return self == other
207
208 # use parent's hash for efficiency reasons
209 # clashes of 'foo', URIRef('foo') and Literal('foo') are typically so rare
210 # that they don't justify additional overhead. Notice that even in case of
211 # clash __eq__ is still the fallback and very quick in those cases.
212 __hash__ = text_type.__hash__
213
214
215 class URIRef(Identifier):
216 """
217 RDF URI Reference: http://www.w3.org/TR/rdf-concepts/#section-Graph-URIref
218 """
219
220 __slots__ = ()
221
222 def __new__(cls, value, base=None):
223 if base is not None:
224 ends_in_hash = value.endswith("#")
225 value = urljoin(base, value, allow_fragments=1)
226 if ends_in_hash:
227 if not value.endswith("#"):
228 value += "#"
229
230 if not _is_valid_uri(value):
231 logger.warning('%s does not look like a valid URI, trying to serialize this will break.'%value)
232
233
234 try:
235 rt = text_type.__new__(cls, value)
236 except UnicodeDecodeError:
237 rt = text_type.__new__(cls, value, 'utf-8')
238 return rt
239
240 def toPython(self):
241 return text_type(self)
242
243 def n3(self, namespace_manager=None):
244 """
245 This will do a limited check for valid URIs,
246 essentially just making sure that the string includes no illegal
247 characters (``<, >, ", {, }, |, \\, `, ^``)
248
249 :param namespace_manager: if not None, will be used to make up
250 a prefixed name
251 """
252
253 if not _is_valid_uri(self):
254 raise Exception('"%s" does not look like a valid URI, I cannot serialize this as N3/Turtle. Perhaps you wanted to urlencode it?'%self)
255
256 if namespace_manager:
257 return namespace_manager.normalizeUri(self)
258 else:
259 return "<%s>" % self
260
261 def defrag(self):
262 if "#" in self:
263 url, frag = urldefrag(self)
264 return URIRef(url)
265 else:
266 return self
267
268 def __reduce__(self):
269 return (URIRef, (text_type(self),))
270
271 def __getnewargs__(self):
272 return (text_type(self), )
273
274 if PY2:
275 def __str__(self):
276 return self.encode()
277
278 def __repr__(self):
279 if self.__class__ is URIRef:
280 clsName = "rdflib.term.URIRef"
281 else:
282 clsName = self.__class__.__name__
283
284 return """%s(%s)""" % (clsName, super(URIRef, self).__repr__())
285
286 def __add__(self, other):
287 return self.__class__(text_type(self) + other)
288
289 def __radd__(self, other):
290 return self.__class__(other + text_type(self))
291
292 def __mod__(self, other):
293 return self.__class__(text_type(self) % other)
294
295 def de_skolemize(self):
296 """ Create a Blank Node from a skolem URI, in accordance
297 with http://www.w3.org/TR/rdf11-concepts/#section-skolemization.
298 This function accepts only rdflib type skolemization, to provide
299 a round-tripping within the system.
300
301 .. versionadded:: 4.0
302 """
303 if isinstance(self, RDFLibGenid):
304 parsed_uri = urlparse("%s" % self)
305 return BNode(
306 value=parsed_uri.path[len(rdflib_skolem_genid):])
307 elif isinstance(self, Genid):
308 bnode_id = "%s" % self
309 if bnode_id in skolems:
310 return skolems[bnode_id]
311 else:
312 retval = BNode()
313 skolems[bnode_id] = retval
314 return retval
315 else:
316 raise Exception("<%s> is not a skolem URI" % self)
317
318
319 class Genid(URIRef):
320 __slots__ = ()
321
322 @staticmethod
323 def _is_external_skolem(uri):
324 if not isinstance(uri, string_types):
325 uri = str(uri)
326 parsed_uri = urlparse(uri)
327 gen_id = parsed_uri.path.rfind(skolem_genid)
328 if gen_id != 0:
329 return False
330 return True
331
332
333 class RDFLibGenid(Genid):
334 __slots__ = ()
335
336 @staticmethod
337 def _is_rdflib_skolem(uri):
338 if not isinstance(uri, string_types):
339 uri = str(uri)
340 parsed_uri = urlparse(uri)
341 if parsed_uri.params != "" \
342 or parsed_uri.query != "" \
343 or parsed_uri.fragment != "":
344 return False
345 gen_id = parsed_uri.path.rfind(rdflib_skolem_genid)
346 if gen_id != 0:
347 return False
348 return True
349
350
351 def _unique_id():
352 # Used to read: """Create a (hopefully) unique prefix"""
353 # now retained merely to leave interal API unchanged.
354 # From BNode.__new__() below ...
355 #
356 # acceptable bnode value range for RDF/XML needs to be
357 # something that can be serialzed as a nodeID for N3
358 #
359 # BNode identifiers must be valid NCNames" _:[A-Za-z][A-Za-z0-9]*
360 # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID
361 return "N" # ensure that id starts with a letter
362
363
364 def _serial_number_generator():
365 """
366 Generates UUID4-based but ncname-compliant identifiers.
367 """
368 from uuid import uuid4
369
370 def _generator():
371 return uuid4().hex
372
373 return _generator
374
375
376 class BNode(Identifier):
377 """
378 Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes
379
380 """
381 __slots__ = ()
382
383 def __new__(cls, value=None,
384 _sn_gen=_serial_number_generator(), _prefix=_unique_id()):
385 """
386 # only store implementations should pass in a value
387 """
388 if value is None:
389 # so that BNode values do not collide with ones created with
390 # a different instance of this module at some other time.
391 node_id = _sn_gen()
392 value = "%s%s" % (_prefix, node_id)
393 else:
394 # TODO: check that value falls within acceptable bnode value range
395 # for RDF/XML needs to be something that can be serialzed
396 # as a nodeID for N3 ?? Unless we require these
397 # constraints be enforced elsewhere?
398 pass # assert is_ncname(text_type(value)), "BNode identifiers
399 # must be valid NCNames" _:[A-Za-z][A-Za-z0-9]*
400 # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID
401 return Identifier.__new__(cls, value)
402
403 def toPython(self):
404 return text_type(self)
405
406 def n3(self, namespace_manager=None):
407 return "_:%s" % self
408
409 def __getnewargs__(self):
410 return (text_type(self), )
411
412 def __reduce__(self):
413 return (BNode, (text_type(self),))
414
415 if PY2:
416 def __str__(self):
417 return self.encode()
418
419 def __repr__(self):
420 if self.__class__ is BNode:
421 clsName = "rdflib.term.BNode"
422 else:
423 clsName = self.__class__.__name__
424 return """%s('%s')""" % (clsName, str(self))
425
426 def skolemize(self, authority=None, basepath=None):
427 """ Create a URIRef "skolem" representation of the BNode, in accordance
428 with http://www.w3.org/TR/rdf11-concepts/#section-skolemization
429
430 .. versionadded:: 4.0
431 """
432 if authority is None:
433 authority = "http://rdlib.net/"
434 if basepath is None:
435 basepath = rdflib_skolem_genid
436 skolem = "%s%s" % (basepath, text_type(self))
437 return URIRef(urljoin(authority, skolem))
438
439
440 class Literal(Identifier):
441 __doc__ = """
442 RDF Literal: http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal
443
444 The lexical value of the literal is the unicode object
445 The interpreted, datatyped value is available from .value
446
447 Language tags must be valid according to :rfc:5646
448
449 For valid XSD datatypes, the lexical form is optionally normalized
450 at construction time. Default behaviour is set by rdflib.NORMALIZE_LITERALS
451 and can be overridden by the normalize parameter to __new__
452
453 Equality and hashing of Literals are done based on the lexical form, i.e.:
454
455 >>> from rdflib.namespace import XSD
456
457 >>> Literal('01')!=Literal('1') # clear - strings differ
458 True
459
460 but with data-type they get normalized:
461
462 >>> Literal('01', datatype=XSD.integer)!=Literal('1', datatype=XSD.integer)
463 False
464
465 unless disabled:
466
467 >>> Literal('01', datatype=XSD.integer, normalize=False)!=Literal('1', datatype=XSD.integer)
468 True
469
470
471 Value based comparison is possible:
472
473 >>> Literal('01', datatype=XSD.integer).eq(Literal('1', datatype=XSD.float))
474 True
475
476 The eq method also provides limited support for basic python types:
477
478 >>> Literal(1).eq(1) # fine - int compatible with xsd:integer
479 True
480 >>> Literal('a').eq('b') # fine - str compatible with plain-lit
481 False
482 >>> Literal('a', datatype=XSD.string).eq('a') # fine - str compatible with xsd:string
483 True
484 >>> Literal('a').eq(1) # not fine, int incompatible with plain-lit
485 NotImplemented
486
487 Greater-than/less-than ordering comparisons are also done in value
488 space, when compatible datatypes are used. Incompatible datatypes
489 are ordered by DT, or by lang-tag. For other nodes the ordering
490 is None < BNode < URIRef < Literal
491
492 Any comparison with non-rdflib Node are "NotImplemented"
493 In PY2.X some stable order will be made up by python
494
495 In PY3 this is an error.
496
497 >>> from rdflib import Literal, XSD
498 >>> lit2006 = Literal('2006-01-01',datatype=XSD.date)
499 >>> lit2006.toPython()
500 datetime.date(2006, 1, 1)
501 >>> lit2006 < Literal('2007-01-01',datatype=XSD.date)
502 True
503 >>> Literal(datetime.utcnow()).datatype
504 rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#dateTime')
505 >>> Literal(1) > Literal(2) # by value
506 False
507 >>> Literal(1) > Literal(2.0) # by value
508 False
509 >>> Literal('1') > Literal(1) # by DT
510 True
511 >>> Literal('1') < Literal('1') # by lexical form
512 False
513 >>> Literal('a', lang='en') > Literal('a', lang='fr') # by lang-tag
514 False
515 >>> Literal(1) > URIRef('foo') # by node-type
516 True
517
518 The > < operators will eat this NotImplemented and either make up
519 an ordering (py2.x) or throw a TypeError (py3k):
520
521 >>> Literal(1).__gt__(2.0)
522 NotImplemented
523
524
525 """
526
527 if not PY3:
528 __slots__ = ("language", "datatype", "value", "_language",
529 "_datatype", "_value")
530 else:
531 __slots__ = ("_language", "_datatype", "_value")
532
533 def __new__(cls, lexical_or_value, lang=None, datatype=None, normalize=None):
534
535 if lang == '':
536 lang = None # no empty lang-tags in RDF
537
538 normalize = normalize if normalize is not None else rdflib.NORMALIZE_LITERALS
539
540 if lang is not None and datatype is not None:
541 raise TypeError(
542 "A Literal can only have one of lang or datatype, "
543 "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal")
544
545 if lang and not _is_valid_langtag(lang):
546 raise Exception("'%s' is not a valid language tag!" % lang)
547
548 if datatype:
549 datatype = URIRef(datatype)
550
551 value = None
552 if isinstance(lexical_or_value, Literal):
553 # create from another Literal instance
554
555 lang = lang or lexical_or_value.language
556 if datatype:
557 # override datatype
558 value = _castLexicalToPython(lexical_or_value, datatype)
559 else:
560 datatype = lexical_or_value.datatype
561 value = lexical_or_value.value
562
563 elif isinstance(lexical_or_value, string_types) or (PY3 and isinstance(lexical_or_value, bytes)):
564 # passed a string
565 # try parsing lexical form of datatyped literal
566 value = _castLexicalToPython(lexical_or_value, datatype)
567
568 if value is not None and normalize:
569 _value, _datatype = _castPythonToLiteral(value, datatype)
570 if _value is not None and _is_valid_unicode(_value):
571 lexical_or_value = _value
572
573 else:
574 # passed some python object
575 value = lexical_or_value
576 _value, _datatype = _castPythonToLiteral(lexical_or_value, datatype)
577
578 datatype = datatype or _datatype
579 if _value is not None:
580 lexical_or_value = _value
581 if datatype:
582 lang = None
583
584 if PY3 and isinstance(lexical_or_value, bytes):
585 lexical_or_value = lexical_or_value.decode('utf-8')
586
587 try:
588 inst = text_type.__new__(cls, lexical_or_value)
589 except UnicodeDecodeError:
590 inst = text_type.__new__(cls, lexical_or_value, 'utf-8')
591
592 inst._language = lang
593 inst._datatype = datatype
594 inst._value = value
595 return inst
596
597 def normalize(self):
598 """
599 Returns a new literal with a normalised lexical representation
600 of this literal
601 >>> from rdflib import XSD
602 >>> Literal("01", datatype=XSD.integer, normalize=False).normalize()
603 rdflib.term.Literal(u'1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))
604
605 Illegal lexical forms for the datatype given are simply passed on
606 >>> Literal("a", datatype=XSD.integer, normalize=False)
607 rdflib.term.Literal(u'a', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))
608
609 """
610
611 if self.value is not None:
612 return Literal(self.value, datatype=self.datatype, lang=self.language)
613 else:
614 return self
615
616 @property
617 def value(self):
618 return self._value
619
620 @property
621 def language(self):
622 return self._language
623
624 @property
625 def datatype(self):
626 return self._datatype
627
628 def __reduce__(self):
629 return (Literal, (text_type(self), self.language, self.datatype),)
630
631 def __getstate__(self):
632 return (None, dict(language=self.language, datatype=self.datatype))
633
634 def __setstate__(self, arg):
635 _, d = arg
636 self._language = d["language"]
637 self._datatype = d["datatype"]
638
639 def __add__(self, val):
640 """
641 >>> Literal(1) + 1
642 rdflib.term.Literal(u'2', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))
643 >>> Literal("1") + "1"
644 rdflib.term.Literal(u'11')
645 """
646
647 # if no val is supplied, return this Literal
648 if val is None:
649 return self
650
651 # convert the val to a Literal, if it isn't already one
652 if not isinstance(val, Literal):
653 val = Literal(val)
654
655 # if the datatypes are the same, just add the Python values and convert back
656 if self.datatype == val.datatype:
657 return Literal(self.toPython() + val.toPython(), self.language, datatype=self.datatype)
658 # if the datatypes are not the same but are both numeric, add the Python values and strip off decimal junk
659 # (i.e. tiny numbers (more than 17 decimal places) and trailing zeros) and return as a decimal
660 elif (
661 self.datatype in _NUMERIC_LITERAL_TYPES
662 and
663 val.datatype in _NUMERIC_LITERAL_TYPES
664 ):
665 return Literal(
666 Decimal(
667 ('%f' % round(Decimal(self.toPython()) + Decimal(val.toPython()), 15)).rstrip('0').rstrip('.')
668 ),
669 datatype=_XSD_DECIMAL
670 )
671 # in all other cases, perform string concatenation
672 else:
673 try:
674 s = text_type.__add__(self, val)
675 except TypeError:
676 s = str(self.value) + str(val)
677
678 # if the original datatype is string-like, use that
679 if self.datatype in _STRING_LITERAL_TYPES:
680 new_datatype = self.datatype
681 # if not, use string
682 else:
683 new_datatype = _XSD_STRING
684
685 return Literal(s, self.language, datatype=new_datatype)
686
687 def __bool__(self):
688 """
689 Is the Literal "True"
690 This is used for if statements, bool(literal), etc.
691 """
692 if self.value is not None:
693 return bool(self.value)
694 return len(self) != 0
695
696 if PY2:
697 __nonzero__ = __bool__
698
699 def __neg__(self):
700 """
701 >>> (- Literal(1))
702 rdflib.term.Literal(u'-1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))
703 >>> (- Literal(10.5))
704 rdflib.term.Literal(u'-10.5', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#double'))
705 >>> from rdflib.namespace import XSD
706 >>> (- Literal("1", datatype=XSD.integer))
707 rdflib.term.Literal(u'-1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))
708
709 >>> (- Literal("1"))
710 Traceback (most recent call last):
711 File "<stdin>", line 1, in <module>
712 TypeError: Not a number; rdflib.term.Literal(u'1')
713 >>>
714 """
715
716 if isinstance(self.value, (int, long_type, float)):
717 return Literal(self.value.__neg__())
718 else:
719 raise TypeError("Not a number; %s" % repr(self))
720
721 def __pos__(self):
722 """
723 >>> (+ Literal(1))
724 rdflib.term.Literal(u'1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))
725 >>> (+ Literal(-1))
726 rdflib.term.Literal(u'-1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))
727 >>> from rdflib.namespace import XSD
728 >>> (+ Literal("-1", datatype=XSD.integer))
729 rdflib.term.Literal(u'-1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))
730
731 >>> (+ Literal("1"))
732 Traceback (most recent call last):
733 File "<stdin>", line 1, in <module>
734 TypeError: Not a number; rdflib.term.Literal(u'1')
735 """
736 if isinstance(self.value, (int, long_type, float)):
737 return Literal(self.value.__pos__())
738 else:
739 raise TypeError("Not a number; %s" % repr(self))
740
741 def __abs__(self):
742 """
743 >>> abs(Literal(-1))
744 rdflib.term.Literal(u'1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))
745
746 >>> from rdflib.namespace import XSD
747 >>> abs( Literal("-1", datatype=XSD.integer))
748 rdflib.term.Literal(u'1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))
749
750 >>> abs(Literal("1"))
751 Traceback (most recent call last):
752 File "<stdin>", line 1, in <module>
753 TypeError: Not a number; rdflib.term.Literal(u'1')
754 """
755 if isinstance(self.value, (int, long_type, float)):
756 return Literal(self.value.__abs__())
757 else:
758 raise TypeError("Not a number; %s" % repr(self))
759
760 def __invert__(self):
761 """
762 >>> ~(Literal(-1))
763 rdflib.term.Literal(u'0', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))
764
765 >>> from rdflib.namespace import XSD
766 >>> ~( Literal("-1", datatype=XSD.integer))
767 rdflib.term.Literal(u'0', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer'))
768
769 Not working:
770
771 >>> ~(Literal("1"))
772 Traceback (most recent call last):
773 File "<stdin>", line 1, in <module>
774 TypeError: Not a number; rdflib.term.Literal(u'1')
775 """
776 if isinstance(self.value, (int, long_type, float)):
777 return Literal(self.value.__invert__())
778 else:
779 raise TypeError("Not a number; %s" % repr(self))
780
781 def __gt__(self, other):
782 """
783
784 This implements ordering for Literals,
785 the other comparison methods delegate here
786
787 This tries to implement this:
788 http://www.w3.org/TR/sparql11-query/#modOrderBy
789
790 In short, Literals with compatible data-types are ordered in value
791 space, i.e.
792 >>> from rdflib import XSD
793
794 >>> Literal(1) > Literal(2) # int/int
795 False
796 >>> Literal(2.0) > Literal(1) # double/int
797 True
798 >>> from decimal import Decimal
799 >>> Literal(Decimal("3.3")) > Literal(2.0) # decimal/double
800 True
801 >>> Literal(Decimal("3.3")) < Literal(4.0) # decimal/double
802 True
803 >>> Literal('b') > Literal('a') # plain lit/plain lit
804 True
805 >>> Literal('b') > Literal('a', datatype=XSD.string) # plain lit/xsd:str
806 True
807
808 Incompatible datatype mismatches ordered by DT
809
810 >>> Literal(1) > Literal("2") # int>string
811 False
812
813 Langtagged literals by lang tag
814 >>> Literal("a", lang="en") > Literal("a", lang="fr")
815 False
816 """
817 if other is None:
818 return True # Everything is greater than None
819 if isinstance(other, Literal):
820
821 if self.datatype in _NUMERIC_LITERAL_TYPES and \
822 other.datatype in _NUMERIC_LITERAL_TYPES:
823 return self.value > other.value
824
825 # plain-literals and xsd:string literals
826 # are "the same"
827 dtself = self.datatype or _XSD_STRING
828 dtother = other.datatype or _XSD_STRING
829
830 if dtself != dtother:
831 if rdflib.DAWG_LITERAL_COLLATION:
832 return NotImplemented
833 else:
834 return dtself > dtother
835
836 if self.language != other.language:
837 if not self.language:
838 return False
839 elif not other.language:
840 return True
841 else:
842 return self.language > other.language
843
844 if self.value is not None and other.value is not None:
845 if type(self.value) in _TOTAL_ORDER_CASTERS:
846 caster = _TOTAL_ORDER_CASTERS[type(self.value)]
847 return caster(self.value) > caster(other.value)
848
849 try:
850 return self.value > other.value
851 except TypeError:
852 pass
853
854 if text_type(self) != text_type(other):
855 return text_type(self) > text_type(other)
856
857 # same language, same lexical form, check real dt
858 # plain-literals come before xsd:string!
859 if self.datatype != other.datatype:
860 if not self.datatype:
861 return False
862 elif not other.datatype:
863 return True
864 else:
865 return self.datatype > other.datatype
866
867 return False # they are the same
868
869 elif isinstance(other, Node):
870 return True # Literal are the greatest!
871 else:
872 return NotImplemented # we can only compare to nodes
873
874 def __lt__(self, other):
875 if other is None:
876 return False # Nothing is less than None
877 if isinstance(other, Literal):
878 try:
879 return not self.__gt__(other) and not self.eq(other)
880 except TypeError:
881 return NotImplemented
882 if isinstance(other, Node):
883 return False # all nodes are less-than Literals
884
885 return NotImplemented
886
887 def __le__(self, other):
888 """
889 >>> from rdflib.namespace import XSD
890 >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime
891 ... ) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime)
892 True
893 """
894 r = self.__lt__(other)
895 if r:
896 return True
897 try:
898 return self.eq(other)
899 except TypeError:
900 return NotImplemented
901
902 def __ge__(self, other):
903 r = self.__gt__(other)
904 if r:
905 return True
906 try:
907 return self.eq(other)
908 except TypeError:
909 return NotImplemented
910
911 def _comparable_to(self, other):
912 """
913 Helper method to decide which things are meaningful to
914 rich-compare with this literal
915 """
916 if isinstance(other, Literal):
917 if (self.datatype and other.datatype):
918 # two datatyped literals
919 if not self.datatype in XSDToPython or not other.datatype in XSDToPython:
920 # non XSD DTs must match
921 if self.datatype != other.datatype:
922 return False
923
924 else:
925 # xsd:string may be compared with plain literals
926 if not (self.datatype == _XSD_STRING and not other.datatype) or \
927 (other.datatype == _XSD_STRING and not self.datatype):
928 return False
929
930 # if given lang-tag has to be case insensitive equal
931 if (self.language or "").lower() != (other.language or "").lower():
932 return False
933
934 return True
935
936 def __hash__(self):
937 """
938 >>> from rdflib.namespace import XSD
939 >>> a = {Literal('1', datatype=XSD.integer):'one'}
940 >>> Literal('1', datatype=XSD.double) in a
941 False
942
943
944 "Called for the key object for dictionary operations,
945 and by the built-in function hash(). Should return
946 a 32-bit integer usable as a hash value for
947 dictionary operations. The only required property
948 is that objects which compare equal have the same
949 hash value; it is advised to somehow mix together
950 (e.g., using exclusive or) the hash values for the
951 components of the object that also play a part in
952 comparison of objects." -- 3.4.1 Basic customization (Python)
953
954 "Two literals are equal if and only if all of the following hold:
955 * The strings of the two lexical forms compare equal, character by
956 character.
957 * Either both or neither have language tags.
958 * The language tags, if any, compare equal.
959 * Either both or neither have datatype URIs.
960 * The two datatype URIs, if any, compare equal, character by
961 character."
962 -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax)
963
964 """
965 # don't use super()... for efficiency reasons, see Identifier.__hash__
966 res = text_type.__hash__(self)
967 if self.language:
968 res ^= hash(self.language.lower())
969 if self.datatype:
970 res ^= hash(self.datatype)
971 return res
972
973 def __eq__(self, other):
974 """
975 Literals are only equal to other literals.
976
977 "Two literals are equal if and only if all of the following hold:
978 * The strings of the two lexical forms compare equal, character by character.
979 * Either both or neither have language tags.
980 * The language tags, if any, compare equal.
981 * Either both or neither have datatype URIs.
982 * The two datatype URIs, if any, compare equal, character by character."
983 -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax)
984
985 >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo"))
986 True
987 >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo2"))
988 False
989
990 >>> Literal("1", datatype=URIRef("foo")) == Literal("2", datatype=URIRef("foo"))
991 False
992 >>> Literal("1", datatype=URIRef("foo")) == "asdf"
993 False
994 >>> from rdflib import XSD
995 >>> Literal('2007-01-01', datatype=XSD.date) == Literal('2007-01-01', datatype=XSD.date)
996 True
997 >>> Literal('2007-01-01', datatype=XSD.date) == date(2007, 1, 1)
998 False
999 >>> Literal("one", lang="en") == Literal("one", lang="en")
1000 True
1001 >>> Literal("hast", lang='en') == Literal("hast", lang='de')
1002 False
1003 >>> Literal("1", datatype=XSD.integer) == Literal(1)
1004 True
1005 >>> Literal("1", datatype=XSD.integer) == Literal("01", datatype=XSD.integer)
1006 True
1007
1008 """
1009 if self is other:
1010 return True
1011 if other is None:
1012 return False
1013 if isinstance(other, Literal):
1014 return self.datatype == other.datatype \
1015 and (self.language.lower() if self.language else None) == (other.language.lower() if other.language else None) \
1016 and text_type.__eq__(self, other)
1017
1018 return False
1019
1020 def eq(self, other):
1021 """
1022 Compare the value of this literal with something else
1023
1024 Either, with the value of another literal
1025 comparisons are then done in literal "value space",
1026 and according to the rules of XSD subtype-substitution/type-promotion
1027
1028 OR, with a python object:
1029
1030 basestring objects can be compared with plain-literals,
1031 or those with datatype xsd:string
1032
1033 bool objects with xsd:boolean
1034
1035 a int, long or float with numeric xsd types
1036
1037 isodate date,time,datetime objects with xsd:date,xsd:time or xsd:datetime
1038
1039 Any other operations returns NotImplemented
1040
1041 """
1042 if isinstance(other, Literal):
1043
1044 if self.datatype in _NUMERIC_LITERAL_TYPES \
1045 and other.datatype in _NUMERIC_LITERAL_TYPES:
1046 if self.value is not None and other.value is not None:
1047 return self.value == other.value
1048 else:
1049 if text_type.__eq__(self, other):
1050 return True
1051 raise TypeError(
1052 'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other))
1053 if (self.language or "").lower() != (other.language or "").lower():
1054 return False
1055
1056 dtself = self.datatype or _XSD_STRING
1057 dtother = other.datatype or _XSD_STRING
1058
1059 if (dtself == _XSD_STRING and dtother == _XSD_STRING):
1060 # string/plain literals, compare on lexical form
1061 return text_type.__eq__(self, other)
1062
1063 if dtself != dtother:
1064 if rdflib.DAWG_LITERAL_COLLATION:
1065 raise TypeError("I don't know how to compare literals with datatypes %s and %s" % (
1066 self.datatype, other.datatype))
1067 else:
1068 return False
1069
1070 # matching non-string DTs now - do we compare values or
1071 # lexical form first? comparing two ints is far quicker -
1072 # maybe there are counter examples
1073
1074 if self.value is not None and other.value is not None:
1075
1076 if self.datatype in (_RDF_XMLLITERAL, _RDF_HTMLLITERAL):
1077 return _isEqualXMLNode(self.value, other.value)
1078
1079 return self.value == other.value
1080 else:
1081
1082 if text_type.__eq__(self, other):
1083 return True
1084
1085 if self.datatype == _XSD_STRING:
1086 return False # string value space=lexical space
1087
1088 # matching DTs, but not matching, we cannot compare!
1089 raise TypeError(
1090 'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other))
1091
1092 elif isinstance(other, Node):
1093 return False # no non-Literal nodes are equal to a literal
1094
1095 elif isinstance(other, string_types):
1096 # only plain-literals can be directly compared to strings
1097
1098 # TODO: Is "blah"@en eq "blah" ?
1099 if self.language is not None:
1100 return False
1101
1102 if (self.datatype == _XSD_STRING or self.datatype is None):
1103 return text_type(self) == other
1104
1105 elif isinstance(other, (int, long_type, float)):
1106 if self.datatype in _NUMERIC_LITERAL_TYPES:
1107 return self.value == other
1108 elif isinstance(other, (date, datetime, time)):
1109 if self.datatype in (_XSD_DATETIME, _XSD_DATE, _XSD_TIME):
1110 return self.value == other
1111 elif isinstance(other, (timedelta, Duration)):
1112 if self.datatype in (_XSD_DURATION, _XSD_DAYTIMEDURATION, _XSD_YEARMONTHDURATION):
1113 return self.value == other
1114 elif isinstance(other, bool):
1115 if self.datatype == _XSD_BOOLEAN:
1116 return self.value == other
1117
1118 return NotImplemented
1119
1120 def neq(self, other):
1121 return not self.eq(other)
1122
1123 def n3(self, namespace_manager=None):
1124 r'''
1125 Returns a representation in the N3 format.
1126
1127 Examples::
1128
1129 >>> Literal("foo").n3()
1130 u'"foo"'
1131
1132 Strings with newlines or triple-quotes::
1133
1134 >>> Literal("foo\nbar").n3()
1135 u'"""foo\nbar"""'
1136
1137 >>> Literal("''\'").n3()
1138 u'"\'\'\'"'
1139
1140 >>> Literal('"""').n3()
1141 u'"\\"\\"\\""'
1142
1143 Language::
1144
1145 >>> Literal("hello", lang="en").n3()
1146 u'"hello"@en'
1147
1148 Datatypes::
1149
1150 >>> Literal(1).n3()
1151 u'"1"^^<http://www.w3.org/2001/XMLSchema#integer>'
1152
1153 >>> Literal(1.0).n3()
1154 u'"1.0"^^<http://www.w3.org/2001/XMLSchema#double>'
1155
1156 >>> Literal(True).n3()
1157 u'"true"^^<http://www.w3.org/2001/XMLSchema#boolean>'
1158
1159 Datatype and language isn't allowed (datatype takes precedence)::
1160
1161 >>> Literal(1, lang="en").n3()
1162 u'"1"^^<http://www.w3.org/2001/XMLSchema#integer>'
1163
1164 Custom datatype::
1165
1166 >>> footype = URIRef("http://example.org/ns#foo")
1167 >>> Literal("1", datatype=footype).n3()
1168 u'"1"^^<http://example.org/ns#foo>'
1169
1170 Passing a namespace-manager will use it to abbreviate datatype URIs:
1171
1172 >>> from rdflib import Graph
1173 >>> Literal(1).n3(Graph().namespace_manager)
1174 u'"1"^^xsd:integer'
1175 '''
1176 if namespace_manager:
1177 return self._literal_n3(qname_callback=namespace_manager.normalizeUri)
1178 else:
1179 return self._literal_n3()
1180
1181 def _literal_n3(self, use_plain=False, qname_callback=None):
1182 '''
1183 Using plain literal (shorthand) output::
1184 >>> from rdflib.namespace import XSD
1185
1186 >>> Literal(1)._literal_n3(use_plain=True)
1187 u'1'
1188
1189 >>> Literal(1.0)._literal_n3(use_plain=True)
1190 u'1e+00'
1191
1192 >>> Literal(1.0, datatype=XSD.decimal)._literal_n3(use_plain=True)
1193 u'1.0'
1194
1195 >>> Literal(1.0, datatype=XSD.float)._literal_n3(use_plain=True)
1196 u'"1.0"^^<http://www.w3.org/2001/XMLSchema#float>'
1197
1198 >>> Literal("foo", datatype=XSD.string)._literal_n3(
1199 ... use_plain=True)
1200 u'"foo"^^<http://www.w3.org/2001/XMLSchema#string>'
1201
1202 >>> Literal(True)._literal_n3(use_plain=True)
1203 u'true'
1204
1205 >>> Literal(False)._literal_n3(use_plain=True)
1206 u'false'
1207
1208 >>> Literal(1.91)._literal_n3(use_plain=True)
1209 u'1.91e+00'
1210
1211 Only limited precision available for floats:
1212 >>> Literal(0.123456789)._literal_n3(use_plain=True)
1213 u'1.234568e-01'
1214
1215 >>> Literal('0.123456789',
1216 ... datatype=XSD.decimal)._literal_n3(use_plain=True)
1217 u'0.123456789'
1218
1219 Using callback for datatype QNames::
1220
1221 >>> Literal(1)._literal_n3(
1222 ... qname_callback=lambda uri: "xsd:integer")
1223 u'"1"^^xsd:integer'
1224
1225 '''
1226 if use_plain and self.datatype in _PLAIN_LITERAL_TYPES:
1227 if self.value is not None:
1228 # If self is inf or NaN, we need a datatype
1229 # (there is no plain representation)
1230 if self.datatype in _NUMERIC_INF_NAN_LITERAL_TYPES:
1231 try:
1232 v = float(self)
1233 if math.isinf(v) or math.isnan(v):
1234 return self._literal_n3(False, qname_callback)
1235 except ValueError:
1236 return self._literal_n3(False, qname_callback)
1237
1238 # this is a bit of a mess -
1239 # in py >=2.6 the string.format function makes this easier
1240 # we try to produce "pretty" output
1241 if self.datatype == _XSD_DOUBLE:
1242 return sub("\\.?0*e", "e", u'%e' % float(self))
1243 elif self.datatype == _XSD_DECIMAL:
1244 s = '%s' % self
1245 if '.' not in s:
1246 s += '.0'
1247 return s
1248
1249 elif self.datatype == _XSD_BOOLEAN:
1250 return (u'%s' % self).lower()
1251 else:
1252 return u'%s' % self
1253
1254 encoded = self._quote_encode()
1255
1256 datatype = self.datatype
1257 quoted_dt = None
1258 if datatype:
1259 if qname_callback:
1260 quoted_dt = qname_callback(datatype)
1261 if not quoted_dt:
1262 quoted_dt = "<%s>" % datatype
1263 if datatype in _NUMERIC_INF_NAN_LITERAL_TYPES:
1264 try:
1265 v = float(self)
1266 if math.isinf(v):
1267 # py string reps: float: 'inf', Decimal: 'Infinity"
1268 # both need to become "INF" in xsd datatypes
1269 encoded = encoded.replace('inf', 'INF').replace(
1270 'Infinity', 'INF')
1271 if math.isnan(v):
1272 encoded = encoded.replace('nan', 'NaN')
1273 except ValueError:
1274 # if we can't cast to float something is wrong, but we can
1275 # still serialize. Warn user about it
1276 warnings.warn("Serializing weird numerical %r" % self)
1277
1278 language = self.language
1279 if language:
1280 return '%s@%s' % (encoded, language)
1281 elif datatype:
1282 return '%s^^%s' % (encoded, quoted_dt)
1283 else:
1284 return '%s' % encoded
1285
1286 def _quote_encode(self):
1287 # This simpler encoding doesn't work; a newline gets encoded as "\\n",
1288 # which is ok in sourcecode, but we want "\n".
1289 # encoded = self.encode('unicode-escape').replace(
1290 # '\\', '\\\\').replace('"','\\"')
1291 # encoded = self.replace.replace('\\', '\\\\').replace('"','\\"')
1292
1293 # NOTE: Could in theory chose quotes based on quotes appearing in the
1294 # string, i.e. '"' and "'", but N3/turtle doesn't allow "'"(?).
1295
1296 if "\n" in self:
1297 # Triple quote this string.
1298 encoded = self.replace('\\', '\\\\')
1299 if '"""' in self:
1300 # is this ok?
1301 encoded = encoded.replace('"""', '\\"\\"\\"')
1302 if encoded[-1] == '"' and encoded[-2] != '\\':
1303 encoded = encoded[:-1] + '\\' + '"'
1304
1305 return '"""%s"""' % encoded.replace('\r', '\\r')
1306 else:
1307 return '"%s"' % self.replace(
1308 '\n', '\\n').replace(
1309 '\\', '\\\\').replace(
1310 '"', '\\"').replace(
1311 '\r', '\\r')
1312
1313 if PY2:
1314 def __str__(self):
1315 return self.encode()
1316
1317 def __repr__(self):
1318 args = [super(Literal, self).__repr__()]
1319 if self.language is not None:
1320 args.append("lang=%s" % repr(self.language))
1321 if self.datatype is not None:
1322 args.append("datatype=%s" % repr(self.datatype))
1323 if self.__class__ == Literal:
1324 clsName = "rdflib.term.Literal"
1325 else:
1326 clsName = self.__class__.__name__
1327 return """%s(%s)""" % (clsName, ", ".join(args))
1328
1329 def toPython(self):
1330 """
1331 Returns an appropriate python datatype derived from this RDF Literal
1332 """
1333
1334 if self.value is not None:
1335 return self.value
1336 return self
1337
1338
1339 def _parseXML(xmlstring):
1340 if PY2:
1341 xmlstring = xmlstring.encode('utf-8')
1342 retval = xml.dom.minidom.parseString(
1343 "<rdflibtoplevelelement>%s</rdflibtoplevelelement>" % xmlstring)
1344 retval.normalize()
1345 return retval
1346
1347
1348 def _parseHTML(htmltext):
1349 try:
1350 import html5lib
1351 parser = html5lib.HTMLParser(
1352 tree=html5lib.treebuilders.getTreeBuilder("dom"))
1353 retval = parser.parseFragment(htmltext)
1354 retval.normalize()
1355 return retval
1356 except ImportError:
1357 raise ImportError(
1358 "HTML5 parser not available. Try installing" +
1359 " html5lib <http://code.google.com/p/html5lib>")
1360
1361
1362 def _writeXML(xmlnode):
1363 if isinstance(xmlnode, xml.dom.minidom.DocumentFragment):
1364 d = xml.dom.minidom.Document()
1365 d.childNodes += xmlnode.childNodes
1366 xmlnode = d
1367 s = xmlnode.toxml('utf-8')
1368 # for clean round-tripping, remove headers -- I have great and
1369 # specific worries that this will blow up later, but this margin
1370 # is too narrow to contain them
1371 if s.startswith(b('<?xml version="1.0" encoding="utf-8"?>')):
1372 s = s[38:]
1373 if s.startswith(b('<rdflibtoplevelelement>')):
1374 s = s[23:-24]
1375 if s == b('<rdflibtoplevelelement/>'):
1376 s = b('')
1377 return s
1378
1379
1380 def _unhexlify(value):
1381 # In Python 3.2, unhexlify does not support str (only bytes)
1382 if PY3 and isinstance(value, str):
1383 value = value.encode()
1384 return unhexlify(value)
1385
1386 def _parseBoolean(value):
1387 true_accepted_values = ['1', 'true']
1388 false_accepted_values = ['0', 'false']
1389 new_value = value.lower()
1390 if new_value in true_accepted_values:
1391 return True
1392 if new_value not in false_accepted_values:
1393 warnings.warn('Parsing weird boolean, % r does not map to True or False' % value, category = DeprecationWarning)
1394 return False
1395
1396 # Cannot import Namespace/XSD because of circular dependencies
1397 _XSD_PFX = 'http://www.w3.org/2001/XMLSchema#'
1398 _RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
1399
1400 _RDF_XMLLITERAL = URIRef(_RDF_PFX + 'XMLLiteral')
1401 _RDF_HTMLLITERAL = URIRef(_RDF_PFX + 'HTML')
1402
1403 _XSD_STRING = URIRef(_XSD_PFX + 'string')
1404
1405 _XSD_FLOAT = URIRef(_XSD_PFX + 'float')
1406 _XSD_DOUBLE = URIRef(_XSD_PFX + 'double')
1407 _XSD_DECIMAL = URIRef(_XSD_PFX + 'decimal')
1408 _XSD_INTEGER = URIRef(_XSD_PFX + 'integer')
1409 _XSD_BOOLEAN = URIRef(_XSD_PFX + 'boolean')
1410
1411 _XSD_DATETIME = URIRef(_XSD_PFX + 'dateTime')
1412 _XSD_DATE = URIRef(_XSD_PFX + 'date')
1413 _XSD_TIME = URIRef(_XSD_PFX + 'time')
1414 _XSD_DURATION = URIRef(_XSD_PFX + 'duration')
1415 _XSD_DAYTIMEDURATION = URIRef(_XSD_PFX + 'dayTimeDuration')
1416 _XSD_YEARMONTHDURATION = URIRef(_XSD_PFX + 'yearMonthDuration')
1417
1418 _OWL_RATIONAL = URIRef('http://www.w3.org/2002/07/owl#rational')
1419 _XSD_HEXBINARY = URIRef(_XSD_PFX + 'hexBinary')
1420 # TODO: gYearMonth, gYear, gMonthDay, gDay, gMonth
1421
1422 _NUMERIC_LITERAL_TYPES = (
1423 _XSD_INTEGER,
1424 _XSD_DECIMAL,
1425 _XSD_DOUBLE,
1426 URIRef(_XSD_PFX + 'float'),
1427
1428 URIRef(_XSD_PFX + 'byte'),
1429 URIRef(_XSD_PFX + 'int'),
1430 URIRef(_XSD_PFX + 'long'),
1431 URIRef(_XSD_PFX + 'negativeInteger'),
1432 URIRef(_XSD_PFX + 'nonNegativeInteger'),
1433 URIRef(_XSD_PFX + 'nonPositiveInteger'),
1434 URIRef(_XSD_PFX + 'positiveInteger'),
1435 URIRef(_XSD_PFX + 'short'),
1436 URIRef(_XSD_PFX + 'unsignedByte'),
1437 URIRef(_XSD_PFX + 'unsignedInt'),
1438 URIRef(_XSD_PFX + 'unsignedLong'),
1439 URIRef(_XSD_PFX + 'unsignedShort'),
1440
1441 )
1442
1443 # these have "native" syntax in N3/SPARQL
1444 _PLAIN_LITERAL_TYPES = (
1445 _XSD_INTEGER,
1446 _XSD_BOOLEAN,
1447 _XSD_DOUBLE,
1448 _XSD_DECIMAL,
1449 _OWL_RATIONAL
1450 )
1451
1452 # these have special INF and NaN XSD representations
1453 _NUMERIC_INF_NAN_LITERAL_TYPES = (
1454 URIRef(_XSD_PFX + 'float'),
1455 _XSD_DOUBLE,
1456 _XSD_DECIMAL,
1457 )
1458
1459 # the following types need special treatment for reasonable sorting because
1460 # certain instances can't be compared to each other. We treat this by
1461 # partitioning and then sorting within those partitions.
1462 _TOTAL_ORDER_CASTERS = {
1463 datetime: lambda value: (
1464 # naive vs. aware
1465 value.tzinfo is not None and value.tzinfo.utcoffset(value) is not None,
1466 value
1467 ),
1468 time: lambda value: (
1469 # naive vs. aware
1470 value.tzinfo is not None and value.tzinfo.utcoffset(None) is not None,
1471 value
1472 ),
1473 xml.dom.minidom.Document: lambda value: value.toxml(),
1474 }
1475
1476
1477 _STRING_LITERAL_TYPES = (
1478 _XSD_STRING,
1479 _RDF_XMLLITERAL,
1480 _RDF_HTMLLITERAL,
1481 URIRef(_XSD_PFX + 'normalizedString'),
1482 URIRef(_XSD_PFX + 'token')
1483 )
1484
1485
1486 def _py2literal(obj, pType, castFunc, dType):
1487 if castFunc:
1488 return castFunc(obj), dType
1489 elif dType:
1490 return obj, dType
1491 else:
1492 return obj, None
1493
1494
1495 def _castPythonToLiteral(obj, datatype):
1496 """
1497 Casts a tuple of a python type and a special datatype URI to a tuple of the lexical value and a
1498 datatype URI (or None)
1499 """
1500 for (pType, dType), castFunc in _SpecificPythonToXSDRules:
1501 if isinstance(obj, pType) and dType == datatype:
1502 return _py2literal(obj, pType, castFunc, dType)
1503
1504 for pType, (castFunc, dType) in _GenericPythonToXSDRules:
1505 if isinstance(obj, pType):
1506 return _py2literal(obj, pType, castFunc, dType)
1507 return obj, None # TODO: is this right for the fall through case?
1508
1509
1510 from decimal import Decimal
1511
1512 # Mappings from Python types to XSD datatypes and back (borrowed from sparta)
1513 # datetime instances are also instances of date... so we need to order these.
1514
1515 # SPARQL/Turtle/N3 has shortcuts for integer, double, decimal
1516 # python has only float - to be in tune with sparql/n3/turtle
1517 # we default to XSD.double for float literals
1518
1519 # python ints are promoted to longs when overflowing
1520 # python longs have no limit
1521 # both map to the abstract integer type,
1522 # rather than some concrete bit-limited datatype
1523 _GenericPythonToXSDRules = [
1524 (string_types, (None, None)),
1525 (float, (None, _XSD_DOUBLE)),
1526 (bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)),
1527 (int, (None, _XSD_INTEGER)),
1528 (long_type, (None, _XSD_INTEGER)),
1529 (Decimal, (None, _XSD_DECIMAL)),
1530 (datetime, (lambda i:i.isoformat(), _XSD_DATETIME)),
1531 (date, (lambda i:i.isoformat(), _XSD_DATE)),
1532 (time, (lambda i:i.isoformat(), _XSD_TIME)),
1533 (Duration, (lambda i:duration_isoformat(i), _XSD_DURATION)),
1534 (timedelta, (lambda i:duration_isoformat(i), _XSD_DAYTIMEDURATION)),
1535 (xml.dom.minidom.Document, (_writeXML, _RDF_XMLLITERAL)),
1536 # this is a bit dirty - by accident the html5lib parser produces
1537 # DocumentFragments, and the xml parser Documents, letting this
1538 # decide what datatype to use makes roundtripping easier, but it a
1539 # bit random
1540 (xml.dom.minidom.DocumentFragment, (_writeXML, _RDF_HTMLLITERAL)),
1541 (Fraction, (None, _OWL_RATIONAL))
1542 ]
1543
1544 _SpecificPythonToXSDRules = [
1545 ((string_types, _XSD_HEXBINARY), hexlify),
1546 ]
1547 if PY3:
1548 _SpecificPythonToXSDRules.append(((bytes, _XSD_HEXBINARY), hexlify))
1549
1550 XSDToPython = {
1551 None: None, # plain literals map directly to value space
1552 URIRef(_XSD_PFX + 'time'): parse_time,
1553 URIRef(_XSD_PFX + 'date'): parse_date,
1554 URIRef(_XSD_PFX + 'gYear'): parse_date,
1555 URIRef(_XSD_PFX + 'gYearMonth'): parse_date,
1556 URIRef(_XSD_PFX + 'dateTime'): parse_datetime,
1557 URIRef(_XSD_PFX + 'duration'): parse_duration,
1558 URIRef(_XSD_PFX + 'dayTimeDuration'): parse_duration,
1559 URIRef(_XSD_PFX + 'yearMonthDuration'): parse_duration,
1560 URIRef(_XSD_PFX + 'hexBinary'): _unhexlify,
1561 URIRef(_XSD_PFX + 'string'): None,
1562 URIRef(_XSD_PFX + 'normalizedString'): None,
1563 URIRef(_XSD_PFX + 'token'): None,
1564 URIRef(_XSD_PFX + 'language'): None,
1565 URIRef(_XSD_PFX + 'boolean'): _parseBoolean,
1566 URIRef(_XSD_PFX + 'decimal'): Decimal,
1567 URIRef(_XSD_PFX + 'integer'): long_type,
1568 URIRef(_XSD_PFX + 'nonPositiveInteger'): int,
1569 URIRef(_XSD_PFX + 'long'): long_type,
1570 URIRef(_XSD_PFX + 'nonNegativeInteger'): int,
1571 URIRef(_XSD_PFX + 'negativeInteger'): int,
1572 URIRef(_XSD_PFX + 'int'): long_type,
1573 URIRef(_XSD_PFX + 'unsignedLong'): long_type,
1574 URIRef(_XSD_PFX + 'positiveInteger'): int,
1575 URIRef(_XSD_PFX + 'short'): int,
1576 URIRef(_XSD_PFX + 'unsignedInt'): long_type,
1577 URIRef(_XSD_PFX + 'byte'): int,
1578 URIRef(_XSD_PFX + 'unsignedShort'): int,
1579 URIRef(_XSD_PFX + 'unsignedByte'): int,
1580 URIRef(_XSD_PFX + 'float'): float,
1581 URIRef(_XSD_PFX + 'double'): float,
1582 URIRef(_XSD_PFX + 'base64Binary'): lambda s: base64.b64decode(s),
1583 URIRef(_XSD_PFX + 'anyURI'): None,
1584 _RDF_XMLLITERAL: _parseXML,
1585 _RDF_HTMLLITERAL: _parseHTML
1586 }
1587
1588 _toPythonMapping = {}
1589
1590 _toPythonMapping.update(XSDToPython)
1591
1592
1593 def _castLexicalToPython(lexical, datatype):
1594 """
1595 Map a lexical form to the value-space for the given datatype
1596 :returns: a python object for the value or ``None``
1597 """
1598 convFunc = _toPythonMapping.get(datatype, False)
1599 if convFunc:
1600 try:
1601 return convFunc(lexical)
1602 except:
1603 # not a valid lexical representation for this dt
1604 return None
1605 elif convFunc is None:
1606 # no conv func means 1-1 lexical<->value-space mapping
1607 try:
1608 return text_type(lexical)
1609 except UnicodeDecodeError:
1610 return text_type(lexical, 'utf-8')
1611 else:
1612 # no convFunc - unknown data-type
1613 return None
1614
1615
1616 def bind(datatype, pythontype, constructor=None, lexicalizer=None, datatype_specific=False):
1617 """
1618 register a new datatype<->pythontype binding
1619
1620 :param constructor: an optional function for converting lexical forms
1621 into a Python instances, if not given the pythontype
1622 is used directly
1623
1624 :param lexicalizer: an optional function for converting python objects to
1625 lexical form, if not given object.__str__ is used
1626
1627 :param datatype_specific: makes the lexicalizer function be accessible
1628 from the pair (pythontype, datatype) if set to True
1629 or from the pythontype otherwise. False by default
1630 """
1631 if datatype_specific and datatype is None:
1632 raise Exception("No datatype given for a datatype-specific binding")
1633
1634 if datatype in _toPythonMapping:
1635 logger.warning("datatype '%s' was already bound. Rebinding." %
1636 datatype)
1637
1638 if constructor is None:
1639 constructor = pythontype
1640 _toPythonMapping[datatype] = constructor
1641 if datatype_specific:
1642 _SpecificPythonToXSDRules.append(((pythontype, datatype), lexicalizer))
1643 else:
1644 _GenericPythonToXSDRules.append((pythontype, (lexicalizer, datatype)))
1645
1646
1647 class Variable(Identifier):
1648 """
1649 A Variable - this is used for querying, or in Formula aware
1650 graphs, where Variables can stored in the graph
1651 """
1652 __slots__ = ()
1653
1654 def __new__(cls, value):
1655 if len(value) == 0:
1656 raise Exception(
1657 "Attempted to create variable with empty string as name!")
1658 if value[0] == '?':
1659 value = value[1:]
1660 return text_type.__new__(cls, value)
1661
1662 def __repr__(self):
1663 if self.__class__ is Variable:
1664 clsName = "rdflib.term.Variable"
1665 else:
1666 clsName = self.__class__.__name__
1667
1668 return """%s(%s)""" % (clsName, super(Variable, self).__repr__())
1669
1670 def toPython(self):
1671 return "?%s" % self
1672
1673 def n3(self, namespace_manager=None):
1674 return "?%s" % self
1675
1676 def __reduce__(self):
1677 return (Variable, (text_type(self),))
1678
1679
1680 class Statement(Node, tuple):
1681
1682 def __new__(cls, triple, context):
1683 subject, predicate, object = triple
1684 warnings.warn(
1685 "Class Statement is deprecated, and will be removed in " +
1686 "the future. If you use this please let rdflib-dev know!",
1687 category=DeprecationWarning, stacklevel=2)
1688 return tuple.__new__(cls, ((subject, predicate, object), context))
1689
1690 def __reduce__(self):
1691 return (Statement, (self[0], self[1]))
1692
1693 def toPython(self):
1694 return (self[0], self[1])
1695
1696
1697 # Nodes are ordered like this
1698 # See http://www.w3.org/TR/sparql11-query/#modOrderBy
1699 # we leave "space" for more subclasses of Node elsewhere
1700 # default-dict to grazefully fail for new subclasses
1701 _ORDERING = defaultdict(int)
1702 _ORDERING.update({
1703 BNode: 10,
1704 Variable: 20,
1705 URIRef: 30,
1706 Literal: 40
1707 })
1708
1709
1710 def _isEqualXMLNode(node, other):
1711 from xml.dom.minidom import Node
1712
1713 def recurse():
1714 # Recursion through the children
1715 # In Python2, the semantics of 'map' is such that the check on
1716 # length would be unnecessary. In Python 3,
1717 # the semantics of map has changed (why, oh why???) and the check
1718 # for the length becomes necessary...
1719 if len(node.childNodes) != len(other.childNodes):
1720 return False
1721 for (nc, oc) in map(
1722 lambda x, y: (x, y), node.childNodes, other.childNodes):
1723 if not _isEqualXMLNode(nc, oc):
1724 return False
1725 # if we got here then everything is fine:
1726 return True
1727
1728 if node is None or other is None:
1729 return False
1730
1731 if node.nodeType != other.nodeType:
1732 return False
1733
1734 if node.nodeType in [Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE]:
1735 return recurse()
1736
1737 elif node.nodeType == Node.ELEMENT_NODE:
1738 # Get the basics right
1739 if not (node.tagName == other.tagName and
1740 node.namespaceURI == other.namespaceURI):
1741 return False
1742
1743 # Handle the (namespaced) attributes; the namespace setting key
1744 # should be ignored, though
1745 # Note that the minidom orders the keys already, so we do not have
1746 # to worry about that, which is a bonus...
1747 n_keys = [
1748 k for k in node.attributes.keysNS()
1749 if k[0] != 'http://www.w3.org/2000/xmlns/']
1750 o_keys = [
1751 k for k in other.attributes.keysNS()
1752 if k[0] != 'http://www.w3.org/2000/xmlns/']
1753 if len(n_keys) != len(o_keys):
1754 return False
1755 for k in n_keys:
1756 if not (k in o_keys
1757 and node.getAttributeNS(k[0], k[1]) ==
1758 other.getAttributeNS(k[0], k[1])):
1759 return False
1760
1761 # if we got here, the attributes are all right, we can go down
1762 # the tree recursively
1763 return recurse()
1764
1765 elif node.nodeType in [
1766 Node.TEXT_NODE, Node.COMMENT_NODE, Node.CDATA_SECTION_NODE,
1767 Node.NOTATION_NODE]:
1768 return node.data == other.data
1769
1770 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1771 return node.data == other.data and node.target == other.target
1772
1773 elif node.nodeType == Node.ENTITY_NODE:
1774 return node.nodeValue == other.nodeValue
1775
1776 elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1777 return node.publicId == other.publicId \
1778 and node.systemId == other.system.Id
1779
1780 else:
1781 # should not happen, in fact
1782 raise Exception(
1783 'I dont know how to compare XML Node type: %s' % node.nodeType)
1784
1785
1786 if __name__ == '__main__':
1787 import doctest
1788 doctest.testmod()