comparison env/lib/python3.9/site-packages/dateutil/parser/_parser.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2 """
3 This module offers a generic date/time string parser which is able to parse
4 most known formats to represent a date and/or time.
5
6 This module attempts to be forgiving with regards to unlikely input formats,
7 returning a datetime object even for dates which are ambiguous. If an element
8 of a date/time stamp is omitted, the following rules are applied:
9
10 - If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
11 on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
12 specified.
13 - If a time zone is omitted, a timezone-naive datetime is returned.
14
15 If any other elements are missing, they are taken from the
16 :class:`datetime.datetime` object passed to the parameter ``default``. If this
17 results in a day number exceeding the valid number of days per month, the
18 value falls back to the end of the month.
19
20 Additional resources about date/time string formats can be found below:
21
22 - `A summary of the international standard date and time notation
23 <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
24 - `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
25 - `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
26 - `CPAN ParseDate module
27 <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
28 - `Java SimpleDateFormat Class
29 <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
30 """
31 from __future__ import unicode_literals
32
33 import datetime
34 import re
35 import string
36 import time
37 import warnings
38
39 from calendar import monthrange
40 from io import StringIO
41
42 import six
43 from six import integer_types, text_type
44
45 from decimal import Decimal
46
47 from warnings import warn
48
49 from .. import relativedelta
50 from .. import tz
51
52 __all__ = ["parse", "parserinfo", "ParserError"]
53
54
55 # TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth
56 # making public and/or figuring out if there is something we can
57 # take off their plate.
58 class _timelex(object):
59 # Fractional seconds are sometimes split by a comma
60 _split_decimal = re.compile("([.,])")
61
62 def __init__(self, instream):
63 if six.PY2:
64 # In Python 2, we can't duck type properly because unicode has
65 # a 'decode' function, and we'd be double-decoding
66 if isinstance(instream, (bytes, bytearray)):
67 instream = instream.decode()
68 else:
69 if getattr(instream, 'decode', None) is not None:
70 instream = instream.decode()
71
72 if isinstance(instream, text_type):
73 instream = StringIO(instream)
74 elif getattr(instream, 'read', None) is None:
75 raise TypeError('Parser must be a string or character stream, not '
76 '{itype}'.format(itype=instream.__class__.__name__))
77
78 self.instream = instream
79 self.charstack = []
80 self.tokenstack = []
81 self.eof = False
82
83 def get_token(self):
84 """
85 This function breaks the time string into lexical units (tokens), which
86 can be parsed by the parser. Lexical units are demarcated by changes in
87 the character set, so any continuous string of letters is considered
88 one unit, any continuous string of numbers is considered one unit.
89
90 The main complication arises from the fact that dots ('.') can be used
91 both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
92 "4:30:21.447"). As such, it is necessary to read the full context of
93 any dot-separated strings before breaking it into tokens; as such, this
94 function maintains a "token stack", for when the ambiguous context
95 demands that multiple tokens be parsed at once.
96 """
97 if self.tokenstack:
98 return self.tokenstack.pop(0)
99
100 seenletters = False
101 token = None
102 state = None
103
104 while not self.eof:
105 # We only realize that we've reached the end of a token when we
106 # find a character that's not part of the current token - since
107 # that character may be part of the next token, it's stored in the
108 # charstack.
109 if self.charstack:
110 nextchar = self.charstack.pop(0)
111 else:
112 nextchar = self.instream.read(1)
113 while nextchar == '\x00':
114 nextchar = self.instream.read(1)
115
116 if not nextchar:
117 self.eof = True
118 break
119 elif not state:
120 # First character of the token - determines if we're starting
121 # to parse a word, a number or something else.
122 token = nextchar
123 if self.isword(nextchar):
124 state = 'a'
125 elif self.isnum(nextchar):
126 state = '0'
127 elif self.isspace(nextchar):
128 token = ' '
129 break # emit token
130 else:
131 break # emit token
132 elif state == 'a':
133 # If we've already started reading a word, we keep reading
134 # letters until we find something that's not part of a word.
135 seenletters = True
136 if self.isword(nextchar):
137 token += nextchar
138 elif nextchar == '.':
139 token += nextchar
140 state = 'a.'
141 else:
142 self.charstack.append(nextchar)
143 break # emit token
144 elif state == '0':
145 # If we've already started reading a number, we keep reading
146 # numbers until we find something that doesn't fit.
147 if self.isnum(nextchar):
148 token += nextchar
149 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
150 token += nextchar
151 state = '0.'
152 else:
153 self.charstack.append(nextchar)
154 break # emit token
155 elif state == 'a.':
156 # If we've seen some letters and a dot separator, continue
157 # parsing, and the tokens will be broken up later.
158 seenletters = True
159 if nextchar == '.' or self.isword(nextchar):
160 token += nextchar
161 elif self.isnum(nextchar) and token[-1] == '.':
162 token += nextchar
163 state = '0.'
164 else:
165 self.charstack.append(nextchar)
166 break # emit token
167 elif state == '0.':
168 # If we've seen at least one dot separator, keep going, we'll
169 # break up the tokens later.
170 if nextchar == '.' or self.isnum(nextchar):
171 token += nextchar
172 elif self.isword(nextchar) and token[-1] == '.':
173 token += nextchar
174 state = 'a.'
175 else:
176 self.charstack.append(nextchar)
177 break # emit token
178
179 if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
180 token[-1] in '.,')):
181 l = self._split_decimal.split(token)
182 token = l[0]
183 for tok in l[1:]:
184 if tok:
185 self.tokenstack.append(tok)
186
187 if state == '0.' and token.count('.') == 0:
188 token = token.replace(',', '.')
189
190 return token
191
192 def __iter__(self):
193 return self
194
195 def __next__(self):
196 token = self.get_token()
197 if token is None:
198 raise StopIteration
199
200 return token
201
202 def next(self):
203 return self.__next__() # Python 2.x support
204
205 @classmethod
206 def split(cls, s):
207 return list(cls(s))
208
209 @classmethod
210 def isword(cls, nextchar):
211 """ Whether or not the next character is part of a word """
212 return nextchar.isalpha()
213
214 @classmethod
215 def isnum(cls, nextchar):
216 """ Whether the next character is part of a number """
217 return nextchar.isdigit()
218
219 @classmethod
220 def isspace(cls, nextchar):
221 """ Whether the next character is whitespace """
222 return nextchar.isspace()
223
224
225 class _resultbase(object):
226
227 def __init__(self):
228 for attr in self.__slots__:
229 setattr(self, attr, None)
230
231 def _repr(self, classname):
232 l = []
233 for attr in self.__slots__:
234 value = getattr(self, attr)
235 if value is not None:
236 l.append("%s=%s" % (attr, repr(value)))
237 return "%s(%s)" % (classname, ", ".join(l))
238
239 def __len__(self):
240 return (sum(getattr(self, attr) is not None
241 for attr in self.__slots__))
242
243 def __repr__(self):
244 return self._repr(self.__class__.__name__)
245
246
247 class parserinfo(object):
248 """
249 Class which handles what inputs are accepted. Subclass this to customize
250 the language and acceptable values for each parameter.
251
252 :param dayfirst:
253 Whether to interpret the first value in an ambiguous 3-integer date
254 (e.g. 01/05/09) as the day (``True``) or month (``False``). If
255 ``yearfirst`` is set to ``True``, this distinguishes between YDM
256 and YMD. Default is ``False``.
257
258 :param yearfirst:
259 Whether to interpret the first value in an ambiguous 3-integer date
260 (e.g. 01/05/09) as the year. If ``True``, the first number is taken
261 to be the year, otherwise the last number is taken to be the year.
262 Default is ``False``.
263 """
264
265 # m from a.m/p.m, t from ISO T separator
266 JUMP = [" ", ".", ",", ";", "-", "/", "'",
267 "at", "on", "and", "ad", "m", "t", "of",
268 "st", "nd", "rd", "th"]
269
270 WEEKDAYS = [("Mon", "Monday"),
271 ("Tue", "Tuesday"), # TODO: "Tues"
272 ("Wed", "Wednesday"),
273 ("Thu", "Thursday"), # TODO: "Thurs"
274 ("Fri", "Friday"),
275 ("Sat", "Saturday"),
276 ("Sun", "Sunday")]
277 MONTHS = [("Jan", "January"),
278 ("Feb", "February"), # TODO: "Febr"
279 ("Mar", "March"),
280 ("Apr", "April"),
281 ("May", "May"),
282 ("Jun", "June"),
283 ("Jul", "July"),
284 ("Aug", "August"),
285 ("Sep", "Sept", "September"),
286 ("Oct", "October"),
287 ("Nov", "November"),
288 ("Dec", "December")]
289 HMS = [("h", "hour", "hours"),
290 ("m", "minute", "minutes"),
291 ("s", "second", "seconds")]
292 AMPM = [("am", "a"),
293 ("pm", "p")]
294 UTCZONE = ["UTC", "GMT", "Z", "z"]
295 PERTAIN = ["of"]
296 TZOFFSET = {}
297 # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
298 # "Anno Domini", "Year of Our Lord"]
299
300 def __init__(self, dayfirst=False, yearfirst=False):
301 self._jump = self._convert(self.JUMP)
302 self._weekdays = self._convert(self.WEEKDAYS)
303 self._months = self._convert(self.MONTHS)
304 self._hms = self._convert(self.HMS)
305 self._ampm = self._convert(self.AMPM)
306 self._utczone = self._convert(self.UTCZONE)
307 self._pertain = self._convert(self.PERTAIN)
308
309 self.dayfirst = dayfirst
310 self.yearfirst = yearfirst
311
312 self._year = time.localtime().tm_year
313 self._century = self._year // 100 * 100
314
315 def _convert(self, lst):
316 dct = {}
317 for i, v in enumerate(lst):
318 if isinstance(v, tuple):
319 for v in v:
320 dct[v.lower()] = i
321 else:
322 dct[v.lower()] = i
323 return dct
324
325 def jump(self, name):
326 return name.lower() in self._jump
327
328 def weekday(self, name):
329 try:
330 return self._weekdays[name.lower()]
331 except KeyError:
332 pass
333 return None
334
335 def month(self, name):
336 try:
337 return self._months[name.lower()] + 1
338 except KeyError:
339 pass
340 return None
341
342 def hms(self, name):
343 try:
344 return self._hms[name.lower()]
345 except KeyError:
346 return None
347
348 def ampm(self, name):
349 try:
350 return self._ampm[name.lower()]
351 except KeyError:
352 return None
353
354 def pertain(self, name):
355 return name.lower() in self._pertain
356
357 def utczone(self, name):
358 return name.lower() in self._utczone
359
360 def tzoffset(self, name):
361 if name in self._utczone:
362 return 0
363
364 return self.TZOFFSET.get(name)
365
366 def convertyear(self, year, century_specified=False):
367 """
368 Converts two-digit years to year within [-50, 49]
369 range of self._year (current local time)
370 """
371
372 # Function contract is that the year is always positive
373 assert year >= 0
374
375 if year < 100 and not century_specified:
376 # assume current century to start
377 year += self._century
378
379 if year >= self._year + 50: # if too far in future
380 year -= 100
381 elif year < self._year - 50: # if too far in past
382 year += 100
383
384 return year
385
386 def validate(self, res):
387 # move to info
388 if res.year is not None:
389 res.year = self.convertyear(res.year, res.century_specified)
390
391 if ((res.tzoffset == 0 and not res.tzname) or
392 (res.tzname == 'Z' or res.tzname == 'z')):
393 res.tzname = "UTC"
394 res.tzoffset = 0
395 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
396 res.tzoffset = 0
397 return True
398
399
400 class _ymd(list):
401 def __init__(self, *args, **kwargs):
402 super(self.__class__, self).__init__(*args, **kwargs)
403 self.century_specified = False
404 self.dstridx = None
405 self.mstridx = None
406 self.ystridx = None
407
408 @property
409 def has_year(self):
410 return self.ystridx is not None
411
412 @property
413 def has_month(self):
414 return self.mstridx is not None
415
416 @property
417 def has_day(self):
418 return self.dstridx is not None
419
420 def could_be_day(self, value):
421 if self.has_day:
422 return False
423 elif not self.has_month:
424 return 1 <= value <= 31
425 elif not self.has_year:
426 # Be permissive, assume leap year
427 month = self[self.mstridx]
428 return 1 <= value <= monthrange(2000, month)[1]
429 else:
430 month = self[self.mstridx]
431 year = self[self.ystridx]
432 return 1 <= value <= monthrange(year, month)[1]
433
434 def append(self, val, label=None):
435 if hasattr(val, '__len__'):
436 if val.isdigit() and len(val) > 2:
437 self.century_specified = True
438 if label not in [None, 'Y']: # pragma: no cover
439 raise ValueError(label)
440 label = 'Y'
441 elif val > 100:
442 self.century_specified = True
443 if label not in [None, 'Y']: # pragma: no cover
444 raise ValueError(label)
445 label = 'Y'
446
447 super(self.__class__, self).append(int(val))
448
449 if label == 'M':
450 if self.has_month:
451 raise ValueError('Month is already set')
452 self.mstridx = len(self) - 1
453 elif label == 'D':
454 if self.has_day:
455 raise ValueError('Day is already set')
456 self.dstridx = len(self) - 1
457 elif label == 'Y':
458 if self.has_year:
459 raise ValueError('Year is already set')
460 self.ystridx = len(self) - 1
461
462 def _resolve_from_stridxs(self, strids):
463 """
464 Try to resolve the identities of year/month/day elements using
465 ystridx, mstridx, and dstridx, if enough of these are specified.
466 """
467 if len(self) == 3 and len(strids) == 2:
468 # we can back out the remaining stridx value
469 missing = [x for x in range(3) if x not in strids.values()]
470 key = [x for x in ['y', 'm', 'd'] if x not in strids]
471 assert len(missing) == len(key) == 1
472 key = key[0]
473 val = missing[0]
474 strids[key] = val
475
476 assert len(self) == len(strids) # otherwise this should not be called
477 out = {key: self[strids[key]] for key in strids}
478 return (out.get('y'), out.get('m'), out.get('d'))
479
480 def resolve_ymd(self, yearfirst, dayfirst):
481 len_ymd = len(self)
482 year, month, day = (None, None, None)
483
484 strids = (('y', self.ystridx),
485 ('m', self.mstridx),
486 ('d', self.dstridx))
487
488 strids = {key: val for key, val in strids if val is not None}
489 if (len(self) == len(strids) > 0 or
490 (len(self) == 3 and len(strids) == 2)):
491 return self._resolve_from_stridxs(strids)
492
493 mstridx = self.mstridx
494
495 if len_ymd > 3:
496 raise ValueError("More than three YMD values")
497 elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):
498 # One member, or two members with a month string
499 if mstridx is not None:
500 month = self[mstridx]
501 # since mstridx is 0 or 1, self[mstridx-1] always
502 # looks up the other element
503 other = self[mstridx - 1]
504 else:
505 other = self[0]
506
507 if len_ymd > 1 or mstridx is None:
508 if other > 31:
509 year = other
510 else:
511 day = other
512
513 elif len_ymd == 2:
514 # Two members with numbers
515 if self[0] > 31:
516 # 99-01
517 year, month = self
518 elif self[1] > 31:
519 # 01-99
520 month, year = self
521 elif dayfirst and self[1] <= 12:
522 # 13-01
523 day, month = self
524 else:
525 # 01-13
526 month, day = self
527
528 elif len_ymd == 3:
529 # Three members
530 if mstridx == 0:
531 if self[1] > 31:
532 # Apr-2003-25
533 month, year, day = self
534 else:
535 month, day, year = self
536 elif mstridx == 1:
537 if self[0] > 31 or (yearfirst and self[2] <= 31):
538 # 99-Jan-01
539 year, month, day = self
540 else:
541 # 01-Jan-01
542 # Give precedence to day-first, since
543 # two-digit years is usually hand-written.
544 day, month, year = self
545
546 elif mstridx == 2:
547 # WTF!?
548 if self[1] > 31:
549 # 01-99-Jan
550 day, year, month = self
551 else:
552 # 99-01-Jan
553 year, day, month = self
554
555 else:
556 if (self[0] > 31 or
557 self.ystridx == 0 or
558 (yearfirst and self[1] <= 12 and self[2] <= 31)):
559 # 99-01-01
560 if dayfirst and self[2] <= 12:
561 year, day, month = self
562 else:
563 year, month, day = self
564 elif self[0] > 12 or (dayfirst and self[1] <= 12):
565 # 13-01-01
566 day, month, year = self
567 else:
568 # 01-13-01
569 month, day, year = self
570
571 return year, month, day
572
573
574 class parser(object):
575 def __init__(self, info=None):
576 self.info = info or parserinfo()
577
578 def parse(self, timestr, default=None,
579 ignoretz=False, tzinfos=None, **kwargs):
580 """
581 Parse the date/time string into a :class:`datetime.datetime` object.
582
583 :param timestr:
584 Any date/time string using the supported formats.
585
586 :param default:
587 The default datetime object, if this is a datetime object and not
588 ``None``, elements specified in ``timestr`` replace elements in the
589 default object.
590
591 :param ignoretz:
592 If set ``True``, time zones in parsed strings are ignored and a
593 naive :class:`datetime.datetime` object is returned.
594
595 :param tzinfos:
596 Additional time zone names / aliases which may be present in the
597 string. This argument maps time zone names (and optionally offsets
598 from those time zones) to time zones. This parameter can be a
599 dictionary with timezone aliases mapping time zone names to time
600 zones or a function taking two parameters (``tzname`` and
601 ``tzoffset``) and returning a time zone.
602
603 The timezones to which the names are mapped can be an integer
604 offset from UTC in seconds or a :class:`tzinfo` object.
605
606 .. doctest::
607 :options: +NORMALIZE_WHITESPACE
608
609 >>> from dateutil.parser import parse
610 >>> from dateutil.tz import gettz
611 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
612 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
613 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
614 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
615 datetime.datetime(2012, 1, 19, 17, 21,
616 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
617
618 This parameter is ignored if ``ignoretz`` is set.
619
620 :param \\*\\*kwargs:
621 Keyword arguments as passed to ``_parse()``.
622
623 :return:
624 Returns a :class:`datetime.datetime` object or, if the
625 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
626 first element being a :class:`datetime.datetime` object, the second
627 a tuple containing the fuzzy tokens.
628
629 :raises ParserError:
630 Raised for invalid or unknown string format, if the provided
631 :class:`tzinfo` is not in a valid format, or if an invalid date
632 would be created.
633
634 :raises TypeError:
635 Raised for non-string or character stream input.
636
637 :raises OverflowError:
638 Raised if the parsed date exceeds the largest valid C integer on
639 your system.
640 """
641
642 if default is None:
643 default = datetime.datetime.now().replace(hour=0, minute=0,
644 second=0, microsecond=0)
645
646 res, skipped_tokens = self._parse(timestr, **kwargs)
647
648 if res is None:
649 raise ParserError("Unknown string format: %s", timestr)
650
651 if len(res) == 0:
652 raise ParserError("String does not contain a date: %s", timestr)
653
654 try:
655 ret = self._build_naive(res, default)
656 except ValueError as e:
657 six.raise_from(ParserError(e.args[0] + ": %s", timestr), e)
658
659 if not ignoretz:
660 ret = self._build_tzaware(ret, res, tzinfos)
661
662 if kwargs.get('fuzzy_with_tokens', False):
663 return ret, skipped_tokens
664 else:
665 return ret
666
667 class _result(_resultbase):
668 __slots__ = ["year", "month", "day", "weekday",
669 "hour", "minute", "second", "microsecond",
670 "tzname", "tzoffset", "ampm","any_unused_tokens"]
671
672 def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
673 fuzzy_with_tokens=False):
674 """
675 Private method which performs the heavy lifting of parsing, called from
676 ``parse()``, which passes on its ``kwargs`` to this function.
677
678 :param timestr:
679 The string to parse.
680
681 :param dayfirst:
682 Whether to interpret the first value in an ambiguous 3-integer date
683 (e.g. 01/05/09) as the day (``True``) or month (``False``). If
684 ``yearfirst`` is set to ``True``, this distinguishes between YDM
685 and YMD. If set to ``None``, this value is retrieved from the
686 current :class:`parserinfo` object (which itself defaults to
687 ``False``).
688
689 :param yearfirst:
690 Whether to interpret the first value in an ambiguous 3-integer date
691 (e.g. 01/05/09) as the year. If ``True``, the first number is taken
692 to be the year, otherwise the last number is taken to be the year.
693 If this is set to ``None``, the value is retrieved from the current
694 :class:`parserinfo` object (which itself defaults to ``False``).
695
696 :param fuzzy:
697 Whether to allow fuzzy parsing, allowing for string like "Today is
698 January 1, 2047 at 8:21:00AM".
699
700 :param fuzzy_with_tokens:
701 If ``True``, ``fuzzy`` is automatically set to True, and the parser
702 will return a tuple where the first element is the parsed
703 :class:`datetime.datetime` datetimestamp and the second element is
704 a tuple containing the portions of the string which were ignored:
705
706 .. doctest::
707
708 >>> from dateutil.parser import parse
709 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
710 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
711
712 """
713 if fuzzy_with_tokens:
714 fuzzy = True
715
716 info = self.info
717
718 if dayfirst is None:
719 dayfirst = info.dayfirst
720
721 if yearfirst is None:
722 yearfirst = info.yearfirst
723
724 res = self._result()
725 l = _timelex.split(timestr) # Splits the timestr into tokens
726
727 skipped_idxs = []
728
729 # year/month/day list
730 ymd = _ymd()
731
732 len_l = len(l)
733 i = 0
734 try:
735 while i < len_l:
736
737 # Check if it's a number
738 value_repr = l[i]
739 try:
740 value = float(value_repr)
741 except ValueError:
742 value = None
743
744 if value is not None:
745 # Numeric token
746 i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)
747
748 # Check weekday
749 elif info.weekday(l[i]) is not None:
750 value = info.weekday(l[i])
751 res.weekday = value
752
753 # Check month name
754 elif info.month(l[i]) is not None:
755 value = info.month(l[i])
756 ymd.append(value, 'M')
757
758 if i + 1 < len_l:
759 if l[i + 1] in ('-', '/'):
760 # Jan-01[-99]
761 sep = l[i + 1]
762 ymd.append(l[i + 2])
763
764 if i + 3 < len_l and l[i + 3] == sep:
765 # Jan-01-99
766 ymd.append(l[i + 4])
767 i += 2
768
769 i += 2
770
771 elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and
772 info.pertain(l[i + 2])):
773 # Jan of 01
774 # In this case, 01 is clearly year
775 if l[i + 4].isdigit():
776 # Convert it here to become unambiguous
777 value = int(l[i + 4])
778 year = str(info.convertyear(value))
779 ymd.append(year, 'Y')
780 else:
781 # Wrong guess
782 pass
783 # TODO: not hit in tests
784 i += 4
785
786 # Check am/pm
787 elif info.ampm(l[i]) is not None:
788 value = info.ampm(l[i])
789 val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)
790
791 if val_is_ampm:
792 res.hour = self._adjust_ampm(res.hour, value)
793 res.ampm = value
794
795 elif fuzzy:
796 skipped_idxs.append(i)
797
798 # Check for a timezone name
799 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):
800 res.tzname = l[i]
801 res.tzoffset = info.tzoffset(res.tzname)
802
803 # Check for something like GMT+3, or BRST+3. Notice
804 # that it doesn't mean "I am 3 hours after GMT", but
805 # "my time +3 is GMT". If found, we reverse the
806 # logic so that timezone parsing code will get it
807 # right.
808 if i + 1 < len_l and l[i + 1] in ('+', '-'):
809 l[i + 1] = ('+', '-')[l[i + 1] == '+']
810 res.tzoffset = None
811 if info.utczone(res.tzname):
812 # With something like GMT+3, the timezone
813 # is *not* GMT.
814 res.tzname = None
815
816 # Check for a numbered timezone
817 elif res.hour is not None and l[i] in ('+', '-'):
818 signal = (-1, 1)[l[i] == '+']
819 len_li = len(l[i + 1])
820
821 # TODO: check that l[i + 1] is integer?
822 if len_li == 4:
823 # -0300
824 hour_offset = int(l[i + 1][:2])
825 min_offset = int(l[i + 1][2:])
826 elif i + 2 < len_l and l[i + 2] == ':':
827 # -03:00
828 hour_offset = int(l[i + 1])
829 min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like?
830 i += 2
831 elif len_li <= 2:
832 # -[0]3
833 hour_offset = int(l[i + 1][:2])
834 min_offset = 0
835 else:
836 raise ValueError(timestr)
837
838 res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)
839
840 # Look for a timezone name between parenthesis
841 if (i + 5 < len_l and
842 info.jump(l[i + 2]) and l[i + 3] == '(' and
843 l[i + 5] == ')' and
844 3 <= len(l[i + 4]) and
845 self._could_be_tzname(res.hour, res.tzname,
846 None, l[i + 4])):
847 # -0300 (BRST)
848 res.tzname = l[i + 4]
849 i += 4
850
851 i += 1
852
853 # Check jumps
854 elif not (info.jump(l[i]) or fuzzy):
855 raise ValueError(timestr)
856
857 else:
858 skipped_idxs.append(i)
859 i += 1
860
861 # Process year/month/day
862 year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)
863
864 res.century_specified = ymd.century_specified
865 res.year = year
866 res.month = month
867 res.day = day
868
869 except (IndexError, ValueError):
870 return None, None
871
872 if not info.validate(res):
873 return None, None
874
875 if fuzzy_with_tokens:
876 skipped_tokens = self._recombine_skipped(l, skipped_idxs)
877 return res, tuple(skipped_tokens)
878 else:
879 return res, None
880
881 def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):
882 # Token is a number
883 value_repr = tokens[idx]
884 try:
885 value = self._to_decimal(value_repr)
886 except Exception as e:
887 six.raise_from(ValueError('Unknown numeric token'), e)
888
889 len_li = len(value_repr)
890
891 len_l = len(tokens)
892
893 if (len(ymd) == 3 and len_li in (2, 4) and
894 res.hour is None and
895 (idx + 1 >= len_l or
896 (tokens[idx + 1] != ':' and
897 info.hms(tokens[idx + 1]) is None))):
898 # 19990101T23[59]
899 s = tokens[idx]
900 res.hour = int(s[:2])
901
902 if len_li == 4:
903 res.minute = int(s[2:])
904
905 elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):
906 # YYMMDD or HHMMSS[.ss]
907 s = tokens[idx]
908
909 if not ymd and '.' not in tokens[idx]:
910 ymd.append(s[:2])
911 ymd.append(s[2:4])
912 ymd.append(s[4:])
913 else:
914 # 19990101T235959[.59]
915
916 # TODO: Check if res attributes already set.
917 res.hour = int(s[:2])
918 res.minute = int(s[2:4])
919 res.second, res.microsecond = self._parsems(s[4:])
920
921 elif len_li in (8, 12, 14):
922 # YYYYMMDD
923 s = tokens[idx]
924 ymd.append(s[:4], 'Y')
925 ymd.append(s[4:6])
926 ymd.append(s[6:8])
927
928 if len_li > 8:
929 res.hour = int(s[8:10])
930 res.minute = int(s[10:12])
931
932 if len_li > 12:
933 res.second = int(s[12:])
934
935 elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:
936 # HH[ ]h or MM[ ]m or SS[.ss][ ]s
937 hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)
938 (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)
939 if hms is not None:
940 # TODO: checking that hour/minute/second are not
941 # already set?
942 self._assign_hms(res, value_repr, hms)
943
944 elif idx + 2 < len_l and tokens[idx + 1] == ':':
945 # HH:MM[:SS[.ss]]
946 res.hour = int(value)
947 value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this?
948 (res.minute, res.second) = self._parse_min_sec(value)
949
950 if idx + 4 < len_l and tokens[idx + 3] == ':':
951 res.second, res.microsecond = self._parsems(tokens[idx + 4])
952
953 idx += 2
954
955 idx += 2
956
957 elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):
958 sep = tokens[idx + 1]
959 ymd.append(value_repr)
960
961 if idx + 2 < len_l and not info.jump(tokens[idx + 2]):
962 if tokens[idx + 2].isdigit():
963 # 01-01[-01]
964 ymd.append(tokens[idx + 2])
965 else:
966 # 01-Jan[-01]
967 value = info.month(tokens[idx + 2])
968
969 if value is not None:
970 ymd.append(value, 'M')
971 else:
972 raise ValueError()
973
974 if idx + 3 < len_l and tokens[idx + 3] == sep:
975 # We have three members
976 value = info.month(tokens[idx + 4])
977
978 if value is not None:
979 ymd.append(value, 'M')
980 else:
981 ymd.append(tokens[idx + 4])
982 idx += 2
983
984 idx += 1
985 idx += 1
986
987 elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):
988 if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:
989 # 12 am
990 hour = int(value)
991 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))
992 idx += 1
993 else:
994 # Year, month or day
995 ymd.append(value)
996 idx += 1
997
998 elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):
999 # 12am
1000 hour = int(value)
1001 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))
1002 idx += 1
1003
1004 elif ymd.could_be_day(value):
1005 ymd.append(value)
1006
1007 elif not fuzzy:
1008 raise ValueError()
1009
1010 return idx
1011
1012 def _find_hms_idx(self, idx, tokens, info, allow_jump):
1013 len_l = len(tokens)
1014
1015 if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:
1016 # There is an "h", "m", or "s" label following this token. We take
1017 # assign the upcoming label to the current token.
1018 # e.g. the "12" in 12h"
1019 hms_idx = idx + 1
1020
1021 elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and
1022 info.hms(tokens[idx+2]) is not None):
1023 # There is a space and then an "h", "m", or "s" label.
1024 # e.g. the "12" in "12 h"
1025 hms_idx = idx + 2
1026
1027 elif idx > 0 and info.hms(tokens[idx-1]) is not None:
1028 # There is a "h", "m", or "s" preceding this token. Since neither
1029 # of the previous cases was hit, there is no label following this
1030 # token, so we use the previous label.
1031 # e.g. the "04" in "12h04"
1032 hms_idx = idx-1
1033
1034 elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and
1035 info.hms(tokens[idx-2]) is not None):
1036 # If we are looking at the final token, we allow for a
1037 # backward-looking check to skip over a space.
1038 # TODO: Are we sure this is the right condition here?
1039 hms_idx = idx - 2
1040
1041 else:
1042 hms_idx = None
1043
1044 return hms_idx
1045
1046 def _assign_hms(self, res, value_repr, hms):
1047 # See GH issue #427, fixing float rounding
1048 value = self._to_decimal(value_repr)
1049
1050 if hms == 0:
1051 # Hour
1052 res.hour = int(value)
1053 if value % 1:
1054 res.minute = int(60*(value % 1))
1055
1056 elif hms == 1:
1057 (res.minute, res.second) = self._parse_min_sec(value)
1058
1059 elif hms == 2:
1060 (res.second, res.microsecond) = self._parsems(value_repr)
1061
1062 def _could_be_tzname(self, hour, tzname, tzoffset, token):
1063 return (hour is not None and
1064 tzname is None and
1065 tzoffset is None and
1066 len(token) <= 5 and
1067 (all(x in string.ascii_uppercase for x in token)
1068 or token in self.info.UTCZONE))
1069
1070 def _ampm_valid(self, hour, ampm, fuzzy):
1071 """
1072 For fuzzy parsing, 'a' or 'am' (both valid English words)
1073 may erroneously trigger the AM/PM flag. Deal with that
1074 here.
1075 """
1076 val_is_ampm = True
1077
1078 # If there's already an AM/PM flag, this one isn't one.
1079 if fuzzy and ampm is not None:
1080 val_is_ampm = False
1081
1082 # If AM/PM is found and hour is not, raise a ValueError
1083 if hour is None:
1084 if fuzzy:
1085 val_is_ampm = False
1086 else:
1087 raise ValueError('No hour specified with AM or PM flag.')
1088 elif not 0 <= hour <= 12:
1089 # If AM/PM is found, it's a 12 hour clock, so raise
1090 # an error for invalid range
1091 if fuzzy:
1092 val_is_ampm = False
1093 else:
1094 raise ValueError('Invalid hour specified for 12-hour clock.')
1095
1096 return val_is_ampm
1097
1098 def _adjust_ampm(self, hour, ampm):
1099 if hour < 12 and ampm == 1:
1100 hour += 12
1101 elif hour == 12 and ampm == 0:
1102 hour = 0
1103 return hour
1104
1105 def _parse_min_sec(self, value):
1106 # TODO: Every usage of this function sets res.second to the return
1107 # value. Are there any cases where second will be returned as None and
1108 # we *don't* want to set res.second = None?
1109 minute = int(value)
1110 second = None
1111
1112 sec_remainder = value % 1
1113 if sec_remainder:
1114 second = int(60 * sec_remainder)
1115 return (minute, second)
1116
1117 def _parse_hms(self, idx, tokens, info, hms_idx):
1118 # TODO: Is this going to admit a lot of false-positives for when we
1119 # just happen to have digits and "h", "m" or "s" characters in non-date
1120 # text? I guess hex hashes won't have that problem, but there's plenty
1121 # of random junk out there.
1122 if hms_idx is None:
1123 hms = None
1124 new_idx = idx
1125 elif hms_idx > idx:
1126 hms = info.hms(tokens[hms_idx])
1127 new_idx = hms_idx
1128 else:
1129 # Looking backwards, increment one.
1130 hms = info.hms(tokens[hms_idx]) + 1
1131 new_idx = idx
1132
1133 return (new_idx, hms)
1134
1135 # ------------------------------------------------------------------
1136 # Handling for individual tokens. These are kept as methods instead
1137 # of functions for the sake of customizability via subclassing.
1138
1139 def _parsems(self, value):
1140 """Parse a I[.F] seconds value into (seconds, microseconds)."""
1141 if "." not in value:
1142 return int(value), 0
1143 else:
1144 i, f = value.split(".")
1145 return int(i), int(f.ljust(6, "0")[:6])
1146
1147 def _to_decimal(self, val):
1148 try:
1149 decimal_value = Decimal(val)
1150 # See GH 662, edge case, infinite value should not be converted
1151 # via `_to_decimal`
1152 if not decimal_value.is_finite():
1153 raise ValueError("Converted decimal value is infinite or NaN")
1154 except Exception as e:
1155 msg = "Could not convert %s to decimal" % val
1156 six.raise_from(ValueError(msg), e)
1157 else:
1158 return decimal_value
1159
1160 # ------------------------------------------------------------------
1161 # Post-Parsing construction of datetime output. These are kept as
1162 # methods instead of functions for the sake of customizability via
1163 # subclassing.
1164
1165 def _build_tzinfo(self, tzinfos, tzname, tzoffset):
1166 if callable(tzinfos):
1167 tzdata = tzinfos(tzname, tzoffset)
1168 else:
1169 tzdata = tzinfos.get(tzname)
1170 # handle case where tzinfo is paased an options that returns None
1171 # eg tzinfos = {'BRST' : None}
1172 if isinstance(tzdata, datetime.tzinfo) or tzdata is None:
1173 tzinfo = tzdata
1174 elif isinstance(tzdata, text_type):
1175 tzinfo = tz.tzstr(tzdata)
1176 elif isinstance(tzdata, integer_types):
1177 tzinfo = tz.tzoffset(tzname, tzdata)
1178 else:
1179 raise TypeError("Offset must be tzinfo subclass, tz string, "
1180 "or int offset.")
1181 return tzinfo
1182
1183 def _build_tzaware(self, naive, res, tzinfos):
1184 if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):
1185 tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)
1186 aware = naive.replace(tzinfo=tzinfo)
1187 aware = self._assign_tzname(aware, res.tzname)
1188
1189 elif res.tzname and res.tzname in time.tzname:
1190 aware = naive.replace(tzinfo=tz.tzlocal())
1191
1192 # Handle ambiguous local datetime
1193 aware = self._assign_tzname(aware, res.tzname)
1194
1195 # This is mostly relevant for winter GMT zones parsed in the UK
1196 if (aware.tzname() != res.tzname and
1197 res.tzname in self.info.UTCZONE):
1198 aware = aware.replace(tzinfo=tz.UTC)
1199
1200 elif res.tzoffset == 0:
1201 aware = naive.replace(tzinfo=tz.UTC)
1202
1203 elif res.tzoffset:
1204 aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
1205
1206 elif not res.tzname and not res.tzoffset:
1207 # i.e. no timezone information was found.
1208 aware = naive
1209
1210 elif res.tzname:
1211 # tz-like string was parsed but we don't know what to do
1212 # with it
1213 warnings.warn("tzname {tzname} identified but not understood. "
1214 "Pass `tzinfos` argument in order to correctly "
1215 "return a timezone-aware datetime. In a future "
1216 "version, this will raise an "
1217 "exception.".format(tzname=res.tzname),
1218 category=UnknownTimezoneWarning)
1219 aware = naive
1220
1221 return aware
1222
1223 def _build_naive(self, res, default):
1224 repl = {}
1225 for attr in ("year", "month", "day", "hour",
1226 "minute", "second", "microsecond"):
1227 value = getattr(res, attr)
1228 if value is not None:
1229 repl[attr] = value
1230
1231 if 'day' not in repl:
1232 # If the default day exceeds the last day of the month, fall back
1233 # to the end of the month.
1234 cyear = default.year if res.year is None else res.year
1235 cmonth = default.month if res.month is None else res.month
1236 cday = default.day if res.day is None else res.day
1237
1238 if cday > monthrange(cyear, cmonth)[1]:
1239 repl['day'] = monthrange(cyear, cmonth)[1]
1240
1241 naive = default.replace(**repl)
1242
1243 if res.weekday is not None and not res.day:
1244 naive = naive + relativedelta.relativedelta(weekday=res.weekday)
1245
1246 return naive
1247
1248 def _assign_tzname(self, dt, tzname):
1249 if dt.tzname() != tzname:
1250 new_dt = tz.enfold(dt, fold=1)
1251 if new_dt.tzname() == tzname:
1252 return new_dt
1253
1254 return dt
1255
1256 def _recombine_skipped(self, tokens, skipped_idxs):
1257 """
1258 >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
1259 >>> skipped_idxs = [0, 1, 2, 5]
1260 >>> _recombine_skipped(tokens, skipped_idxs)
1261 ["foo bar", "baz"]
1262 """
1263 skipped_tokens = []
1264 for i, idx in enumerate(sorted(skipped_idxs)):
1265 if i > 0 and idx - 1 == skipped_idxs[i - 1]:
1266 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
1267 else:
1268 skipped_tokens.append(tokens[idx])
1269
1270 return skipped_tokens
1271
1272
1273 DEFAULTPARSER = parser()
1274
1275
1276 def parse(timestr, parserinfo=None, **kwargs):
1277 """
1278
1279 Parse a string in one of the supported formats, using the
1280 ``parserinfo`` parameters.
1281
1282 :param timestr:
1283 A string containing a date/time stamp.
1284
1285 :param parserinfo:
1286 A :class:`parserinfo` object containing parameters for the parser.
1287 If ``None``, the default arguments to the :class:`parserinfo`
1288 constructor are used.
1289
1290 The ``**kwargs`` parameter takes the following keyword arguments:
1291
1292 :param default:
1293 The default datetime object, if this is a datetime object and not
1294 ``None``, elements specified in ``timestr`` replace elements in the
1295 default object.
1296
1297 :param ignoretz:
1298 If set ``True``, time zones in parsed strings are ignored and a naive
1299 :class:`datetime` object is returned.
1300
1301 :param tzinfos:
1302 Additional time zone names / aliases which may be present in the
1303 string. This argument maps time zone names (and optionally offsets
1304 from those time zones) to time zones. This parameter can be a
1305 dictionary with timezone aliases mapping time zone names to time
1306 zones or a function taking two parameters (``tzname`` and
1307 ``tzoffset``) and returning a time zone.
1308
1309 The timezones to which the names are mapped can be an integer
1310 offset from UTC in seconds or a :class:`tzinfo` object.
1311
1312 .. doctest::
1313 :options: +NORMALIZE_WHITESPACE
1314
1315 >>> from dateutil.parser import parse
1316 >>> from dateutil.tz import gettz
1317 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
1318 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
1319 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
1320 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
1321 datetime.datetime(2012, 1, 19, 17, 21,
1322 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
1323
1324 This parameter is ignored if ``ignoretz`` is set.
1325
1326 :param dayfirst:
1327 Whether to interpret the first value in an ambiguous 3-integer date
1328 (e.g. 01/05/09) as the day (``True``) or month (``False``). If
1329 ``yearfirst`` is set to ``True``, this distinguishes between YDM and
1330 YMD. If set to ``None``, this value is retrieved from the current
1331 :class:`parserinfo` object (which itself defaults to ``False``).
1332
1333 :param yearfirst:
1334 Whether to interpret the first value in an ambiguous 3-integer date
1335 (e.g. 01/05/09) as the year. If ``True``, the first number is taken to
1336 be the year, otherwise the last number is taken to be the year. If
1337 this is set to ``None``, the value is retrieved from the current
1338 :class:`parserinfo` object (which itself defaults to ``False``).
1339
1340 :param fuzzy:
1341 Whether to allow fuzzy parsing, allowing for string like "Today is
1342 January 1, 2047 at 8:21:00AM".
1343
1344 :param fuzzy_with_tokens:
1345 If ``True``, ``fuzzy`` is automatically set to True, and the parser
1346 will return a tuple where the first element is the parsed
1347 :class:`datetime.datetime` datetimestamp and the second element is
1348 a tuple containing the portions of the string which were ignored:
1349
1350 .. doctest::
1351
1352 >>> from dateutil.parser import parse
1353 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
1354 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
1355
1356 :return:
1357 Returns a :class:`datetime.datetime` object or, if the
1358 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
1359 first element being a :class:`datetime.datetime` object, the second
1360 a tuple containing the fuzzy tokens.
1361
1362 :raises ValueError:
1363 Raised for invalid or unknown string format, if the provided
1364 :class:`tzinfo` is not in a valid format, or if an invalid date
1365 would be created.
1366
1367 :raises OverflowError:
1368 Raised if the parsed date exceeds the largest valid C integer on
1369 your system.
1370 """
1371 if parserinfo:
1372 return parser(parserinfo).parse(timestr, **kwargs)
1373 else:
1374 return DEFAULTPARSER.parse(timestr, **kwargs)
1375
1376
1377 class _tzparser(object):
1378
1379 class _result(_resultbase):
1380
1381 __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
1382 "start", "end"]
1383
1384 class _attr(_resultbase):
1385 __slots__ = ["month", "week", "weekday",
1386 "yday", "jyday", "day", "time"]
1387
1388 def __repr__(self):
1389 return self._repr("")
1390
1391 def __init__(self):
1392 _resultbase.__init__(self)
1393 self.start = self._attr()
1394 self.end = self._attr()
1395
1396 def parse(self, tzstr):
1397 res = self._result()
1398 l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]
1399 used_idxs = list()
1400 try:
1401
1402 len_l = len(l)
1403
1404 i = 0
1405 while i < len_l:
1406 # BRST+3[BRDT[+2]]
1407 j = i
1408 while j < len_l and not [x for x in l[j]
1409 if x in "0123456789:,-+"]:
1410 j += 1
1411 if j != i:
1412 if not res.stdabbr:
1413 offattr = "stdoffset"
1414 res.stdabbr = "".join(l[i:j])
1415 else:
1416 offattr = "dstoffset"
1417 res.dstabbr = "".join(l[i:j])
1418
1419 for ii in range(j):
1420 used_idxs.append(ii)
1421 i = j
1422 if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
1423 "0123456789")):
1424 if l[i] in ('+', '-'):
1425 # Yes, that's right. See the TZ variable
1426 # documentation.
1427 signal = (1, -1)[l[i] == '+']
1428 used_idxs.append(i)
1429 i += 1
1430 else:
1431 signal = -1
1432 len_li = len(l[i])
1433 if len_li == 4:
1434 # -0300
1435 setattr(res, offattr, (int(l[i][:2]) * 3600 +
1436 int(l[i][2:]) * 60) * signal)
1437 elif i + 1 < len_l and l[i + 1] == ':':
1438 # -03:00
1439 setattr(res, offattr,
1440 (int(l[i]) * 3600 +
1441 int(l[i + 2]) * 60) * signal)
1442 used_idxs.append(i)
1443 i += 2
1444 elif len_li <= 2:
1445 # -[0]3
1446 setattr(res, offattr,
1447 int(l[i][:2]) * 3600 * signal)
1448 else:
1449 return None
1450 used_idxs.append(i)
1451 i += 1
1452 if res.dstabbr:
1453 break
1454 else:
1455 break
1456
1457
1458 if i < len_l:
1459 for j in range(i, len_l):
1460 if l[j] == ';':
1461 l[j] = ','
1462
1463 assert l[i] == ','
1464
1465 i += 1
1466
1467 if i >= len_l:
1468 pass
1469 elif (8 <= l.count(',') <= 9 and
1470 not [y for x in l[i:] if x != ','
1471 for y in x if y not in "0123456789+-"]):
1472 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
1473 for x in (res.start, res.end):
1474 x.month = int(l[i])
1475 used_idxs.append(i)
1476 i += 2
1477 if l[i] == '-':
1478 value = int(l[i + 1]) * -1
1479 used_idxs.append(i)
1480 i += 1
1481 else:
1482 value = int(l[i])
1483 used_idxs.append(i)
1484 i += 2
1485 if value:
1486 x.week = value
1487 x.weekday = (int(l[i]) - 1) % 7
1488 else:
1489 x.day = int(l[i])
1490 used_idxs.append(i)
1491 i += 2
1492 x.time = int(l[i])
1493 used_idxs.append(i)
1494 i += 2
1495 if i < len_l:
1496 if l[i] in ('-', '+'):
1497 signal = (-1, 1)[l[i] == "+"]
1498 used_idxs.append(i)
1499 i += 1
1500 else:
1501 signal = 1
1502 used_idxs.append(i)
1503 res.dstoffset = (res.stdoffset + int(l[i]) * signal)
1504
1505 # This was a made-up format that is not in normal use
1506 warn(('Parsed time zone "%s"' % tzstr) +
1507 'is in a non-standard dateutil-specific format, which ' +
1508 'is now deprecated; support for parsing this format ' +
1509 'will be removed in future versions. It is recommended ' +
1510 'that you switch to a standard format like the GNU ' +
1511 'TZ variable format.', tz.DeprecatedTzFormatWarning)
1512 elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
1513 not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
1514 '.', '-', ':')
1515 for y in x if y not in "0123456789"]):
1516 for x in (res.start, res.end):
1517 if l[i] == 'J':
1518 # non-leap year day (1 based)
1519 used_idxs.append(i)
1520 i += 1
1521 x.jyday = int(l[i])
1522 elif l[i] == 'M':
1523 # month[-.]week[-.]weekday
1524 used_idxs.append(i)
1525 i += 1
1526 x.month = int(l[i])
1527 used_idxs.append(i)
1528 i += 1
1529 assert l[i] in ('-', '.')
1530 used_idxs.append(i)
1531 i += 1
1532 x.week = int(l[i])
1533 if x.week == 5:
1534 x.week = -1
1535 used_idxs.append(i)
1536 i += 1
1537 assert l[i] in ('-', '.')
1538 used_idxs.append(i)
1539 i += 1
1540 x.weekday = (int(l[i]) - 1) % 7
1541 else:
1542 # year day (zero based)
1543 x.yday = int(l[i]) + 1
1544
1545 used_idxs.append(i)
1546 i += 1
1547
1548 if i < len_l and l[i] == '/':
1549 used_idxs.append(i)
1550 i += 1
1551 # start time
1552 len_li = len(l[i])
1553 if len_li == 4:
1554 # -0300
1555 x.time = (int(l[i][:2]) * 3600 +
1556 int(l[i][2:]) * 60)
1557 elif i + 1 < len_l and l[i + 1] == ':':
1558 # -03:00
1559 x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
1560 used_idxs.append(i)
1561 i += 2
1562 if i + 1 < len_l and l[i + 1] == ':':
1563 used_idxs.append(i)
1564 i += 2
1565 x.time += int(l[i])
1566 elif len_li <= 2:
1567 # -[0]3
1568 x.time = (int(l[i][:2]) * 3600)
1569 else:
1570 return None
1571 used_idxs.append(i)
1572 i += 1
1573
1574 assert i == len_l or l[i] == ','
1575
1576 i += 1
1577
1578 assert i >= len_l
1579
1580 except (IndexError, ValueError, AssertionError):
1581 return None
1582
1583 unused_idxs = set(range(len_l)).difference(used_idxs)
1584 res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})
1585 return res
1586
1587
1588 DEFAULTTZPARSER = _tzparser()
1589
1590
1591 def _parsetz(tzstr):
1592 return DEFAULTTZPARSER.parse(tzstr)
1593
1594
1595 class ParserError(ValueError):
1596 """Error class for representing failure to parse a datetime string."""
1597 def __str__(self):
1598 try:
1599 return self.args[0] % self.args[1:]
1600 except (TypeError, IndexError):
1601 return super(ParserError, self).__str__()
1602
1603 def __repr__(self):
1604 return "%s(%s)" % (self.__class__.__name__, str(self))
1605
1606
1607 class UnknownTimezoneWarning(RuntimeWarning):
1608 """Raised when the parser finds a timezone it cannot parse into a tzinfo"""
1609 # vim:ts=4:sw=4:et