sam_consensus_v3: env/lib/python3.9/site-packages/dateutil/parser/

comparison env/lib/python3.9/site-packages/dateutil/parser/_parser.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"

author	shellac
date	Mon, 22 Mar 2021 18:12:50 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:4f3585e2f14b
+# -*- coding: utf-8 -*-
+"""
+This module offers a generic date/time string parser which is able to parse
+most known formats to represent a date and/or time.
+This module attempts to be forgiving with regards to unlikely input formats,
+returning a datetime object even for dates which are ambiguous. If an element
+of a date/time stamp is omitted, the following rules are applied:
+- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
+on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
+specified.
+- If a time zone is omitted, a timezone-naive datetime is returned.
+If any other elements are missing, they are taken from the
+:class:`datetime.datetime` object passed to the parameter ``default``. If this
+results in a day number exceeding the valid number of days per month, the
+value falls back to the end of the month.
+Additional resources about date/time string formats can be found below:
+- `A summary of the international standard date and time notation
+<http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
+- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
+- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
+- `CPAN ParseDate module
+<http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
+- `Java SimpleDateFormat Class
+<https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
+"""
+from __future__ import unicode_literals
+import datetime
+import re
+import string
+import time
+import warnings
+from calendar import monthrange
+from io import StringIO
+import six
+from six import integer_types, text_type
+from decimal import Decimal
+from warnings import warn
+from .. import relativedelta
+from .. import tz
+__all__ = ["parse", "parserinfo", "ParserError"]
+# TODO: pandas.core.tools.datetimes imports this explicitly.  Might be worth
+# making public and/or figuring out if there is something we can
+# take off their plate.
+class _timelex(object):
+# Fractional seconds are sometimes split by a comma
+_split_decimal = re.compile("([.,])")
+def __init__(self, instream):
+if six.PY2:
+# In Python 2, we can't duck type properly because unicode has
+# a 'decode' function, and we'd be double-decoding
+if isinstance(instream, (bytes, bytearray)):
+instream = instream.decode()
+else:
+if getattr(instream, 'decode', None) is not None:
+instream = instream.decode()
+if isinstance(instream, text_type):
+instream = StringIO(instream)
+elif getattr(instream, 'read', None) is None:
+raise TypeError('Parser must be a string or character stream, not '
+'{itype}'.format(itype=instream.__class__.__name__))
+self.instream = instream
+self.charstack = []
+self.tokenstack = []
+self.eof = False
+def get_token(self):
+"""
+This function breaks the time string into lexical units (tokens), which
+can be parsed by the parser. Lexical units are demarcated by changes in
+the character set, so any continuous string of letters is considered
+one unit, any continuous string of numbers is considered one unit.
+The main complication arises from the fact that dots ('.') can be used
+both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
+"4:30:21.447"). As such, it is necessary to read the full context of
+any dot-separated strings before breaking it into tokens; as such, this
+function maintains a "token stack", for when the ambiguous context
+demands that multiple tokens be parsed at once.
+"""
+if self.tokenstack:
+return self.tokenstack.pop(0)
+seenletters = False
+token = None
+state = None
+while not self.eof:
+# We only realize that we've reached the end of a token when we
+# find a character that's not part of the current token - since
+# that character may be part of the next token, it's stored in the
+# charstack.
+if self.charstack:
+nextchar = self.charstack.pop(0)
+else:
+nextchar = self.instream.read(1)
+while nextchar == '\x00':
+nextchar = self.instream.read(1)
+if not nextchar:
+self.eof = True
+break
+elif not state:
+# First character of the token - determines if we're starting
+# to parse a word, a number or something else.
+token = nextchar
+if self.isword(nextchar):
+state = 'a'
+elif self.isnum(nextchar):
+state = '0'
+elif self.isspace(nextchar):
+token = ' '
+break  # emit token
+else:
+break  # emit token
+elif state == 'a':
+# If we've already started reading a word, we keep reading
+# letters until we find something that's not part of a word.
+seenletters = True
+if self.isword(nextchar):
+token += nextchar
+elif nextchar == '.':
+token += nextchar
+state = 'a.'
+else:
+self.charstack.append(nextchar)
+break  # emit token
+elif state == '0':
+# If we've already started reading a number, we keep reading
+# numbers until we find something that doesn't fit.
+if self.isnum(nextchar):
+token += nextchar
+elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
+token += nextchar
+state = '0.'
+else:
+self.charstack.append(nextchar)
+break  # emit token
+elif state == 'a.':
+# If we've seen some letters and a dot separator, continue
+# parsing, and the tokens will be broken up later.
+seenletters = True
+if nextchar == '.' or self.isword(nextchar):
+token += nextchar
+elif self.isnum(nextchar) and token[-1] == '.':
+token += nextchar
+state = '0.'
+else:
+self.charstack.append(nextchar)
+break  # emit token
+elif state == '0.':
+# If we've seen at least one dot separator, keep going, we'll
+# break up the tokens later.
+if nextchar == '.' or self.isnum(nextchar):
+token += nextchar
+elif self.isword(nextchar) and token[-1] == '.':
+token += nextchar
+state = 'a.'
+else:
+self.charstack.append(nextchar)
+break  # emit token
+if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
+token[-1] in '.,')):
+l = self._split_decimal.split(token)
+token = l[0]
+for tok in l[1:]:
+if tok:
+self.tokenstack.append(tok)
+if state == '0.' and token.count('.') == 0:
+token = token.replace(',', '.')
+return token
+def __iter__(self):
+return self
+def __next__(self):
+token = self.get_token()
+if token is None:
+raise StopIteration
+return token
+def next(self):
+return self.__next__()  # Python 2.x support
+@classmethod
+def split(cls, s):
+return list(cls(s))
+@classmethod
+def isword(cls, nextchar):
+""" Whether or not the next character is part of a word """
+return nextchar.isalpha()
+@classmethod
+def isnum(cls, nextchar):
+""" Whether the next character is part of a number """
+return nextchar.isdigit()
+@classmethod
+def isspace(cls, nextchar):
+""" Whether the next character is whitespace """
+return nextchar.isspace()
+class _resultbase(object):
+def __init__(self):
+for attr in self.__slots__:
+setattr(self, attr, None)
+def _repr(self, classname):
+l = []
+for attr in self.__slots__:
+value = getattr(self, attr)
+if value is not None:
+l.append("%s=%s" % (attr, repr(value)))
+return "%s(%s)" % (classname, ", ".join(l))
+def __len__(self):
+return (sum(getattr(self, attr) is not None
+for attr in self.__slots__))
+def __repr__(self):
+return self._repr(self.__class__.__name__)
+class parserinfo(object):
+"""
+Class which handles what inputs are accepted. Subclass this to customize
+the language and acceptable values for each parameter.
+:param dayfirst:
+Whether to interpret the first value in an ambiguous 3-integer date
+(e.g. 01/05/09) as the day (``True``) or month (``False``). If
+``yearfirst`` is set to ``True``, this distinguishes between YDM
+and YMD. Default is ``False``.
+:param yearfirst:
+Whether to interpret the first value in an ambiguous 3-integer date
+(e.g. 01/05/09) as the year. If ``True``, the first number is taken
+to be the year, otherwise the last number is taken to be the year.
+Default is ``False``.
+"""
+# m from a.m/p.m, t from ISO T separator
+JUMP = [" ", ".", ",", ";", "-", "/", "'",
+"at", "on", "and", "ad", "m", "t", "of",
+"st", "nd", "rd", "th"]
+WEEKDAYS = [("Mon", "Monday"),
+("Tue", "Tuesday"),     # TODO: "Tues"
+("Wed", "Wednesday"),
+("Thu", "Thursday"),    # TODO: "Thurs"
+("Fri", "Friday"),
+("Sat", "Saturday"),
+("Sun", "Sunday")]
+MONTHS = [("Jan", "January"),
+("Feb", "February"),      # TODO: "Febr"
+("Mar", "March"),
+("Apr", "April"),
+("May", "May"),
+("Jun", "June"),
+("Jul", "July"),
+("Aug", "August"),
+("Sep", "Sept", "September"),
+("Oct", "October"),
+("Nov", "November"),
+("Dec", "December")]
+HMS = [("h", "hour", "hours"),
+("m", "minute", "minutes"),
+("s", "second", "seconds")]
+AMPM = [("am", "a"),
+("pm", "p")]
+UTCZONE = ["UTC", "GMT", "Z", "z"]
+PERTAIN = ["of"]
+TZOFFSET = {}
+# TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
+#              "Anno Domini", "Year of Our Lord"]
+def __init__(self, dayfirst=False, yearfirst=False):
+self._jump = self._convert(self.JUMP)
+self._weekdays = self._convert(self.WEEKDAYS)
+self._months = self._convert(self.MONTHS)
+self._hms = self._convert(self.HMS)
+self._ampm = self._convert(self.AMPM)
+self._utczone = self._convert(self.UTCZONE)
+self._pertain = self._convert(self.PERTAIN)
+self.dayfirst = dayfirst
+self.yearfirst = yearfirst
+self._year = time.localtime().tm_year
+self._century = self._year // 100 * 100
+def _convert(self, lst):
+dct = {}
+for i, v in enumerate(lst):
+if isinstance(v, tuple):
+for v in v:
+dct[v.lower()] = i
+else:
+dct[v.lower()] = i
+return dct
+def jump(self, name):
+return name.lower() in self._jump
+def weekday(self, name):
+try:
+return self._weekdays[name.lower()]
+except KeyError:
+pass
+return None
+def month(self, name):
+try:
+return self._months[name.lower()] + 1
+except KeyError:
+pass
+return None
+def hms(self, name):
+try:
+return self._hms[name.lower()]
+except KeyError:
+return None
+def ampm(self, name):
+try:
+return self._ampm[name.lower()]
+except KeyError:
+return None
+def pertain(self, name):
+return name.lower() in self._pertain
+def utczone(self, name):
+return name.lower() in self._utczone
+def tzoffset(self, name):
+if name in self._utczone:
+return 0
+return self.TZOFFSET.get(name)
+def convertyear(self, year, century_specified=False):
+"""
+Converts two-digit years to year within [-50, 49]
+range of self._year (current local time)
+"""
+# Function contract is that the year is always positive
+assert year >= 0
+if year < 100 and not century_specified:
+# assume current century to start
+year += self._century
+if year >= self._year + 50:  # if too far in future
+year -= 100
+elif year < self._year - 50:  # if too far in past
+year += 100
+return year
+def validate(self, res):
+# move to info
+if res.year is not None:
+res.year = self.convertyear(res.year, res.century_specified)
+if ((res.tzoffset == 0 and not res.tzname) or
+(res.tzname == 'Z' or res.tzname == 'z')):
+res.tzname = "UTC"
+res.tzoffset = 0
+elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
+res.tzoffset = 0
+return True
+class _ymd(list):
+def __init__(self, *args, **kwargs):
+super(self.__class__, self).__init__(*args, **kwargs)
+self.century_specified = False
+self.dstridx = None
+self.mstridx = None
+self.ystridx = None
+@property
+def has_year(self):
+return self.ystridx is not None
+@property
+def has_month(self):
+return self.mstridx is not None
+@property
+def has_day(self):
+return self.dstridx is not None
+def could_be_day(self, value):
+if self.has_day:
+return False
+elif not self.has_month:
+return 1 <= value <= 31
+elif not self.has_year:
+# Be permissive, assume leap year
+month = self[self.mstridx]
+return 1 <= value <= monthrange(2000, month)[1]
+else:
+month = self[self.mstridx]
+year = self[self.ystridx]
+return 1 <= value <= monthrange(year, month)[1]
+def append(self, val, label=None):
+if hasattr(val, '__len__'):
+if val.isdigit() and len(val) > 2:
+self.century_specified = True
+if label not in [None, 'Y']:  # pragma: no cover
+raise ValueError(label)
+label = 'Y'
+elif val > 100:
+self.century_specified = True
+if label not in [None, 'Y']:  # pragma: no cover
+raise ValueError(label)
+label = 'Y'
+super(self.__class__, self).append(int(val))
+if label == 'M':
+if self.has_month:
+raise ValueError('Month is already set')
+self.mstridx = len(self) - 1
+elif label == 'D':
+if self.has_day:
+raise ValueError('Day is already set')
+self.dstridx = len(self) - 1
+elif label == 'Y':
+if self.has_year:
+raise ValueError('Year is already set')
+self.ystridx = len(self) - 1
+def _resolve_from_stridxs(self, strids):
+"""
+Try to resolve the identities of year/month/day elements using
+ystridx, mstridx, and dstridx, if enough of these are specified.
+"""
+if len(self) == 3 and len(strids) == 2:
+# we can back out the remaining stridx value
+missing = [x for x in range(3) if x not in strids.values()]
+key = [x for x in ['y', 'm', 'd'] if x not in strids]
+assert len(missing) == len(key) == 1
+key = key[0]
+val = missing[0]
+strids[key] = val
+assert len(self) == len(strids)  # otherwise this should not be called
+out = {key: self[strids[key]] for key in strids}
+return (out.get('y'), out.get('m'), out.get('d'))
+def resolve_ymd(self, yearfirst, dayfirst):
+len_ymd = len(self)
+year, month, day = (None, None, None)
+strids = (('y', self.ystridx),
+('m', self.mstridx),
+('d', self.dstridx))
+strids = {key: val for key, val in strids if val is not None}
+if (len(self) == len(strids) > 0 or
+(len(self) == 3 and len(strids) == 2)):
+return self._resolve_from_stridxs(strids)
+mstridx = self.mstridx
+if len_ymd > 3:
+raise ValueError("More than three YMD values")
+elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):
+# One member, or two members with a month string
+if mstridx is not None:
+month = self[mstridx]
+# since mstridx is 0 or 1, self[mstridx-1] always
+# looks up the other element
+other = self[mstridx - 1]
+else:
+other = self[0]
+if len_ymd > 1 or mstridx is None:
+if other > 31:
+year = other
+else:
+day = other
+elif len_ymd == 2:
+# Two members with numbers
+if self[0] > 31:
+# 99-01
+year, month = self
+elif self[1] > 31:
+# 01-99
+month, year = self
+elif dayfirst and self[1] <= 12:
+# 13-01
+day, month = self
+else:
+# 01-13
+month, day = self
+elif len_ymd == 3:
+# Three members
+if mstridx == 0:
+if self[1] > 31:
+# Apr-2003-25
+month, year, day = self
+else:
+month, day, year = self
+elif mstridx == 1:
+if self[0] > 31 or (yearfirst and self[2] <= 31):
+# 99-Jan-01
+year, month, day = self
+else:
+# 01-Jan-01
+# Give precedence to day-first, since
+# two-digit years is usually hand-written.
+day, month, year = self
+elif mstridx == 2:
+# WTF!?
+if self[1] > 31:
+# 01-99-Jan
+day, year, month = self
+else:
+# 99-01-Jan
+year, day, month = self
+else:
+if (self[0] > 31 or
+self.ystridx == 0 or
+(yearfirst and self[1] <= 12 and self[2] <= 31)):
+# 99-01-01
+if dayfirst and self[2] <= 12:
+year, day, month = self
+else:
+year, month, day = self
+elif self[0] > 12 or (dayfirst and self[1] <= 12):
+# 13-01-01
+day, month, year = self
+else:
+# 01-13-01
+month, day, year = self
+return year, month, day
+class parser(object):
+def __init__(self, info=None):
+self.info = info or parserinfo()
+def parse(self, timestr, default=None,
+ignoretz=False, tzinfos=None, **kwargs):
+"""
+Parse the date/time string into a :class:`datetime.datetime` object.
+:param timestr:
+Any date/time string using the supported formats.
+:param default:
+The default datetime object, if this is a datetime object and not
+``None``, elements specified in ``timestr`` replace elements in the
+default object.
+:param ignoretz:
+If set ``True``, time zones in parsed strings are ignored and a
+naive :class:`datetime.datetime` object is returned.
+:param tzinfos:
+Additional time zone names / aliases which may be present in the
+string. This argument maps time zone names (and optionally offsets
+from those time zones) to time zones. This parameter can be a
+dictionary with timezone aliases mapping time zone names to time
+zones or a function taking two parameters (``tzname`` and
+``tzoffset``) and returning a time zone.
+The timezones to which the names are mapped can be an integer
+offset from UTC in seconds or a :class:`tzinfo` object.
+.. doctest::
+:options: +NORMALIZE_WHITESPACE
+>>> from dateutil.parser import parse
+>>> from dateutil.tz import gettz
+>>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
+>>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
+datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
+>>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
+datetime.datetime(2012, 1, 19, 17, 21,
+tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
+This parameter is ignored if ``ignoretz`` is set.
+:param \\*\\*kwargs:
+Keyword arguments as passed to ``_parse()``.
+:return:
+Returns a :class:`datetime.datetime` object or, if the
+``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
+first element being a :class:`datetime.datetime` object, the second
+a tuple containing the fuzzy tokens.
+:raises ParserError:
+Raised for invalid or unknown string format, if the provided
+:class:`tzinfo` is not in a valid format, or if an invalid date
+would be created.
+:raises TypeError:
+Raised for non-string or character stream input.
+:raises OverflowError:
+Raised if the parsed date exceeds the largest valid C integer on
+your system.
+"""
+if default is None:
+default = datetime.datetime.now().replace(hour=0, minute=0,
+second=0, microsecond=0)
+res, skipped_tokens = self._parse(timestr, **kwargs)
+if res is None:
+raise ParserError("Unknown string format: %s", timestr)
+if len(res) == 0:
+raise ParserError("String does not contain a date: %s", timestr)
+try:
+ret = self._build_naive(res, default)
+except ValueError as e:
+six.raise_from(ParserError(e.args[0] + ": %s", timestr), e)
+if not ignoretz:
+ret = self._build_tzaware(ret, res, tzinfos)
+if kwargs.get('fuzzy_with_tokens', False):
+return ret, skipped_tokens
+else:
+return ret
+class _result(_resultbase):
+__slots__ = ["year", "month", "day", "weekday",
+"hour", "minute", "second", "microsecond",
+"tzname", "tzoffset", "ampm","any_unused_tokens"]
+def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
+fuzzy_with_tokens=False):
+"""
+Private method which performs the heavy lifting of parsing, called from
+``parse()``, which passes on its ``kwargs`` to this function.
+:param timestr:
+The string to parse.
+:param dayfirst:
+Whether to interpret the first value in an ambiguous 3-integer date
+(e.g. 01/05/09) as the day (``True``) or month (``False``). If
+``yearfirst`` is set to ``True``, this distinguishes between YDM
+and YMD. If set to ``None``, this value is retrieved from the
+current :class:`parserinfo` object (which itself defaults to
+``False``).
+:param yearfirst:
+Whether to interpret the first value in an ambiguous 3-integer date
+(e.g. 01/05/09) as the year. If ``True``, the first number is taken
+to be the year, otherwise the last number is taken to be the year.
+If this is set to ``None``, the value is retrieved from the current
+:class:`parserinfo` object (which itself defaults to ``False``).
+:param fuzzy:
+Whether to allow fuzzy parsing, allowing for string like "Today is
+January 1, 2047 at 8:21:00AM".
+:param fuzzy_with_tokens:
+If ``True``, ``fuzzy`` is automatically set to True, and the parser
+will return a tuple where the first element is the parsed
+:class:`datetime.datetime` datetimestamp and the second element is
+a tuple containing the portions of the string which were ignored:
+.. doctest::
+>>> from dateutil.parser import parse
+>>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
+(datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
+"""
+if fuzzy_with_tokens:
+fuzzy = True
+info = self.info
+if dayfirst is None:
+dayfirst = info.dayfirst
+if yearfirst is None:
+yearfirst = info.yearfirst
+res = self._result()
+l = _timelex.split(timestr)         # Splits the timestr into tokens
+skipped_idxs = []
+# year/month/day list
+ymd = _ymd()
+len_l = len(l)
+i = 0
+try:
+while i < len_l:
+# Check if it's a number
+value_repr = l[i]
+try:
+value = float(value_repr)
+except ValueError:
+value = None
+if value is not None:
+# Numeric token
+i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)
+# Check weekday
+elif info.weekday(l[i]) is not None:
+value = info.weekday(l[i])
+res.weekday = value
+# Check month name
+elif info.month(l[i]) is not None:
+value = info.month(l[i])
+ymd.append(value, 'M')
+if i + 1 < len_l:
+if l[i + 1] in ('-', '/'):
+# Jan-01[-99]
+sep = l[i + 1]
+ymd.append(l[i + 2])
+if i + 3 < len_l and l[i + 3] == sep:
+# Jan-01-99
+ymd.append(l[i + 4])
+i += 2
+i += 2
+elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and
+info.pertain(l[i + 2])):
+# Jan of 01
+# In this case, 01 is clearly year
+if l[i + 4].isdigit():
+# Convert it here to become unambiguous
+value = int(l[i + 4])
+year = str(info.convertyear(value))
+ymd.append(year, 'Y')
+else:
+# Wrong guess
+pass
+# TODO: not hit in tests
+i += 4
+# Check am/pm
+elif info.ampm(l[i]) is not None:
+value = info.ampm(l[i])
+val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)
+if val_is_ampm:
+res.hour = self._adjust_ampm(res.hour, value)
+res.ampm = value
+elif fuzzy:
+skipped_idxs.append(i)
+# Check for a timezone name
+elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):
+res.tzname = l[i]
+res.tzoffset = info.tzoffset(res.tzname)
+# Check for something like GMT+3, or BRST+3. Notice
+# that it doesn't mean "I am 3 hours after GMT", but
+# "my time +3 is GMT". If found, we reverse the
+# logic so that timezone parsing code will get it
+# right.
+if i + 1 < len_l and l[i + 1] in ('+', '-'):
+l[i + 1] = ('+', '-')[l[i + 1] == '+']
+res.tzoffset = None
+if info.utczone(res.tzname):
+# With something like GMT+3, the timezone
+# is *not* GMT.
+res.tzname = None
+# Check for a numbered timezone
+elif res.hour is not None and l[i] in ('+', '-'):
+signal = (-1, 1)[l[i] == '+']
+len_li = len(l[i + 1])
+# TODO: check that l[i + 1] is integer?
+if len_li == 4:
+# -0300
+hour_offset = int(l[i + 1][:2])
+min_offset = int(l[i + 1][2:])
+elif i + 2 < len_l and l[i + 2] == ':':
+# -03:00
+hour_offset = int(l[i + 1])
+min_offset = int(l[i + 3])  # TODO: Check that l[i+3] is minute-like?
+i += 2
+elif len_li <= 2:
+# -[0]3
+hour_offset = int(l[i + 1][:2])
+min_offset = 0
+else:
+raise ValueError(timestr)
+res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)
+# Look for a timezone name between parenthesis
+if (i + 5 < len_l and
+info.jump(l[i + 2]) and l[i + 3] == '(' and
+l[i + 5] == ')' and
+3 <= len(l[i + 4]) and
+self._could_be_tzname(res.hour, res.tzname,
+None, l[i + 4])):
+# -0300 (BRST)
+res.tzname = l[i + 4]
+i += 4
+i += 1
+# Check jumps
+elif not (info.jump(l[i]) or fuzzy):
+raise ValueError(timestr)
+else:
+skipped_idxs.append(i)
+i += 1
+# Process year/month/day
+year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)
+res.century_specified = ymd.century_specified
+res.year = year
+res.month = month
+res.day = day
+except (IndexError, ValueError):
+return None, None
+if not info.validate(res):
+return None, None
+if fuzzy_with_tokens:
+skipped_tokens = self._recombine_skipped(l, skipped_idxs)
+return res, tuple(skipped_tokens)
+else:
+return res, None
+def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):
+# Token is a number
+value_repr = tokens[idx]
+try:
+value = self._to_decimal(value_repr)
+except Exception as e:
+six.raise_from(ValueError('Unknown numeric token'), e)
+len_li = len(value_repr)
+len_l = len(tokens)
+if (len(ymd) == 3 and len_li in (2, 4) and
+res.hour is None and
+(idx + 1 >= len_l or
+(tokens[idx + 1] != ':' and
+info.hms(tokens[idx + 1]) is None))):
+# 19990101T23[59]
+s = tokens[idx]
+res.hour = int(s[:2])
+if len_li == 4:
+res.minute = int(s[2:])
+elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):
+# YYMMDD or HHMMSS[.ss]
+s = tokens[idx]
+if not ymd and '.' not in tokens[idx]:
+ymd.append(s[:2])
+ymd.append(s[2:4])
+ymd.append(s[4:])
+else:
+# 19990101T235959[.59]
+# TODO: Check if res attributes already set.
+res.hour = int(s[:2])
+res.minute = int(s[2:4])
+res.second, res.microsecond = self._parsems(s[4:])
+elif len_li in (8, 12, 14):
+# YYYYMMDD
+s = tokens[idx]
+ymd.append(s[:4], 'Y')
+ymd.append(s[4:6])
+ymd.append(s[6:8])
+if len_li > 8:
+res.hour = int(s[8:10])
+res.minute = int(s[10:12])
+if len_li > 12:
+res.second = int(s[12:])
+elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:
+# HH[ ]h or MM[ ]m or SS[.ss][ ]s
+hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)
+(idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)
+if hms is not None:
+# TODO: checking that hour/minute/second are not
+# already set?
+self._assign_hms(res, value_repr, hms)
+elif idx + 2 < len_l and tokens[idx + 1] == ':':
+# HH:MM[:SS[.ss]]
+res.hour = int(value)
+value = self._to_decimal(tokens[idx + 2])  # TODO: try/except for this?
+(res.minute, res.second) = self._parse_min_sec(value)
+if idx + 4 < len_l and tokens[idx + 3] == ':':
+res.second, res.microsecond = self._parsems(tokens[idx + 4])
+idx += 2
+idx += 2
+elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):
+sep = tokens[idx + 1]
+ymd.append(value_repr)
+if idx + 2 < len_l and not info.jump(tokens[idx + 2]):
+if tokens[idx + 2].isdigit():
+# 01-01[-01]
+ymd.append(tokens[idx + 2])
+else:
+# 01-Jan[-01]
+value = info.month(tokens[idx + 2])
+if value is not None:
+ymd.append(value, 'M')
+else:
+raise ValueError()
+if idx + 3 < len_l and tokens[idx + 3] == sep:
+# We have three members
+value = info.month(tokens[idx + 4])
+if value is not None:
+ymd.append(value, 'M')
+else:
+ymd.append(tokens[idx + 4])
+idx += 2
+idx += 1
+idx += 1
+elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):
+if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:
+# 12 am
+hour = int(value)
+res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))
+idx += 1
+else:
+# Year, month or day
+ymd.append(value)
+idx += 1
+elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):
+# 12am
+hour = int(value)
+res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))
+idx += 1
+elif ymd.could_be_day(value):
+ymd.append(value)
+elif not fuzzy:
+raise ValueError()
+return idx
+def _find_hms_idx(self, idx, tokens, info, allow_jump):
+len_l = len(tokens)
+if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:
+# There is an "h", "m", or "s" label following this token.  We take
+# assign the upcoming label to the current token.
+# e.g. the "12" in 12h"
+hms_idx = idx + 1
+elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and
+info.hms(tokens[idx+2]) is not None):
+# There is a space and then an "h", "m", or "s" label.
+# e.g. the "12" in "12 h"
+hms_idx = idx + 2
+elif idx > 0 and info.hms(tokens[idx-1]) is not None:
+# There is a "h", "m", or "s" preceding this token.  Since neither
+# of the previous cases was hit, there is no label following this
+# token, so we use the previous label.
+# e.g. the "04" in "12h04"
+hms_idx = idx-1
+elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and
+info.hms(tokens[idx-2]) is not None):
+# If we are looking at the final token, we allow for a
+# backward-looking check to skip over a space.
+# TODO: Are we sure this is the right condition here?
+hms_idx = idx - 2
+else:
+hms_idx = None
+return hms_idx
+def _assign_hms(self, res, value_repr, hms):
+# See GH issue #427, fixing float rounding
+value = self._to_decimal(value_repr)
+if hms == 0:
+# Hour
+res.hour = int(value)
+if value % 1:
+res.minute = int(60*(value % 1))
+elif hms == 1:
+(res.minute, res.second) = self._parse_min_sec(value)
+elif hms == 2:
+(res.second, res.microsecond) = self._parsems(value_repr)
+def _could_be_tzname(self, hour, tzname, tzoffset, token):
+return (hour is not None and
+tzname is None and
+tzoffset is None and
+len(token) <= 5 and
+(all(x in string.ascii_uppercase for x in token)
+or token in self.info.UTCZONE))
+def _ampm_valid(self, hour, ampm, fuzzy):
+"""
+For fuzzy parsing, 'a' or 'am' (both valid English words)
+may erroneously trigger the AM/PM flag. Deal with that
+here.
+"""
+val_is_ampm = True
+# If there's already an AM/PM flag, this one isn't one.
+if fuzzy and ampm is not None:
+val_is_ampm = False
+# If AM/PM is found and hour is not, raise a ValueError
+if hour is None:
+if fuzzy:
+val_is_ampm = False
+else:
+raise ValueError('No hour specified with AM or PM flag.')
+elif not 0 <= hour <= 12:
+# If AM/PM is found, it's a 12 hour clock, so raise
+# an error for invalid range
+if fuzzy:
+val_is_ampm = False
+else:
+raise ValueError('Invalid hour specified for 12-hour clock.')
+return val_is_ampm
+def _adjust_ampm(self, hour, ampm):
+if hour < 12 and ampm == 1:
+hour += 12
+elif hour == 12 and ampm == 0:
+hour = 0
+return hour
+def _parse_min_sec(self, value):
+# TODO: Every usage of this function sets res.second to the return
+# value. Are there any cases where second will be returned as None and
+# we *don't* want to set res.second = None?
+minute = int(value)
+second = None
+sec_remainder = value % 1
+if sec_remainder:
+second = int(60 * sec_remainder)
+return (minute, second)
+def _parse_hms(self, idx, tokens, info, hms_idx):
+# TODO: Is this going to admit a lot of false-positives for when we
+# just happen to have digits and "h", "m" or "s" characters in non-date
+# text?  I guess hex hashes won't have that problem, but there's plenty
+# of random junk out there.
+if hms_idx is None:
+hms = None
+new_idx = idx
+elif hms_idx > idx:
+hms = info.hms(tokens[hms_idx])
+new_idx = hms_idx
+else:
+# Looking backwards, increment one.
+hms = info.hms(tokens[hms_idx]) + 1
+new_idx = idx
+return (new_idx, hms)
+# ------------------------------------------------------------------
+# Handling for individual tokens.  These are kept as methods instead
+#  of functions for the sake of customizability via subclassing.
+def _parsems(self, value):
+"""Parse a I[.F] seconds value into (seconds, microseconds)."""
+if "." not in value:
+return int(value), 0
+else:
+i, f = value.split(".")
+return int(i), int(f.ljust(6, "0")[:6])
+def _to_decimal(self, val):
+try:
+decimal_value = Decimal(val)
+# See GH 662, edge case, infinite value should not be converted
+#  via `_to_decimal`
+if not decimal_value.is_finite():
+raise ValueError("Converted decimal value is infinite or NaN")
+except Exception as e:
+msg = "Could not convert %s to decimal" % val
+six.raise_from(ValueError(msg), e)
+else:
+return decimal_value
+# ------------------------------------------------------------------
+# Post-Parsing construction of datetime output.  These are kept as
+#  methods instead of functions for the sake of customizability via
+#  subclassing.
+def _build_tzinfo(self, tzinfos, tzname, tzoffset):
+if callable(tzinfos):
+tzdata = tzinfos(tzname, tzoffset)
+else:
+tzdata = tzinfos.get(tzname)
+# handle case where tzinfo is paased an options that returns None
+# eg tzinfos = {'BRST' : None}
+if isinstance(tzdata, datetime.tzinfo) or tzdata is None:
+tzinfo = tzdata
+elif isinstance(tzdata, text_type):
+tzinfo = tz.tzstr(tzdata)
+elif isinstance(tzdata, integer_types):
+tzinfo = tz.tzoffset(tzname, tzdata)
+else:
+raise TypeError("Offset must be tzinfo subclass, tz string, "
+"or int offset.")
+return tzinfo
+def _build_tzaware(self, naive, res, tzinfos):
+if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):
+tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)
+aware = naive.replace(tzinfo=tzinfo)
+aware = self._assign_tzname(aware, res.tzname)
+elif res.tzname and res.tzname in time.tzname:
+aware = naive.replace(tzinfo=tz.tzlocal())
+# Handle ambiguous local datetime
+aware = self._assign_tzname(aware, res.tzname)
+# This is mostly relevant for winter GMT zones parsed in the UK
+if (aware.tzname() != res.tzname and
+res.tzname in self.info.UTCZONE):
+aware = aware.replace(tzinfo=tz.UTC)
+elif res.tzoffset == 0:
+aware = naive.replace(tzinfo=tz.UTC)
+elif res.tzoffset:
+aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
+elif not res.tzname and not res.tzoffset:
+# i.e. no timezone information was found.
+aware = naive
+elif res.tzname:
+# tz-like string was parsed but we don't know what to do
+# with it
+warnings.warn("tzname {tzname} identified but not understood.  "
+"Pass `tzinfos` argument in order to correctly "
+"return a timezone-aware datetime.  In a future "
+"version, this will raise an "
+"exception.".format(tzname=res.tzname),
+category=UnknownTimezoneWarning)
+aware = naive
+return aware
+def _build_naive(self, res, default):
+repl = {}
+for attr in ("year", "month", "day", "hour",
+"minute", "second", "microsecond"):
+value = getattr(res, attr)
+if value is not None:
+repl[attr] = value
+if 'day' not in repl:
+# If the default day exceeds the last day of the month, fall back
+# to the end of the month.
+cyear = default.year if res.year is None else res.year
+cmonth = default.month if res.month is None else res.month
+cday = default.day if res.day is None else res.day
+if cday > monthrange(cyear, cmonth)[1]:
+repl['day'] = monthrange(cyear, cmonth)[1]
+naive = default.replace(**repl)
+if res.weekday is not None and not res.day:
+naive = naive + relativedelta.relativedelta(weekday=res.weekday)
+return naive
+def _assign_tzname(self, dt, tzname):
+if dt.tzname() != tzname:
+new_dt = tz.enfold(dt, fold=1)
+if new_dt.tzname() == tzname:
+return new_dt
+return dt
+def _recombine_skipped(self, tokens, skipped_idxs):
+"""
+>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
+>>> skipped_idxs = [0, 1, 2, 5]
+>>> _recombine_skipped(tokens, skipped_idxs)
+["foo bar", "baz"]
+"""
+skipped_tokens = []
+for i, idx in enumerate(sorted(skipped_idxs)):
+if i > 0 and idx - 1 == skipped_idxs[i - 1]:
+skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
+else:
+skipped_tokens.append(tokens[idx])
+return skipped_tokens
+DEFAULTPARSER = parser()
+def parse(timestr, parserinfo=None, **kwargs):
+"""
+Parse a string in one of the supported formats, using the
+``parserinfo`` parameters.
+:param timestr:
+A string containing a date/time stamp.
+:param parserinfo:
+A :class:`parserinfo` object containing parameters for the parser.
+If ``None``, the default arguments to the :class:`parserinfo`
+constructor are used.
+The ``**kwargs`` parameter takes the following keyword arguments:
+:param default:
+The default datetime object, if this is a datetime object and not
+``None``, elements specified in ``timestr`` replace elements in the
+default object.
+:param ignoretz:
+If set ``True``, time zones in parsed strings are ignored and a naive
+:class:`datetime` object is returned.
+:param tzinfos:
+Additional time zone names / aliases which may be present in the
+string. This argument maps time zone names (and optionally offsets
+from those time zones) to time zones. This parameter can be a
+dictionary with timezone aliases mapping time zone names to time
+zones or a function taking two parameters (``tzname`` and
+``tzoffset``) and returning a time zone.
+The timezones to which the names are mapped can be an integer
+offset from UTC in seconds or a :class:`tzinfo` object.
+.. doctest::
+:options: +NORMALIZE_WHITESPACE
+>>> from dateutil.parser import parse
+>>> from dateutil.tz import gettz
+>>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
+>>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
+datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
+>>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
+datetime.datetime(2012, 1, 19, 17, 21,
+tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
+This parameter is ignored if ``ignoretz`` is set.
+:param dayfirst:
+Whether to interpret the first value in an ambiguous 3-integer date
+(e.g. 01/05/09) as the day (``True``) or month (``False``). If
+``yearfirst`` is set to ``True``, this distinguishes between YDM and
+YMD. If set to ``None``, this value is retrieved from the current
+:class:`parserinfo` object (which itself defaults to ``False``).
+:param yearfirst:
+Whether to interpret the first value in an ambiguous 3-integer date
+(e.g. 01/05/09) as the year. If ``True``, the first number is taken to
+be the year, otherwise the last number is taken to be the year. If
+this is set to ``None``, the value is retrieved from the current
+:class:`parserinfo` object (which itself defaults to ``False``).
+:param fuzzy:
+Whether to allow fuzzy parsing, allowing for string like "Today is
+January 1, 2047 at 8:21:00AM".
+:param fuzzy_with_tokens:
+If ``True``, ``fuzzy`` is automatically set to True, and the parser
+will return a tuple where the first element is the parsed
+:class:`datetime.datetime` datetimestamp and the second element is
+a tuple containing the portions of the string which were ignored:
+.. doctest::
+>>> from dateutil.parser import parse
+>>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
+(datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
+:return:
+Returns a :class:`datetime.datetime` object or, if the
+``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
+first element being a :class:`datetime.datetime` object, the second
+a tuple containing the fuzzy tokens.
+:raises ValueError:
+Raised for invalid or unknown string format, if the provided
+:class:`tzinfo` is not in a valid format, or if an invalid date
+would be created.
+:raises OverflowError:
+Raised if the parsed date exceeds the largest valid C integer on
+your system.
+"""
+if parserinfo:
+return parser(parserinfo).parse(timestr, **kwargs)
+else:
+return DEFAULTPARSER.parse(timestr, **kwargs)
+class _tzparser(object):
+class _result(_resultbase):
+__slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
+"start", "end"]
+class _attr(_resultbase):
+__slots__ = ["month", "week", "weekday",
+"yday", "jyday", "day", "time"]
+def __repr__(self):
+return self._repr("")
+def __init__(self):
+_resultbase.__init__(self)
+self.start = self._attr()
+self.end = self._attr()
+def parse(self, tzstr):
+res = self._result()
+l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]
+used_idxs = list()
+try:
+len_l = len(l)
+i = 0
+while i < len_l:
+# BRST+3[BRDT[+2]]
+j = i
+while j < len_l and not [x for x in l[j]
+if x in "0123456789:,-+"]:
+j += 1
+if j != i:
+if not res.stdabbr:
+offattr = "stdoffset"
+res.stdabbr = "".join(l[i:j])
+else:
+offattr = "dstoffset"
+res.dstabbr = "".join(l[i:j])
+for ii in range(j):
+used_idxs.append(ii)
+i = j
+if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
+"0123456789")):
+if l[i] in ('+', '-'):
+# Yes, that's right.  See the TZ variable
+# documentation.
+signal = (1, -1)[l[i] == '+']
+used_idxs.append(i)
+i += 1
+else:
+signal = -1
+len_li = len(l[i])
+if len_li == 4:
+# -0300
+setattr(res, offattr, (int(l[i][:2]) * 3600 +
+int(l[i][2:]) * 60) * signal)
+elif i + 1 < len_l and l[i + 1] == ':':
+# -03:00
+setattr(res, offattr,
+(int(l[i]) * 3600 +
+int(l[i + 2]) * 60) * signal)
+used_idxs.append(i)
+i += 2
+elif len_li <= 2:
+# -[0]3
+setattr(res, offattr,
+int(l[i][:2]) * 3600 * signal)
+else:
+return None
+used_idxs.append(i)
+i += 1
+if res.dstabbr:
+break
+else:
+break
+if i < len_l:
+for j in range(i, len_l):
+if l[j] == ';':
+l[j] = ','
+assert l[i] == ','
+i += 1
+if i >= len_l:
+pass
+elif (8 <= l.count(',') <= 9 and
+not [y for x in l[i:] if x != ','
+for y in x if y not in "0123456789+-"]):
+# GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
+for x in (res.start, res.end):
+x.month = int(l[i])
+used_idxs.append(i)
+i += 2
+if l[i] == '-':
+value = int(l[i + 1]) * -1
+used_idxs.append(i)
+i += 1
+else:
+value = int(l[i])
+used_idxs.append(i)
+i += 2
+if value:
+x.week = value
+x.weekday = (int(l[i]) - 1) % 7
+else:
+x.day = int(l[i])
+used_idxs.append(i)
+i += 2
+x.time = int(l[i])
+used_idxs.append(i)
+i += 2
+if i < len_l:
+if l[i] in ('-', '+'):
+signal = (-1, 1)[l[i] == "+"]
+used_idxs.append(i)
+i += 1
+else:
+signal = 1
+used_idxs.append(i)
+res.dstoffset = (res.stdoffset + int(l[i]) * signal)
+# This was a made-up format that is not in normal use
+warn(('Parsed time zone "%s"' % tzstr) +
+'is in a non-standard dateutil-specific format, which ' +
+'is now deprecated; support for parsing this format ' +
+'will be removed in future versions. It is recommended ' +
+'that you switch to a standard format like the GNU ' +
+'TZ variable format.', tz.DeprecatedTzFormatWarning)
+elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
+not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
+'.', '-', ':')
+for y in x if y not in "0123456789"]):
+for x in (res.start, res.end):
+if l[i] == 'J':
+# non-leap year day (1 based)
+used_idxs.append(i)
+i += 1
+x.jyday = int(l[i])
+elif l[i] == 'M':
+# month[-.]week[-.]weekday
+used_idxs.append(i)
+i += 1
+x.month = int(l[i])
+used_idxs.append(i)
+i += 1
+assert l[i] in ('-', '.')
+used_idxs.append(i)
+i += 1
+x.week = int(l[i])
+if x.week == 5:
+x.week = -1
+used_idxs.append(i)
+i += 1
+assert l[i] in ('-', '.')
+used_idxs.append(i)
+i += 1
+x.weekday = (int(l[i]) - 1) % 7
+else:
+# year day (zero based)
+x.yday = int(l[i]) + 1
+used_idxs.append(i)
+i += 1
+if i < len_l and l[i] == '/':
+used_idxs.append(i)
+i += 1
+# start time
+len_li = len(l[i])
+if len_li == 4:
+# -0300
+x.time = (int(l[i][:2]) * 3600 +
+int(l[i][2:]) * 60)
+elif i + 1 < len_l and l[i + 1] == ':':
+# -03:00
+x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
+used_idxs.append(i)
+i += 2
+if i + 1 < len_l and l[i + 1] == ':':
+used_idxs.append(i)
+i += 2
+x.time += int(l[i])
+elif len_li <= 2:
+# -[0]3
+x.time = (int(l[i][:2]) * 3600)
+else:
+return None
+used_idxs.append(i)
+i += 1
+assert i == len_l or l[i] == ','
+i += 1
+assert i >= len_l
+except (IndexError, ValueError, AssertionError):
+return None
+unused_idxs = set(range(len_l)).difference(used_idxs)
+res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})
+return res
+DEFAULTTZPARSER = _tzparser()
+def _parsetz(tzstr):
+return DEFAULTTZPARSER.parse(tzstr)
+class ParserError(ValueError):
+"""Error class for representing failure to parse a datetime string."""
+def __str__(self):
+try:
+return self.args[0] % self.args[1:]
+except (TypeError, IndexError):
+return super(ParserError, self).__str__()
+def __repr__(self):
+return "%s(%s)" % (self.__class__.__name__, str(self))
+class UnknownTimezoneWarning(RuntimeWarning):
+"""Raised when the parser finds a timezone it cannot parse into a tzinfo"""
+# vim:ts=4:sw=4:et

Mercurial > repos > shellac > sam_consensus_v3

comparison env/lib/python3.9/site-packages/dateutil/parser/_parser.py @ 0:4f3585e2f14b draft default tip