Skip to content

Commit

Permalink
Added support for DDD and DDDD tokens in arrow.get()
Browse files Browse the repository at this point in the history
  • Loading branch information
jadchaar committed Jul 14, 2019
1 parent 3f1a3c9 commit 87f63fb
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 20 deletions.
60 changes: 43 additions & 17 deletions arrow/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from __future__ import absolute_import, unicode_literals

import re
import warnings
from datetime import datetime

from dateutil import tz
Expand All @@ -19,21 +18,6 @@ class ParserError(RuntimeError):
pass


class GetParseWarning(DeprecationWarning):
"""Raised when arrow.get() is passed a string with no formats and matches incorrectly
on one of the default formats.
e.g.
arrow.get('blabla2016') -> <Arrow [2016-01-01T00:00:00+00:00]>
arrow.get('13/4/2045') -> <Arrow [2045-01-01T00:00:00+00:00]>
In version 0.15.0 this warning will become a ParserError.
"""


warnings.simplefilter("default", GetParseWarning)


class DateTimeParser(object):

_FORMAT_RE = re.compile(
Expand All @@ -43,6 +27,8 @@ class DateTimeParser(object):

_ONE_OR_MORE_DIGIT_RE = re.compile(r"\d+")
_ONE_OR_TWO_DIGIT_RE = re.compile(r"\d{1,2}")
_ONE_OR_TWO_OR_THREE_DIGIT_RE = re.compile(r"\d{1,3}")
_THREE_DIGIT_RE = re.compile(r"\d{3}")
_FOUR_DIGIT_RE = re.compile(r"\d{4}")
_TWO_DIGIT_RE = re.compile(r"\d{2}")
_TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?|Z")
Expand All @@ -54,6 +40,8 @@ class DateTimeParser(object):
"YY": _TWO_DIGIT_RE,
"MM": _TWO_DIGIT_RE,
"M": _ONE_OR_TWO_DIGIT_RE,
"DDDD": _THREE_DIGIT_RE,
"DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE,
"DD": _TWO_DIGIT_RE,
"D": _ONE_OR_TWO_DIGIT_RE,
"HH": _TWO_DIGIT_RE,
Expand Down Expand Up @@ -125,7 +113,7 @@ def parse_iso(self, datetime_string):

# TODO: add tests for all the new formats, especially basic format

# required date formats to test against
# date formats (ISO-8601 and others) to test against
formats = [
"YYYY-MM-DD",
"YYYY-M-DD",
Expand All @@ -137,6 +125,8 @@ def parse_iso(self, datetime_string):
"YYYY.M.DD",
"YYYY.M.D",
"YYYYMMDD",
"YYYY-DDDD",
"YYYYDDDD",
"YYYY-MM",
"YYYY/MM",
"YYYY.MM",
Expand Down Expand Up @@ -301,6 +291,9 @@ def _parse_token(self, token, value, parts):
elif token in ["MM", "M"]:
parts["month"] = int(value)

elif token in ["DDDD", "DDD"]:
parts["day_of_year"] = int(value)

elif token in ["DD", "D"]:
parts["day"] = int(value)

Expand Down Expand Up @@ -354,6 +347,39 @@ def _build_datetime(parts):
tz_utc = tz.tzutc()
return datetime.fromtimestamp(timestamp, tz=tz_utc)

# TODO: add tests for this!
day_of_year = parts.get("day_of_year")

if day_of_year:
year = parts.get("year")
month = parts.get("month")
if year is None:
raise ParserError(
"Year component is required with the DDD and DDDD tokens"
)

if month is not None:
raise ParserError(
"Month component is not allowed with the DDD and DDDD tokens"
)

date_string = "{}-{}".format(year, day_of_year)
try:
dt = datetime.strptime(date_string, "%Y-%j")
except ValueError:
raise ParserError(
"Expected a valid day of year, but received '{}'".format(
day_of_year
)
)

# TODO: write test for 2015-366
# datetime.strptime("2015-366", "%Y-%j")
# Changes year: datetime.datetime(2016, 1, 1, 0, 0)
parts["year"] = dt.year
parts["month"] = dt.month
parts["day"] = dt.day

am_pm = parts.get("am_pm")
hour = parts.get("hour", 0)

Expand Down
5 changes: 2 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,9 @@ Use the following tokens in parsing and formatting. Note that they're not the s
+--------------------------------+--------------+-------------------------------------------+
| |M |1, 2, 3 ... 11, 12 |
+--------------------------------+--------------+-------------------------------------------+
|**Day of Year** |DDDD [#t5]_ |001, 002, 003 ... 364, 365 |
|**Day of Year** |DDDD |001, 002, 003 ... 364, 365 |
+--------------------------------+--------------+-------------------------------------------+
| |DDD [#t5]_ |1, 2, 3 ... 4, 5 |
| |DDD |1, 2, 3 ... 364, 365 |
+--------------------------------+--------------+-------------------------------------------+
|**Day of Month** |DD |01, 02, 03 ... 30, 31 |
+--------------------------------+--------------+-------------------------------------------+
Expand Down Expand Up @@ -387,7 +387,6 @@ Any token can be escaped when parsing by enclosing it within square brackets:
.. [#t2] localization support only for formatting
.. [#t3] the result is truncated to microseconds, with `half-to-even rounding <https://en.wikipedia.org/wiki/IEEE_floating_point#Roundings_to_nearest>`_.
.. [#t4] timezone names from `tz database <https://www.iana.org/time-zones>`_ provided via dateutil package
.. [#t5] support for the DDD and DDDD tokens will be added in a future release
API Guide
---------
Expand Down

0 comments on commit 87f63fb

Please sign in to comment.