"""
Time Utilities
--------------
Functions querying time information.
Contents
interval_to_col_name,
truncate_date,
truncate_date_col,
incl_intervals,
make_timespan,
latest_date,
earliest_date,
truncated_latest_date,
truncated_earliest_date
"""
from datetime import date, datetime
from dateutil.rrule import DAILY, MONTHLY, WEEKLY, YEARLY, rrule
[docs]def interval_to_col_name(interval):
"""
Queries the proper name of the column for timespans given an interval.
"""
interval = interval.lower()
if interval == "yearly":
return "year"
elif interval == "monthly":
return "month"
elif interval == "weekly":
return "week"
elif interval == "daily":
return "day"
[docs]def truncate_date(d, interval=None):
"""
Truncates a date object given an interval.
"""
assert isinstance(interval, str) or interval == None, (
"'interval' argument must be None or one of "
+ ", ".join(i for i in incl_intervals())
+ "."
)
if interval is not None:
if not isinstance(d, str): # hasn't been formatted already
if isinstance(d, tuple):
d = datetime.strptime(f"{d[0]}-{d[1]}-{d[2]}", "%Y-%m-%d").date()
interval = interval.lower()
if interval == "yearly":
return d.strftime("%Y")
elif interval == "monthly":
return d.strftime("%Y-%m")
elif interval == "weekly":
return d.strftime("%Y-%W")
elif interval == "daily":
return d.strftime("%Y-%m-%d")
else:
return d
else:
return d
[docs]def truncate_date_col(df, col, interval):
"""
Truncates the date column of a df based on a provided interval.
"""
df[col] = df[col].map(lambda x: truncate_date(d=x, interval=interval))
return df
[docs]def incl_intervals():
"""
Queries the included intervals.
Note: timespans will not be able to be queried if their interval is not included.
"""
return ["yearly", "monthly", "weekly", "daily"]
[docs]def make_timespan(timespan=None, interval=None):
"""
Queries a timespan given user input of strings, ints, or time values.
Parameters
----------
timespan : two element tuple or list : contains datetime.date or tuple (default=None: (date.today(), date.today()))
A tuple or list that defines the start and end dates to be queried.
Note 1: if True, then the full timespan from 1-1-1 to the current day will be queried.
Note 2: passing a single entry will query for that date only.
interval : str
The time interval over which queries will be made.
Note 1: see data.time_utils for options.
Note 2: if None, then only the most recent data will be queried.
Returns
-------
formatted_timespan : list (contains datetime.date)
The timespan formatted going back in time.
"""
if interval is None and timespan is None:
# Most recent data wanted.
return
order = -1 # default order is decreasing in time
if timespan is None:
timespan = (date.today(), date.today())
elif timespan == True:
timespan = (date.min, date.today())
elif isinstance(timespan, date):
timespan = (timespan, timespan)
elif timespan[0] > timespan[1]:
timespan = (timespan[1], timespan[0])
order = (
1 # user wants the dates to be increasing in df rows instead of the default
)
else:
ValueError("An invalid value was passed to the 'timespan' argument.")
if isinstance(timespan[0], date):
start_dt = timespan[0]
elif isinstance(timespan[0], tuple):
start_dt = date(*timespan[0])
if isinstance(timespan[1], date):
end_dt = timespan[1]
elif isinstance(timespan[1], tuple):
end_dt = date(*timespan[1])
if interval == "yearly":
return [dt.date() for dt in rrule(YEARLY, dtstart=start_dt, until=end_dt)][
::order
]
elif interval == "monthly":
return [dt.date() for dt in rrule(MONTHLY, dtstart=start_dt, until=end_dt)][
::order
]
elif interval == "weekly":
return [dt.date() for dt in rrule(WEEKLY, dtstart=start_dt, until=end_dt)][
::order
]
elif interval == "daily":
return [dt.date() for dt in rrule(DAILY, dtstart=start_dt, until=end_dt)][
::order
]
else:
ValueError(
(
"An invalid value was passed to the 'interval' argument. Please choose one of "
+ ", ".join(incl_intervals())
+ "."
)
)
[docs]def latest_date(timespan):
"""
Returns the latest date in a timespan.
"""
if timespan[1] > timespan[0]:
return timespan[1]
else:
return timespan[0]
[docs]def earliest_date(timespan):
"""
Returns the earliest date in a timespan.
"""
if timespan[0] < timespan[1]:
return timespan[0]
else:
return timespan[1]
[docs]def truncated_latest_date(timespan, interval):
"""
Returns the truncated latest date in a timespan.
"""
return truncate_date(latest_date(timespan), interval=interval)
[docs]def truncated_earliest_date(timespan, interval):
"""
Returns the truncated earliest date in a timespan.
"""
return truncate_date(earliest_date(timespan), interval=interval)