Source code for wikirepo.data.time_utils

"""
Time Utilities
--------------

Functions querying time information.

Contents
    interval_to_col_name,
    truncate_date,
    truncate_date_col,
    incl_intervals,
    make_timespan,
    latest_date,
    earliest_date,
    truncated_latest_date,
    truncated_earliest_date
"""

from datetime import date, datetime

from dateutil.rrule import DAILY, MONTHLY, WEEKLY, YEARLY, rrule


[docs]def interval_to_col_name(interval): """ Queries the proper name of the column for timespans given an interval. """ interval = interval.lower() if interval == "yearly": return "year" elif interval == "monthly": return "month" elif interval == "weekly": return "week" elif interval == "daily": return "day"
[docs]def truncate_date(d, interval=None): """ Truncates a date object given an interval. """ assert isinstance(interval, str) or interval == None, ( "'interval' argument must be None or one of " + ", ".join(i for i in incl_intervals()) + "." ) if interval is not None: if not isinstance(d, str): # hasn't been formatted already if isinstance(d, tuple): d = datetime.strptime(f"{d[0]}-{d[1]}-{d[2]}", "%Y-%m-%d").date() interval = interval.lower() if interval == "yearly": return d.strftime("%Y") elif interval == "monthly": return d.strftime("%Y-%m") elif interval == "weekly": return d.strftime("%Y-%W") elif interval == "daily": return d.strftime("%Y-%m-%d") else: return d else: return d
[docs]def truncate_date_col(df, col, interval): """ Truncates the date column of a df based on a provided interval. """ df[col] = df[col].map(lambda x: truncate_date(d=x, interval=interval)) return df
[docs]def incl_intervals(): """ Queries the included intervals. Note: timespans will not be able to be queried if their interval is not included. """ return ["yearly", "monthly", "weekly", "daily"]
[docs]def make_timespan(timespan=None, interval=None): """ Queries a timespan given user input of strings, ints, or time values. Parameters ---------- timespan : two element tuple or list : contains datetime.date or tuple (default=None: (date.today(), date.today())) A tuple or list that defines the start and end dates to be queried. Note 1: if True, then the full timespan from 1-1-1 to the current day will be queried. Note 2: passing a single entry will query for that date only. interval : str The time interval over which queries will be made. Note 1: see data.time_utils for options. Note 2: if None, then only the most recent data will be queried. Returns ------- formatted_timespan : list (contains datetime.date) The timespan formatted going back in time. """ if interval is None and timespan is None: # Most recent data wanted. return order = -1 # default order is decreasing in time if timespan is None: timespan = (date.today(), date.today()) elif timespan == True: timespan = (date.min, date.today()) elif isinstance(timespan, date): timespan = (timespan, timespan) elif timespan[0] > timespan[1]: timespan = (timespan[1], timespan[0]) order = ( 1 # user wants the dates to be increasing in df rows instead of the default ) else: ValueError("An invalid value was passed to the 'timespan' argument.") if isinstance(timespan[0], date): start_dt = timespan[0] elif isinstance(timespan[0], tuple): start_dt = date(*timespan[0]) if isinstance(timespan[1], date): end_dt = timespan[1] elif isinstance(timespan[1], tuple): end_dt = date(*timespan[1]) if interval == "yearly": return [dt.date() for dt in rrule(YEARLY, dtstart=start_dt, until=end_dt)][ ::order ] elif interval == "monthly": return [dt.date() for dt in rrule(MONTHLY, dtstart=start_dt, until=end_dt)][ ::order ] elif interval == "weekly": return [dt.date() for dt in rrule(WEEKLY, dtstart=start_dt, until=end_dt)][ ::order ] elif interval == "daily": return [dt.date() for dt in rrule(DAILY, dtstart=start_dt, until=end_dt)][ ::order ] else: ValueError( ( "An invalid value was passed to the 'interval' argument. Please choose one of " + ", ".join(incl_intervals()) + "." ) )
[docs]def latest_date(timespan): """ Returns the latest date in a timespan. """ if timespan[1] > timespan[0]: return timespan[1] else: return timespan[0]
[docs]def earliest_date(timespan): """ Returns the earliest date in a timespan. """ if timespan[0] < timespan[1]: return timespan[0] else: return timespan[1]
[docs]def truncated_latest_date(timespan, interval): """ Returns the truncated latest date in a timespan. """ return truncate_date(latest_date(timespan), interval=interval)
[docs]def truncated_earliest_date(timespan, interval): """ Returns the truncated earliest date in a timespan. """ return truncate_date(earliest_date(timespan), interval=interval)