Source code for learning_machine.engine.date

import pandas as pd
import numpy as np

from .engine import DataEngine
from .engine_type import DataEngineType
from .fourier import sin_cos_transform
from learning_machine.zoo import DATA_ENGINE_ZOO


[docs] @DATA_ENGINE_ZOO.regist() class StringToDatetime(DataEngine): """String to pd.datetime object. Replace original string column to datetime object column""" engine_type = [DataEngineType.SIDE_EFFECT]
[docs] def __init__(self, col: str, format: str | None = None): """ Args: col (str): column name format (str | None, optional): datetime format. Defaults to None. """ self.col = col self.format = format
def __call__(self, data: pd.DataFrame) -> pd.DataFrame: data[self.col] = pd.to_datetime(data[self.col], format=self.format) return data
[docs] @DATA_ENGINE_ZOO.regist() class DatetimeDayOfYearSinCos(DataEngine): """Periodically transforms the day of year. Return 2 column {prefix}_{col}_sin/cos.""" engine_type = [DataEngineType.RETURN_NEW_PD]
[docs] def __init__(self, col: str, prefix="day_of_year"): """ Args: col (str): column name prefix (str, optional): return column prefix. Defaults to "day_of_year". """ self.col = col self.prefix = prefix
def __call__(self, data: pd.DataFrame): day_of_year = data[self.col].dt.dayofyear.to_numpy().astype(np.float64) day_of_year /= data[self.col].dt.is_leap_year.map({True: 366, False: 365}) sinx, cosx = sin_cos_transform(day_of_year, 1) return pd.DataFrame( { f"{self.prefix}_{self.col}_sin": sinx, f"{self.prefix}_{self.col}_cos": cosx, }, index=data.index, )
[docs] @DATA_ENGINE_ZOO.regist() class DatetimeDayOfYear(DataEngine): """Get day of year from datetime column. Return {prefix}_{col}.""" engine_type = [DataEngineType.RETURN_NEW_PD]
[docs] def __init__(self, col: str, prefix="day_of_year"): """ Args: col (str): column name prefix (str, optional): return column prefix. Defaults to "day_of_year". """ self.col = col self.prefix = prefix
def __call__(self, data: pd.DataFrame): day_of_year = data[self.col].dt.dayofyear.to_numpy() return pd.DataFrame( { f"{self.prefix}_{self.col}": day_of_year, }, index=data.index, )
[docs] @DATA_ENGINE_ZOO.regist() class DatetimeMonthOfYear(DataEngine): """Get month of year from datetime column. Return {prefix}_{col}.""" engine_type = [DataEngineType.RETURN_NEW_PD]
[docs] def __init__(self, col: str, prefix="month_of_year"): """ Args: col (str): column name prefix (str, optional): return column prefix. Defaults to "month_of_year". """ self.col = col self.prefix = prefix
def __call__(self, data: pd.DataFrame): month_of_year = data[self.col].dt.month.to_numpy() return pd.DataFrame( { f"{self.prefix}_{self.col}": month_of_year, }, index=data.index, )
[docs] @DATA_ENGINE_ZOO.regist() class DatetimeMonthOfYearSinCos(DataEngine): """Periodically transform the month of year. Return 2 column {prefix}_{col}_sin/cos""" engine_type = [DataEngineType.RETURN_NEW_PD]
[docs] def __init__(self, col: str, prefix="month_of_year"): """ Args: col (str): column name prefix (str, optional): return column prefix. Defaults to "month_of_year". """ self.col = col self.prefix = prefix
def __call__(self, data: pd.DataFrame): month_of_year = data[self.col].dt.month.to_numpy().astype(np.float64) sinx, cosx = sin_cos_transform(month_of_year, 12) return pd.DataFrame( { f"{self.prefix}_{self.col}_sin": sinx, f"{self.prefix}_{self.col}_cos": cosx, }, index=data.index, )
[docs] @DATA_ENGINE_ZOO.regist() class DatetimeDayOfMonth(DataEngine): """Get day of month from datetime column. Return {prefix}_{col}.""" engine_type = [DataEngineType.RETURN_NEW_PD]
[docs] def __init__(self, col: str, prefix="day_of_month"): """ Args: col (str): column name prefix (str, optional): return column prefix. Defaults to "day_of_month". """ self.col = col self.prefix = prefix
def __call__(self, data: pd.DataFrame): day_of_month = data[self.col].dt.day.to_numpy() return pd.DataFrame( { f"{self.prefix}_{self.col}": day_of_month, }, index=data.index, )
[docs] @DATA_ENGINE_ZOO.regist() class DatetimeDayOfMonthSinCos(DataEngine): """Periodically transform the day of month. Return 2 column {prefix}_{col}_sin/cos.""" engine_type = [DataEngineType.RETURN_NEW_PD]
[docs] def __init__(self, col: str, prefix="day_of_month", norm=True): """ Args: col (str): column name prefix (str, optional): prefix of column. Defaults to "day_of_month". norm (bool, optional): preiod length. If False, prefiod fixed with 31, else, preiod length is end of month. Defaults to True. """ self.col = col self.prefix = prefix self.norm = norm
def __call__(self, data: pd.DataFrame): day_of_month = data[self.col].dt.day.to_numpy().astype(np.float64) if self.norm: day_of_month /= data[self.col].dt.days_in_month else: day_of_month /= 31 sinx, cosx = sin_cos_transform(day_of_month, 1) return pd.DataFrame( { f"{self.prefix}_{self.col}_sin": sinx, f"{self.prefix}_{self.col}_cos": cosx, }, index=data.index, )
[docs] @DATA_ENGINE_ZOO.regist() class DatetimeDayOfWeek(DataEngine): """Get day of week from datetime column. Return {prefix}_{col}.""" engine_type = [DataEngineType.RETURN_NEW_PD]
[docs] def __init__(self, col: str, prefix="day_of_week"): """ Args: col (str): column name prefix (str, optional): return column prefix. Defaults to "day_of_week". """ self.col = col self.prefix = prefix
def __call__(self, data: pd.DataFrame): day_of_week = data[self.col].dt.day_of_week.to_numpy() return pd.DataFrame( { f"{self.prefix}_{self.col}": day_of_week, }, index=data.index, )
[docs] @DATA_ENGINE_ZOO.regist() class DatetimeDayOfWeekSinCos(DataEngine): """Periodically transforms the day of week. Return 2 column {prefix}_{col}_sin/cos""" engine_type = [DataEngineType.RETURN_NEW_PD]
[docs] def __init__(self, col: str, prefix="day_of_week"): """ Args: col (str): column name prefix (str, optional): return column prefix. Defaults to "day_of_week". """ self.col = col self.prefix = prefix
def __call__(self, data: pd.DataFrame): day_of_week = data[self.col].dt.day.to_numpy().astype(np.float64) sinx, cosx = sin_cos_transform(day_of_week, 7) return pd.DataFrame( { f"{self.prefix}_{self.col}_sin": sinx, f"{self.prefix}_{self.col}_cos": cosx, }, index=data.index, )
[docs] @DATA_ENGINE_ZOO.regist() class DatetimeIsWeekend(DataEngine): """Get datetime is weekend. Return {prefix}_{col}""" engine_type = [DataEngineType.RETURN_NEW_PD]
[docs] def __init__(self, col: str, prefix="is_weekend", include_sat=True): """ Args: col (str): column name prefix (str, optional): return column prefix. Defaults to "is_weekend". include_sat (bool, optional): include saturday. If True, saturday is also weekend. Defaults to True. """ self.col = col self.prefix = prefix self.include_sat = include_sat
def __call__(self, data: pd.DataFrame): threshold = 5 if self.include_sat else 6 is_weekend = data[self.col].dt.day_of_week.to_numpy() > threshold return pd.DataFrame( { f"{self.prefix}_{self.col}": is_weekend, }, index=data.index, )