Source code for learning_machine.engine.na
from typing import Any
import numpy as np
import pandas as pd
from learning_machine.zoo import DATA_ENGINE_ZOO
from .engine import DataEngine
class NdFillSinkHole(DataEngine):
"""Fill continuous nan value."""
def __init__(self, length: int, fillwith: Any):
self.length = length
self.fillwith = fillwith
def __call__(self, data: np.ndarray) -> np.ndarray:
arr = data.copy()
not_nan_mask = ~np.isnan(arr)
not_nan_mask = np.concatenate(([True], not_nan_mask, [True]))
range_arr = np.flatnonzero(not_nan_mask[1:] != not_nan_mask[:-1]).reshape(-1, 2)
fill_range = range_arr[range_arr[:, 1] - range_arr[:, 0] < self.length]
concat_arr = [np.arange(x[0], x[1]) for x in fill_range]
if len(concat_arr) == 0:
return arr
idxs = np.concatenate(concat_arr)
arr[idxs] = self.fillwith
return arr
@DATA_ENGINE_ZOO.regist()
class FillSinkHole(NdFillSinkHole):
"""Fill in the interval where nan value appear continuously with specific value."""
def __init__(self, col: str, length: int, fillwith: Any):
"""
Args:
col (str): target column
length (int): interval length
fillwith (Any): fill value
name (str, optional): . Defaults to "".
"""
super().__init__(length, fillwith)
self.col = col
def __call__(self, data: pd.DataFrame) -> pd.DataFrame:
arr = data[self.col].to_numpy()
fill = super().__call__(arr)
data[self.col] = fill
return data
[docs]
@DATA_ENGINE_ZOO.regist()
class DropNARow(DataEngine):
"""Drop rows contain missing value in specific columns."""
[docs]
def __init__(self, cols: list[str], copy=True):
"""
Args:
cols (list[str]): columns to drop nan values
copy (bool, optional): copy new dataframe and process. Defaults to True.
"""
self.columns = cols
self.copy = copy
def __call__(self, data: pd.DataFrame) -> pd.DataFrame:
if self.copy:
data = data.copy()
return data.dropna(subset=self.columns)
[docs]
@DATA_ENGINE_ZOO.regist()
class FillNaWithValue(DataEngine):
"""Fill nan rows with specific value."""
[docs]
def __init__(self, cols: list[str], fillwith):
"""
Args:
cols (list[str]): columns to fill
fillwith (_type_): fill value
"""
self.cols = cols
self.fillwith = fillwith
def __call__(self, data: pd.DataFrame) -> pd.DataFrame:
filler = {col: self.fillwith for col in self.cols}
data.fillna(value=filler, inplace=True)
return data
[docs]
@DATA_ENGINE_ZOO.regist()
class FillNaFrom(DataEngine):
"""Fill nan value from another column."""
[docs]
def __init__(self, col: str, from_col: str):
"""
Args:
col (str): target column
from_col (str): from column
"""
self.col = col
self.from_col = from_col
def __call__(self, data: pd.DataFrame) -> pd.DataFrame:
data[self.col] = data[self.col].fillna(data[self.from_col], inplace=False)
return data