|
- from typing import List
- import pandas as pd
- from pandas.tseries import offsets
- from pandas.tseries.frequencies import to_offset
- import numpy as np
-
- import mindspore.numpy as mnp
- import mindspore.dataset as ds
-
- class TimeFeature:
- def __init__(self):
- pass
-
- def __call__(self, index: pd.DatetimeIndex) -> mnp.ndarray:
- pass
-
- def __repr__(self):
- return self.__class__.__name__ + "()"
-
- class SecondOfMinute(TimeFeature):
- """Minute of hour encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> mnp.ndarray:
- return index.second / 59.0 - 0.5
-
- class MinuteOfHour(TimeFeature):
- """Minute of hour encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> mnp.ndarray:
- return index.minute / 59.0 - 0.5
-
- class HourOfDay(TimeFeature):
- """Hour of day encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> mnp.ndarray:
- return index.hour / 23.0 - 0.5
-
- class DayOfWeek(TimeFeature):
- """Hour of day encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> mnp.ndarray:
- return index.dayofweek / 6.0 - 0.5
-
- class DayOfMonth(TimeFeature):
- """Day of month encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> mnp.ndarray:
- return (index.day - 1) / 30.0 - 0.5
-
- class DayOfYear(TimeFeature):
- """Day of year encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> mnp.ndarray:
- return (index.dayofyear - 1) / 365.0 - 0.5
-
- class MonthOfYear(TimeFeature):
- """Month of year encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> mnp.ndarray:
- return (index.month - 1) / 11.0 - 0.5
-
- class WeekOfYear(TimeFeature):
- """Week of year encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> mnp.ndarray:
- return (index.week - 1) / 52.0 - 0.5
-
- def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
- """
- Returns a list of time features that will be appropriate for the given frequency string.
- Parameters
- ----------
- freq_str
- Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
- """
-
- features_by_offsets = {
- offsets.YearEnd: [],
- offsets.QuarterEnd: [MonthOfYear],
- offsets.MonthEnd: [MonthOfYear],
- offsets.Week: [DayOfMonth, WeekOfYear],
- offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
- offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
- offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
- offsets.Minute: [
- MinuteOfHour,
- HourOfDay,
- DayOfWeek,
- DayOfMonth,
- DayOfYear,
- ],
- offsets.Second: [
- SecondOfMinute,
- MinuteOfHour,
- HourOfDay,
- DayOfWeek,
- DayOfMonth,
- DayOfYear,
- ],
- }
-
- offset = to_offset(freq_str)
-
- for offset_type, feature_classes in features_by_offsets.items():
- if isinstance(offset, offset_type):
- return [cls() for cls in feature_classes]
-
- supported_freq_msg = f"""
- Unsupported frequency {freq_str}
- The following frequencies are supported:
- Y - yearly
- alias: A
- M - monthly
- W - weekly
- D - daily
- B - business days
- H - hourly
- T - minutely
- alias: min
- S - secondly
- """
- raise RuntimeError(supported_freq_msg)
-
- def time_features(dates, timeenc=1, freq='h'):
- """
- > `time_features` takes in a `dates` dataframe with a 'dates' column and extracts the date down to `freq` where freq can be any of the following if `timeenc` is 0:
- > * m - [month]
- > * w - [month]
- > * d - [month, day, weekday]
- > * b - [month, day, weekday]
- > * h - [month, day, weekday, hour]
- > * t - [month, day, weekday, hour, *minute]
- >
- > If `timeenc` is 1, a similar, but different list of `freq` values are supported (all encoded between [-0.5 and 0.5]):
- > * Q - [month]
- > * M - [month]
- > * W - [Day of month, week of year]
- > * D - [Day of week, day of month, day of year]
- > * B - [Day of week, day of month, day of year]
- > * H - [Hour of day, day of week, day of month, day of year]
- > * T - [Minute of hour*, hour of day, day of week, day of month, day of year]
- > * S - [Second of minute, minute of hour, hour of day, day of week, day of month, day of year]
-
- *minute returns a number from 0-3 corresponding to the 15 minute period it falls into.
- """
- if timeenc==0:
- dates['month'] = dates.date.apply(lambda row:row.month,1)
- dates['day'] = dates.date.apply(lambda row:row.day,1)
- dates['weekday'] = dates.date.apply(lambda row:row.weekday(),1)
- dates['hour'] = dates.date.apply(lambda row:row.hour,1)
- dates['minute'] = dates.date.apply(lambda row:row.minute,1)
- dates['minute'] = dates.minute.map(lambda x:x//15)
- freq_map = {
- 'y':[],'m':['month'],'w':['month'],'d':['month','day','weekday'],
- 'b':['month','day','weekday'],'h':['month','day','weekday','hour'],
- 't':['month','day','weekday','hour','minute'],
- }
- return dates[freq_map[freq.lower()]].values
- if timeenc==1:
- dates = pd.to_datetime(dates.date.values)
- return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]).transpose(1,0)
|