You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

timefeatures.py 5.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. from typing import List
  2. import numpy as np
  3. import pandas as pd
  4. from pandas.tseries import offsets
  5. from pandas.tseries.frequencies import to_offset
  6. class TimeFeature:
  7. def __init__(self):
  8. pass
  9. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  10. pass
  11. def __repr__(self):
  12. return self.__class__.__name__ + "()"
  13. class SecondOfMinute(TimeFeature):
  14. """Minute of hour encoded as value between [-0.5, 0.5]"""
  15. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  16. return index.second / 59.0 - 0.5
  17. class MinuteOfHour(TimeFeature):
  18. """Minute of hour encoded as value between [-0.5, 0.5]"""
  19. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  20. return index.minute / 59.0 - 0.5
  21. class HourOfDay(TimeFeature):
  22. """Hour of day encoded as value between [-0.5, 0.5]"""
  23. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  24. return index.hour / 23.0 - 0.5
  25. class DayOfWeek(TimeFeature):
  26. """Hour of day encoded as value between [-0.5, 0.5]"""
  27. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  28. return index.dayofweek / 6.0 - 0.5
  29. class DayOfMonth(TimeFeature):
  30. """Day of month encoded as value between [-0.5, 0.5]"""
  31. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  32. return (index.day - 1) / 30.0 - 0.5
  33. class DayOfYear(TimeFeature):
  34. """Day of year encoded as value between [-0.5, 0.5]"""
  35. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  36. return (index.dayofyear - 1) / 365.0 - 0.5
  37. class MonthOfYear(TimeFeature):
  38. """Month of year encoded as value between [-0.5, 0.5]"""
  39. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  40. return (index.month - 1) / 11.0 - 0.5
  41. class WeekOfYear(TimeFeature):
  42. """Week of year encoded as value between [-0.5, 0.5]"""
  43. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  44. return (index.week - 1) / 52.0 - 0.5
  45. def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
  46. """
  47. Returns a list of time features that will be appropriate for the given frequency string.
  48. Parameters
  49. ----------
  50. freq_str
  51. Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
  52. """
  53. features_by_offsets = {
  54. offsets.YearEnd: [],
  55. offsets.QuarterEnd: [MonthOfYear],
  56. offsets.MonthEnd: [MonthOfYear],
  57. offsets.Week: [DayOfMonth, WeekOfYear],
  58. offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
  59. offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
  60. offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
  61. offsets.Minute: [
  62. MinuteOfHour,
  63. HourOfDay,
  64. DayOfWeek,
  65. DayOfMonth,
  66. DayOfYear,
  67. ],
  68. offsets.Second: [
  69. SecondOfMinute,
  70. MinuteOfHour,
  71. HourOfDay,
  72. DayOfWeek,
  73. DayOfMonth,
  74. DayOfYear,
  75. ],
  76. }
  77. offset = to_offset(freq_str)
  78. for offset_type, feature_classes in features_by_offsets.items():
  79. if isinstance(offset, offset_type):
  80. return [cls() for cls in feature_classes]
  81. supported_freq_msg = f"""
  82. Unsupported frequency {freq_str}
  83. The following frequencies are supported:
  84. Y - yearly
  85. alias: A
  86. M - monthly
  87. W - weekly
  88. D - daily
  89. B - business days
  90. H - hourly
  91. T - minutely
  92. alias: min
  93. S - secondly
  94. """
  95. raise RuntimeError(supported_freq_msg)
  96. def time_features(dates, timeenc=1, freq='h'):
  97. """
  98. > `time_features` takes in a `dates` dataframe with a 'dates' column and extracts the date down to `freq` where freq can be any of the following if `timeenc` is 0:
  99. > * m - [month]
  100. > * w - [month]
  101. > * d - [month, day, weekday]
  102. > * b - [month, day, weekday]
  103. > * h - [month, day, weekday, hour]
  104. > * t - [month, day, weekday, hour, *minute]
  105. >
  106. > If `timeenc` is 1, a similar, but different list of `freq` values are supported (all encoded between [-0.5 and 0.5]):
  107. > * Q - [month]
  108. > * M - [month]
  109. > * W - [Day of month, week of year]
  110. > * D - [Day of week, day of month, day of year]
  111. > * B - [Day of week, day of month, day of year]
  112. > * H - [Hour of day, day of week, day of month, day of year]
  113. > * T - [Minute of hour*, hour of day, day of week, day of month, day of year]
  114. > * S - [Second of minute, minute of hour, hour of day, day of week, day of month, day of year]
  115. *minute returns a number from 0-3 corresponding to the 15 minute period it falls into.
  116. """
  117. if timeenc==0:
  118. dates['month'] = dates.date.apply(lambda row:row.month,1)
  119. dates['day'] = dates.date.apply(lambda row:row.day,1)
  120. dates['weekday'] = dates.date.apply(lambda row:row.weekday(),1)
  121. dates['hour'] = dates.date.apply(lambda row:row.hour,1)
  122. dates['minute'] = dates.date.apply(lambda row:row.minute,1)
  123. dates['minute'] = dates.minute.map(lambda x:x//15)
  124. freq_map = {
  125. 'y':[],'m':['month'],'w':['month'],'d':['month','day','weekday'],
  126. 'b':['month','day','weekday'],'h':['month','day','weekday','hour'],
  127. 't':['month','day','weekday','hour','minute'],
  128. }
  129. return dates[freq_map[freq.lower()]].values
  130. if timeenc==1:
  131. dates = pd.to_datetime(dates.date.values)
  132. return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]).transpose(1,0)

基于MindSpore的多模态股票价格预测系统研究 Informer,LSTM,RNN