|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384 |
- import os
- import mindspore.dataset as ds
- import mindspore.dataset.transforms.c_transforms as C
- import mindspore.dataset.transforms.py_transforms as P
- import mindspore.dataset.vision.c_transforms as CV
- import mindspore.dataset.transforms.vision.py_transforms as VP
- from mindspore import Tensor
-
-
- from utils.tools import StandardScaler
- from utils.timefeatures import time_features
-
- import numpy as np
- import pandas as pd
-
- import warnings
- warnings.filterwarnings('ignore')
-
- class Dataset_ETT_hour(Dataset):
- def __init__(self, root_path, flag='train', size=None,
- features='S', data_path='ETTh1.csv',
- target='OT', scale=True, inverse=False, timeenc=0, freq='h', cols=None):
- # size [seq_len, label_len, pred_len]
- # info
- if size == None:
- self.seq_len = 24*4*4
- self.label_len = 24*4
- self.pred_len = 24*4
- else:
- self.seq_len = size[0]
- self.label_len = size[1]
- self.pred_len = size[2]
- # init
- assert flag in ['train', 'test', 'val']
- type_map = {'train':0, 'val':1, 'test':2}
- self.set_type = type_map[flag]
-
- self.features = features
- self.target = target
- self.scale = scale
- self.inverse = inverse
- self.timeenc = timeenc
- self.freq = freq
-
- self.root_path = root_path
- self.data_path = data_path
- self.__read_data__()
-
- def __read_data__(self):
- self.scaler = StandardScaler()
- df_raw = pd.read_csv(os.path.join(self.root_path,
- self.data_path))
-
- border1s = [0, 12*30*24 - self.seq_len, 12*30*24+4*30*24 - self.seq_len]
- border2s = [12*30*24, 12*30*24+4*30*24, 12*30*24+8*30*24]
- border1 = border1s[self.set_type]
- border2 = border2s[self.set_type]
-
- if self.features=='M' or self.features=='MS':
- cols_data = df_raw.columns[1:]
- df_data = df_raw[cols_data]
- elif self.features=='S':
- df_data = df_raw[[self.target]]
-
- if self.scale:
- train_data = df_data[border1s[0]:border2s[0]]
- self.scaler.fit(train_data.values)
- data = self.scaler.transform(df_data.values)
- else:
- data = df_data.values
-
- df_stamp = df_raw[['date']][border1:border2]
- df_stamp['date'] = pd.to_datetime(df_stamp.date)
- data_stamp = time_features(df_stamp, timeenc=self.timeenc, freq=self.freq)
-
- self.data_x = data[border1:border2]
- if self.inverse:
- self.data_y = df_data.values[border1:border2]
- else:
- self.data_y = data[border1:border2]
- self.data_stamp = data_stamp
-
- def __getitem__(self, index):
- s_begin = index
- s_end = s_begin + self.seq_len
- r_begin = s_end - self.label_len
- r_end = r_begin + self.label_len + self.pred_len
-
- seq_x = self.data_x[s_begin:s_end]
- if self.inverse:
- seq_y = np.concatenate([self.data_x[r_begin:r_begin+self.label_len], self.data_y[r_begin+self.label_len:r_end]], 0)
- else:
- seq_y = self.data_y[r_begin:r_end]
- seq_x_mark = self.data_stamp[s_begin:s_end]
- seq_y_mark = self.data_stamp[r_begin:r_end]
-
- return seq_x, seq_y, seq_x_mark, seq_y_mark
-
- def __len__(self):
- return len(self.data_x) - self.seq_len- self.pred_len + 1
-
- def inverse_transform(self, data):
- return self.scaler.inverse_transform(data)
-
- class Dataset_ETT_minute(Dataset):
- def __init__(self, root_path, flag='train', size=None,
- features='S', data_path='ETTm1.csv',
- target='OT', scale=True, inverse=False, timeenc=0, freq='t', cols=None):
- # size [seq_len, label_len, pred_len]
- # info
- if size == None:
- self.seq_len = 24*4*4
- self.label_len = 24*4
- self.pred_len = 24*4
- else:
- self.seq_len = size[0]
- self.label_len = size[1]
- self.pred_len = size[2]
- # init
- assert flag in ['train', 'test', 'val']
- type_map = {'train':0, 'val':1, 'test':2}
- self.set_type = type_map[flag]
-
- self.features = features
- self.target = target
- self.scale = scale
- self.inverse = inverse
- self.timeenc = timeenc
- self.freq = freq
-
- self.root_path = root_path
- self.data_path = data_path
- self.__read_data__()
-
- def __read_data__(self):
- self.scaler = StandardScaler()
- df_raw = pd.read_csv(os.path.join(self.root_path,
- self.data_path))
-
- border1s = [0, 12*30*24*4 - self.seq_len, 12*30*24*4+4*30*24*4 - self.seq_len]
- border2s = [12*30*24*4, 12*30*24*4+4*30*24*4, 12*30*24*4+8*30*24*4]
- border1 = border1s[self.set_type]
- border2 = border2s[self.set_type]
-
- if self.features=='M' or self.features=='MS':
- cols_data = df_raw.columns[1:]
- df_data = df_raw[cols_data]
- elif self.features=='S':
- df_data = df_raw[[self.target]]
-
- if self.scale:
- train_data = df_data[border1s[0]:border2s[0]]
- self.scaler.fit(train_data.values)
- data = self.scaler.transform(df_data.values)
- else:
- data = df_data.values
-
- df_stamp = df_raw[['date']][border1:border2]
- df_stamp['date'] = pd.to_datetime(df_stamp.date)
- data_stamp = time_features(df_stamp, timeenc=self.timeenc, freq=self.freq)
-
- self.data_x = data[border1:border2]
- if self.inverse:
- self.data_y = df_data.values[border1:border2]
- else:
- self.data_y = data[border1:border2]
- self.data_stamp = data_stamp
-
- def __getitem__(self, index):
- s_begin = index
- s_end = s_begin + self.seq_len
- r_begin = s_end - self.label_len
- r_end = r_begin + self.label_len + self.pred_len
-
- seq_x = self.data_x[s_begin:s_end]
- if self.inverse:
- seq_y = np.concatenate([self.data_x[r_begin:r_begin+self.label_len], self.data_y[r_begin+self.label_len:r_end]], 0)
- else:
- seq_y = self.data_y[r_begin:r_end]
- seq_x_mark = self.data_stamp[s_begin:s_end]
- seq_y_mark = self.data_stamp[r_begin:r_end]
-
- return seq_x, seq_y, seq_x_mark, seq_y_mark
-
- def __len__(self):
- return len(self.data_x) - self.seq_len - self.pred_len + 1
-
- def inverse_transform(self, data):
- return self.scaler.inverse_transform(data)
-
-
- class Dataset_Custom(Dataset):
- def __init__(self, root_path, flag='train', size=None,
- features='S', data_path='ETTh1.csv',
- target='OT', scale=True, inverse=False, timeenc=0, freq='h', cols=None):
- # size [seq_len, label_len, pred_len]
- # info
- if size == None:
- self.seq_len = 24*4*4
- self.label_len = 24*4
- self.pred_len = 24*4
- else:
- self.seq_len = size[0]
- self.label_len = size[1]
- self.pred_len = size[2]
- # init
- assert flag in ['train', 'test', 'val']
- type_map = {'train':0, 'val':1, 'test':2}
- self.set_type = type_map[flag]
-
- self.features = features
- self.target = target
- self.scale = scale
- self.inverse = inverse
- self.timeenc = timeenc
- self.freq = freq
- self.cols=cols
- self.root_path = root_path
- self.data_path = data_path
- self.__read_data__()
-
- def __read_data__(self):
- self.scaler = StandardScaler()
- df_raw = pd.read_csv(os.path.join(self.root_path,
- self.data_path))
- '''
- df_raw.columns: ['date', ...(other features), target feature]
- '''
- # cols = list(df_raw.columns);
- if self.cols:
- cols=self.cols.copy()
- cols.remove(self.target)
- else:
- cols = list(df_raw.columns); cols.remove(self.target); cols.remove('date')
- df_raw = df_raw[['date']+cols+[self.target]]
-
- num_train = int(len(df_raw)*0.7)
- num_test = int(len(df_raw)*0.2)
- num_vali = len(df_raw) - num_train - num_test
- border1s = [0, num_train-self.seq_len, len(df_raw)-num_test-self.seq_len]
- border2s = [num_train, num_train+num_vali, len(df_raw)]
- # start and end of train, val, test data
- border1 = border1s[self.set_type]
- border2 = border2s[self.set_type]
-
- if self.features=='M' or self.features=='MS':
- cols_data = df_raw.columns[1:]
- df_data = df_raw[cols_data]
- elif self.features=='S':
- df_data = df_raw[[self.target]]
-
- if self.scale:
- train_data = df_data[border1s[0]:border2s[0]]
- self.scaler.fit(train_data.values)
- data = self.scaler.transform(df_data.values)
- else:
- data = df_data.values
-
- df_stamp = df_raw[['date']][border1:border2]
- df_stamp['date'] = pd.to_datetime(df_stamp.date)
- data_stamp = time_features(df_stamp, timeenc=self.timeenc, freq=self.freq)
-
- self.data_x = data[border1:border2]
- if self.inverse:
- self.data_y = df_data.values[border1:border2]
- else:
- self.data_y = data[border1:border2]
- self.data_stamp = data_stamp
-
- def __getitem__(self, index):
- s_begin = index
- s_end = s_begin + self.seq_len
- r_begin = s_end - self.label_len
- r_end = r_begin + self.label_len + self.pred_len
-
- seq_x = self.data_x[s_begin:s_end]
- if self.inverse:
- seq_y = np.concatenate([self.data_x[r_begin:r_begin+self.label_len], self.data_y[r_begin+self.label_len:r_end]], 0)
- else:
- seq_y = self.data_y[r_begin:r_end]
- seq_x_mark = self.data_stamp[s_begin:s_end]
- seq_y_mark = self.data_stamp[r_begin:r_end]
-
- return seq_x, seq_y, seq_x_mark, seq_y_mark
-
- def __len__(self):
- return len(self.data_x) - self.seq_len- self.pred_len + 1
-
- def inverse_transform(self, data):
- return self.scaler.inverse_transform(data)
-
- class Dataset_Pred(Dataset):
- def __init__(self, root_path, flag='pred', size=None,
- features='S', data_path='ETTh1.csv',
- target='OT', scale=True, inverse=False, timeenc=0, freq='15min', cols=None):
- # size [seq_len, label_len, pred_len]
- # info
- if size == None:
- self.seq_len = 24*4*4
- self.label_len = 24*4
- self.pred_len = 24*4
- else:
- self.seq_len = size[0]
- self.label_len = size[1]
- self.pred_len = size[2]
- # init
- assert flag in ['pred']
-
- self.features = features
- self.target = target
- self.scale = scale
- self.inverse = inverse
- self.timeenc = timeenc
- self.freq = freq
- self.cols=cols
- self.root_path = root_path
- self.data_path = data_path
- self.__read_data__()
-
- def __read_data__(self):
- self.scaler = StandardScaler()
- df_raw = pd.read_csv(os.path.join(self.root_path,
- self.data_path))
- '''
- df_raw.columns: ['date', ...(other features), target feature]
- '''
- if self.cols:
- cols=self.cols.copy()
- cols.remove(self.target)
- else:
- cols = list(df_raw.columns); cols.remove(self.target); cols.remove('date')
- df_raw = df_raw[['date']+cols+[self.target]]
-
- border1 = len(df_raw)-self.seq_len
- border2 = len(df_raw)
-
- if self.features=='M' or self.features=='MS':
- cols_data = df_raw.columns[1:]
- df_data = df_raw[cols_data]
- elif self.features=='S':
- df_data = df_raw[[self.target]]
-
- if self.scale:
- self.scaler.fit(df_data.values)
- data = self.scaler.transform(df_data.values)
- else:
- data = df_data.values
-
- tmp_stamp = df_raw[['date']][border1:border2]
- tmp_stamp['date'] = pd.to_datetime(tmp_stamp.date)
- pred_dates = pd.date_range(tmp_stamp.date.values[-1], periods=self.pred_len+1, freq=self.freq)
-
- df_stamp = pd.DataFrame(columns = ['date'])
- df_stamp.date = list(tmp_stamp.date.values) + list(pred_dates[1:])
- data_stamp = time_features(df_stamp, timeenc=self.timeenc, freq=self.freq[-1:])
-
- self.data_x = data[border1:border2]
- if self.inverse:
- self.data_y = df_data.values[border1:border2]
- else:
- self.data_y = data[border1:border2]
- self.data_stamp = data_stamp
-
- def __getitem__(self, index):
- s_begin = index
- s_end = s_begin + self.seq_len
- r_begin = s_end - self.label_len
- r_end = r_begin + self.label_len + self.pred_len
-
- seq_x = self.data_x[s_begin:s_end]
- if self.inverse:
- seq_y = self.data_x[r_begin:r_begin+self.label_len]
- else:
- seq_y = self.data_y[r_begin:r_begin+self.label_len]
- seq_x_mark = self.data_stamp[s_begin:s_end]
- seq_y_mark = self.data_stamp[r_begin:r_end]
-
- return seq_x, seq_y, seq_x_mark, seq_y_mark
-
- def __len__(self):
- return len(self.data_x) - self.seq_len + 1
-
- def inverse_transform(self, data):
- return self.scaler.inverse_transform(data)
|