Browse Source

fix concept drift feature

tags/v1.3.0
mindspore-ci-bot wuxiaoyu (F) 4 years ago
parent
commit
a82050a984
4 changed files with 49 additions and 60 deletions
  1. +0
    -1
      examples/reliability/concept_drift_time_series.py
  2. +10
    -7
      mindarmour/reliability/concept_drift/README.md
  3. +39
    -40
      mindarmour/reliability/concept_drift/concept_drift_check_time_series.py
  4. +0
    -12
      setup.py

+ 0
- 1
examples/reliability/concept_drift_time_series.py View File

@@ -34,7 +34,6 @@ Please choose one column or multiple columns.
import numpy as np import numpy as np
from mindarmour import ConceptDriftCheckTimeSeries from mindarmour import ConceptDriftCheckTimeSeries



# input data # input data
DATA_FILE = r'archive/individual_stocks_5yr/individual_stocks_5yr/AEE_data.csv' DATA_FILE = r'archive/individual_stocks_5yr/individual_stocks_5yr/AEE_data.csv'
data = np.loadtxt(DATA_FILE, str, delimiter=",") data = np.loadtxt(DATA_FILE, str, delimiter=",")


+ 10
- 7
mindarmour/reliability/concept_drift/README.md View File

@@ -53,11 +53,11 @@ concept = ConceptDriftCheckTimeSeries(window_size=100, rolling_window=10, step=1
need_label=False) need_label=False)
``` ```


>window_size(int): Size of a concept window, belongs to [10, 1/3*len(input data)]. If the data is periodic, usually window_size equals 2-5 periods, such as, for monthly/weekly data, the data volume of 30/7 days is a period. Default: 100.
>window_size(int): Size of a concept window, no less than 10. If given the input data, window_size belongs to [10, 1/3*len(input data)]. If the data is periodic, usually window_size equals 2-5 periods, such as, for monthly/weekly data, the data volume of 30/7 days is a period. Default: 100.
rolling_window(int): Smoothing window size, belongs to [1, window_size]. Default:10. rolling_window(int): Smoothing window size, belongs to [1, window_size]. Default:10.
step(int): The jump length of the sliding window, belongs to [1,window_size]. Default:10. step(int): The jump length of the sliding window, belongs to [1,window_size]. Default:10.
threshold_index(float): The threshold index, (-∞,+∞), Default: 1.5.
need_label(bool: False or True. If need_label=True, concept drift labels are needed. Default: False.
threshold_index(float): The threshold index. Default: 1.5.
need_label(bool): False or True. If need_label=True, concept drift labels are needed. Default: False.


### Data ### Data


@@ -74,19 +74,22 @@ data = data[1:, 2].astype('float64') # here we choose one column or multiple co


```python ```python
drift_score, threshold, concept_drift_location = concept.concept_check(data) drift_score, threshold, concept_drift_location = concept.concept_check(data)
# the result is saved as pdf named 'concept_drift_check.pdf'
``` ```


>drift_score(numpy.ndarray): The concept drift score of the example series. >drift_score(numpy.ndarray): The concept drift score of the example series.
threshold(float): The threshold to judge concept drift. threshold(float): The threshold to judge concept drift.
concept_drift_location(list): The location of the concept drift.
concept_drift_location(list): The location of the concept drift.



## Script Description ## Script Description


```python
```bash
├── mindarmour ├── mindarmour
├── reliability # descriptions about GhostNet # shell script for evaluation with CPU, GPU or Ascend ├── reliability # descriptions about GhostNet # shell script for evaluation with CPU, GPU or Ascend
├──concept_drift ├──concept_drift
├──concept_drift.py
├──readme.md
├──__init__.py
├──concept_drift_check_time_series.py
├──README.md
``` ```



+ 39
- 40
mindarmour/reliability/concept_drift/concept_drift_check_time_series.py View File

@@ -24,23 +24,24 @@ from mindarmour.utils._check_param import check_param_type, check_param_in_range


class ConceptDriftCheckTimeSeries: class ConceptDriftCheckTimeSeries:
""" """
Concept is used for example series distribution change detection.
ConceptDriftCheckTimeSeries is used for example series distribution change detection.


Args: Args:
window_size(int): Size of a concept window, belongs to [10, 1/3*len(input data)].
If the data is periodic, usually window_size equals 2-5 periods, such as,
for monthly/weekly data, the data volume of 30/7 days is a period. Default: 100.
window_size(int): Size of a concept window, no less than 10. If given the input data,
window_size belongs to [10, 1/3*len(input data)]. If the data is periodic, usually
window_size equals 2-5 periods, such as, for monthly/weekly data, the data volume
of 30/7 days is a period. Default: 100.
rolling_window(int): Smoothing window size, belongs to [1, window_size]. Default:10. rolling_window(int): Smoothing window size, belongs to [1, window_size]. Default:10.
step(int): The jump length of the sliding window, belongs to [1, window_size]. Default:10. step(int): The jump length of the sliding window, belongs to [1, window_size]. Default:10.
threshold_index(float): The threshold index, (-∞, +∞), Default: 1.5.
threshold_index(float): The threshold index, :math:`(-\infty, +\infty)`. Default: 1.5.
need_label(bool): False or True. If need_label=True, concept drift labels are needed. need_label(bool): False or True. If need_label=True, concept drift labels are needed.
Default: False. Default: False.


Examples: Examples:
>>> concept = ConceptDriftCheckTimeSeries(window_size=100, rolling_window=10, >>> concept = ConceptDriftCheckTimeSeries(window_size=100, rolling_window=10,
>>> step=10, threshold_index=1.5, need_label=False) >>> step=10, threshold_index=1.5, need_label=False)
>>> data_example = np.array([np.random.rand(1000),
>>> np.random.rand(1000),np.random.rand(1000)]).T
>>> data_example = 5*np.random.rand(1000)
>>> data_example[200: 800] = 20*np.random.rand(600)
>>> score, threshold, concept_drift_location = concept.concept_check(data_example) >>> score, threshold, concept_drift_location = concept.concept_check(data_example)
""" """


@@ -48,10 +49,7 @@ class ConceptDriftCheckTimeSeries:
step=10, threshold_index=1.5, need_label=False): step=10, threshold_index=1.5, need_label=False):
self.window_size = check_param_type('window_size', window_size, int) self.window_size = check_param_type('window_size', window_size, int)
self.rolling_window = check_param_type('rolling_window', rolling_window, int) self.rolling_window = check_param_type('rolling_window', rolling_window, int)
self.rolling_window = check_param_in_range('rolling_window',
rolling_window, 1, window_size)
self.step = check_param_type('step', step, int) self.step = check_param_type('step', step, int)
self.step = check_param_in_range('step', step, 1, window_size)
self.threshold_index = check_param_type('threshold_index', threshold_index, float) self.threshold_index = check_param_type('threshold_index', threshold_index, float)
self.need_label = check_param_type('need_label', need_label, bool) self.need_label = check_param_type('need_label', need_label, bool)
self._in_size = window_size self._in_size = window_size
@@ -66,8 +64,8 @@ class ConceptDriftCheckTimeSeries:
window_data(numpy.ndarray): The input data (in one window). window_data(numpy.ndarray): The input data (in one window).


Returns: Returns:
w_out(numpy.ndarray): The output weight of reservoir model.
x_state(numpy.ndarray): The state of the reservoir model in the latent space.
- numpy.ndarray, the output weight of reservoir model.
- numpy.ndarray, the state of the reservoir model in the latent space.


Examples: Examples:
>>> input_data = np.random.rand(100) >>> input_data = np.random.rand(100)
@@ -99,7 +97,7 @@ class ConceptDriftCheckTimeSeries:
data_y(numpy.ndarray): Data y. data_y(numpy.ndarray): Data y.


Returns: Returns:
distance_score_mean(float): Distance between data_x and data_y.
- float, distance between data_x and data_y.


Examples: Examples:
>>> x = np.random.rand(100) >>> x = np.random.rand(100)
@@ -125,7 +123,7 @@ class ConceptDriftCheckTimeSeries:
data(numpy.ndarray): Input data. data(numpy.ndarray): Input data.


Returns: Returns:
smooth_data(numpy.ndarray): Data after smoothing.
- numpy.ndarray, data after smoothing.


Examples: Examples:
>>> data_example = np.random.rand(100) >>> data_example = np.random.rand(100)
@@ -150,28 +148,30 @@ class ConceptDriftCheckTimeSeries:


def concept_check(self, data): def concept_check(self, data):
""" """
Find concept drift locations in a example series.
Find concept drift locations in a data series.


Args: Args:
data(numpy.ndarray): Input data. The shape of data could be (n,1) or (n,m). data(numpy.ndarray): Input data. The shape of data could be (n,1) or (n,m).
Note that each column (m columns) is one data series. Note that each column (m columns) is one data series.


Returns: Returns:
drift_score(numpy.ndarray): The concept drift score of the example series.
threshold(float): The threshold to judge concept drift.
concept_drift_location(list): The location of the concept drift.
- numpy.ndarray, the concept drift score of the example series.
- float, the threshold to judge concept drift.
- list, the location of the concept drift.


Examples: Examples:
>>> concept = ConceptDriftCheckTimeSeries(window_size=100, rolling_window=10, >>> concept = ConceptDriftCheckTimeSeries(window_size=100, rolling_window=10,
>>> step=10, threshold_index=1.5, need_label=False) >>> step=10, threshold_index=1.5, need_label=False)
>>> data_example = np.array([np.random.rand(1000),
>>> np.random.rand(1000), np.random.rand(1000)]).T
>>> score, drift_threshold, point = concept.concept_check(data_example)
>>> data_example = 5*np.random.rand(1000)
>>> data_example[200: 800] = 20*np.random.rand(600)
>>> score, drift_threshold, drift_location = concept.concept_check(data_example)
""" """
# data check # data check
data = _check_array_not_empty('data', data) data = _check_array_not_empty('data', data)
data = check_param_type('data', data, np.ndarray) data = check_param_type('data', data, np.ndarray)
check_param_in_range('window_size', self.window_size, 10, int((1 / 3)*len(data))) check_param_in_range('window_size', self.window_size, 10, int((1 / 3)*len(data)))
check_param_in_range('rolling_window', self.rolling_window, 1, self.window_size)
check_param_in_range('step', self.step, 1, self.window_size)
original_data = data original_data = data
data = self._data_process(data) data = self._data_process(data)
# calculate drift score # calculate drift score
@@ -190,39 +190,38 @@ class ConceptDriftCheckTimeSeries:
# find drift blocks # find drift blocks
concept_drift_location, drift_point = _drift_blocks(drift_score, concept_drift_location, drift_point = _drift_blocks(drift_score,
label_continue, label_location) label_continue, label_location)
# show result
_plot_show(original_data, threshold, concept_drift_location,
drift_point, drift_score)
# save result
_result_save(original_data, threshold, concept_drift_location, drift_point, drift_score)
return drift_score, threshold, concept_drift_location return drift_score, threshold, concept_drift_location




def _plot_show(original_data, threshold, concept_location, drift_point, drift_score):
def _result_save(original_data, threshold, concept_location, drift_point, drift_score):
""" """
To show the result.
To save the result.


Args: Args:
original_data(numpy.ndarray): The input data. original_data(numpy.ndarray): The input data.
threshold(float): The concept drift threshold. threshold(float): The concept drift threshold.
concept_location(list): The concept drift locations(x-axis). concept_location(list): The concept drift locations(x-axis).
drift_point(list): The precise drift location of a drift.
drift_point(list): The precise drift point of a drift.
drift_score(numpy.ndarray): The drift score of input data. drift_score(numpy.ndarray): The drift score of input data.
""" """
plt.figure(figsize=(20, 8)) plt.figure(figsize=(20, 8))
plt.subplot(2, 1, 1) plt.subplot(2, 1, 1)
# Plot input data and drift points # Plot input data and drift points
plt.plot(original_data, label="data") plt.plot(original_data, label="data")
plt.title('concept drift check, threshold=' + str(threshold))
plt.title('concept drift check, threshold=' + str(threshold), fontsize=25)
plt.scatter(concept_location, np.ones(len(concept_location)), plt.scatter(concept_location, np.ones(len(concept_location)),
marker='*', s=200, color="b", label="concept drift occurred") marker='*', s=200, color="b", label="concept drift occurred")
for _, i in enumerate(drift_point): for _, i in enumerate(drift_point):
plt.axvline(x=i, color='r', linestyle='--') plt.axvline(x=i, color='r', linestyle='--')
plt.legend()
plt.legend(fontsize=15)
plt.subplot(2, 1, 2) plt.subplot(2, 1, 2)
# Plot drift score # Plot drift score
plt.plot(drift_score, label="drift_score") plt.plot(drift_score, label="drift_score")
plt.axhline(y=threshold, color='r', linestyle='--', label="threshold") plt.axhline(y=threshold, color='r', linestyle='--', label="threshold")
plt.legend()
plt.show()
plt.legend(fontsize=15)
plt.savefig('concept_drift_check.pdf')




def _original_label(original_data, threshold, drift_score, window_size, step_size): def _original_label(original_data, threshold, drift_score, window_size, step_size):
@@ -238,9 +237,9 @@ def _original_label(original_data, threshold, drift_score, window_size, step_siz
step_size(int): The jump length of the sliding window. step_size(int): The jump length of the sliding window.


Returns: Returns:
label(list): The drift label of input data.
- list, the drift label of input data.
0 means no drift, and 1 means drift happens. 0 means no drift, and 1 means drift happens.
label_location(list): The locations of drifts(x-axis).
- list, the locations of drifts(x-axis).
""" """
label = [] label = []
label_location = [] label_location = []
@@ -262,7 +261,7 @@ def _label_continue_process(label):
label(list): The original drift label. label(list): The original drift label.


Returns: Returns:
label_continue(numpy.ndarray): The continual drift label.
- numpy.ndarray, The continual drift label.
The drift may happen occasionally, we hope to avoid The drift may happen occasionally, we hope to avoid
frequent alarms, so label continue process is necessary. frequent alarms, so label continue process is necessary.
""" """
@@ -290,7 +289,7 @@ def _continue_block(location):
location(numpy.ndarray): The locations of concept drift. location(numpy.ndarray): The locations of concept drift.


Returns: Returns:
area(list): Continue blocks of concept drift.
- list, continue blocks of concept drift.
""" """
area = [] area = []
for _, loc in groupby(enumerate(location), _find_loc): for _, loc in groupby(enumerate(location), _find_loc):
@@ -309,8 +308,8 @@ def _drift_blocks(drift_score, label_continue, label_location):
label_location(numpy.ndarray): The locations of concept drift(x-axis). label_location(numpy.ndarray): The locations of concept drift(x-axis).


Returns: Returns:
concept_location(list): The concept drift locations(x-axis) after continual blocks finding.
drift_point(list): Return a precise beginning location of a drift.
- list, the concept drift locations(x-axis) after continual blocks finding.
- list, return a precise beginning location of a drift.
""" """
# Find drift blocks # Find drift blocks
area = _continue_block(np.where(label_continue == 1)[0]) area = _continue_block(np.where(label_continue == 1)[0])
@@ -341,7 +340,7 @@ def _w_generate(res_size, in_size, input_data):
input_data(numpy.ndarray): Input data. input_data(numpy.ndarray): Input data.


Returns: Returns:
x_state(numpy.ndarray): The state of reservoir.
- numpy.ndarray, the state of reservoir.
""" """
# Weight generates randomly # Weight generates randomly
np.random.seed(42) np.random.seed(42)
@@ -373,7 +372,7 @@ def _cal_distance(matrix1, matrix2):
matrix2(numpy.ndarray): Input array. matrix2(numpy.ndarray): Input array.


Returns: Returns:
distance(numpy.ndarray): Distance between two arrays.
- numpy.ndarray, distance between two arrays.
""" """
w_mean_x = np.mean(matrix1, axis=0) w_mean_x = np.mean(matrix1, axis=0)
w_mean_y = np.mean(matrix2, axis=0) w_mean_y = np.mean(matrix2, axis=0)
@@ -390,7 +389,7 @@ def _cal_threshold(distance, threshold_index):
threshold_index(float): Threshold adjusted index, [-∞, +∞]. threshold_index(float): Threshold adjusted index, [-∞, +∞].


Returns: Returns:
threshold(float): [0, 1].
- float, [0, 1].
""" """
distance = distance[distance > 0] distance = distance[distance > 0]
# Threshold calculation # Threshold calculation


+ 0
- 12
setup.py View File

@@ -24,17 +24,6 @@ version = '1.2.0'
cur_dir = os.path.dirname(os.path.realpath(__file__)) cur_dir = os.path.dirname(os.path.realpath(__file__))
pkg_dir = os.path.join(cur_dir, 'build') pkg_dir = os.path.join(cur_dir, 'build')


try:
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel


class bdist_wheel(_bdist_wheel):
def finalize_options(self):
_bdist_wheel.finalize_options(self)
self.root_is_pure = False
except ImportError:
bdist_wheel = None



def write_version(file): def write_version(file):
file.write("__version__ = '{}'\n".format(version)) file.write("__version__ = '{}'\n".format(version))
@@ -122,7 +111,6 @@ setup(
cmdclass={ cmdclass={
'egg_info': EggInfo, 'egg_info': EggInfo,
'build_py': BuildPy, 'build_py': BuildPy,
'bdist_wheel': bdist_wheel
}, },
install_requires=[ install_requires=[
'scipy >= 1.5.3', 'scipy >= 1.5.3',


Loading…
Cancel
Save