hummingbird
/
mindarmour

 
			
							# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Attacker of Membership Inference.
"""
import warnings

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.exceptions import ConvergenceWarning

from mindarmour.utils.logger import LogUtil

LOGGER = LogUtil.get_instance()
TAG = "Attacker"


def _attack_knn(features, labels, param_grid, n_jobs):
    """
    Train and return a KNN model.

    Args:
        features (numpy.ndarray): Loss and logits characteristics of each sample.
        labels (numpy.ndarray): Labels of each sample whether belongs to training set.
        param_grid (dict): Setting of GridSearchCV.
        n_jobs (int): Number of jobs run in parallel. -1 means using all processors,
            otherwise the value of n_jobs must be a positive integer.

    Returns:
        sklearn.model_selection.GridSearchCV, trained model.
    """
    knn_model = KNeighborsClassifier()
    knn_model = GridSearchCV(
        knn_model, param_grid=param_grid, cv=3, n_jobs=n_jobs, verbose=0,
    )
    knn_model.fit(X=features, y=labels)
    return knn_model


def _attack_lr(features, labels, param_grid, n_jobs):
    """
    Train and return a LR model.

    Args:
        features (numpy.ndarray): Loss and logits characteristics of each sample.
        labels (numpy.ndarray): Labels of each sample whether belongs to training set.
        param_grid (dict): Setting of GridSearchCV.
        n_jobs (int): Number of jobs run in parallel. -1 means using all processors,
            otherwise the value of n_jobs must be a positive integer.

    Returns:
        sklearn.model_selection.GridSearchCV, trained model.
    """
    lr_model = LogisticRegression(C=1.0, penalty="l2", max_iter=300)
    lr_model = GridSearchCV(
        lr_model, param_grid=param_grid, cv=3, n_jobs=n_jobs, verbose=0,
    )
    lr_model.fit(X=features, y=labels)
    return lr_model


def _attack_mlpc(features, labels, param_grid, n_jobs):
    """
    Train and return a MLPC model.

    Args:
        features (numpy.ndarray): Loss and logits characteristics of each sample.
        labels (numpy.ndarray): Labels of each sample whether belongs to training set.
        param_grid (dict): Setting of GridSearchCV.
        n_jobs (int): Number of jobs run in parallel. -1 means using all processors,
            otherwise the value of n_jobs must be a positive integer.

    Returns:
        sklearn.model_selection.GridSearchCV, trained model.
    """
    mlpc_model = MLPClassifier(random_state=1, max_iter=300)
    mlpc_model = GridSearchCV(
        mlpc_model, param_grid=param_grid, cv=3, n_jobs=n_jobs, verbose=0,
    )
    mlpc_model.fit(features, labels)
    return mlpc_model


def _attack_rf(features, labels, random_grid, n_jobs):
    """
    Train and return a RF model.

    Args:
        features (numpy.ndarray): Loss and logits characteristics of each sample.
        labels (numpy.ndarray): Labels of each sample whether belongs to training set.
        random_grid (dict): Setting of RandomizedSearchCV.
        n_jobs (int): Number of jobs run in parallel. -1 means using all processors,
            otherwise the value of n_jobs must be a positive integer.

    Returns:
        sklearn.model_selection.RandomizedSearchCV, trained model.
    """
    rf_model = RandomForestClassifier(max_depth=2, random_state=0)
    rf_model = RandomizedSearchCV(
        rf_model, param_distributions=random_grid, n_iter=7, cv=3, n_jobs=n_jobs,
        verbose=0,
    )
    rf_model.fit(features, labels)
    return rf_model


def get_attack_model(features, labels, config, n_jobs=-1):
    """
    Get trained attack model specify by config.

    Args:
        features (numpy.ndarray): Loss and logits characteristics of each sample.
        labels (numpy.ndarray): Labels of each sample whether belongs to training set.
        config (dict): Config of attacker, with key in ["method", "params"].
            The format is {"method": "knn", "params": {"n_neighbors": [3, 5, 7]}},
            params of each method must within the range of changeable parameters.
            Tips of params implement can be found in
            "https://scikit-learn.org/0.16/modules/generated/sklearn.grid_search.GridSearchCV.html".
        n_jobs (int): Number of jobs run in parallel. -1 means using all processors,
            otherwise the value of n_jobs must be a positive integer.

    Returns:
        sklearn.BaseEstimator, trained model specify by config["method"].

    Examples:
        >>> features = np.random.randn(10, 10)
        >>> labels = np.random.randint(0, 2, 10)
        >>> config = {"method": "knn", "params": {"n_neighbors": [3, 5, 7]}}
        >>> attack_model = get_attack_model(features, labels, config)
    """
    method = str.lower(config["method"])
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=ConvergenceWarning)
        if method == "knn":
            return _attack_knn(features, labels, config["params"], n_jobs)
        if method == "lr":
            return _attack_lr(features, labels, config["params"], n_jobs)
        if method == "mlp":
            return _attack_mlpc(features, labels, config["params"], n_jobs)
        if method == "rf":
            return _attack_rf(features, labels, config["params"], n_jobs)

    msg = "Method {} is not supported.".format(config["method"])
    LOGGER.error(TAG, msg)
    raise NameError(msg)