diff --git a/RELEASE.md b/RELEASE.md index e90906f..281c0cc 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,26 @@ +# Release 0.5.0-beta + +## Major Features and Improvements + +### Differential privacy model training + +* Optimizers with differential privacy + + * Differential privacy model training now supports both Pynative mode and graph mode. + + * Graph mode is recommended for its performance. + +## Bugfixes + +## Contributors + +Thanks goes to these wonderful people: + +Liu Liu, Huanhuan Zheng, Xiulang Jin, Zhidan Liu. + +Contributions of any kind are welcome! + + # Release 0.3.0-alpha ## Major Features and Improvements diff --git a/mindarmour/attacks/iterative_gradient_method.py b/mindarmour/attacks/iterative_gradient_method.py index 9a212b2..2b6ffc9 100644 --- a/mindarmour/attacks/iterative_gradient_method.py +++ b/mindarmour/attacks/iterative_gradient_method.py @@ -476,7 +476,6 @@ class DiverseInputIterativeMethod(BasicIterativeMethod): is_targeted=is_targeted, nb_iter=nb_iter, loss_fn=loss_fn) - # FGSM default alpha is None equal alpha=1 self.prob = check_param_type('prob', prob, float) diff --git a/mindarmour/diff_privacy/mechanisms/mechanisms.py b/mindarmour/diff_privacy/mechanisms/mechanisms.py index 1a4cc6a..b7a6cd1 100644 --- a/mindarmour/diff_privacy/mechanisms/mechanisms.py +++ b/mindarmour/diff_privacy/mechanisms/mechanisms.py @@ -19,7 +19,6 @@ from abc import abstractmethod from mindspore import Tensor from mindspore.nn import Cell from mindspore.ops import operations as P -from mindspore.ops import functional as F from mindspore.common.parameter import Parameter from mindspore.common import dtype as mstype @@ -124,6 +123,8 @@ class GaussianRandom(Mechanisms): seed(int): Original random seed, if seed=0 random normal will use secure random number. IF seed!=0 random normal will generate values using given seed. Default: 0. + policy(str): Mechanisms parameters update policy. Default: None, no + parameters need update. Returns: Tensor, generated noise with shape like given gradients. @@ -137,7 +138,7 @@ class GaussianRandom(Mechanisms): >>> print(res) """ - def __init__(self, norm_bound=0.5, initial_noise_multiplier=1.5, seed=0): + def __init__(self, norm_bound=0.5, initial_noise_multiplier=1.5, seed=0, policy=None): super(GaussianRandom, self).__init__() self._norm_bound = check_value_positive('norm_bound', norm_bound) self._norm_bound = Tensor(norm_bound, mstype.float32) @@ -146,6 +147,7 @@ class GaussianRandom(Mechanisms): self._initial_noise_multiplier = Tensor(initial_noise_multiplier, mstype.float32) self._mean = Tensor(0, mstype.float32) self._normal = P.Normal(seed=seed) + self._decay_policy = policy def construct(self, gradients): """ @@ -218,14 +220,8 @@ class AdaGaussianRandom(Mechanisms): raise NameError("The decay_policy must be in ['Time', 'Step'], but " "get {}".format(decay_policy)) self._decay_policy = decay_policy - self._sub = P.Sub() self._mul = P.Mul() - self._add = P.TensorAdd() - self._div = P.Div() - self._dtype = mstype.float32 self._normal = P.Normal(seed=seed) - self._assign = P.Assign() - self._one = Tensor(1, self._dtype) def construct(self, gradients): """ @@ -239,14 +235,48 @@ class AdaGaussianRandom(Mechanisms): """ shape = P.Shape()(gradients) noise = self._normal(shape, self._mean, self._mul(self._noise_multiplier, self._norm_bound)) + return noise - if self._decay_policy == 'Time': - temp = self._div(self._initial_noise_multiplier, - self._noise_multiplier) - temp = self._add(temp, self._noise_decay_rate) - multiplier = self._assign(self._noise_multiplier, self._div(self._initial_noise_multiplier, temp)) - else: - temp = self._sub(self._one, self._noise_decay_rate) - multiplier = self._assign(self._noise_multiplier, self._mul(temp, self._noise_multiplier)) - return F.depend(noise, multiplier) +class _MechanismsParamsUpdater(Cell): + """ + Update mechanisms parameters, the parameters will refresh in train period. + + Args: + policy(str): Pass in by the mechanisms class, mechanisms parameters update policy. + decay_rate(Tensor): Pass in by the mechanisms class, hyper parameter for controlling the decay size. + cur_params(Parameter): Pass in by the mechanisms class, current params value in this time. + init_params(Parameter):Pass in by the mechanisms class, initial params value to be updated. + + Returns: + Tuple, next params value. + """ + def __init__(self, policy, decay_rate, cur_params, init_params): + super(_MechanismsParamsUpdater, self).__init__() + self._policy = policy + self._decay_rate = decay_rate + self._cur_params = cur_params + self._init_params = init_params + + self._div = P.Sub() + self._add = P.TensorAdd() + self._assign = P.Assign() + self._sub = P.Sub() + self._one = Tensor(1, mstype.float32) + self._mul = P.Mul() + + def construct(self): + """ + update parameters to `self._cur_params`. + + Returns: + Tuple, next step parameters value. + """ + if self._policy == 'Time': + temp = self._div(self._init_params, self._cur_params) + temp = self._add(temp, self._decay_rate) + next_params = self._assign(self._cur_params, self._div(self._init_params, temp)) + else: + temp = self._sub(self._one, self._decay_rate) + next_params = self._assign(self._cur_params, self._mul(temp, self._cur_params)) + return next_params diff --git a/mindarmour/diff_privacy/optimizer/optimizer.py b/mindarmour/diff_privacy/optimizer/optimizer.py index d1d2db4..78b7a56 100644 --- a/mindarmour/diff_privacy/optimizer/optimizer.py +++ b/mindarmour/diff_privacy/optimizer/optimizer.py @@ -21,9 +21,14 @@ from mindspore.ops import operations as P from mindspore.ops import functional as F from mindspore.common import dtype as mstype -from mindarmour.diff_privacy.mechanisms.mechanisms import MechanismsFactory +from mindarmour.utils.logger import LogUtil +from mindarmour.diff_privacy import MechanismsFactory +from mindarmour.diff_privacy.mechanisms.mechanisms import _MechanismsParamsUpdater from mindarmour.utils._check_param import check_int_positive +LOGGER = LogUtil.get_instance() +TAG = 'DP optimizer' + _grad_scale = C.MultitypeFuncGraph("grad_scale") _reciprocal = P.Reciprocal() @@ -97,7 +102,9 @@ class DPOptimizerClassFactory: if policy == 'Adam': cls = self._get_dp_optimizer_class(nn.Adam, self.mech, self._micro_batches, *args, **kwargs) return cls - raise NameError("The {} is not implement, please choose ['SGD', 'Momentum', 'Adam']".format(policy)) + msg = "The {} is not implement, please choose ['SGD', 'Momentum', 'Adam']".format(policy) + LOGGER.error(TAG, msg) + raise NameError(msg) def _get_dp_optimizer_class(self, cls, mech, micro_batches): """ @@ -119,6 +126,14 @@ class DPOptimizerClassFactory: self._hyper_map = C.HyperMap() self._micro_float = Tensor(micro_batches, mstype.float32) + self._mech_param_updater = None + if self._mech is not None and self._mech._decay_policy is not None: + self._mech_param_updater = _MechanismsParamsUpdater(policy=self._mech._decay_policy, + decay_rate=self._mech._noise_decay_rate, + cur_params=self._mech._noise_multiplier, + init_params= + self._mech._initial_noise_multiplier) + def construct(self, gradients): """ construct a compute flow. @@ -126,6 +141,10 @@ class DPOptimizerClassFactory: grad_noise = self._hyper_map(self._mech, gradients) grads = self._tuple_add(gradients, grad_noise) grads = self._hyper_map(F.partial(_grad_scale, self._micro_float), grads) + # update mech parameters + if self._mech_param_updater is not None: + multiplier = self._mech_param_updater() + grads = F.depend(grads, multiplier) gradients = super(DPOptimizer, self).construct(grads) return gradients diff --git a/mindarmour/diff_privacy/train/model.py b/mindarmour/diff_privacy/train/model.py index a915922..40d130a 100644 --- a/mindarmour/diff_privacy/train/model.py +++ b/mindarmour/diff_privacy/train/model.py @@ -47,10 +47,15 @@ from mindspore.nn.wrap.loss_scale import _grad_overflow from mindspore.nn import Cell from mindspore import ParameterTuple +from mindarmour.utils.logger import LogUtil +from mindarmour.diff_privacy.mechanisms.mechanisms import _MechanismsParamsUpdater from mindarmour.utils._check_param import check_param_type from mindarmour.utils._check_param import check_value_positive from mindarmour.utils._check_param import check_int_positive +LOGGER = LogUtil.get_instance() +TAG = 'DP model' + GRADIENT_CLIP_TYPE = 1 _grad_scale = C.MultitypeFuncGraph("grad_scale") _reciprocal = P.Reciprocal() @@ -105,13 +110,19 @@ class DPModel(Model): norm_clip = check_param_type('norm_clip', norm_clip, float) self._norm_clip = check_value_positive('norm_clip', norm_clip) if mech is not None and "DPOptimizer" in kwargs['optimizer'].__class__.__name__: - raise ValueError('DPOptimizer is not supported while mech is not None') + msg = 'DPOptimizer is not supported while mech is not None' + LOGGER.error(TAG, msg) + raise ValueError(msg) if mech is None: if "DPOptimizer" in kwargs['optimizer'].__class__.__name__: if context.get_context('mode') != context.PYNATIVE_MODE: - raise ValueError('DPOptimizer just support pynative mode currently.') + msg = 'DPOptimizer just support pynative mode currently.' + LOGGER.error(TAG, msg) + raise ValueError(msg) else: - raise ValueError('DPModel should set mech or DPOptimizer configure, please refer to example.') + msg = 'DPModel should set mech or DPOptimizer configure, please refer to example.' + LOGGER.error(TAG, msg) + raise ValueError(msg) self._mech = mech super(DPModel, self).__init__(**kwargs) @@ -163,10 +174,11 @@ class DPModel(Model): if update_cell is not None: # only cpu not support `TrainOneStepWithLossScaleCell` for control flow. if not context.get_context("enable_ge") and context.get_context("device_target") == "CPU": - raise ValueError("Only `loss_scale_manager=None` and " - "`loss_scale_manager=FixedLossScaleManager(drop_overflow_update=False)`" - "are supported in current version. If you use `O2` option, please" - "use `loss_scale_manager=None` or `FixedLossScaleManager`") + msg = "Only `loss_scale_manager=None` and `loss_scale_manager=FixedLossScaleManager(drop_overflow" \ + "_update=False)` are supported in current version. If you use `O2` option, please use " \ + "`loss_scale_manager=None` or `FixedLossScaleManager`" + LOGGER.error(TAG, msg) + raise ValueError(msg) network = _TrainOneStepWithLossScaleCell(network, optimizer, scale_update_cell=update_cell, @@ -174,6 +186,7 @@ class DPModel(Model): norm_clip=self._norm_clip, mech=self._mech).set_train() return network + network = _TrainOneStepCell(network, optimizer, loss_scale, @@ -182,47 +195,48 @@ class DPModel(Model): mech=self._mech).set_train() return network - def _build_train_network(self): - """Build train network""" - network = self._network - if self._micro_batches: - if self._optimizer: - if self._loss_scale_manager_set: - network = self._amp_build_train_network(network, - self._optimizer, - self._loss_fn, - level=self._amp_level, - loss_scale_manager=self._loss_scale_manager, - keep_batchnorm_fp32=self._keep_bn_fp32) - else: - network = self._amp_build_train_network(network, - self._optimizer, - self._loss_fn, - level=self._amp_level, - keep_batchnorm_fp32=self._keep_bn_fp32) - elif self._loss_fn: - network = nn.WithLossCell(network, self._loss_fn) - else: - if self._optimizer: - if self._loss_scale_manager_set: - network = amp.build_train_network(network, - self._optimizer, - self._loss_fn, - level=self._amp_level, - loss_scale_manager=self._loss_scale_manager, - keep_batchnorm_fp32=self._keep_bn_fp32) - else: - network = amp.build_train_network(network, - self._optimizer, - self._loss_fn, - level=self._amp_level, - keep_batchnorm_fp32=self._keep_bn_fp32) - elif self._loss_fn: - network = nn.WithLossCell(network, self._loss_fn) - - if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL): - network.set_auto_parallel() - return network + +def _build_train_network(self): + """Build train network""" + network = self._network + if self._micro_batches: + if self._optimizer: + if self._loss_scale_manager_set: + network = self._amp_build_train_network(network, + self._optimizer, + self._loss_fn, + level=self._amp_level, + loss_scale_manager=self._loss_scale_manager, + keep_batchnorm_fp32=self._keep_bn_fp32) + else: + network = self._amp_build_train_network(network, + self._optimizer, + self._loss_fn, + level=self._amp_level, + keep_batchnorm_fp32=self._keep_bn_fp32) + elif self._loss_fn: + network = nn.WithLossCell(network, self._loss_fn) + else: + if self._optimizer: + if self._loss_scale_manager_set: + network = amp.build_train_network(network, + self._optimizer, + self._loss_fn, + level=self._amp_level, + loss_scale_manager=self._loss_scale_manager, + keep_batchnorm_fp32=self._keep_bn_fp32) + else: + network = amp.build_train_network(network, + self._optimizer, + self._loss_fn, + level=self._amp_level, + keep_batchnorm_fp32=self._keep_bn_fp32) + elif self._loss_fn: + network = nn.WithLossCell(network, self._loss_fn) + + if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL): + network.set_auto_parallel() + return network class _ClipGradients(nn.Cell): @@ -358,6 +372,13 @@ class _TrainOneStepWithLossScaleCell(Cell): self._hyper_map = C.HyperMap() self._micro_float = Tensor(micro_batches, mstype.float32) + self._mech_param_updater = None + if self._mech is not None and self._mech._decay_policy is not None: + self._mech_param_updater = _MechanismsParamsUpdater(policy=self._mech._decay_policy, + decay_rate=self._mech._noise_decay_rate, + cur_params=self._mech._noise_multiplier, + init_params=self._mech._initial_noise_multiplier) + def construct(self, data, label, sens=None): """ construct a compute flow. @@ -380,14 +401,14 @@ class _TrainOneStepWithLossScaleCell(Cell): record_labels = self._split(label) # first index loss = self.network(record_datas[0], record_labels[0]) - scaling_sens_filled = C.ones_like(loss)*F.cast(scaling_sens, F.dtype(loss)) + scaling_sens_filled = C.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss)) record_grad = self.grad(self.network, weights)(record_datas[0], record_labels[0], scaling_sens_filled) record_grad = self._clip_by_global_norm(record_grad, GRADIENT_CLIP_TYPE, self._l2_norm) grads = record_grad total_loss = loss for i in range(1, self._micro_batches): loss = self.network(record_datas[i], record_labels[i]) - scaling_sens_filled = C.ones_like(loss)*F.cast(scaling_sens, F.dtype(loss)) + scaling_sens_filled = C.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss)) record_grad = self.grad(self.network, weights)(record_datas[i], record_labels[i], scaling_sens_filled) record_grad = self._clip_by_global_norm(record_grad, GRADIENT_CLIP_TYPE, self._l2_norm) grads = self._tuple_add(grads, record_grad) @@ -398,6 +419,10 @@ class _TrainOneStepWithLossScaleCell(Cell): grad_noise = self._hyper_map(self._mech, grads) grads = self._tuple_add(grads, grad_noise) grads = self._hyper_map(F.partial(_grad_scale, self._micro_float), grads) + # update mech parameters + if self._mech_param_updater is not None: + multiplier = self._mech_param_updater() + loss = F.depend(loss, multiplier) grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads) # apply grad reducer on grads @@ -474,6 +499,10 @@ class _TrainOneStepCell(Cell): self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) # dp params + if micro_batches is None: + msg = 'micro_batches must give in differential privacy, but got value: {}'.format(micro_batches) + LOGGER.error(TAG, msg) + raise ValueError(msg) self._micro_batches = micro_batches norm_clip = check_param_type('norm_clip', norm_clip, float) self._l2_norm = check_value_positive('norm_clip', norm_clip) @@ -484,6 +513,13 @@ class _TrainOneStepCell(Cell): self._hyper_map = C.HyperMap() self._micro_float = Tensor(micro_batches, mstype.float32) + self._mech_param_updater = None + if self._mech is not None and self._mech._decay_policy is not None: + self._mech_param_updater = _MechanismsParamsUpdater(policy=self._mech._decay_policy, + decay_rate=self._mech._noise_decay_rate, + cur_params=self._mech._noise_multiplier, + init_params=self._mech._initial_noise_multiplier) + def construct(self, data, label): """ construct a compute flow. @@ -510,6 +546,10 @@ class _TrainOneStepCell(Cell): grad_noise = self._hyper_map(self._mech, grads) grads = self._tuple_add(grads, grad_noise) grads = self._hyper_map(F.partial(_grad_scale, self._micro_float), grads) + # update mech parameters + if self._mech_param_updater is not None: + multiplier = self._mech_param_updater() + loss = F.depend(loss, multiplier) if self.reducer_flag: # apply grad reducer on grads diff --git a/tests/ut/python/diff_privacy/test_model_train.py b/tests/ut/python/diff_privacy/test_model_train.py index 88bd942..aac04b5 100644 --- a/tests/ut/python/diff_privacy/test_model_train.py +++ b/tests/ut/python/diff_privacy/test_model_train.py @@ -41,7 +41,7 @@ def dataset_generator(batch_size, batches): @pytest.mark.platform_x86_ascend_training @pytest.mark.env_card @pytest.mark.component_mindarmour -def test_dp_model_pynative_mode(): +def test_dp_model_with_pynative_mode(): context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") norm_clip = 1.0 initial_noise_multiplier = 0.01 @@ -96,3 +96,33 @@ def test_dp_model_with_graph_mode(): ms_ds = ds.GeneratorDataset(dataset_generator(batch_size, batches), ['data', 'label']) ms_ds.set_dataset_size(batch_size * batches) model.train(epochs, ms_ds, dataset_sink_mode=False) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_card +@pytest.mark.component_mindarmour +def test_dp_model_with_graph_mode_ada_gaussian(): + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + norm_clip = 1.0 + initial_noise_multiplier = 0.01 + network = LeNet5() + batch_size = 32 + batches = 128 + epochs = 1 + loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + mech = MechanismsFactory().create('AdaGaussian', + norm_bound=norm_clip, + initial_noise_multiplier=initial_noise_multiplier) + net_opt = nn.Momentum(network.trainable_params(), learning_rate=0.1, momentum=0.9) + model = DPModel(micro_batches=2, + norm_clip=norm_clip, + mech=mech, + network=network, + loss_fn=loss, + optimizer=net_opt, + metrics=None) + ms_ds = ds.GeneratorDataset(dataset_generator(batch_size, batches), ['data', 'label']) + ms_ds.set_dataset_size(batch_size * batches) + model.train(epochs, ms_ds, dataset_sink_mode=False)