You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

importances.py 1.6 kB

12345678910111213141516171819202122232425262728293031323334353637
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Utils for calculate importance."""
  16. import numpy as np
  17. from mindinsight.optimizer.common.exceptions import SamplesNotEnoughError, CorrelationNanError
  18. from mindinsight.optimizer.common.log import logger
  19. def calc_hyper_param_importance(df, hyper_param, target):
  20. """Calc hyper param importance relative to given target."""
  21. logger.debug("Calculating importance for hyper_param %s, target is %s.", hyper_param, target)
  22. new_df = df[[hyper_param, target]]
  23. no_missing_value_df = new_df.dropna()
  24. # Can not calc pearson correlation coefficient when number of samples is less or equal than 2
  25. if len(no_missing_value_df) <= 2:
  26. raise SamplesNotEnoughError("Number of samples is less or equal than 2.")
  27. correlation = no_missing_value_df[target].corr(no_missing_value_df[hyper_param])
  28. if np.isnan(correlation):
  29. logger.warning("Correlation is nan!")
  30. raise CorrelationNanError("Correlation is nan!")
  31. return abs(correlation)