You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

histogram_container.py 8.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Histogram data container."""
  16. import math
  17. from mindinsight.datavisual.proto_files.mindinsight_summary_pb2 import Summary
  18. from mindinsight.utils.exceptions import ParamValueError
  19. from mindinsight.datavisual.utils.utils import calc_histogram_bins
  20. def _mask_invalid_number(num):
  21. """Mask invalid number to 0."""
  22. if math.isnan(num) or math.isinf(num):
  23. return type(num)(0)
  24. return num
  25. class Bucket:
  26. """
  27. Bucket data class.
  28. Args:
  29. left (double): Left edge of the histogram bucket.
  30. width (double): Width of the histogram bucket.
  31. count (int): Count of numbers fallen in the histogram bucket.
  32. """
  33. def __init__(self, left, width, count):
  34. self._left = left
  35. self._width = width
  36. self._count = count
  37. @property
  38. def left(self):
  39. """Gets left edge of the histogram bucket."""
  40. return self._left
  41. @property
  42. def count(self):
  43. """Gets count of numbers fallen in the histogram bucket."""
  44. return self._count
  45. @property
  46. def width(self):
  47. """Gets width of the histogram bucket."""
  48. return self._width
  49. @property
  50. def right(self):
  51. """Gets right edge of the histogram bucket."""
  52. return self._left + self._width
  53. def as_tuple(self):
  54. """Gets the bucket as tuple."""
  55. return self._left, self._width, self._count
  56. def __repr__(self):
  57. """Returns repr(self)."""
  58. return "Bucket(left={}, width={}, count={})".format(self._left, self._width, self._count)
  59. class HistogramContainer:
  60. """
  61. Histogram data container.
  62. Args:
  63. histogram_message (Summary.Histogram): Histogram message in summary file.
  64. """
  65. def __init__(self, histogram_message: Summary.Histogram):
  66. self._msg = histogram_message
  67. original_buckets = [Bucket(bucket.left, bucket.width, bucket.count) for bucket in self._msg.buckets]
  68. # Ensure buckets are sorted from min to max.
  69. original_buckets.sort(key=lambda bucket: bucket.left)
  70. self._original_buckets = tuple(original_buckets)
  71. self._count = sum(bucket.count for bucket in self._original_buckets)
  72. self._max = _mask_invalid_number(histogram_message.max)
  73. self._min = _mask_invalid_number(histogram_message.min)
  74. self._visual_max = self._max
  75. self._visual_min = self._min
  76. # default bin number
  77. self._visual_bins = calc_histogram_bins(self._count)
  78. # Note that tuple is immutable, so sharing tuple is often safe.
  79. self._re_sampled_buckets = ()
  80. @property
  81. def max(self):
  82. """Gets max value of the tensor."""
  83. return self._max
  84. @property
  85. def min(self):
  86. """Gets min value of the tensor."""
  87. return self._min
  88. @property
  89. def count(self):
  90. """Gets valid number count of the tensor."""
  91. return self._count
  92. @property
  93. def original_msg(self):
  94. """Gets original proto message."""
  95. return self._msg
  96. def set_visual_range(self, max_val: float, min_val: float, bins: int) -> None:
  97. """
  98. Sets visual range for later re-sampling.
  99. It's caller's duty to ensure input is valid.
  100. Args:
  101. max_val (float): Max value for visual histogram.
  102. min_val (float): Min value for visual histogram.
  103. bins (int): Bins number for visual histogram.
  104. """
  105. if max_val < min_val:
  106. raise ParamValueError(
  107. "Invalid input. max_val({}) is less or equal than min_val({}).".format(max_val, min_val))
  108. if bins < 1:
  109. raise ParamValueError("Invalid input bins({}). Must be greater than 0.".format(bins))
  110. self._visual_max = max_val
  111. self._visual_min = min_val
  112. self._visual_bins = bins
  113. # mark _re_sampled_buckets to empty
  114. self._re_sampled_buckets = ()
  115. def _calc_intersection_len(self, max1, min1, max2, min2):
  116. """Calculates intersection length of [min1, max1] and [min2, max2]."""
  117. if max1 < min1:
  118. raise ParamValueError(
  119. "Invalid input. max1({}) is less than min1({}).".format(max1, min1))
  120. if max2 < min2:
  121. raise ParamValueError(
  122. "Invalid input. max2({}) is less than min2({}).".format(max2, min2))
  123. if min1 <= min2:
  124. if max1 <= min2:
  125. # return value must be calculated by max1.__sub__
  126. return max1 - max1
  127. if max1 <= max2:
  128. return max1 - min2
  129. # max1 > max2
  130. return max2 - min2
  131. # min1 > min2
  132. if max2 <= min1:
  133. return max2 - max2
  134. if max2 <= max1:
  135. return max2 - min1
  136. return max1 - min1
  137. def _re_sample_buckets(self):
  138. """Re-samples buckets according to visual_max, visual_min and visual_bins."""
  139. if self._visual_max == self._visual_min:
  140. # Adjust visual range if max equals min.
  141. self._visual_max += 0.5
  142. self._visual_min -= 0.5
  143. width = (self._visual_max - self._visual_min) / self._visual_bins
  144. if not self.count:
  145. self._re_sampled_buckets = tuple(
  146. Bucket(self._visual_min + width * i, width, 0)
  147. for i in range(self._visual_bins))
  148. return
  149. re_sampled = []
  150. original_pos = 0
  151. original_bucket = self._original_buckets[original_pos]
  152. for i in range(self._visual_bins):
  153. cur_left = self._visual_min + width * i
  154. cur_right = cur_left + width
  155. cur_estimated_count = 0.0
  156. # Skip no bucket range.
  157. if cur_right <= original_bucket.left:
  158. re_sampled.append(Bucket(cur_left, width, math.ceil(cur_estimated_count)))
  159. continue
  160. # Skip no intersect range.
  161. while cur_left >= original_bucket.right:
  162. original_pos += 1
  163. if original_pos >= len(self._original_buckets):
  164. break
  165. original_bucket = self._original_buckets[original_pos]
  166. # entering with this condition: cur_right > original_bucket.left and cur_left < original_bucket.right
  167. while True:
  168. if original_pos >= len(self._original_buckets):
  169. break
  170. original_bucket = self._original_buckets[original_pos]
  171. intersection = self._calc_intersection_len(
  172. min1=cur_left, max1=cur_right,
  173. min2=original_bucket.left, max2=original_bucket.right)
  174. estimated_count = (intersection / original_bucket.width) * original_bucket.count
  175. cur_estimated_count += estimated_count
  176. if cur_right > original_bucket.right:
  177. # Need to sample next original bucket to this visual bucket.
  178. original_pos += 1
  179. else:
  180. # Current visual bucket has taken all intersect buckets into account.
  181. break
  182. re_sampled.append(Bucket(cur_left, width, math.ceil(cur_estimated_count)))
  183. self._re_sampled_buckets = tuple(re_sampled)
  184. def buckets(self, convert_to_tuple=True):
  185. """
  186. Get visual buckets instead of original buckets.
  187. Args:
  188. convert_to_tuple (bool): Whether convert bucket object to tuple.
  189. Returns:
  190. tuple, contains buckets.
  191. """
  192. if not self._re_sampled_buckets:
  193. self._re_sample_buckets()
  194. if not convert_to_tuple:
  195. return self._re_sampled_buckets
  196. return tuple(bucket.as_tuple() for bucket in self._re_sampled_buckets)

MindInsight为MindSpore提供了简单易用的调优调试能力。在训练过程中,可以将标量、张量、图像、计算图、模型超参、训练耗时等数据记录到文件中,通过MindInsight可视化页面进行查看及分析。