Browse Source

!480 fix dsmi_get_device_utilization_rate

Merge pull request !480 from LiHongzhang/fix_unpack
tags/v0.6.0-beta
mindspore-ci-bot Gitee 5 years ago
parent
commit
582463f78b
1 changed files with 9 additions and 9 deletions
  1. +9
    -9
      mindinsight/sysmetric/collector/_collect_npu.py

+ 9
- 9
mindinsight/sysmetric/collector/_collect_npu.py View File

@@ -270,8 +270,7 @@ def dsmi_get_hbm_info(device_id):
} }




@_timeout(0.2, 0)
@_fallback_to_prev_result
@_timeout(0.2, -1)
def dsmi_get_device_utilization_rate(device_id, device_type): def dsmi_get_device_utilization_rate(device_id, device_type):
""" """
Get device utilization rate, %. Get device utilization rate, %.
@@ -282,13 +281,14 @@ def dsmi_get_device_utilization_rate(device_id, device_type):
device_id (int): The specific device id device_id (int): The specific device id
device_type (int): The device type, 1 for memory, 2 AI Core, 5 memory bandwidth, 6 HBM, 10 HBM bandwidth. device_type (int): The device type, 1 for memory, 2 AI Core, 5 memory bandwidth, 6 HBM, 10 HBM bandwidth.
Returns: Returns:
int, the utilization rate.
int, the utilization rate, returning -1 to indicate querying failed.
""" """
device_id = c_int(device_id) device_id = c_int(device_id)
device_type = c_int(device_type) device_type = c_int(device_type)
utilization_rate = c_uint() utilization_rate = c_uint()
success = _libsmicall(device_id, device_type, byref(utilization_rate))
return success, utilization_rate.value
if _libsmicall(device_id, device_type, byref(utilization_rate)):
return utilization_rate.value
return -1




@_fallback_to_prev_result @_fallback_to_prev_result
@@ -388,14 +388,14 @@ def _collect_one(device_id):
Raises: Raises:
RuntimeError, when querying dsmi returning non-zero. RuntimeError, when querying dsmi returning non-zero.
""" """
kb_to_mb, memory_threshold, success = 1024, 4, [True] * 7
kb_to_mb, memory_threshold, success = 1024, 4, [True] * 6
success[0], health = dsmi_get_device_health(device_id) success[0], health = dsmi_get_device_health(device_id)
success[1], hbm_info = dsmi_get_hbm_info(device_id) success[1], hbm_info = dsmi_get_hbm_info(device_id)
success[2], chip_info = dsmi_get_chip_info(device_id) success[2], chip_info = dsmi_get_chip_info(device_id)
success[3], ip_addr = dsmi_get_device_ip_address(device_id) success[3], ip_addr = dsmi_get_device_ip_address(device_id)
success[4], aicore_rate = dsmi_get_device_utilization_rate(device_id, 2)
success[5], power_info = dsmi_get_device_power_info(device_id)
success[6], temperature = dsmi_get_device_temperature(device_id)
success[4], power_info = dsmi_get_device_power_info(device_id)
success[5], temperature = dsmi_get_device_temperature(device_id)
aicore_rate = dsmi_get_device_utilization_rate(device_id, 2)
return { return {
'chip_name': chip_info.get('chip_name'), 'chip_name': chip_info.get('chip_name'),
'device_id': device_id, 'device_id': device_id,


Loading…
Cancel
Save