From 726c0b9fdaa60034d01d30fabb9b4102a6e9909c Mon Sep 17 00:00:00 2001 From: Li Hongzhang Date: Wed, 16 Sep 2020 17:02:34 +0800 Subject: [PATCH] fix the impl of get_flatten_lineage --- mindinsight/lineagemgr/model.py | 110 +++++++++++--------------------- 1 file changed, 37 insertions(+), 73 deletions(-) diff --git a/mindinsight/lineagemgr/model.py b/mindinsight/lineagemgr/model.py index d974f918..11998432 100644 --- a/mindinsight/lineagemgr/model.py +++ b/mindinsight/lineagemgr/model.py @@ -206,80 +206,44 @@ def get_flattened_lineage(data_manager, search_condition=None): Dict[str, list]: A dict contains keys and values from lineages. """ - summary_base_dir = data_manager.summary_base_dir - lineages = filter_summary_lineage(data_manager=data_manager, search_condition=search_condition) - lineage_objects = lineages.get("object", []) - - # Step 1, get column names - column_names = _get_columns_name(lineage_objects) - - # Step 2, collect data - column_data = _organize_data_to_matrix(lineage_objects, column_names, summary_base_dir) - - return column_data - - -def _get_columns_name(lineage_objects): - """Get columns name.""" - column_names = set() - user_defined_num = 0 - for lineage in lineage_objects: - model_lineage = lineage.get("model_lineage", {}) - metric = model_lineage.get("metric", {}) - metric_names = tuple('{}{}'.format(_METRIC_PREFIX, key) for key in metric.keys()) - user_defined = model_lineage.get("user_defined", {}) - user_defined_names = tuple('{}{}'.format(_USER_DEFINED_PREFIX, key) for key in user_defined.keys()) - model_lineage_temp = list(model_lineage.keys()) - for key in model_lineage_temp: - if key in ["metric", "user_defined"]: - model_lineage_temp.remove(key) - column_names.update(model_lineage_temp) - column_names.update(metric_names) - if user_defined_num + len(user_defined_names) <= USER_DEFINED_INFO_LIMIT: - column_names.update(user_defined_names) - user_defined_num += len(user_defined_names) - elif user_defined_num < USER_DEFINED_INFO_LIMIT <= user_defined_num + len(user_defined_names): - names = [] - for i in range(USER_DEFINED_INFO_LIMIT - user_defined_num): - names.append(user_defined_names[i]) - column_names.update(names) - user_defined_num += len(names) - log.info("Partial user_defined_info is deleted. Currently saved length is: %s.", user_defined_num) + summary_base_dir, flatten_dict, user_count = data_manager.summary_base_dir, {'train_id': []}, 0 + lineages = filter_summary_lineage(data_manager=data_manager, search_condition=search_condition).get("object", []) + for index, lineage in enumerate(lineages): + flatten_dict['train_id'].append(get_relative_path(lineage.get("summary_dir"), summary_base_dir)) + for key, val in _flatten_lineage(lineage.get('model_lineage', {})): + if key.startswith(_USER_DEFINED_PREFIX) and key not in flatten_dict: + if user_count > USER_DEFINED_INFO_LIMIT: + log.warning("The user_defined_info has reached the limit %s. %r is ignored", + USER_DEFINED_INFO_LIMIT, key) + continue + user_count += 1 + if key not in flatten_dict: + flatten_dict[key] = [None] * index + flatten_dict[key].append(_parse_value(val)) + for vals in flatten_dict.values(): + if len(vals) == index: + vals.append(None) + return flatten_dict + + +def _flatten_lineage(lineage): + """Flatten the lineage.""" + for key, val in lineage.items(): + if key == 'metric': + for k, v in val.items(): + yield f'{_METRIC_PREFIX}{k}', v + elif key == 'user_defined': + for k, v in val.items(): + yield f'{_USER_DEFINED_PREFIX}{k}', v else: - log.warning("The quantity of user_defined_info has reached the limit %s.", USER_DEFINED_INFO_LIMIT) - column_names.update(["train_id"]) - - return column_names - - -def _organize_data_to_matrix(lineage_objects, column_names, summary_base_dir): - """Collect data and transform to matrix.""" - cnt_lineages = len(lineage_objects) - column_data = {key: [None] * cnt_lineages for key in column_names} - for ind, lineage in enumerate(lineage_objects): - - train_id = get_relative_path(lineage.get("summary_dir"), summary_base_dir) - - model_lineage = lineage.get("model_lineage", {}) - metric = model_lineage.pop("metric", {}) - metric_content = { - '{}{}'.format(_METRIC_PREFIX, key): val for key, val in metric.items() - } - user_defined = model_lineage.pop("user_defined", {}) - user_defined_content = { - '{}{}'.format(_USER_DEFINED_PREFIX, key): val for key, val in user_defined.items() - } - final_content = {} - final_content.update(model_lineage) - final_content.update(metric_content) - final_content.update(user_defined_content) - final_content.update({"train_id": train_id}) - for key, val in final_content.items(): - if isinstance(val, str) and val.lower() in ['nan', 'inf']: - val = np.nan - if key in column_data: - column_data[key][ind] = val - return column_data + yield key, val + + +def _parse_value(val): + """Parse value.""" + if isinstance(val, str) and val.lower() in ['nan', 'inf']: + return np.nan + return val class LineageTable: