You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

querier.py 16 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """This file is used to define lineage info querier."""
  16. import enum
  17. import functools
  18. import operator
  19. from mindinsight.lineagemgr.common.exceptions.exceptions import LineageQuerierParamException, LineageParamTypeError
  20. from mindinsight.lineagemgr.common.utils import enum_to_list
  21. from mindinsight.lineagemgr.lineage_parser import SuperLineageObj
  22. from mindinsight.lineagemgr.querier.query_model import FIELD_MAPPING
  23. @enum.unique
  24. class ConditionParam(enum.Enum):
  25. """
  26. Filtering and sorting field names.
  27. `LIMIT` represents the number of lineage info per page. `OFFSET` represents
  28. page number. `SORTED_NAME` means to sort by this field. `SORTED_TYPE` means
  29. ascending or descending.
  30. """
  31. LIMIT = 'limit'
  32. OFFSET = 'offset'
  33. SORTED_NAME = 'sorted_name'
  34. SORTED_TYPE = 'sorted_type'
  35. LINEAGE_TYPE = 'lineage_type'
  36. @classmethod
  37. def is_condition_type(cls, value):
  38. """
  39. Judge that the input param is one of field names in the class.
  40. Args:
  41. value (str): The input field name.
  42. Returns:
  43. bool, `True` if the input field name in the class, else `False`.
  44. """
  45. return value in cls._value2member_map_
  46. @enum.unique
  47. class ExpressionType(enum.Enum):
  48. """
  49. Filter condition name definition.
  50. `EQ` means `==`. `LT` means `<`. `GT` means `>`. `LE` means `<=`. `GE` means
  51. `>=`. `IN` means filter value in the specified list.
  52. """
  53. EQ = 'eq'
  54. LT = 'lt'
  55. GT = 'gt'
  56. LE = 'le'
  57. GE = 'ge'
  58. IN = 'in'
  59. NOT_IN = 'not_in'
  60. @classmethod
  61. def is_valid_exp(cls, key):
  62. """
  63. Judge that the input param is one of filter condition names in the class.
  64. Args:
  65. key (str): The input filter condition name.
  66. Returns:
  67. bool, `True` if the input filter condition name in the class,
  68. else `False`.
  69. """
  70. return key in cls._value2member_map_
  71. @classmethod
  72. def is_match(cls, except_key, except_value, actual_value):
  73. """
  74. Determine whether the value meets the expected requirement.
  75. Args:
  76. except_key (str): The expression key.
  77. except_value (Union[str, int, float, list, tuple]): The expected
  78. value.
  79. actual_value (Union[str, int, float]): The actual value.
  80. Returns:
  81. bool, `True` if the actual value meets the expected requirement,
  82. else `False`.
  83. """
  84. if actual_value is None and except_key in [cls.LT.value, cls.GT.value,
  85. cls.LE.value, cls.GE.value]:
  86. return False
  87. try:
  88. if except_key == cls.IN.value:
  89. state = operator.contains(except_value, actual_value)
  90. elif except_key == cls.NOT_IN.value:
  91. state = not operator.contains(except_value, actual_value)
  92. else:
  93. state = getattr(operator, except_key)(actual_value, except_value)
  94. except TypeError:
  95. # actual_value can not compare with except_value
  96. return False
  97. return state
  98. @enum.unique
  99. class LineageFilterKey(enum.Enum):
  100. """Summary lineage information filter key."""
  101. METRIC = 'metric'
  102. HYPER_PARAM = 'hyper_parameters'
  103. ALGORITHM = 'algorithm'
  104. TRAIN_DATASET = 'train_dataset'
  105. VALID_DATASET = 'valid_dataset'
  106. MODEL = 'model'
  107. DATASET_GRAPH = 'dataset_graph'
  108. @classmethod
  109. def is_valid_filter_key(cls, key):
  110. """
  111. Judge that the input param is one of field names in the class.
  112. Args:
  113. key (str): The input field name.
  114. Returns:
  115. bool, `True` if the input field name in the class, else `False`.
  116. """
  117. return key in cls._value2member_map_
  118. @classmethod
  119. def get_key_list(cls):
  120. """
  121. Get the filter key name list.
  122. Returns:
  123. list[str], the filter key name list.
  124. """
  125. return [member.value for member in cls]
  126. @enum.unique
  127. class LineageType(enum.Enum):
  128. """Lineage search type."""
  129. DATASET = 'dataset'
  130. MODEL = 'model'
  131. class Querier:
  132. """
  133. The querier of model lineage information.
  134. The class provides model lineage information query function. The information
  135. includes hyper parameters, train dataset, algorithm, model information,
  136. metric, valid dataset, etc.
  137. The class also provides search and sorting capabilities about model lineage
  138. information. You can search and sort by the specified condition.
  139. The condition explain in `ConditionParam` and `ExpressionType` class.
  140. See the method `filter_summary_lineage` for supported fields.
  141. Args:
  142. super_lineage_objs (dict): A dict of <summary_dir, SuperLineageObject>.
  143. Raises:
  144. LineageParamTypeError: If the input parameter type is invalid.
  145. LineageQuerierParamException: If the input parameter value is invalid.
  146. LineageSummaryParseException: If all summary logs parsing failed.
  147. """
  148. def __init__(self, super_lineage_objs):
  149. self._super_lineage_objs = self._check_objs(super_lineage_objs)
  150. def _check_objs(self, super_lineage_objs):
  151. if super_lineage_objs is None:
  152. raise LineageQuerierParamException(
  153. 'querier_init_param', 'The querier init param is empty.'
  154. )
  155. if not isinstance(super_lineage_objs, dict):
  156. raise LineageParamTypeError("Init param should be a dict.")
  157. return super_lineage_objs
  158. def filter_summary_lineage(self, condition=None):
  159. """
  160. Filter and sort lineage information based on the specified condition.
  161. See `ConditionType` and `ExpressionType` class for the rule of filtering
  162. and sorting. The filtering and sorting fields are defined in
  163. `FIELD_MAPPING` or prefixed with `metric/` or 'user_defined/'.
  164. If the condition is `None`, all model lineage information will be
  165. returned.
  166. Args:
  167. condition (Union[dict, None]): Filter and sort condition.
  168. Default: None.
  169. Returns:
  170. dict, filtered and sorted model lineage information.
  171. """
  172. def _filter(super_lineage_obj: SuperLineageObj):
  173. for condition_key, condition_value in condition.items():
  174. if ConditionParam.is_condition_type(condition_key):
  175. continue
  176. if self._is_valid_field(condition_key):
  177. raise LineageQuerierParamException(
  178. 'condition',
  179. 'The field {} not supported'.format(condition_key)
  180. )
  181. value = super_lineage_obj.lineage_obj.get_value_by_key(condition_key)
  182. for exp_key, exp_value in condition_value.items():
  183. if not ExpressionType.is_valid_exp(exp_key):
  184. raise LineageQuerierParamException(
  185. 'condition',
  186. 'The expression {} not supported.'.format(exp_key)
  187. )
  188. if not ExpressionType.is_match(exp_key, exp_value, value):
  189. return False
  190. return True
  191. if condition is None:
  192. condition = {}
  193. self._add_dataset_mark()
  194. super_lineage_objs = list(self._super_lineage_objs.values())
  195. super_lineage_objs.sort(key=lambda x: x.update_time, reverse=True)
  196. results = list(filter(_filter, super_lineage_objs))
  197. results = self._sorted_results(results, condition)
  198. offset_results = self._handle_limit_and_offset(condition, results)
  199. customized = self._organize_customized(offset_results)
  200. lineage_types = condition.get(ConditionParam.LINEAGE_TYPE.value)
  201. lineage_types = self._get_lineage_types(lineage_types)
  202. object_items = []
  203. for item in offset_results:
  204. lineage_object = dict()
  205. if LineageType.MODEL.value in lineage_types:
  206. lineage_object.update(item.lineage_obj.to_model_lineage_dict())
  207. if LineageType.DATASET.value in lineage_types:
  208. lineage_object.update(item.lineage_obj.to_dataset_lineage_dict())
  209. lineage_object.update({"added_info": item.added_info})
  210. object_items.append(lineage_object)
  211. lineage_info = {
  212. 'customized': customized,
  213. 'object': object_items,
  214. 'count': len(results)
  215. }
  216. return lineage_info
  217. def _sorted_results(self, results, condition):
  218. """Get sorted results."""
  219. def _cmp(value1, value2):
  220. if value1 is None and value2 is None:
  221. cmp_result = 0
  222. elif value1 is None:
  223. cmp_result = -1
  224. elif value2 is None:
  225. cmp_result = 1
  226. else:
  227. try:
  228. cmp_result = (value1 > value2) - (value1 < value2)
  229. except TypeError:
  230. type1 = type(value1).__name__
  231. type2 = type(value2).__name__
  232. cmp_result = (type1 > type2) - (type1 < type2)
  233. return cmp_result
  234. def _cmp_added_info(obj1: SuperLineageObj, obj2: SuperLineageObj):
  235. value1 = obj1.added_info.get(sorted_name)
  236. value2 = obj2.added_info.get(sorted_name)
  237. return _cmp(value1, value2)
  238. def _cmp_super_lineage_obj(obj1: SuperLineageObj, obj2: SuperLineageObj):
  239. value1 = obj1.lineage_obj.get_value_by_key(sorted_name)
  240. value2 = obj2.lineage_obj.get_value_by_key(sorted_name)
  241. return _cmp(value1, value2)
  242. if ConditionParam.SORTED_NAME.value in condition:
  243. sorted_name = condition.get(ConditionParam.SORTED_NAME.value)
  244. sorted_type = condition.get(ConditionParam.SORTED_TYPE.value)
  245. reverse = sorted_type == 'descending'
  246. if sorted_name in ['tag']:
  247. results = sorted(
  248. results, key=functools.cmp_to_key(_cmp_added_info), reverse=reverse
  249. )
  250. return results
  251. if self._is_valid_field(sorted_name):
  252. raise LineageQuerierParamException(
  253. 'condition',
  254. 'The sorted name {} not supported.'.format(sorted_name)
  255. )
  256. results = sorted(
  257. results, key=functools.cmp_to_key(_cmp_super_lineage_obj), reverse=reverse
  258. )
  259. return results
  260. def _organize_customized(self, offset_results):
  261. """Organize customized."""
  262. customized = dict()
  263. for offset_result in offset_results:
  264. for obj_name in ["metric", "user_defined"]:
  265. self._organize_customized_item(customized, offset_result.lineage_obj, obj_name)
  266. # If types contain numbers and string, it will be "mixed".
  267. # If types contain "int" and "float", it will be "float".
  268. for key, value in customized.items():
  269. types = value["type"]
  270. if len(types) == 1:
  271. customized[key]["type"] = list(types)[0]
  272. elif types.issubset(["int", "float"]):
  273. customized[key]["type"] = "float"
  274. else:
  275. customized[key]["type"] = "mixed"
  276. return customized
  277. def _organize_customized_item(self, customized, offset_result, obj_name):
  278. """Organize customized item."""
  279. obj = getattr(offset_result, obj_name)
  280. require = bool(obj_name == "metric")
  281. if obj and isinstance(obj, dict):
  282. for key, value in obj.items():
  283. label = f'{obj_name}/{key}'
  284. current_type = type(value).__name__
  285. if customized.get(label) is None:
  286. customized[label] = dict()
  287. customized[label]["label"] = label
  288. # user defined info is not displayed by default
  289. customized[label]["required"] = require
  290. customized[label]["type"] = set()
  291. customized[label]["type"].add(current_type)
  292. def _get_lineage_types(self, lineage_type_param):
  293. """
  294. Get lineage types.
  295. Args:
  296. lineage_type_param (dict): A dict contains "in" or "eq".
  297. Returns:
  298. list, lineage type.
  299. """
  300. # lineage_type_param is None or an empty dict
  301. if not lineage_type_param:
  302. return enum_to_list(LineageType)
  303. if lineage_type_param.get("in") is not None:
  304. return lineage_type_param.get("in")
  305. return [lineage_type_param.get("eq")]
  306. def _is_valid_field(self, field_name):
  307. """
  308. Check if field name is valid.
  309. Args:
  310. field_name (str): Field name.
  311. Returns:
  312. bool, `True` if the field name is valid, else `False`.
  313. """
  314. return field_name not in FIELD_MAPPING \
  315. and not field_name.startswith(('metric/', 'user_defined/'))
  316. def _handle_limit_and_offset(self, condition, result):
  317. """
  318. Handling the condition of `limit` and `offset`.
  319. Args:
  320. condition (dict): Filter and sort condition.
  321. result (list[SuperLineageObj]): Filtered and sorted result.
  322. Returns:
  323. list[SuperLineageObj], paginated result.
  324. """
  325. offset = 0
  326. limit = 10
  327. if ConditionParam.OFFSET.value in condition:
  328. offset = condition.get(ConditionParam.OFFSET.value)
  329. if ConditionParam.LIMIT.value in condition:
  330. limit = condition.get(ConditionParam.LIMIT.value)
  331. if ConditionParam.OFFSET.value not in condition \
  332. and ConditionParam.LIMIT.value not in condition:
  333. offset_result = result
  334. else:
  335. offset_result = result[offset * limit: limit * (offset + 1)]
  336. return offset_result
  337. def _add_dataset_mark(self):
  338. """Add dataset mark into LineageObj."""
  339. # give a dataset mark for each dataset graph in lineage information
  340. marked_dataset_group = {'1': None}
  341. for super_lineage_obj in self._super_lineage_objs.values():
  342. lineage = super_lineage_obj.lineage_obj
  343. dataset_mark = '0'
  344. for dataset_graph_mark, marked_dataset_graph in marked_dataset_group.items():
  345. if marked_dataset_graph == lineage.dataset_graph:
  346. dataset_mark = dataset_graph_mark
  347. break
  348. # if no matched, add the new dataset graph into group
  349. if dataset_mark == '0':
  350. dataset_mark = str(int(max(marked_dataset_group.keys())) + 1)
  351. marked_dataset_group.update({
  352. dataset_mark:
  353. lineage.dataset_graph
  354. })
  355. lineage.dataset_mark = dataset_mark