You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

iam.py 27 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Fri Apr 26 11:49:12 2019
  5. Iterative alternate minimizations using GED.
  6. @author: ljia
  7. """
  8. import numpy as np
  9. import random
  10. import networkx as nx
  11. import sys
  12. #from Cython_GedLib_2 import librariesImport, script
  13. import librariesImport, script
  14. sys.path.insert(0, "../")
  15. from pygraph.utils.graphfiles import saveDataset
  16. from pygraph.utils.graphdataset import get_dataset_attributes
  17. from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
  18. #from pygraph.utils.utils import graph_deepcopy
  19. def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type',
  20. connected=True):
  21. """See my name, then you know what I do.
  22. """
  23. # Gn = Gn[0:10]
  24. Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
  25. # phase 1: initilize.
  26. # compute set-median.
  27. dis_min = np.inf
  28. pi_p = []
  29. pi_all = []
  30. for idx1, G_p in enumerate(Gn):
  31. dist_sum = 0
  32. pi_all.append([])
  33. for idx2, G_p_prime in enumerate(Gn):
  34. dist_tmp, pi_tmp, _ = GED(G_p, G_p_prime)
  35. pi_all[idx1].append(pi_tmp)
  36. dist_sum += dist_tmp
  37. if dist_sum < dis_min:
  38. dis_min = dist_sum
  39. G = G_p.copy()
  40. idx_min = idx1
  41. # list of edit operations.
  42. pi_p = pi_all[idx_min]
  43. # phase 2: iteration.
  44. ds_attrs = get_dataset_attributes(Gn, attr_names=['edge_labeled', 'node_attr_dim'],
  45. edge_label=edge_label)
  46. for itr in range(0, 10): # @todo: the convergence condition?
  47. G_new = G.copy()
  48. # update vertex labels.
  49. # pre-compute h_i0 for each label.
  50. # for label in get_node_labels(Gn, node_label):
  51. # print(label)
  52. # for nd in G.nodes(data=True):
  53. # pass
  54. if not ds_attrs['node_attr_dim']: # labels are symbolic
  55. for nd, _ in G.nodes(data=True):
  56. h_i0_list = []
  57. label_list = []
  58. for label in get_node_labels(Gn, node_label):
  59. h_i0 = 0
  60. for idx, g in enumerate(Gn):
  61. pi_i = pi_p[idx][nd]
  62. if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
  63. h_i0 += 1
  64. h_i0_list.append(h_i0)
  65. label_list.append(label)
  66. # choose one of the best randomly.
  67. idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
  68. idx_rdm = random.randint(0, len(idx_max) - 1)
  69. G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
  70. else: # labels are non-symbolic
  71. for nd, _ in G.nodes(data=True):
  72. Si_norm = 0
  73. phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
  74. for idx, g in enumerate(Gn):
  75. pi_i = pi_p[idx][nd]
  76. if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
  77. Si_norm += 1
  78. phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])
  79. phi_i_bar /= Si_norm
  80. G_new.nodes[nd]['attributes'] = phi_i_bar
  81. # update edge labels and adjacency matrix.
  82. if ds_attrs['edge_labeled']:
  83. for nd1, nd2, _ in G.edges(data=True):
  84. h_ij0_list = []
  85. label_list = []
  86. for label in get_edge_labels(Gn, edge_label):
  87. h_ij0 = 0
  88. for idx, g in enumerate(Gn):
  89. pi_i = pi_p[idx][nd1]
  90. pi_j = pi_p[idx][nd2]
  91. h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and
  92. g.has_edge(pi_i, pi_j) and
  93. g.edges[pi_i, pi_j][edge_label] == label)
  94. h_ij0 += h_ij0_p
  95. h_ij0_list.append(h_ij0)
  96. label_list.append(label)
  97. # choose one of the best randomly.
  98. idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
  99. h_ij0_max = h_ij0_list[idx_max[0]]
  100. idx_rdm = random.randint(0, len(idx_max) - 1)
  101. best_label = label_list[idx_max[idx_rdm]]
  102. # check whether a_ij is 0 or 1.
  103. sij_norm = 0
  104. for idx, g in enumerate(Gn):
  105. pi_i = pi_p[idx][nd1]
  106. pi_j = pi_p[idx][nd2]
  107. if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
  108. sij_norm += 1
  109. if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
  110. if not G_new.has_edge(nd1, nd2):
  111. G_new.add_edge(nd1, nd2)
  112. G_new.edges[nd1, nd2][edge_label] = best_label
  113. else:
  114. if G_new.has_edge(nd1, nd2):
  115. G_new.remove_edge(nd1, nd2)
  116. else: # if edges are unlabeled
  117. for nd1, nd2, _ in G.edges(data=True):
  118. sij_norm = 0
  119. for idx, g in enumerate(Gn):
  120. pi_i = pi_p[idx][nd1]
  121. pi_j = pi_p[idx][nd2]
  122. if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
  123. sij_norm += 1
  124. if sij_norm > len(Gn) * c_er / (c_er + c_ei):
  125. if not G_new.has_edge(nd1, nd2):
  126. G_new.add_edge(nd1, nd2)
  127. else:
  128. if G_new.has_edge(nd1, nd2):
  129. G_new.remove_edge(nd1, nd2)
  130. G = G_new.copy()
  131. # update pi_p
  132. pi_p = []
  133. for idx1, G_p in enumerate(Gn):
  134. dist_tmp, pi_tmp, _ = GED(G, G_p)
  135. pi_p.append(pi_tmp)
  136. return G
  137. def GED(g1, g2, lib='gedlib'):
  138. """
  139. Compute GED.
  140. """
  141. if lib == 'gedlib':
  142. # transform dataset to the 'xml' file as the GedLib required.
  143. saveDataset([g1, g2], [None, None], group='xml', filename='ged_tmp/tmp')
  144. # script.appel()
  145. script.PyRestartEnv()
  146. script.PyLoadGXLGraph('ged_tmp/', 'ged_tmp/tmp.xml')
  147. listID = script.PyGetGraphIds()
  148. script.PySetEditCost("CHEM_1")
  149. script.PyInitEnv()
  150. script.PySetMethod("IPFP", "")
  151. script.PyInitMethod()
  152. g = listID[0]
  153. h = listID[1]
  154. script.PyRunMethod(g, h)
  155. pi_forward, pi_backward = script.PyGetAllMap(g, h)
  156. upper = script.PyGetUpperBound(g, h)
  157. lower = script.PyGetLowerBound(g, h)
  158. dis = (upper + lower) / 2
  159. return dis, pi_forward, pi_backward
  160. # --------------------------- These are tests --------------------------------#
  161. def test_iam_with_more_graphs_as_init(Gn, G_candidate, c_ei=3, c_er=3, c_es=1,
  162. node_label='atom', edge_label='bond_type'):
  163. """See my name, then you know what I do.
  164. """
  165. from tqdm import tqdm
  166. # Gn = Gn[0:10]
  167. Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
  168. # phase 1: initilize.
  169. # compute set-median.
  170. dis_min = np.inf
  171. # pi_p = []
  172. pi_all_forward = []
  173. pi_all_backward = []
  174. for idx1, G_p in tqdm(enumerate(G_candidate), desc='computing GEDs', file=sys.stdout):
  175. dist_sum = 0
  176. pi_all_forward.append([])
  177. pi_all_backward.append([])
  178. for idx2, G_p_prime in enumerate(Gn):
  179. dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_p, G_p_prime)
  180. pi_all_forward[idx1].append(pi_tmp_forward)
  181. pi_all_backward[idx1].append(pi_tmp_backward)
  182. dist_sum += dist_tmp
  183. if dist_sum <= dis_min:
  184. dis_min = dist_sum
  185. G = G_p.copy()
  186. idx_min = idx1
  187. # list of edit operations.
  188. pi_p_forward = pi_all_forward[idx_min]
  189. pi_p_backward = pi_all_backward[idx_min]
  190. # phase 2: iteration.
  191. ds_attrs = get_dataset_attributes(Gn + [G], attr_names=['edge_labeled', 'node_attr_dim'],
  192. edge_label=edge_label)
  193. label_set = get_node_labels(Gn + [G], node_label)
  194. for itr in range(0, 10): # @todo: the convergence condition?
  195. G_new = G.copy()
  196. # update vertex labels.
  197. # pre-compute h_i0 for each label.
  198. # for label in get_node_labels(Gn, node_label):
  199. # print(label)
  200. # for nd in G.nodes(data=True):
  201. # pass
  202. if not ds_attrs['node_attr_dim']: # labels are symbolic
  203. for nd in G.nodes():
  204. h_i0_list = []
  205. label_list = []
  206. for label in label_set:
  207. h_i0 = 0
  208. for idx, g in enumerate(Gn):
  209. pi_i = pi_p_forward[idx][nd]
  210. if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
  211. h_i0 += 1
  212. h_i0_list.append(h_i0)
  213. label_list.append(label)
  214. # choose one of the best randomly.
  215. idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
  216. idx_rdm = random.randint(0, len(idx_max) - 1)
  217. G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
  218. else: # labels are non-symbolic
  219. for nd in G.nodes():
  220. Si_norm = 0
  221. phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
  222. for idx, g in enumerate(Gn):
  223. pi_i = pi_p_forward[idx][nd]
  224. if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
  225. Si_norm += 1
  226. phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])
  227. phi_i_bar /= Si_norm
  228. G_new.nodes[nd]['attributes'] = phi_i_bar
  229. # update edge labels and adjacency matrix.
  230. if ds_attrs['edge_labeled']:
  231. for nd1, nd2, _ in G.edges(data=True):
  232. h_ij0_list = []
  233. label_list = []
  234. for label in get_edge_labels(Gn, edge_label):
  235. h_ij0 = 0
  236. for idx, g in enumerate(Gn):
  237. pi_i = pi_p_forward[idx][nd1]
  238. pi_j = pi_p_forward[idx][nd2]
  239. h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and
  240. g.has_edge(pi_i, pi_j) and
  241. g.edges[pi_i, pi_j][edge_label] == label)
  242. h_ij0 += h_ij0_p
  243. h_ij0_list.append(h_ij0)
  244. label_list.append(label)
  245. # choose one of the best randomly.
  246. idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
  247. h_ij0_max = h_ij0_list[idx_max[0]]
  248. idx_rdm = random.randint(0, len(idx_max) - 1)
  249. best_label = label_list[idx_max[idx_rdm]]
  250. # check whether a_ij is 0 or 1.
  251. sij_norm = 0
  252. for idx, g in enumerate(Gn):
  253. pi_i = pi_p_forward[idx][nd1]
  254. pi_j = pi_p_forward[idx][nd2]
  255. if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
  256. sij_norm += 1
  257. if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
  258. if not G_new.has_edge(nd1, nd2):
  259. G_new.add_edge(nd1, nd2)
  260. G_new.edges[nd1, nd2][edge_label] = best_label
  261. else:
  262. if G_new.has_edge(nd1, nd2):
  263. G_new.remove_edge(nd1, nd2)
  264. else: # if edges are unlabeled
  265. # @todo: works only for undirected graphs.
  266. for nd1 in range(nx.number_of_nodes(G)):
  267. for nd2 in range(nd1 + 1, nx.number_of_nodes(G)):
  268. sij_norm = 0
  269. for idx, g in enumerate(Gn):
  270. pi_i = pi_p_forward[idx][nd1]
  271. pi_j = pi_p_forward[idx][nd2]
  272. if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
  273. sij_norm += 1
  274. if sij_norm > len(Gn) * c_er / (c_er + c_ei):
  275. if not G_new.has_edge(nd1, nd2):
  276. G_new.add_edge(nd1, nd2)
  277. elif sij_norm < len(Gn) * c_er / (c_er + c_ei):
  278. if G_new.has_edge(nd1, nd2):
  279. G_new.remove_edge(nd1, nd2)
  280. # do not change anything when equal.
  281. G = G_new.copy()
  282. # update pi_p
  283. pi_p_forward = []
  284. for G_p in Gn:
  285. dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p)
  286. pi_p_forward.append(pi_tmp_forward)
  287. return G
  288. def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
  289. Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, node_label='atom',
  290. edge_label='bond_type', connected=True):
  291. """See my name, then you know what I do.
  292. """
  293. from tqdm import tqdm
  294. # Gn_median = Gn_median[0:10]
  295. # Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median]
  296. node_ir = sys.maxsize * 2 # Max number for c++, corresponding to the node remove and insertion.
  297. label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable.
  298. ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate,
  299. attr_names=['edge_labeled', 'node_attr_dim'],
  300. edge_label=edge_label)
  301. def generate_graph(G, pi_p_forward, label_set):
  302. G_new_list = [G.copy()] # all "best" graphs generated in this iteration.
  303. # nx.draw_networkx(G)
  304. # import matplotlib.pyplot as plt
  305. # plt.show()
  306. # print(pi_p_forward)
  307. # update vertex labels.
  308. # pre-compute h_i0 for each label.
  309. # for label in get_node_labels(Gn, node_label):
  310. # print(label)
  311. # for nd in G.nodes(data=True):
  312. # pass
  313. if not ds_attrs['node_attr_dim']: # labels are symbolic
  314. for ndi, (nd, _) in enumerate(G.nodes(data=True)):
  315. h_i0_list = []
  316. label_list = []
  317. for label in label_set:
  318. h_i0 = 0
  319. for idx, g in enumerate(Gn_median):
  320. pi_i = pi_p_forward[idx][ndi]
  321. if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
  322. h_i0 += 1
  323. h_i0_list.append(h_i0)
  324. label_list.append(label)
  325. # case when the node is to be removed.
  326. h_i0_remove = 0
  327. for idx, g in enumerate(Gn_median):
  328. pi_i = pi_p_forward[idx][ndi]
  329. if pi_i == node_ir:
  330. h_i0_remove += 1
  331. h_i0_list.append(h_i0_remove)
  332. label_list.append(label_r)
  333. # get the best labels.
  334. idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
  335. nlabel_best = [label_list[idx] for idx in idx_max]
  336. # generate "best" graphs with regard to "best" node labels.
  337. G_new_list_nd = []
  338. for g in G_new_list:
  339. for nl in nlabel_best:
  340. g_tmp = g.copy()
  341. if nl == label_r:
  342. g_tmp.remove_node(nd)
  343. else:
  344. g_tmp.nodes[nd][node_label] = nl
  345. G_new_list_nd.append(g_tmp)
  346. # nx.draw_networkx(g_tmp)
  347. # import matplotlib.pyplot as plt
  348. # plt.show()
  349. # print(g_tmp.nodes(data=True))
  350. # print(g_tmp.edges(data=True))
  351. G_new_list = G_new_list_nd[:]
  352. else: # labels are non-symbolic
  353. for nd in G.nodes():
  354. Si_norm = 0
  355. phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
  356. for idx, g in enumerate(Gn_median):
  357. pi_i = pi_p_forward[idx][nd]
  358. if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
  359. Si_norm += 1
  360. phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])
  361. phi_i_bar /= Si_norm
  362. G_new.nodes[nd]['attributes'] = phi_i_bar
  363. # update edge labels and adjacency matrix.
  364. if ds_attrs['edge_labeled']:
  365. for nd1, nd2, _ in G.edges(data=True):
  366. h_ij0_list = []
  367. label_list = []
  368. for label in get_edge_labels(Gn_median, edge_label):
  369. h_ij0 = 0
  370. for idx, g in enumerate(Gn_median):
  371. pi_i = pi_p_forward[idx][nd1]
  372. pi_j = pi_p_forward[idx][nd2]
  373. h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and
  374. g.has_edge(pi_i, pi_j) and
  375. g.edges[pi_i, pi_j][edge_label] == label)
  376. h_ij0 += h_ij0_p
  377. h_ij0_list.append(h_ij0)
  378. label_list.append(label)
  379. # choose one of the best randomly.
  380. idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
  381. h_ij0_max = h_ij0_list[idx_max[0]]
  382. idx_rdm = random.randint(0, len(idx_max) - 1)
  383. best_label = label_list[idx_max[idx_rdm]]
  384. # check whether a_ij is 0 or 1.
  385. sij_norm = 0
  386. for idx, g in enumerate(Gn_median):
  387. pi_i = pi_p_forward[idx][nd1]
  388. pi_j = pi_p_forward[idx][nd2]
  389. if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
  390. sij_norm += 1
  391. if h_ij0_max > len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
  392. if not G_new.has_edge(nd1, nd2):
  393. G_new.add_edge(nd1, nd2)
  394. G_new.edges[nd1, nd2][edge_label] = best_label
  395. else:
  396. if G_new.has_edge(nd1, nd2):
  397. G_new.remove_edge(nd1, nd2)
  398. else: # if edges are unlabeled
  399. # @todo: works only for undirected graphs.
  400. nd_list = [n for n in G.nodes()]
  401. for g_tmp in G_new_list:
  402. for nd1i in range(nx.number_of_nodes(G)):
  403. nd1 = nd_list[nd1i]
  404. for nd2i in range(nd1i + 1, nx.number_of_nodes(G)):
  405. nd2 = nd_list[nd2i]
  406. sij_norm = 0
  407. for idx, g in enumerate(Gn_median):
  408. pi_i = pi_p_forward[idx][nd1i]
  409. pi_j = pi_p_forward[idx][nd2i]
  410. if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
  411. sij_norm += 1
  412. if sij_norm > len(Gn_median) * c_er / (c_er + c_ei):
  413. # @todo: should we consider if nd1 and nd2 in g_tmp?
  414. # or just add the edge anyway?
  415. if g_tmp.has_node(nd1) and g_tmp.has_node(nd2) \
  416. and not g_tmp.has_edge(nd1, nd2):
  417. g_tmp.add_edge(nd1, nd2)
  418. elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei):
  419. if g_tmp.has_edge(nd1, nd2):
  420. g_tmp.remove_edge(nd1, nd2)
  421. # do not change anything when equal.
  422. # find the best graph generated in this iteration and update pi_p.
  423. # @todo: should we update all graphs generated or just the best ones?
  424. dis_list, pi_forward_list = median_distance(G_new_list, Gn_median)
  425. # @todo: should we remove the identical and connectivity check?
  426. # Don't know which is faster.
  427. G_new_list, idx_list = remove_duplicates(G_new_list)
  428. pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
  429. # if connected == True:
  430. # G_new_list, idx_list = remove_disconnected(G_new_list)
  431. # pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
  432. # idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
  433. # dis_min = dis_list[idx_min_tmp_list[0]]
  434. # pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list]
  435. # G_new_list = [G_new_list[idx] for idx in idx_min_list]
  436. for g in G_new_list:
  437. import matplotlib.pyplot as plt
  438. nx.draw_networkx(g)
  439. plt.show()
  440. print(g.nodes(data=True))
  441. print(g.edges(data=True))
  442. return G_new_list, pi_forward_list
  443. def median_distance(Gn, Gn_median, measure='ged', verbose=False):
  444. dis_list = []
  445. pi_forward_list = []
  446. for idx, G in tqdm(enumerate(Gn), desc='computing median distances',
  447. file=sys.stdout) if verbose else enumerate(Gn):
  448. dis_sum = 0
  449. pi_forward_list.append([])
  450. for G_p in Gn_median:
  451. dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p)
  452. pi_forward_list[idx].append(pi_tmp_forward)
  453. dis_sum += dis_tmp
  454. dis_list.append(dis_sum)
  455. return dis_list, pi_forward_list
  456. def best_median_graphs(Gn_candidate, dis_all, pi_all_forward):
  457. idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist()
  458. dis_min = dis_all[idx_min_list[0]]
  459. pi_forward_min_list = [pi_all_forward[idx] for idx in idx_min_list]
  460. G_min_list = [Gn_candidate[idx] for idx in idx_min_list]
  461. return G_min_list, pi_forward_min_list, dis_min
  462. def iteration_proc(G, pi_p_forward):
  463. G_list = [G]
  464. pi_forward_list = [pi_p_forward]
  465. # iterations.
  466. for itr in range(0, 10): # @todo: the convergence condition?
  467. # print('itr is', itr)
  468. G_new_list = []
  469. pi_forward_new_list = []
  470. for idx, G in enumerate(G_list):
  471. label_set = get_node_labels(Gn_median + [G], node_label)
  472. G_tmp_list, pi_forward_tmp_list = generate_graph(
  473. G, pi_forward_list[idx], label_set)
  474. G_new_list += G_tmp_list
  475. pi_forward_new_list += pi_forward_tmp_list
  476. G_list = G_new_list[:]
  477. pi_forward_list = pi_forward_new_list[:]
  478. G_list, idx_list = remove_duplicates(G_list)
  479. pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
  480. # import matplotlib.pyplot as plt
  481. # for g in G_list:
  482. # nx.draw_networkx(g)
  483. # plt.show()
  484. # print(g.nodes(data=True))
  485. # print(g.edges(data=True))
  486. return G_list, pi_forward_list # do we return all graphs or the best ones?
  487. def remove_duplicates(Gn):
  488. """Remove duplicate graphs from list.
  489. """
  490. Gn_new = []
  491. idx_list = []
  492. for idx, g in enumerate(Gn):
  493. dupl = False
  494. for g_new in Gn_new:
  495. if graph_isIdentical(g_new, g):
  496. dupl = True
  497. break
  498. if not dupl:
  499. Gn_new.append(g)
  500. idx_list.append(idx)
  501. return Gn_new, idx_list
  502. def remove_disconnected(Gn):
  503. """Remove disconnected graphs from list.
  504. """
  505. Gn_new = []
  506. idx_list = []
  507. for idx, g in enumerate(Gn):
  508. if nx.is_connected(g):
  509. Gn_new.append(g)
  510. idx_list.append(idx)
  511. return Gn_new, idx_list
  512. # phase 1: initilize.
  513. # compute set-median.
  514. dis_min = np.inf
  515. dis_all, pi_all_forward = median_distance(Gn_candidate[::-1], Gn_median)
  516. # find all smallest distances.
  517. idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist()
  518. dis_min = dis_all[idx_min_list[0]]
  519. # phase 2: iteration.
  520. G_list = []
  521. for idx_min in idx_min_list[::-1]:
  522. # print('idx_min is', idx_min)
  523. G = Gn_candidate[idx_min].copy()
  524. # list of edit operations.
  525. pi_p_forward = pi_all_forward[idx_min]
  526. # pi_p_backward = pi_all_backward[idx_min]
  527. Gi_list, pi_i_forward_list = iteration_proc(G, pi_p_forward)
  528. G_list += Gi_list
  529. G_list, _ = remove_duplicates(G_list)
  530. if connected == True:
  531. G_list, _ = remove_disconnected(G_list)
  532. import matplotlib.pyplot as plt
  533. for g in G_list:
  534. nx.draw_networkx(g)
  535. plt.show()
  536. print(g.nodes(data=True))
  537. print(g.edges(data=True))
  538. # get the best median graphs
  539. dis_all, pi_all_forward = median_distance(G_list, Gn_median)
  540. G_min_list, pi_forward_min_list, dis_min = best_median_graphs(
  541. G_list, dis_all, pi_all_forward)
  542. for g in G_min_list:
  543. nx.draw_networkx(g)
  544. plt.show()
  545. print(g.nodes(data=True))
  546. print(g.edges(data=True))
  547. return G_min_list
  548. if __name__ == '__main__':
  549. from pygraph.utils.graphfiles import loadDataset
  550. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
  551. 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb
  552. # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  553. # 'extra_params': {}} # node nsymb
  554. # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
  555. # 'extra_params': {}}
  556. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  557. iam(Gn)

A Python package for graph kernels, graph edit distances and graph pre-image problem.