| @@ -0,0 +1,103 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Wed Oct 16 14:20:06 2019 | |||
| @author: ljia | |||
| """ | |||
| import numpy as np | |||
| from tqdm import tqdm | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from pygraph.utils.graphfiles import loadDataset | |||
| from ged import GED, get_nb_edit_operations | |||
| from utils import kernel_distance_matrix | |||
| def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): | |||
| c_vi = 1 | |||
| c_vr = 1 | |||
| c_vs = 1 | |||
| c_ei = 1 | |||
| c_er = 1 | |||
| c_es = 1 | |||
| # compute distances in feature space. | |||
| dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel) | |||
| dis_k_vec = [] | |||
| for i in range(len(dis_k_mat)): | |||
| for j in range(i, len(dis_k_mat)): | |||
| dis_k_vec.append(dis_k_mat[i, j]) | |||
| dis_k_vec = np.array(dis_k_vec) | |||
| residual_list = [] | |||
| edit_cost_list = [] | |||
| for itr in range(itr_max): | |||
| print('iteration', itr) | |||
| ged_all = [] | |||
| n_vi_all = [] | |||
| n_vr_all = [] | |||
| n_vs_all = [] | |||
| n_ei_all = [] | |||
| n_er_all = [] | |||
| n_es_all = [] | |||
| # compute GEDs and numbers of edit operations. | |||
| edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] | |||
| edit_cost_list.append(edit_cost_constant) | |||
| for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout): | |||
| # for i in range(len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy', | |||
| cost='CONSTANT', method='IPFP', | |||
| edit_cost_constant=edit_cost_constant, stabilizer='min', | |||
| repeat=30) | |||
| ged_all.append(dis) | |||
| n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(Gn[i], | |||
| Gn[j], pi_forward, pi_backward) | |||
| n_vi_all.append(n_vi) | |||
| n_vr_all.append(n_vr) | |||
| n_vs_all.append(n_vs) | |||
| n_ei_all.append(n_ei) | |||
| n_er_all.append(n_er) | |||
| n_es_all.append(n_es) | |||
| residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec))) | |||
| residual_list.append(residual) | |||
| # "fit" geds to distances in feature space by tuning edit costs using the | |||
| # Least Squares Method. | |||
| nb_cost_mat = np.column_stack((np.array(n_vi_all), np.array(n_vr_all), | |||
| np.array(n_vs_all), np.array(n_ei_all), | |||
| np.array(n_er_all), np.array(n_es_all))) | |||
| edit_costs, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | |||
| rcond=None) | |||
| for i in range(len(edit_costs)): | |||
| if edit_costs[i] < 0: | |||
| if edit_costs[i] > -1e-3: | |||
| edit_costs[i] = 0 | |||
| # else: | |||
| # raise ValueError('The edit cost is negative.') | |||
| c_vi = edit_costs[0] | |||
| c_vr = edit_costs[1] | |||
| c_vs = edit_costs[2] | |||
| c_ei = edit_costs[3] | |||
| c_er = edit_costs[4] | |||
| c_es = edit_costs[5] | |||
| return c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list | |||
| if __name__ == '__main__': | |||
| from utils import remove_edges | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| Gn = Gn[0:10] | |||
| remove_edges(Gn) | |||
| gkernel = 'marginalizedkernel' | |||
| itr_max = 10 | |||
| c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list = \ | |||
| fit_GED_to_kernel_distance(Gn, gkernel, itr_max) | |||
| @@ -0,0 +1,197 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Thu Oct 17 18:44:59 2019 | |||
| @author: ljia | |||
| """ | |||
| import numpy as np | |||
| import networkx as nx | |||
| from tqdm import tqdm | |||
| import sys | |||
| from gedlibpy import librariesImport, gedlibpy | |||
| def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||
| edit_cost_constant=[], saveGXL='benoit', stabilizer='min', repeat=50): | |||
| """ | |||
| Compute GED for 2 graphs. | |||
| """ | |||
| if lib == 'gedlibpy': | |||
| def convertGraph(G): | |||
| """Convert a graph to the proper NetworkX format that can be | |||
| recognized by library gedlibpy. | |||
| """ | |||
| G_new = nx.Graph() | |||
| for nd, attrs in G.nodes(data=True): | |||
| G_new.add_node(str(nd), chem=attrs['atom']) | |||
| for nd1, nd2, attrs in G.edges(data=True): | |||
| # G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||
| G_new.add_edge(str(nd1), str(nd2)) | |||
| return G_new | |||
| gedlibpy.restart_env() | |||
| gedlibpy.add_nx_graph(convertGraph(g1), "") | |||
| gedlibpy.add_nx_graph(convertGraph(g2), "") | |||
| listID = gedlibpy.get_all_graph_ids() | |||
| gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) | |||
| gedlibpy.init() | |||
| gedlibpy.set_method(method, "") | |||
| gedlibpy.init_method() | |||
| g = listID[0] | |||
| h = listID[1] | |||
| if stabilizer == None: | |||
| gedlibpy.run_method(g, h) | |||
| pi_forward = gedlibpy.get_forward_map(g, h) | |||
| pi_backward = gedlibpy.get_backward_map(g, h) | |||
| upper = gedlibpy.get_upper_bound(g, h) | |||
| lower = gedlibpy.get_lower_bound(g, h) | |||
| elif stabilizer == 'min': | |||
| upper = np.inf | |||
| for itr in range(repeat): | |||
| gedlibpy.run_method(g, h) | |||
| upper_tmp = gedlibpy.get_upper_bound(g, h) | |||
| if upper_tmp < upper: | |||
| upper = upper_tmp | |||
| pi_forward = gedlibpy.get_forward_map(g, h) | |||
| pi_backward = gedlibpy.get_backward_map(g, h) | |||
| lower = gedlibpy.get_lower_bound(g, h) | |||
| if upper == 0: | |||
| break | |||
| dis = upper | |||
| # make the map label correct (label remove map as np.inf) | |||
| nodes1 = [n for n in g1.nodes()] | |||
| nodes2 = [n for n in g2.nodes()] | |||
| nb1 = nx.number_of_nodes(g1) | |||
| nb2 = nx.number_of_nodes(g2) | |||
| pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||
| pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||
| return dis, pi_forward, pi_backward | |||
| def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||
| edit_cost_constant=[], stabilizer='min', repeat=50): | |||
| """ | |||
| Compute GEDs for a group of graphs. | |||
| """ | |||
| if lib == 'gedlibpy': | |||
| def convertGraph(G): | |||
| """Convert a graph to the proper NetworkX format that can be | |||
| recognized by library gedlibpy. | |||
| """ | |||
| G_new = nx.Graph() | |||
| for nd, attrs in G.nodes(data=True): | |||
| G_new.add_node(str(nd), chem=attrs['atom']) | |||
| for nd1, nd2, attrs in G.edges(data=True): | |||
| # G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||
| G_new.add_edge(str(nd1), str(nd2)) | |||
| return G_new | |||
| gedlibpy.restart_env() | |||
| gedlibpy.add_nx_graph(convertGraph(g1), "") | |||
| gedlibpy.add_nx_graph(convertGraph(g2), "") | |||
| listID = gedlibpy.get_all_graph_ids() | |||
| gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) | |||
| gedlibpy.init() | |||
| gedlibpy.set_method(method, "") | |||
| gedlibpy.init_method() | |||
| g = listID[0] | |||
| h = listID[1] | |||
| if stabilizer == None: | |||
| gedlibpy.run_method(g, h) | |||
| pi_forward = gedlibpy.get_forward_map(g, h) | |||
| pi_backward = gedlibpy.get_backward_map(g, h) | |||
| upper = gedlibpy.get_upper_bound(g, h) | |||
| lower = gedlibpy.get_lower_bound(g, h) | |||
| elif stabilizer == 'min': | |||
| upper = np.inf | |||
| for itr in range(repeat): | |||
| gedlibpy.run_method(g, h) | |||
| upper_tmp = gedlibpy.get_upper_bound(g, h) | |||
| if upper_tmp < upper: | |||
| upper = upper_tmp | |||
| pi_forward = gedlibpy.get_forward_map(g, h) | |||
| pi_backward = gedlibpy.get_backward_map(g, h) | |||
| lower = gedlibpy.get_lower_bound(g, h) | |||
| if upper == 0: | |||
| break | |||
| dis = upper | |||
| # make the map label correct (label remove map as np.inf) | |||
| nodes1 = [n for n in g1.nodes()] | |||
| nodes2 = [n for n in g2.nodes()] | |||
| nb1 = nx.number_of_nodes(g1) | |||
| nb2 = nx.number_of_nodes(g2) | |||
| pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||
| pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||
| return dis, pi_forward, pi_backward | |||
| def ged_median(Gn, Gn_median, measure='ged', verbose=False, | |||
| ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'): | |||
| dis_list = [] | |||
| pi_forward_list = [] | |||
| for idx, G in tqdm(enumerate(Gn), desc='computing median distances', | |||
| file=sys.stdout) if verbose else enumerate(Gn): | |||
| dis_sum = 0 | |||
| pi_forward_list.append([]) | |||
| for G_p in Gn_median: | |||
| dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, | |||
| cost=ged_cost, method=ged_method, saveGXL=saveGXL) | |||
| pi_forward_list[idx].append(pi_tmp_forward) | |||
| dis_sum += dis_tmp | |||
| dis_list.append(dis_sum) | |||
| return dis_list, pi_forward_list | |||
| def get_nb_edit_operations(g1, g2, forward_map, backward_map): | |||
| """Compute the number of each edit operations. | |||
| """ | |||
| n_vi = 0 | |||
| n_vr = 0 | |||
| n_vs = 0 | |||
| n_ei = 0 | |||
| n_er = 0 | |||
| n_es = 0 | |||
| nodes1 = [n for n in g1.nodes()] | |||
| for i, map_i in enumerate(forward_map): | |||
| if map_i == np.inf: | |||
| n_vr += 1 | |||
| elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']: | |||
| n_vs += 1 | |||
| for map_i in backward_map: | |||
| if map_i == np.inf: | |||
| n_vi += 1 | |||
| # idx_nodes1 = range(0, len(node1)) | |||
| edges1 = [e for e in g1.edges()] | |||
| nb_edges2_cnted = 0 | |||
| for n1, n2 in edges1: | |||
| idx1 = nodes1.index(n1) | |||
| idx2 = nodes1.index(n2) | |||
| # one of the nodes is removed, thus the edge is removed. | |||
| if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf: | |||
| n_er += 1 | |||
| # corresponding edge is in g2. Edge label is not considered. | |||
| elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \ | |||
| (forward_map[idx2], forward_map[idx1]) in g2.edges(): | |||
| nb_edges2_cnted += 1 | |||
| # corresponding nodes are in g2, however the edge is removed. | |||
| else: | |||
| n_er += 1 | |||
| n_ei = nx.number_of_edges(g2) - nb_edges2_cnted | |||
| return n_vi, n_vr, n_vs, n_ei, n_er, n_es | |||
| @@ -12,10 +12,10 @@ import networkx as nx | |||
| from tqdm import tqdm | |||
| import sys | |||
| from gedlibpy import librariesImport, gedlibpy | |||
| sys.path.insert(0, "../") | |||
| from pygraph.utils.graphdataset import get_dataset_attributes | |||
| from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels | |||
| from ged import GED, ged_median | |||
| def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
| @@ -237,7 +237,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
| # # find the best graph generated in this iteration and update pi_p. | |||
| # @todo: should we update all graphs generated or just the best ones? | |||
| dis_list, pi_forward_list = median_distance(G_new_list, Gn_median, | |||
| dis_list, pi_forward_list = ged_median(G_new_list, Gn_median, | |||
| **params_ged) | |||
| # @todo: should we remove the identical and connectivity check? | |||
| # Don't know which is faster. | |||
| @@ -362,7 +362,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
| # phase 1: initilize. | |||
| # compute set-median. | |||
| dis_min = np.inf | |||
| dis_list, pi_forward_all = median_distance(Gn_candidate, Gn_median, | |||
| dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median, | |||
| **params_ged) | |||
| # find all smallest distances. | |||
| if allBestInit: # try all best init graphs. | |||
| @@ -426,96 +426,6 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
| ############################################################################### | |||
| # Useful functions. | |||
| def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', saveGXL='benoit', | |||
| stabilizer='min'): | |||
| """ | |||
| Compute GED. | |||
| """ | |||
| if lib == 'gedlibpy': | |||
| def convertGraph(G): | |||
| """Convert a graph to the proper NetworkX format that can be | |||
| recognized by library gedlibpy. | |||
| """ | |||
| G_new = nx.Graph() | |||
| for nd, attrs in G.nodes(data=True): | |||
| G_new.add_node(str(nd), chem=attrs['atom']) | |||
| for nd1, nd2, attrs in G.edges(data=True): | |||
| # G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||
| G_new.add_edge(str(nd1), str(nd2)) | |||
| return G_new | |||
| gedlibpy.restart_env() | |||
| gedlibpy.add_nx_graph(convertGraph(g1), "") | |||
| gedlibpy.add_nx_graph(convertGraph(g2), "") | |||
| listID = gedlibpy.get_all_graph_ids() | |||
| gedlibpy.set_edit_cost(cost) | |||
| gedlibpy.init() | |||
| gedlibpy.set_method(method, "") | |||
| gedlibpy.init_method() | |||
| g = listID[0] | |||
| h = listID[1] | |||
| if stabilizer == None: | |||
| gedlibpy.run_method(g, h) | |||
| pi_forward = gedlibpy.get_forward_map(g, h) | |||
| pi_backward = gedlibpy.get_backward_map(g, h) | |||
| upper = gedlibpy.get_upper_bound(g, h) | |||
| lower = gedlibpy.get_lower_bound(g, h) | |||
| elif stabilizer == 'min': | |||
| upper = np.inf | |||
| for itr in range(50): | |||
| gedlibpy.run_method(g, h) | |||
| upper_tmp = gedlibpy.get_upper_bound(g, h) | |||
| if upper_tmp < upper: | |||
| upper = upper_tmp | |||
| pi_forward = gedlibpy.get_forward_map(g, h) | |||
| pi_backward = gedlibpy.get_backward_map(g, h) | |||
| lower = gedlibpy.get_lower_bound(g, h) | |||
| if upper == 0: | |||
| break | |||
| dis = upper | |||
| # make the map label correct (label remove map as np.inf) | |||
| nodes1 = [n for n in g1.nodes()] | |||
| nodes2 = [n for n in g2.nodes()] | |||
| nb1 = nx.number_of_nodes(g1) | |||
| nb2 = nx.number_of_nodes(g2) | |||
| pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||
| pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||
| return dis, pi_forward, pi_backward | |||
| def median_distance(Gn, Gn_median, measure='ged', verbose=False, | |||
| ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'): | |||
| dis_list = [] | |||
| pi_forward_list = [] | |||
| for idx, G in tqdm(enumerate(Gn), desc='computing median distances', | |||
| file=sys.stdout) if verbose else enumerate(Gn): | |||
| dis_sum = 0 | |||
| pi_forward_list.append([]) | |||
| for G_p in Gn_median: | |||
| dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, | |||
| cost=ged_cost, method=ged_method, saveGXL=saveGXL) | |||
| pi_forward_list[idx].append(pi_tmp_forward) | |||
| dis_sum += dis_tmp | |||
| dis_list.append(dis_sum) | |||
| return dis_list, pi_forward_list | |||
| ############################################################################### | |||
| # Old implementations. | |||
| @@ -13,20 +13,13 @@ and the iterative alternate minimizations (IAM) in reference [2]. | |||
| """ | |||
| import sys | |||
| import numpy as np | |||
| import multiprocessing | |||
| from tqdm import tqdm | |||
| import networkx as nx | |||
| import matplotlib.pyplot as plt | |||
| import random | |||
| from iam import iam_upgraded | |||
| sys.path.insert(0, "../") | |||
| from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||
| from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||
| from pygraph.kernels.spKernel import spkernel | |||
| import functools | |||
| from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
| from pygraph.kernels.structuralspKernel import structuralspkernel | |||
| from utils import dis_gstar, compute_kernel | |||
| def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||
| @@ -72,13 +65,13 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||
| # print(g.nodes(data=True)) | |||
| # print(g.edges(data=True)) | |||
| Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | |||
| for gi in Gk: | |||
| nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||
| # nx.draw_networkx(gi) | |||
| plt.show() | |||
| # draw_Letter_graph(g) | |||
| print(gi.nodes(data=True)) | |||
| print(gi.edges(data=True)) | |||
| # for gi in Gk: | |||
| # nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||
| ## nx.draw_networkx(gi) | |||
| # plt.show() | |||
| ## draw_Letter_graph(g) | |||
| # print(gi.nodes(data=True)) | |||
| # print(gi.edges(data=True)) | |||
| # i = 1 | |||
| r = 0 | |||
| @@ -173,7 +166,7 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||
| print('\nthe k shortest distances are', dis_k) | |||
| print('the shortest distances for previous iterations are', dis_of_each_itr) | |||
| print('\nthe graph is updated', nb_updated, 'times.') | |||
| print('\n\nthe graph is updated', nb_updated, 'times.') | |||
| print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.') | |||
| print('distances in kernel space:', dis_of_each_itr, '\n') | |||
| @@ -227,13 +220,13 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max | |||
| # print(g.nodes(data=True)) | |||
| # print(g.edges(data=True)) | |||
| Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | |||
| for gi in Gk: | |||
| nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||
| # nx.draw_networkx(gi) | |||
| plt.show() | |||
| # draw_Letter_graph(g) | |||
| print(gi.nodes(data=True)) | |||
| print(gi.edges(data=True)) | |||
| # for gi in Gk: | |||
| # nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||
| ## nx.draw_networkx(gi) | |||
| # plt.show() | |||
| ## draw_Letter_graph(g) | |||
| # print(gi.nodes(data=True)) | |||
| # print(gi.edges(data=True)) | |||
| r = 0 | |||
| itr_total = 0 | |||
| @@ -394,7 +387,8 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max | |||
| # compute distance between \psi and the new generated graph. | |||
| knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False) | |||
| dhat_new = dis_gstar(0, [1, 2], alpha, knew, withterm3=False) | |||
| dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1), | |||
| alpha, knew, withterm3=False) | |||
| # @todo: the new distance is smaller or also equal? | |||
| if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon: | |||
| # check if the new distance is the same as one in D_k. | |||
| @@ -448,7 +442,7 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max | |||
| print('\nthe k shortest distances are', dis_k) | |||
| print('the shortest distances for previous iterations are', dis_of_each_itr) | |||
| print('\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation', | |||
| print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation', | |||
| nb_updated_random, 'times.') | |||
| print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, | |||
| 'times, and by random generation', nb_updated_k_random, 'times.') | |||
| @@ -458,60 +452,6 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max | |||
| nb_updated_iam, nb_updated_random, nb_updated_k_iam, nb_updated_k_random | |||
| ############################################################################### | |||
| # useful functions. | |||
| def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||
| term1 = Kmatrix[idx_g, idx_g] | |||
| term2 = 0 | |||
| for i, a in enumerate(alpha): | |||
| term2 += a * Kmatrix[idx_g, idx_gi[i]] | |||
| term2 *= 2 | |||
| if withterm3 == False: | |||
| for i1, a1 in enumerate(alpha): | |||
| for i2, a2 in enumerate(alpha): | |||
| term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | |||
| return np.sqrt(term1 - term2 + term3) | |||
| def compute_kernel(Gn, graph_kernel, verbose): | |||
| if graph_kernel == 'marginalizedkernel': | |||
| Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | |||
| p_quit=0.03, n_iteration=10, remove_totters=False, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| elif graph_kernel == 'untilhpathkernel': | |||
| Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, | |||
| depth=10, k_func='MinMax', compute_method='trie', | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| elif graph_kernel == 'spkernel': | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= | |||
| {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| elif graph_kernel == 'structuralspkernel': | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= | |||
| {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| # normalization | |||
| Kmatrix_diag = Kmatrix.diagonal().copy() | |||
| for i in range(len(Kmatrix)): | |||
| for j in range(i, len(Kmatrix)): | |||
| Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
| Kmatrix[j][i] = Kmatrix[i][j] | |||
| return Kmatrix | |||
| def gram2distances(Kmatrix): | |||
| dmatrix = np.zeros((len(Kmatrix), len(Kmatrix))) | |||
| for i1 in range(len(Kmatrix)): | |||
| for i2 in range(len(Kmatrix)): | |||
| dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] | |||
| dmatrix = np.sqrt(dmatrix) | |||
| return dmatrix | |||
| ############################################################################### | |||
| # Old implementations. | |||
| @@ -10,51 +10,14 @@ pre-image | |||
| import sys | |||
| import numpy as np | |||
| import random | |||
| import multiprocessing | |||
| from tqdm import tqdm | |||
| import networkx as nx | |||
| import matplotlib.pyplot as plt | |||
| sys.path.insert(0, "../") | |||
| from pygraph.utils.graphfiles import loadDataset | |||
| from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||
| from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||
| from pygraph.kernels.spKernel import spkernel | |||
| import functools | |||
| from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
| from pygraph.kernels.structuralspKernel import structuralspkernel | |||
| from gk_iam import dis_gstar | |||
| def compute_kernel(Gn, graph_kernel, verbose): | |||
| if graph_kernel == 'marginalizedkernel': | |||
| Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | |||
| p_quit=0.03, n_iteration=10, remove_totters=False, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| elif graph_kernel == 'untilhpathkernel': | |||
| Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, | |||
| depth=10, k_func='MinMax', compute_method='trie', | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| elif graph_kernel == 'spkernel': | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= | |||
| {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| elif graph_kernel == 'structuralspkernel': | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= | |||
| {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| # normalization | |||
| Kmatrix_diag = Kmatrix.diagonal().copy() | |||
| for i in range(len(Kmatrix)): | |||
| for j in range(i, len(Kmatrix)): | |||
| Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
| Kmatrix[j][i] = Kmatrix[i][j] | |||
| return Kmatrix | |||
| from utils import compute_kernel, dis_gstar | |||
| def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): | |||
| @@ -105,6 +68,7 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke | |||
| r = 0 | |||
| # sod_list = [dhat] | |||
| # found = False | |||
| dis_of_each_itr = [dhat] | |||
| nb_updated = 0 | |||
| g_best = [] | |||
| while r < r_max: | |||
| @@ -162,7 +126,8 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke | |||
| # p_quit=lmbda, n_iteration=20, remove_totters=False, | |||
| # n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
| knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False) | |||
| dnew = dis_gstar(0, [1, 2], alpha, knew, withterm3=False) | |||
| dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew, | |||
| withterm3=False) | |||
| if dnew <= dhat: # @todo: the new distance is smaller or also equal? | |||
| if dnew < dhat: | |||
| print('\nI am smaller!') | |||
| @@ -184,13 +149,19 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke | |||
| dihat_list = [dhat] | |||
| else: | |||
| r += 1 | |||
| dis_of_each_itr.append(dhat) | |||
| print('the shortest distances for previous iterations are', dis_of_each_itr) | |||
| # dis_best.append(dhat) | |||
| g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) | |||
| g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) | |||
| print('distances in kernel space:', dis_of_each_itr, '\n') | |||
| return dhat, g_best, nb_updated | |||
| # return 0, 0, 0 | |||
| if __name__ == '__main__': | |||
| from pygraph.utils.graphfiles import loadDataset | |||
| # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| # 'extra_params': {}} # node/edge symb | |||
| @@ -80,5 +80,6 @@ def testNxGrapĥ(): | |||
| print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h)) | |||
| print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h))) | |||
| #test() | |||
| init() | |||
| #testNxGrapĥ() | |||
| @@ -0,0 +1,167 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Thu Sep 5 15:59:00 2019 | |||
| @author: ljia | |||
| """ | |||
| import numpy as np | |||
| import networkx as nx | |||
| import matplotlib.pyplot as plt | |||
| import time | |||
| import random | |||
| #from tqdm import tqdm | |||
| #import os | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from pygraph.utils.graphfiles import loadDataset | |||
| from iam import iam_upgraded | |||
| from utils import remove_edges, compute_kernel, get_same_item_indices | |||
| from ged import ged_median | |||
| ############################################################################### | |||
| # tests on different numbers of median-sets. | |||
| def test_iam_median_nb(): | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| # Gn = Gn[0:50] | |||
| remove_edges(Gn) | |||
| gkernel = 'marginalizedkernel' | |||
| # lmbda = 0.03 # termination probalility | |||
| # r_max = 10 # iteration limit for pre-image. | |||
| # alpha_range = np.linspace(0.5, 0.5, 1) | |||
| # k = 5 # k nearest neighbors | |||
| # epsilon = 1e-6 | |||
| # InitIAMWithAllDk = True | |||
| # parameters for GED function | |||
| ged_cost='CHEM_1' | |||
| ged_method='IPFP' | |||
| saveGXL='gedlib' | |||
| # parameters for IAM function | |||
| c_ei=1 | |||
| c_er=1 | |||
| c_es=1 | |||
| ite_max_iam = 50 | |||
| epsilon_iam = 0.001 | |||
| removeNodes = False | |||
| connected_iam = False | |||
| # number of graphs; we what to compute the median of these graphs. | |||
| nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
| # find out all the graphs classified to positive group 1. | |||
| idx_dict = get_same_item_indices(y_all) | |||
| Gn = [Gn[i] for i in idx_dict[1]] | |||
| # # compute Gram matrix. | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| # # write Gram matrix to file. | |||
| # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
| time_list = [] | |||
| dis_ks_min_list = [] | |||
| sod_gs_list = [] | |||
| sod_gs_min_list = [] | |||
| nb_updated_list = [] | |||
| nb_updated_k_list = [] | |||
| g_best = [] | |||
| for nb_median in nb_median_range: | |||
| print('\n-------------------------------------------------------') | |||
| print('number of median graphs =', nb_median) | |||
| random.seed(1) | |||
| idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
| print('graphs chosen:', idx_rdm) | |||
| Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
| Gn_candidate = [g.copy() for g in Gn_median] | |||
| # for g in Gn_median: | |||
| # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
| ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
| # plt.show() | |||
| # plt.clf() | |||
| ################################################################### | |||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
| km_tmp = gmfile['gm'] | |||
| time_km = gmfile['gmtime'] | |||
| # modify mixed gram matrix. | |||
| km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
| for i in range(len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| km[i, j] = km_tmp[i, j] | |||
| km[j, i] = km[i, j] | |||
| for i in range(len(Gn)): | |||
| for j, idx in enumerate(idx_rdm): | |||
| km[i, len(Gn) + j] = km[i, idx] | |||
| km[len(Gn) + j, i] = km[i, idx] | |||
| for i, idx1 in enumerate(idx_rdm): | |||
| for j, idx2 in enumerate(idx_rdm): | |||
| km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
| ################################################################### | |||
| alpha_range = [1 / nb_median] * nb_median | |||
| time0 = time.time() | |||
| ghat_new_list, dis_min = iam_upgraded(Gn_median, Gn_candidate, | |||
| c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, | |||
| epsilon=epsilon_iam, removeNodes=removeNodes, | |||
| connected=connected_iam, | |||
| params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
| 'saveGXL': saveGXL}) | |||
| time_total = time.time() - time0 | |||
| print('\ntime: ', time_total) | |||
| time_list.append(time_total) | |||
| print('\nsmallest distance in kernel space: ', dhat) | |||
| dis_ks_min_list.append(dhat) | |||
| g_best.append(ghat_list) | |||
| print('\nnumber of updates of the best graph: ', nb_updated) | |||
| nb_updated_list.append(nb_updated) | |||
| print('\nnumber of updates of k nearest graphs: ', nb_updated_k) | |||
| nb_updated_k_list.append(nb_updated_k) | |||
| # show the best graph and save it to file. | |||
| print('the shortest distance is', dhat) | |||
| print('one of the possible corresponding pre-images is') | |||
| nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||
| with_labels=True) | |||
| plt.show() | |||
| plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + | |||
| '.png', format="PNG") | |||
| plt.clf() | |||
| # print(ghat_list[0].nodes(data=True)) | |||
| # print(ghat_list[0].edges(data=True)) | |||
| # compute the corresponding sod in graph space. | |||
| sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||
| ged_method=ged_method, saveGXL=saveGXL) | |||
| sod_gs_list.append(sod_tmp) | |||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||
| print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
| print('\nsods in graph space: ', sod_gs_list) | |||
| print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) | |||
| print('\nsmallest distance in kernel space for each set of median graphs: ', | |||
| dis_ks_min_list) | |||
| print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', | |||
| nb_updated_list) | |||
| print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', | |||
| nb_updated_k_list) | |||
| print('\ntimes:', time_list) | |||
| ############################################################################### | |||
| if __name__ == '__main__': | |||
| ############################################################################### | |||
| # tests on different numbers of median-sets. | |||
| test_iam_median_nb() | |||
| @@ -15,6 +15,9 @@ import sys | |||
| sys.path.insert(0, "../") | |||
| from pygraph.utils.graphfiles import loadDataset | |||
| from median import draw_Letter_graph | |||
| from ged import GED, ged_median | |||
| from utils import get_same_item_indices, compute_kernel, gram2distances, \ | |||
| dis_gstar, remove_edges | |||
| # --------------------------- These are tests --------------------------------# | |||
| @@ -47,7 +50,6 @@ def test_who_is_the_closest_in_kernel_space(Gn): | |||
| def test_who_is_the_closest_in_GED_space(Gn): | |||
| from iam import GED | |||
| idx_gi = [0, 6] | |||
| g1 = Gn[idx_gi[0]] | |||
| g2 = Gn[idx_gi[1]] | |||
| @@ -142,7 +144,7 @@ def test_new_IAM_allGraph_deleteNodes(Gn): | |||
| def test_the_simple_two(Gn, gkernel): | |||
| from gk_iam import gk_iam_nearest_multi, compute_kernel | |||
| from gk_iam import gk_iam_nearest_multi | |||
| lmbda = 0.03 # termination probalility | |||
| r_max = 10 # recursions | |||
| l = 500 | |||
| @@ -199,7 +201,7 @@ def test_the_simple_two(Gn, gkernel): | |||
| def test_remove_bests(Gn, gkernel): | |||
| from gk_iam import gk_iam_nearest_multi, compute_kernel | |||
| from gk_iam import gk_iam_nearest_multi | |||
| lmbda = 0.03 # termination probalility | |||
| r_max = 10 # recursions | |||
| l = 500 | |||
| @@ -249,8 +251,7 @@ def test_remove_bests(Gn, gkernel): | |||
| # Tests on dataset Letter-H. | |||
| def test_gkiam_letter_h(): | |||
| from gk_iam import gk_iam_nearest_multi, compute_kernel | |||
| from iam import median_distance | |||
| from gk_iam import gk_iam_nearest_multi | |||
| ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||
| 'extra_params': {}} # node nsymb | |||
| # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | |||
| @@ -305,7 +306,7 @@ def test_gkiam_letter_h(): | |||
| print(g.edges(data=True)) | |||
| # compute the corresponding sod in graph space. (alpha range not considered.) | |||
| sod_tmp, _ = median_distance(g_best[0], Gn_let, ged_cost='LETTER', | |||
| sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER', | |||
| ged_method='IPFP', saveGXL='gedlib-letter') | |||
| sod_gs_list.append(sod_tmp) | |||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||
| @@ -318,19 +319,6 @@ def test_gkiam_letter_h(): | |||
| print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) | |||
| print('\nnumber of updates for each letter: ', nb_updated_list) | |||
| print('\ntimes:', time_list) | |||
| def get_same_item_indices(ls): | |||
| """Get the indices of the same items in a list. Return a dict keyed by items. | |||
| """ | |||
| idx_dict = {} | |||
| for idx, item in enumerate(ls): | |||
| if item in idx_dict: | |||
| idx_dict[item].append(idx) | |||
| else: | |||
| idx_dict[item] = [idx] | |||
| return idx_dict | |||
| #def compute_letter_median_by_average(Gn): | |||
| # return g_median | |||
| @@ -338,7 +326,6 @@ def get_same_item_indices(ls): | |||
| def test_iam_letter_h(): | |||
| from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations | |||
| from gk_iam import dis_gstar, compute_kernel | |||
| ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||
| 'extra_params': {}} # node nsymb | |||
| # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | |||
| @@ -402,7 +389,7 @@ def test_iam_letter_h(): | |||
| def test_random_preimage_letter_h(): | |||
| from preimage_random import preimage_random, compute_kernel | |||
| from preimage_random import preimage_random | |||
| ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||
| 'extra_params': {}} # node nsymb | |||
| # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | |||
| @@ -463,7 +450,7 @@ def test_random_preimage_letter_h(): | |||
| print(g.edges(data=True)) | |||
| # compute the corresponding sod in graph space. (alpha range not considered.) | |||
| sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||
| sod_tmp, _ = ged_median(g_best[0], Gn_let) | |||
| sod_list.append(sod_tmp) | |||
| sod_min_list.append(np.min(sod_tmp)) | |||
| @@ -479,8 +466,7 @@ def test_random_preimage_letter_h(): | |||
| def test_gkiam_mutag(): | |||
| from gk_iam import gk_iam_nearest_multi, compute_kernel | |||
| from iam import median_distance | |||
| from gk_iam import gk_iam_nearest_multi | |||
| ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||
| 'extra_params': {}} # node nsymb | |||
| # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | |||
| @@ -535,7 +521,7 @@ def test_gkiam_mutag(): | |||
| print(g.edges(data=True)) | |||
| # compute the corresponding sod in graph space. (alpha range not considered.) | |||
| sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||
| sod_tmp, _ = ged_median(g_best[0], Gn_let) | |||
| sod_gs_list.append(sod_tmp) | |||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||
| sod_ks_min_list.append(sod_ks) | |||
| @@ -553,9 +539,7 @@ def test_gkiam_mutag(): | |||
| # Re-test. | |||
| def retest_the_simple_two(): | |||
| from gk_iam import gk_iam_nearest_multi, compute_kernel | |||
| from iam import median_distance | |||
| from test_random_mutag import remove_edges | |||
| from gk_iam import gk_iam_nearest_multi | |||
| # The two simple graphs. | |||
| # g1 = nx.Graph(name='haha') | |||
| @@ -653,7 +637,7 @@ def retest_the_simple_two(): | |||
| # compute the corresponding sod in graph space. | |||
| for idx, item in enumerate(alpha_range): | |||
| sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||
| sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost, | |||
| ged_method=ged_method, saveGXL=saveGXL) | |||
| sod_gs_list.append(sod_tmp) | |||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||
| @@ -10,20 +10,23 @@ import numpy as np | |||
| import networkx as nx | |||
| import matplotlib.pyplot as plt | |||
| import time | |||
| from tqdm import tqdm | |||
| import random | |||
| #from tqdm import tqdm | |||
| import os | |||
| #import os | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from pygraph.utils.graphfiles import loadDataset | |||
| from utils import remove_edges, compute_kernel, get_same_item_indices | |||
| from ged import ged_median | |||
| from preimage_iam import preimage_iam | |||
| ############################################################################### | |||
| # test on the combination of the two randomly chosen graphs. (the same as in the | |||
| # random pre-image paper.) | |||
| # tests on different values on grid of median-sets and k. | |||
| def test_preimage_mix_2combination_all_pairs(): | |||
| from preimage_iam import preimage_iam_random_mix, compute_kernel | |||
| from iam import median_distance | |||
| def test_preimage_iam_grid_k_median_nb(): | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| @@ -32,13 +35,11 @@ def test_preimage_mix_2combination_all_pairs(): | |||
| gkernel = 'marginalizedkernel' | |||
| lmbda = 0.03 # termination probalility | |||
| r_max = 10 # iteration limit for pre-image. | |||
| l_max = 500 # update limit for random generation | |||
| alpha_range = np.linspace(0.5, 0.5, 1) | |||
| k = 5 # k nearest neighbors | |||
| r_max = 5 # iteration limit for pre-image. | |||
| # alpha_range = np.linspace(0.5, 0.5, 1) | |||
| # k = 5 # k nearest neighbors | |||
| epsilon = 1e-6 | |||
| InitIAMWithAllDk = True | |||
| InitRandomWithAllDk = True | |||
| # parameters for GED function | |||
| ged_cost='CHEM_1' | |||
| ged_method='IPFP' | |||
| @@ -52,153 +53,280 @@ def test_preimage_mix_2combination_all_pairs(): | |||
| removeNodes = True | |||
| connected_iam = False | |||
| nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf) | |||
| nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf) | |||
| # test on each pair of graphs. | |||
| # for idx1 in range(len(Gn) - 1, -1, -1): | |||
| # for idx2 in range(idx1, -1, -1): | |||
| for idx1 in range(187, 188): | |||
| for idx2 in range(167, 168): | |||
| g1 = Gn[idx1].copy() | |||
| g2 = Gn[idx2].copy() | |||
| # Gn[10] = [] | |||
| # Gn[10] = [] | |||
| # number of graphs; we what to compute the median of these graphs. | |||
| nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
| # number of nearest neighbors. | |||
| k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] | |||
| # find out all the graphs classified to positive group 1. | |||
| idx_dict = get_same_item_indices(y_all) | |||
| Gn = [Gn[i] for i in idx_dict[1]] | |||
| # # compute Gram matrix. | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| # # write Gram matrix to file. | |||
| # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
| time_list = [] | |||
| dis_ks_min_list = [] | |||
| sod_gs_list = [] | |||
| sod_gs_min_list = [] | |||
| nb_updated_list = [] | |||
| nb_updated_k_list = [] | |||
| g_best = [] | |||
| for idx_nb, nb_median in enumerate(nb_median_range): | |||
| print('\n-------------------------------------------------------') | |||
| print('number of median graphs =', nb_median) | |||
| random.seed(1) | |||
| idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
| print('graphs chosen:', idx_rdm) | |||
| Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
| # for g in Gn_median: | |||
| # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
| ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
| # plt.show() | |||
| # plt.clf() | |||
| ################################################################### | |||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
| km_tmp = gmfile['gm'] | |||
| time_km = gmfile['gmtime'] | |||
| # modify mixed gram matrix. | |||
| km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
| for i in range(len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| km[i, j] = km_tmp[i, j] | |||
| km[j, i] = km[i, j] | |||
| for i in range(len(Gn)): | |||
| for j, idx in enumerate(idx_rdm): | |||
| km[i, len(Gn) + j] = km[i, idx] | |||
| km[len(Gn) + j, i] = km[i, idx] | |||
| for i, idx1 in enumerate(idx_rdm): | |||
| for j, idx2 in enumerate(idx_rdm): | |||
| km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
| ################################################################### | |||
| alpha_range = [1 / nb_median] * nb_median | |||
| time_list.append([]) | |||
| dis_ks_min_list.append([]) | |||
| sod_gs_list.append([]) | |||
| sod_gs_min_list.append([]) | |||
| nb_updated_list.append([]) | |||
| nb_updated_k_list.append([]) | |||
| g_best.append([]) | |||
| for k in k_range: | |||
| print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | |||
| print('k =', k) | |||
| time0 = time.time() | |||
| dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \ | |||
| preimage_iam(Gn, Gn_median, | |||
| alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, | |||
| gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||
| params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||
| 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||
| 'removeNodes': removeNodes, 'connected': connected_iam}, | |||
| params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
| 'saveGXL': saveGXL}) | |||
| time_total = time.time() - time0 + time_km | |||
| print('time: ', time_total) | |||
| time_list[idx_nb].append(time_total) | |||
| print('\nsmallest distance in kernel space: ', dhat) | |||
| dis_ks_min_list[idx_nb].append(dhat) | |||
| g_best[idx_nb].append(ghat_list) | |||
| print('\nnumber of updates of the best graph by IAM: ', nb_updated) | |||
| nb_updated_list[idx_nb].append(nb_updated) | |||
| print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k) | |||
| nb_updated_k_list[idx_nb].append(nb_updated_k) | |||
| nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||
| plt.savefig("results/preimage_mix/mutag187.png", format="PNG") | |||
| plt.show() | |||
| plt.clf() | |||
| nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||
| plt.savefig("results/preimage_mix/mutag167.png", format="PNG") | |||
| plt.show() | |||
| # show the best graph and save it to file. | |||
| print('the shortest distance is', dhat) | |||
| print('one of the possible corresponding pre-images is') | |||
| nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||
| with_labels=True) | |||
| plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + | |||
| '_k' + str(k) + '.png', format="PNG") | |||
| # plt.show() | |||
| plt.clf() | |||
| # print(ghat_list[0].nodes(data=True)) | |||
| # print(ghat_list[0].edges(data=True)) | |||
| # compute the corresponding sod in graph space. | |||
| sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||
| ged_method=ged_method, saveGXL=saveGXL) | |||
| sod_gs_list[idx_nb].append(sod_tmp) | |||
| sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) | |||
| print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
| print('\nsods in graph space: ', sod_gs_list) | |||
| print('\nsmallest sod in graph space for each set of median graphs and k: ', | |||
| sod_gs_min_list) | |||
| print('\nsmallest distance in kernel space for each set of median graphs and k: ', | |||
| dis_ks_min_list) | |||
| print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', | |||
| nb_updated_list) | |||
| print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', | |||
| nb_updated_k_list) | |||
| print('\ntimes:', time_list) | |||
| ################################################################### | |||
| # Gn_mix = [g.copy() for g in Gn] | |||
| # Gn_mix.append(g1.copy()) | |||
| # Gn_mix.append(g2.copy()) | |||
| # | |||
| # # compute | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn_mix, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| # | |||
| # # write Gram matrix to file and read it. | |||
| # np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km) | |||
| ################################################################### | |||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||
| km = gmfile['gm'] | |||
| time_km = gmfile['gmtime'] | |||
| # modify mixed gram matrix. | |||
| for i in range(len(Gn)): | |||
| km[i, len(Gn)] = km[i, idx1] | |||
| km[i, len(Gn) + 1] = km[i, idx2] | |||
| km[len(Gn), i] = km[i, idx1] | |||
| km[len(Gn) + 1, i] = km[i, idx2] | |||
| km[len(Gn), len(Gn)] = km[idx1, idx1] | |||
| km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||
| km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||
| km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||
| ################################################################### | |||
| # # use only the two graphs in median set as candidates. | |||
| # Gn = [g1.copy(), g2.copy()] | |||
| # Gn_mix = Gn + [g1.copy(), g2.copy()] | |||
| # # compute | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn_mix, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| ############################################################################### | |||
| # tests on different numbers of median-sets. | |||
| def test_preimage_iam_median_nb(): | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| # Gn = Gn[0:50] | |||
| remove_edges(Gn) | |||
| gkernel = 'marginalizedkernel' | |||
| time_list = [] | |||
| dis_ks_min_list = [] | |||
| sod_gs_list = [] | |||
| sod_gs_min_list = [] | |||
| nb_updated_list_iam = [] | |||
| nb_updated_list_random = [] | |||
| nb_updated_k_list_iam = [] | |||
| nb_updated_k_list_random = [] | |||
| g_best = [] | |||
| # for each alpha | |||
| for alpha in alpha_range: | |||
| print('\n-------------------------------------------------------\n') | |||
| print('alpha =', alpha) | |||
| time0 = time.time() | |||
| dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ | |||
| nb_updated_k_iam, nb_updated_k_random = \ | |||
| preimage_iam_random_mix(Gn, [g1, g2], | |||
| [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, | |||
| l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||
| InitRandomWithAllDk=InitRandomWithAllDk, | |||
| params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||
| 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||
| 'removeNodes': removeNodes, 'connected': connected_iam}, | |||
| params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
| 'saveGXL': saveGXL}) | |||
| time_total = time.time() - time0 + time_km | |||
| print('time: ', time_total) | |||
| time_list.append(time_total) | |||
| dis_ks_min_list.append(dhat) | |||
| g_best.append(ghat_list) | |||
| nb_updated_list_iam.append(nb_updated_iam) | |||
| nb_updated_list_random.append(nb_updated_random) | |||
| nb_updated_k_list_iam.append(nb_updated_k_iam) | |||
| nb_updated_k_list_random.append(nb_updated_k_random) | |||
| # show best graphs and save them to file. | |||
| for idx, item in enumerate(alpha_range): | |||
| print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||
| print('one of the possible corresponding pre-images is') | |||
| nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), | |||
| with_labels=True) | |||
| plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) | |||
| + '_alpha' + str(item) + '.png', format="PNG") | |||
| # plt.show() | |||
| plt.clf() | |||
| # print(g_best[idx][0].nodes(data=True)) | |||
| # print(g_best[idx][0].edges(data=True)) | |||
| # for g in g_best[idx]: | |||
| # draw_Letter_graph(g, savepath='results/gk_iam/') | |||
| ## nx.draw_networkx(g) | |||
| ## plt.show() | |||
| # print(g.nodes(data=True)) | |||
| # print(g.edges(data=True)) | |||
| lmbda = 0.03 # termination probalility | |||
| r_max = 10 # iteration limit for pre-image. | |||
| # alpha_range = np.linspace(0.5, 0.5, 1) | |||
| k = 5 # k nearest neighbors | |||
| epsilon = 1e-6 | |||
| InitIAMWithAllDk = True | |||
| # parameters for GED function | |||
| ged_cost='CHEM_1' | |||
| ged_method='IPFP' | |||
| saveGXL='gedlib' | |||
| # parameters for IAM function | |||
| c_ei=1 | |||
| c_er=1 | |||
| c_es=1 | |||
| ite_max_iam = 50 | |||
| epsilon_iam = 0.001 | |||
| removeNodes = True | |||
| connected_iam = False | |||
| # number of graphs; we what to compute the median of these graphs. | |||
| nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
| # find out all the graphs classified to positive group 1. | |||
| idx_dict = get_same_item_indices(y_all) | |||
| Gn = [Gn[i] for i in idx_dict[1]] | |||
| # # compute Gram matrix. | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| # # write Gram matrix to file. | |||
| # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
| time_list = [] | |||
| dis_ks_min_list = [] | |||
| sod_gs_list = [] | |||
| sod_gs_min_list = [] | |||
| nb_updated_list = [] | |||
| nb_updated_k_list = [] | |||
| g_best = [] | |||
| for nb_median in nb_median_range: | |||
| print('\n-------------------------------------------------------') | |||
| print('number of median graphs =', nb_median) | |||
| random.seed(1) | |||
| idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
| print('graphs chosen:', idx_rdm) | |||
| Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
| # for g in Gn_median: | |||
| # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
| ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
| # plt.show() | |||
| # plt.clf() | |||
| # compute the corresponding sod in graph space. | |||
| for idx, item in enumerate(alpha_range): | |||
| sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||
| ged_method=ged_method, saveGXL=saveGXL) | |||
| sod_gs_list.append(sod_tmp) | |||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||
| ################################################################### | |||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
| km_tmp = gmfile['gm'] | |||
| time_km = gmfile['gmtime'] | |||
| # modify mixed gram matrix. | |||
| km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
| for i in range(len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| km[i, j] = km_tmp[i, j] | |||
| km[j, i] = km[i, j] | |||
| for i in range(len(Gn)): | |||
| for j, idx in enumerate(idx_rdm): | |||
| km[i, len(Gn) + j] = km[i, idx] | |||
| km[len(Gn) + j, i] = km[i, idx] | |||
| for i, idx1 in enumerate(idx_rdm): | |||
| for j, idx2 in enumerate(idx_rdm): | |||
| km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
| print('\nsods in graph space: ', sod_gs_list) | |||
| print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||
| print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||
| print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam) | |||
| print('\nnumber of updates of the best graph for each alpha by random generation: ', | |||
| nb_updated_list_random) | |||
| print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', | |||
| nb_updated_k_list_iam) | |||
| print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', | |||
| nb_updated_k_list_random) | |||
| print('\ntimes:', time_list) | |||
| nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0] | |||
| nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0] | |||
| ################################################################### | |||
| alpha_range = [1 / nb_median] * nb_median | |||
| time0 = time.time() | |||
| dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \ | |||
| preimage_iam(Gn, Gn_median, | |||
| alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, | |||
| gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||
| params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||
| 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||
| 'removeNodes': removeNodes, 'connected': connected_iam}, | |||
| params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
| 'saveGXL': saveGXL}) | |||
| str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \ | |||
| % (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0]) | |||
| with open('results/preimage_mix/nb_updates.txt', 'r+') as file: | |||
| content = file.read() | |||
| file.seek(0, 0) | |||
| file.write(str_fw + content) | |||
| time_total = time.time() - time0 + time_km | |||
| print('\ntime: ', time_total) | |||
| time_list.append(time_total) | |||
| print('\nsmallest distance in kernel space: ', dhat) | |||
| dis_ks_min_list.append(dhat) | |||
| g_best.append(ghat_list) | |||
| print('\nnumber of updates of the best graph: ', nb_updated) | |||
| nb_updated_list.append(nb_updated) | |||
| print('\nnumber of updates of k nearest graphs: ', nb_updated_k) | |||
| nb_updated_k_list.append(nb_updated_k) | |||
| # show the best graph and save it to file. | |||
| print('the shortest distance is', dhat) | |||
| print('one of the possible corresponding pre-images is') | |||
| nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||
| with_labels=True) | |||
| # plt.show() | |||
| plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + | |||
| '.png', format="PNG") | |||
| plt.clf() | |||
| # print(ghat_list[0].nodes(data=True)) | |||
| # print(ghat_list[0].edges(data=True)) | |||
| # compute the corresponding sod in graph space. | |||
| sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||
| ged_method=ged_method, saveGXL=saveGXL) | |||
| sod_gs_list.append(sod_tmp) | |||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||
| print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
| print('\nsods in graph space: ', sod_gs_list) | |||
| print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) | |||
| print('\nsmallest distance in kernel space for each set of median graphs: ', | |||
| dis_ks_min_list) | |||
| print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', | |||
| nb_updated_list) | |||
| print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', | |||
| nb_updated_k_list) | |||
| print('\ntimes:', time_list) | |||
| ############################################################################### | |||
| # test on the combination of the two randomly chosen graphs. (the same as in the | |||
| # random pre-image paper.) | |||
| def test_gkiam_2combination_all_pairs(): | |||
| from preimage_iam import preimage_iam, compute_kernel | |||
| from iam import median_distance | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| @@ -334,7 +462,7 @@ def test_gkiam_2combination_all_pairs(): | |||
| # compute the corresponding sod in graph space. | |||
| for idx, item in enumerate(alpha_range): | |||
| sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||
| sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, | |||
| ged_method=ged_method, saveGXL=saveGXL) | |||
| sod_gs_list.append(sod_tmp) | |||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||
| @@ -358,8 +486,7 @@ def test_gkiam_2combination_all_pairs(): | |||
| def test_gkiam_2combination(): | |||
| from gk_iam import gk_iam_nearest_multi, compute_kernel | |||
| from iam import median_distance | |||
| from gk_iam import gk_iam_nearest_multi | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| @@ -451,7 +578,7 @@ def test_gkiam_2combination(): | |||
| # compute the corresponding sod in graph space. | |||
| for idx, item in enumerate(alpha_range): | |||
| sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||
| sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, | |||
| ged_method=ged_method, saveGXL=saveGXL) | |||
| sod_gs_list.append(sod_tmp) | |||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||
| @@ -463,148 +590,6 @@ def test_gkiam_2combination(): | |||
| print('\ntimes:', time_list) | |||
| def test_random_preimage_2combination(): | |||
| # from gk_iam import compute_kernel | |||
| from preimage_random import preimage_random | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| # Gn = Gn[0:12] | |||
| remove_edges(Gn) | |||
| gkernel = 'marginalizedkernel' | |||
| # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel) | |||
| # print(dis_max, dis_min, dis_mean) | |||
| lmbda = 0.03 # termination probalility | |||
| r_max = 10 # iteration limit for pre-image. | |||
| l = 500 | |||
| alpha_range = np.linspace(0, 1, 11) | |||
| k = 5 # k nearest neighbors | |||
| # randomly select two molecules | |||
| np.random.seed(1) | |||
| idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2) | |||
| g1 = Gn[idx_gi[0]].copy() | |||
| g2 = Gn[idx_gi[1]].copy() | |||
| # nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||
| # plt.savefig("results/random_preimage/mutag10.png", format="PNG") | |||
| # plt.show() | |||
| # nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||
| # plt.savefig("results/random_preimage/mutag11.png", format="PNG") | |||
| # plt.show() | |||
| ###################################################################### | |||
| # Gn_mix = [g.copy() for g in Gn] | |||
| # Gn_mix.append(g1.copy()) | |||
| # Gn_mix.append(g2.copy()) | |||
| # | |||
| ## g_tmp = iam([g1, g2]) | |||
| ## nx.draw_networkx(g_tmp) | |||
| ## plt.show() | |||
| # | |||
| # # compute | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn_mix, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| ################################################################### | |||
| idx1 = idx_gi[0] | |||
| idx2 = idx_gi[1] | |||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||
| km = gmfile['gm'] | |||
| time_km = gmfile['gmtime'] | |||
| # modify mixed gram matrix. | |||
| for i in range(len(Gn)): | |||
| km[i, len(Gn)] = km[i, idx1] | |||
| km[i, len(Gn) + 1] = km[i, idx2] | |||
| km[len(Gn), i] = km[i, idx1] | |||
| km[len(Gn) + 1, i] = km[i, idx2] | |||
| km[len(Gn), len(Gn)] = km[idx1, idx1] | |||
| km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||
| km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||
| km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||
| ################################################################### | |||
| time_list = [] | |||
| nb_updated_list = [] | |||
| g_best = [] | |||
| dis_ks_min_list = [] | |||
| # for each alpha | |||
| for alpha in alpha_range: | |||
| print('\n-------------------------------------------------------\n') | |||
| print('alpha =', alpha) | |||
| time0 = time.time() | |||
| dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], | |||
| range(len(Gn), len(Gn) + 2), km, | |||
| k, r_max, l, gkernel) | |||
| time_total = time.time() - time0 + time_km | |||
| print('time: ', time_total) | |||
| time_list.append(time_total) | |||
| dis_ks_min_list.append(dhat) | |||
| g_best.append(ghat) | |||
| nb_updated_list.append(nb_updated) | |||
| # show best graphs and save them to file. | |||
| for idx, item in enumerate(alpha_range): | |||
| print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||
| print('one of the possible corresponding pre-images is') | |||
| nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), | |||
| with_labels=True) | |||
| plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG") | |||
| plt.show() | |||
| plt.clf() | |||
| print(g_best[idx].nodes(data=True)) | |||
| print(g_best[idx].edges(data=True)) | |||
| # # compute the corresponding sod in graph space. (alpha range not considered.) | |||
| # sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||
| # sod_gs_list.append(sod_tmp) | |||
| # sod_gs_min_list.append(np.min(sod_tmp)) | |||
| # sod_ks_min_list.append(sod_ks) | |||
| # nb_updated_list.append(nb_updated) | |||
| # print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||
| print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||
| print('\nnumber of updates for each alpha: ', nb_updated_list) | |||
| print('\ntimes:', time_list) | |||
| ############################################################################### | |||
| # help functions | |||
| def remove_edges(Gn): | |||
| for G in Gn: | |||
| for _, _, attrs in G.edges(data=True): | |||
| attrs.clear() | |||
| def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None): | |||
| from gk_iam import compute_kernel | |||
| dis_mat = np.empty((len(Gn), len(Gn))) | |||
| if Kmatrix == None: | |||
| Kmatrix = compute_kernel(Gn, gkernel, True) | |||
| for i in range(len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j] | |||
| if dis < 0: | |||
| if dis > -1e-10: | |||
| dis = 0 | |||
| else: | |||
| raise ValueError('The distance is negative.') | |||
| dis_mat[i, j] = np.sqrt(dis) | |||
| dis_mat[j, i] = dis_mat[i, j] | |||
| dis_max = np.max(np.max(dis_mat)) | |||
| dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | |||
| dis_mean = np.mean(np.mean(dis_mat)) | |||
| return dis_mat, dis_max, dis_min, dis_mean | |||
| ############################################################################### | |||
| @@ -612,7 +597,13 @@ if __name__ == '__main__': | |||
| ############################################################################### | |||
| # test on the combination of the two randomly chosen graphs. (the same as in the | |||
| # random pre-image paper.) | |||
| # test_random_preimage_2combination() | |||
| # test_gkiam_2combination() | |||
| # test_gkiam_2combination_all_pairs() | |||
| test_preimage_mix_2combination_all_pairs() | |||
| ############################################################################### | |||
| # tests on different numbers of median-sets. | |||
| test_preimage_iam_median_nb() | |||
| ############################################################################### | |||
| # tests on different values on grid of median-sets and k. | |||
| # test_preimage_iam_grid_k_median_nb() | |||
| @@ -0,0 +1,542 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Thu Sep 5 15:59:00 2019 | |||
| @author: ljia | |||
| """ | |||
| import numpy as np | |||
| import networkx as nx | |||
| import matplotlib.pyplot as plt | |||
| import time | |||
| import random | |||
| #from tqdm import tqdm | |||
| #import os | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from pygraph.utils.graphfiles import loadDataset | |||
| from ged import ged_median | |||
| from utils import compute_kernel, get_same_item_indices, remove_edges | |||
| from preimage_iam import preimage_iam_random_mix | |||
| ############################################################################### | |||
| # tests on different values on grid of median-sets and k. | |||
| def test_preimage_mix_grid_k_median_nb(): | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| # Gn = Gn[0:50] | |||
| remove_edges(Gn) | |||
| gkernel = 'marginalizedkernel' | |||
| lmbda = 0.03 # termination probalility | |||
| r_max = 5 # iteration limit for pre-image. | |||
| l_max = 500 # update limit for random generation | |||
| # alpha_range = np.linspace(0.5, 0.5, 1) | |||
| # k = 5 # k nearest neighbors | |||
| epsilon = 1e-6 | |||
| InitIAMWithAllDk = True | |||
| InitRandomWithAllDk = True | |||
| # parameters for GED function | |||
| ged_cost='CHEM_1' | |||
| ged_method='IPFP' | |||
| saveGXL='gedlib' | |||
| # parameters for IAM function | |||
| c_ei=1 | |||
| c_er=1 | |||
| c_es=1 | |||
| ite_max_iam = 50 | |||
| epsilon_iam = 0.001 | |||
| removeNodes = True | |||
| connected_iam = False | |||
| # number of graphs; we what to compute the median of these graphs. | |||
| nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
| # number of nearest neighbors. | |||
| k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] | |||
| # find out all the graphs classified to positive group 1. | |||
| idx_dict = get_same_item_indices(y_all) | |||
| Gn = [Gn[i] for i in idx_dict[1]] | |||
| # # compute Gram matrix. | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| # # write Gram matrix to file. | |||
| # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
| time_list = [] | |||
| dis_ks_min_list = [] | |||
| sod_gs_list = [] | |||
| sod_gs_min_list = [] | |||
| nb_updated_list_iam = [] | |||
| nb_updated_list_random = [] | |||
| nb_updated_k_list_iam = [] | |||
| nb_updated_k_list_random = [] | |||
| g_best = [] | |||
| for idx_nb, nb_median in enumerate(nb_median_range): | |||
| print('\n-------------------------------------------------------') | |||
| print('number of median graphs =', nb_median) | |||
| random.seed(1) | |||
| idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
| print('graphs chosen:', idx_rdm) | |||
| Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
| # for g in Gn_median: | |||
| # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
| ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
| # plt.show() | |||
| # plt.clf() | |||
| ################################################################### | |||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
| km_tmp = gmfile['gm'] | |||
| time_km = gmfile['gmtime'] | |||
| # modify mixed gram matrix. | |||
| km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
| for i in range(len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| km[i, j] = km_tmp[i, j] | |||
| km[j, i] = km[i, j] | |||
| for i in range(len(Gn)): | |||
| for j, idx in enumerate(idx_rdm): | |||
| km[i, len(Gn) + j] = km[i, idx] | |||
| km[len(Gn) + j, i] = km[i, idx] | |||
| for i, idx1 in enumerate(idx_rdm): | |||
| for j, idx2 in enumerate(idx_rdm): | |||
| km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
| ################################################################### | |||
| alpha_range = [1 / nb_median] * nb_median | |||
| time_list.append([]) | |||
| dis_ks_min_list.append([]) | |||
| sod_gs_list.append([]) | |||
| sod_gs_min_list.append([]) | |||
| nb_updated_list_iam.append([]) | |||
| nb_updated_list_random.append([]) | |||
| nb_updated_k_list_iam.append([]) | |||
| nb_updated_k_list_random.append([]) | |||
| g_best.append([]) | |||
| for k in k_range: | |||
| print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | |||
| print('k =', k) | |||
| time0 = time.time() | |||
| dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ | |||
| nb_updated_k_iam, nb_updated_k_random = \ | |||
| preimage_iam_random_mix(Gn, Gn_median, | |||
| alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, | |||
| l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||
| InitRandomWithAllDk=InitRandomWithAllDk, | |||
| params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||
| 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||
| 'removeNodes': removeNodes, 'connected': connected_iam}, | |||
| params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
| 'saveGXL': saveGXL}) | |||
| time_total = time.time() - time0 + time_km | |||
| print('time: ', time_total) | |||
| time_list[idx_nb].append(time_total) | |||
| print('\nsmallest distance in kernel space: ', dhat) | |||
| dis_ks_min_list[idx_nb].append(dhat) | |||
| g_best[idx_nb].append(ghat_list) | |||
| print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam) | |||
| nb_updated_list_iam[idx_nb].append(nb_updated_iam) | |||
| print('\nnumber of updates of the best graph by random generation: ', | |||
| nb_updated_random) | |||
| nb_updated_list_random[idx_nb].append(nb_updated_random) | |||
| print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam) | |||
| nb_updated_k_list_iam[idx_nb].append(nb_updated_k_iam) | |||
| print('\nnumber of updates of k nearest graphs by random generation: ', | |||
| nb_updated_k_random) | |||
| nb_updated_k_list_random[idx_nb].append(nb_updated_k_random) | |||
| # show the best graph and save it to file. | |||
| print('the shortest distance is', dhat) | |||
| print('one of the possible corresponding pre-images is') | |||
| nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||
| with_labels=True) | |||
| plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + | |||
| '_k' + str(k) + '.png', format="PNG") | |||
| # plt.show() | |||
| plt.clf() | |||
| # print(ghat_list[0].nodes(data=True)) | |||
| # print(ghat_list[0].edges(data=True)) | |||
| # compute the corresponding sod in graph space. | |||
| sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||
| ged_method=ged_method, saveGXL=saveGXL) | |||
| sod_gs_list[idx_nb].append(sod_tmp) | |||
| sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) | |||
| print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
| print('\nsods in graph space: ', sod_gs_list) | |||
| print('\nsmallest sod in graph space for each set of median graphs and k: ', | |||
| sod_gs_min_list) | |||
| print('\nsmallest distance in kernel space for each set of median graphs and k: ', | |||
| dis_ks_min_list) | |||
| print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', | |||
| nb_updated_list_iam) | |||
| print('\nnumber of updates of the best graph for each set of median graphs and k by random generation: ', | |||
| nb_updated_list_random) | |||
| print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', | |||
| nb_updated_k_list_iam) | |||
| print('\nnumber of updates of k nearest graphs for each set of median graphs and k by random generation: ', | |||
| nb_updated_k_list_random) | |||
| print('\ntimes:', time_list) | |||
| ############################################################################### | |||
| # tests on different numbers of median-sets. | |||
| def test_preimage_mix_median_nb(): | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| # Gn = Gn[0:50] | |||
| remove_edges(Gn) | |||
| gkernel = 'marginalizedkernel' | |||
| lmbda = 0.03 # termination probalility | |||
| r_max = 5 # iteration limit for pre-image. | |||
| l_max = 500 # update limit for random generation | |||
| # alpha_range = np.linspace(0.5, 0.5, 1) | |||
| k = 5 # k nearest neighbors | |||
| epsilon = 1e-6 | |||
| InitIAMWithAllDk = True | |||
| InitRandomWithAllDk = True | |||
| # parameters for GED function | |||
| ged_cost='CHEM_1' | |||
| ged_method='IPFP' | |||
| saveGXL='gedlib' | |||
| # parameters for IAM function | |||
| c_ei=1 | |||
| c_er=1 | |||
| c_es=1 | |||
| ite_max_iam = 50 | |||
| epsilon_iam = 0.001 | |||
| removeNodes = True | |||
| connected_iam = False | |||
| # number of graphs; we what to compute the median of these graphs. | |||
| nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
| # find out all the graphs classified to positive group 1. | |||
| idx_dict = get_same_item_indices(y_all) | |||
| Gn = [Gn[i] for i in idx_dict[1]] | |||
| # # compute Gram matrix. | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| # # write Gram matrix to file. | |||
| # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
| time_list = [] | |||
| dis_ks_min_list = [] | |||
| sod_gs_list = [] | |||
| sod_gs_min_list = [] | |||
| nb_updated_list_iam = [] | |||
| nb_updated_list_random = [] | |||
| nb_updated_k_list_iam = [] | |||
| nb_updated_k_list_random = [] | |||
| g_best = [] | |||
| for nb_median in nb_median_range: | |||
| print('\n-------------------------------------------------------') | |||
| print('number of median graphs =', nb_median) | |||
| random.seed(1) | |||
| idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
| print('graphs chosen:', idx_rdm) | |||
| Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
| # for g in Gn_median: | |||
| # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
| ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
| # plt.show() | |||
| # plt.clf() | |||
| ################################################################### | |||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
| km_tmp = gmfile['gm'] | |||
| time_km = gmfile['gmtime'] | |||
| # modify mixed gram matrix. | |||
| km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
| for i in range(len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| km[i, j] = km_tmp[i, j] | |||
| km[j, i] = km[i, j] | |||
| for i in range(len(Gn)): | |||
| for j, idx in enumerate(idx_rdm): | |||
| km[i, len(Gn) + j] = km[i, idx] | |||
| km[len(Gn) + j, i] = km[i, idx] | |||
| for i, idx1 in enumerate(idx_rdm): | |||
| for j, idx2 in enumerate(idx_rdm): | |||
| km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
| ################################################################### | |||
| alpha_range = [1 / nb_median] * nb_median | |||
| time0 = time.time() | |||
| dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ | |||
| nb_updated_k_iam, nb_updated_k_random = \ | |||
| preimage_iam_random_mix(Gn, Gn_median, | |||
| alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, | |||
| l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||
| InitRandomWithAllDk=InitRandomWithAllDk, | |||
| params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||
| 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||
| 'removeNodes': removeNodes, 'connected': connected_iam}, | |||
| params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
| 'saveGXL': saveGXL}) | |||
| time_total = time.time() - time0 + time_km | |||
| print('time: ', time_total) | |||
| time_list.append(time_total) | |||
| print('\nsmallest distance in kernel space: ', dhat) | |||
| dis_ks_min_list.append(dhat) | |||
| g_best.append(ghat_list) | |||
| print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam) | |||
| nb_updated_list_iam.append(nb_updated_iam) | |||
| print('\nnumber of updates of the best graph by random generation: ', | |||
| nb_updated_random) | |||
| nb_updated_list_random.append(nb_updated_random) | |||
| print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam) | |||
| nb_updated_k_list_iam.append(nb_updated_k_iam) | |||
| print('\nnumber of updates of k nearest graphs by random generation: ', | |||
| nb_updated_k_random) | |||
| nb_updated_k_list_random.append(nb_updated_k_random) | |||
| # show the best graph and save it to file. | |||
| print('the shortest distance is', dhat) | |||
| print('one of the possible corresponding pre-images is') | |||
| nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||
| with_labels=True) | |||
| plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + | |||
| '.png', format="PNG") | |||
| # plt.show() | |||
| plt.clf() | |||
| # print(ghat_list[0].nodes(data=True)) | |||
| # print(ghat_list[0].edges(data=True)) | |||
| # compute the corresponding sod in graph space. | |||
| sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||
| ged_method=ged_method, saveGXL=saveGXL) | |||
| sod_gs_list.append(sod_tmp) | |||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||
| print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
| print('\nsods in graph space: ', sod_gs_list) | |||
| print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) | |||
| print('\nsmallest distance in kernel space for each set of median graphs: ', | |||
| dis_ks_min_list) | |||
| print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', | |||
| nb_updated_list_iam) | |||
| print('\nnumber of updates of the best graph for each set of median graphs by random generation: ', | |||
| nb_updated_list_random) | |||
| print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', | |||
| nb_updated_k_list_iam) | |||
| print('\nnumber of updates of k nearest graphs for each set of median graphs by random generation: ', | |||
| nb_updated_k_list_random) | |||
| print('\ntimes:', time_list) | |||
| ############################################################################### | |||
| # test on the combination of the two randomly chosen graphs. (the same as in the | |||
| # random pre-image paper.) | |||
| def test_preimage_mix_2combination_all_pairs(): | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| # Gn = Gn[0:50] | |||
| remove_edges(Gn) | |||
| gkernel = 'marginalizedkernel' | |||
| lmbda = 0.03 # termination probalility | |||
| r_max = 10 # iteration limit for pre-image. | |||
| l_max = 500 # update limit for random generation | |||
| alpha_range = np.linspace(0.5, 0.5, 1) | |||
| k = 5 # k nearest neighbors | |||
| epsilon = 1e-6 | |||
| InitIAMWithAllDk = True | |||
| InitRandomWithAllDk = True | |||
| # parameters for GED function | |||
| ged_cost='CHEM_1' | |||
| ged_method='IPFP' | |||
| saveGXL='gedlib' | |||
| # parameters for IAM function | |||
| c_ei=1 | |||
| c_er=1 | |||
| c_es=1 | |||
| ite_max_iam = 50 | |||
| epsilon_iam = 0.001 | |||
| removeNodes = True | |||
| connected_iam = False | |||
| nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf) | |||
| nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf) | |||
| # test on each pair of graphs. | |||
| # for idx1 in range(len(Gn) - 1, -1, -1): | |||
| # for idx2 in range(idx1, -1, -1): | |||
| for idx1 in range(187, 188): | |||
| for idx2 in range(167, 168): | |||
| g1 = Gn[idx1].copy() | |||
| g2 = Gn[idx2].copy() | |||
| # Gn[10] = [] | |||
| # Gn[10] = [] | |||
| nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||
| plt.savefig("results/preimage_mix/mutag187.png", format="PNG") | |||
| plt.show() | |||
| plt.clf() | |||
| nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||
| plt.savefig("results/preimage_mix/mutag167.png", format="PNG") | |||
| plt.show() | |||
| plt.clf() | |||
| ################################################################### | |||
| # Gn_mix = [g.copy() for g in Gn] | |||
| # Gn_mix.append(g1.copy()) | |||
| # Gn_mix.append(g2.copy()) | |||
| # | |||
| # # compute | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn_mix, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| # | |||
| # # write Gram matrix to file and read it. | |||
| # np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km) | |||
| ################################################################### | |||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||
| km = gmfile['gm'] | |||
| time_km = gmfile['gmtime'] | |||
| # modify mixed gram matrix. | |||
| for i in range(len(Gn)): | |||
| km[i, len(Gn)] = km[i, idx1] | |||
| km[i, len(Gn) + 1] = km[i, idx2] | |||
| km[len(Gn), i] = km[i, idx1] | |||
| km[len(Gn) + 1, i] = km[i, idx2] | |||
| km[len(Gn), len(Gn)] = km[idx1, idx1] | |||
| km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||
| km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||
| km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||
| ################################################################### | |||
| # # use only the two graphs in median set as candidates. | |||
| # Gn = [g1.copy(), g2.copy()] | |||
| # Gn_mix = Gn + [g1.copy(), g2.copy()] | |||
| # # compute | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn_mix, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| time_list = [] | |||
| dis_ks_min_list = [] | |||
| sod_gs_list = [] | |||
| sod_gs_min_list = [] | |||
| nb_updated_list_iam = [] | |||
| nb_updated_list_random = [] | |||
| nb_updated_k_list_iam = [] | |||
| nb_updated_k_list_random = [] | |||
| g_best = [] | |||
| # for each alpha | |||
| for alpha in alpha_range: | |||
| print('\n-------------------------------------------------------\n') | |||
| print('alpha =', alpha) | |||
| time0 = time.time() | |||
| dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ | |||
| nb_updated_k_iam, nb_updated_k_random = \ | |||
| preimage_iam_random_mix(Gn, [g1, g2], | |||
| [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, | |||
| l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||
| InitRandomWithAllDk=InitRandomWithAllDk, | |||
| params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||
| 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||
| 'removeNodes': removeNodes, 'connected': connected_iam}, | |||
| params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
| 'saveGXL': saveGXL}) | |||
| time_total = time.time() - time0 + time_km | |||
| print('time: ', time_total) | |||
| time_list.append(time_total) | |||
| dis_ks_min_list.append(dhat) | |||
| g_best.append(ghat_list) | |||
| nb_updated_list_iam.append(nb_updated_iam) | |||
| nb_updated_list_random.append(nb_updated_random) | |||
| nb_updated_k_list_iam.append(nb_updated_k_iam) | |||
| nb_updated_k_list_random.append(nb_updated_k_random) | |||
| # show best graphs and save them to file. | |||
| for idx, item in enumerate(alpha_range): | |||
| print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||
| print('one of the possible corresponding pre-images is') | |||
| nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), | |||
| with_labels=True) | |||
| plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) | |||
| + '_alpha' + str(item) + '.png', format="PNG") | |||
| # plt.show() | |||
| plt.clf() | |||
| # print(g_best[idx][0].nodes(data=True)) | |||
| # print(g_best[idx][0].edges(data=True)) | |||
| # for g in g_best[idx]: | |||
| # draw_Letter_graph(g, savepath='results/gk_iam/') | |||
| ## nx.draw_networkx(g) | |||
| ## plt.show() | |||
| # print(g.nodes(data=True)) | |||
| # print(g.edges(data=True)) | |||
| # compute the corresponding sod in graph space. | |||
| for idx, item in enumerate(alpha_range): | |||
| sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, | |||
| ged_method=ged_method, saveGXL=saveGXL) | |||
| sod_gs_list.append(sod_tmp) | |||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||
| print('\nsods in graph space: ', sod_gs_list) | |||
| print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||
| print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||
| print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam) | |||
| print('\nnumber of updates of the best graph for each alpha by random generation: ', | |||
| nb_updated_list_random) | |||
| print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', | |||
| nb_updated_k_list_iam) | |||
| print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', | |||
| nb_updated_k_list_random) | |||
| print('\ntimes:', time_list) | |||
| nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0] | |||
| nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0] | |||
| str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \ | |||
| % (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0]) | |||
| with open('results/preimage_mix/nb_updates.txt', 'r+') as file: | |||
| content = file.read() | |||
| file.seek(0, 0) | |||
| file.write(str_fw + content) | |||
| ############################################################################### | |||
| if __name__ == '__main__': | |||
| ############################################################################### | |||
| # test on the combination of the two randomly chosen graphs. (the same as in the | |||
| # random pre-image paper.) | |||
| # test_preimage_mix_2combination_all_pairs() | |||
| ############################################################################### | |||
| # tests on different numbers of median-sets. | |||
| # test_preimage_mix_median_nb() | |||
| ############################################################################### | |||
| # tests on different values on grid of median-sets and k. | |||
| test_preimage_mix_grid_k_median_nb() | |||
| @@ -0,0 +1,402 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Thu Sep 5 15:59:00 2019 | |||
| @author: ljia | |||
| """ | |||
| import numpy as np | |||
| import networkx as nx | |||
| import matplotlib.pyplot as plt | |||
| import time | |||
| import random | |||
| #from tqdm import tqdm | |||
| #import os | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from pygraph.utils.graphfiles import loadDataset | |||
| from preimage_random import preimage_random | |||
| from ged import ged_median | |||
| from utils import compute_kernel, get_same_item_indices, remove_edges | |||
| ############################################################################### | |||
| # tests on different values on grid of median-sets and k. | |||
| def test_preimage_random_grid_k_median_nb(): | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| # Gn = Gn[0:50] | |||
| remove_edges(Gn) | |||
| gkernel = 'marginalizedkernel' | |||
| lmbda = 0.03 # termination probalility | |||
| r_max = 5 # iteration limit for pre-image. | |||
| l = 500 # update limit for random generation | |||
| # alpha_range = np.linspace(0.5, 0.5, 1) | |||
| # k = 5 # k nearest neighbors | |||
| # parameters for GED function | |||
| ged_cost='CHEM_1' | |||
| ged_method='IPFP' | |||
| saveGXL='gedlib' | |||
| # number of graphs; we what to compute the median of these graphs. | |||
| nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
| # number of nearest neighbors. | |||
| k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] | |||
| # find out all the graphs classified to positive group 1. | |||
| idx_dict = get_same_item_indices(y_all) | |||
| Gn = [Gn[i] for i in idx_dict[1]] | |||
| # # compute Gram matrix. | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| # # write Gram matrix to file. | |||
| # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
| time_list = [] | |||
| dis_ks_min_list = [] | |||
| sod_gs_list = [] | |||
| sod_gs_min_list = [] | |||
| nb_updated_list = [] | |||
| g_best = [] | |||
| for idx_nb, nb_median in enumerate(nb_median_range): | |||
| print('\n-------------------------------------------------------') | |||
| print('number of median graphs =', nb_median) | |||
| random.seed(1) | |||
| idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
| print('graphs chosen:', idx_rdm) | |||
| Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
| # for g in Gn_median: | |||
| # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
| ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
| # plt.show() | |||
| # plt.clf() | |||
| ################################################################### | |||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
| km_tmp = gmfile['gm'] | |||
| time_km = gmfile['gmtime'] | |||
| # modify mixed gram matrix. | |||
| km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
| for i in range(len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| km[i, j] = km_tmp[i, j] | |||
| km[j, i] = km[i, j] | |||
| for i in range(len(Gn)): | |||
| for j, idx in enumerate(idx_rdm): | |||
| km[i, len(Gn) + j] = km[i, idx] | |||
| km[len(Gn) + j, i] = km[i, idx] | |||
| for i, idx1 in enumerate(idx_rdm): | |||
| for j, idx2 in enumerate(idx_rdm): | |||
| km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
| ################################################################### | |||
| alpha_range = [1 / nb_median] * nb_median | |||
| time_list.append([]) | |||
| dis_ks_min_list.append([]) | |||
| sod_gs_list.append([]) | |||
| sod_gs_min_list.append([]) | |||
| nb_updated_list.append([]) | |||
| g_best.append([]) | |||
| for k in k_range: | |||
| print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | |||
| print('k =', k) | |||
| time0 = time.time() | |||
| dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, | |||
| range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel) | |||
| time_total = time.time() - time0 + time_km | |||
| print('time: ', time_total) | |||
| time_list[idx_nb].append(time_total) | |||
| print('\nsmallest distance in kernel space: ', dhat) | |||
| dis_ks_min_list[idx_nb].append(dhat) | |||
| g_best[idx_nb].append(ghat) | |||
| print('\nnumber of updates of the best graph: ', nb_updated) | |||
| nb_updated_list[idx_nb].append(nb_updated) | |||
| # show the best graph and save it to file. | |||
| print('the shortest distance is', dhat) | |||
| print('one of the possible corresponding pre-images is') | |||
| nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), | |||
| with_labels=True) | |||
| plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + | |||
| '_k' + str(k) + '.png', format="PNG") | |||
| # plt.show() | |||
| plt.clf() | |||
| # print(ghat_list[0].nodes(data=True)) | |||
| # print(ghat_list[0].edges(data=True)) | |||
| # compute the corresponding sod in graph space. | |||
| sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, | |||
| ged_method=ged_method, saveGXL=saveGXL) | |||
| sod_gs_list[idx_nb].append(sod_tmp) | |||
| sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) | |||
| print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
| print('\nsods in graph space: ', sod_gs_list) | |||
| print('\nsmallest sod in graph space for each set of median graphs and k: ', | |||
| sod_gs_min_list) | |||
| print('\nsmallest distance in kernel space for each set of median graphs and k: ', | |||
| dis_ks_min_list) | |||
| print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', | |||
| nb_updated_list) | |||
| print('\ntimes:', time_list) | |||
| ############################################################################### | |||
| # tests on different numbers of median-sets. | |||
| def test_preimage_random_median_nb(): | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| # Gn = Gn[0:50] | |||
| remove_edges(Gn) | |||
| gkernel = 'marginalizedkernel' | |||
| lmbda = 0.03 # termination probalility | |||
| r_max = 5 # iteration limit for pre-image. | |||
| l = 500 # update limit for random generation | |||
| # alpha_range = np.linspace(0.5, 0.5, 1) | |||
| k = 5 # k nearest neighbors | |||
| # parameters for GED function | |||
| ged_cost='CHEM_1' | |||
| ged_method='IPFP' | |||
| saveGXL='gedlib' | |||
| # number of graphs; we what to compute the median of these graphs. | |||
| nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
| # find out all the graphs classified to positive group 1. | |||
| idx_dict = get_same_item_indices(y_all) | |||
| Gn = [Gn[i] for i in idx_dict[1]] | |||
| # # compute Gram matrix. | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| # # write Gram matrix to file. | |||
| # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
| time_list = [] | |||
| dis_ks_min_list = [] | |||
| sod_gs_list = [] | |||
| sod_gs_min_list = [] | |||
| nb_updated_list = [] | |||
| g_best = [] | |||
| for nb_median in nb_median_range: | |||
| print('\n-------------------------------------------------------') | |||
| print('number of median graphs =', nb_median) | |||
| random.seed(1) | |||
| idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
| print('graphs chosen:', idx_rdm) | |||
| Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
| # for g in Gn_median: | |||
| # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
| ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
| # plt.show() | |||
| # plt.clf() | |||
| ################################################################### | |||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
| km_tmp = gmfile['gm'] | |||
| time_km = gmfile['gmtime'] | |||
| # modify mixed gram matrix. | |||
| km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
| for i in range(len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| km[i, j] = km_tmp[i, j] | |||
| km[j, i] = km[i, j] | |||
| for i in range(len(Gn)): | |||
| for j, idx in enumerate(idx_rdm): | |||
| km[i, len(Gn) + j] = km[i, idx] | |||
| km[len(Gn) + j, i] = km[i, idx] | |||
| for i, idx1 in enumerate(idx_rdm): | |||
| for j, idx2 in enumerate(idx_rdm): | |||
| km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
| ################################################################### | |||
| alpha_range = [1 / nb_median] * nb_median | |||
| time0 = time.time() | |||
| dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, | |||
| range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel) | |||
| time_total = time.time() - time0 + time_km | |||
| print('time: ', time_total) | |||
| time_list.append(time_total) | |||
| print('\nsmallest distance in kernel space: ', dhat) | |||
| dis_ks_min_list.append(dhat) | |||
| g_best.append(ghat) | |||
| print('\nnumber of updates of the best graph: ', nb_updated) | |||
| nb_updated_list.append(nb_updated) | |||
| # show the best graph and save it to file. | |||
| print('the shortest distance is', dhat) | |||
| print('one of the possible corresponding pre-images is') | |||
| nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), | |||
| with_labels=True) | |||
| plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + | |||
| '.png', format="PNG") | |||
| # plt.show() | |||
| plt.clf() | |||
| # print(ghat_list[0].nodes(data=True)) | |||
| # print(ghat_list[0].edges(data=True)) | |||
| # compute the corresponding sod in graph space. | |||
| sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, | |||
| ged_method=ged_method, saveGXL=saveGXL) | |||
| sod_gs_list.append(sod_tmp) | |||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||
| print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
| print('\nsods in graph space: ', sod_gs_list) | |||
| print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) | |||
| print('\nsmallest distance in kernel space for each set of median graphs: ', | |||
| dis_ks_min_list) | |||
| print('\nnumber of updates of the best graph for each set of median graphs: ', | |||
| nb_updated_list) | |||
| print('\ntimes:', time_list) | |||
| ############################################################################### | |||
| # test on the combination of the two randomly chosen graphs. (the same as in the | |||
| # random pre-image paper.) | |||
| def test_random_preimage_2combination(): | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| # Gn = Gn[0:12] | |||
| remove_edges(Gn) | |||
| gkernel = 'marginalizedkernel' | |||
| # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel) | |||
| # print(dis_max, dis_min, dis_mean) | |||
| lmbda = 0.03 # termination probalility | |||
| r_max = 10 # iteration limit for pre-image. | |||
| l = 500 | |||
| alpha_range = np.linspace(0, 1, 11) | |||
| k = 5 # k nearest neighbors | |||
| # randomly select two molecules | |||
| np.random.seed(1) | |||
| idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2) | |||
| g1 = Gn[idx_gi[0]].copy() | |||
| g2 = Gn[idx_gi[1]].copy() | |||
| # nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||
| # plt.savefig("results/random_preimage/mutag10.png", format="PNG") | |||
| # plt.show() | |||
| # nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||
| # plt.savefig("results/random_preimage/mutag11.png", format="PNG") | |||
| # plt.show() | |||
| ###################################################################### | |||
| # Gn_mix = [g.copy() for g in Gn] | |||
| # Gn_mix.append(g1.copy()) | |||
| # Gn_mix.append(g2.copy()) | |||
| # | |||
| ## g_tmp = iam([g1, g2]) | |||
| ## nx.draw_networkx(g_tmp) | |||
| ## plt.show() | |||
| # | |||
| # # compute | |||
| # time0 = time.time() | |||
| # km = compute_kernel(Gn_mix, gkernel, True) | |||
| # time_km = time.time() - time0 | |||
| ################################################################### | |||
| idx1 = idx_gi[0] | |||
| idx2 = idx_gi[1] | |||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||
| km = gmfile['gm'] | |||
| time_km = gmfile['gmtime'] | |||
| # modify mixed gram matrix. | |||
| for i in range(len(Gn)): | |||
| km[i, len(Gn)] = km[i, idx1] | |||
| km[i, len(Gn) + 1] = km[i, idx2] | |||
| km[len(Gn), i] = km[i, idx1] | |||
| km[len(Gn) + 1, i] = km[i, idx2] | |||
| km[len(Gn), len(Gn)] = km[idx1, idx1] | |||
| km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||
| km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||
| km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||
| ################################################################### | |||
| time_list = [] | |||
| nb_updated_list = [] | |||
| g_best = [] | |||
| dis_ks_min_list = [] | |||
| # for each alpha | |||
| for alpha in alpha_range: | |||
| print('\n-------------------------------------------------------\n') | |||
| print('alpha =', alpha) | |||
| time0 = time.time() | |||
| dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], | |||
| range(len(Gn), len(Gn) + 2), km, | |||
| k, r_max, l, gkernel) | |||
| time_total = time.time() - time0 + time_km | |||
| print('time: ', time_total) | |||
| time_list.append(time_total) | |||
| dis_ks_min_list.append(dhat) | |||
| g_best.append(ghat) | |||
| nb_updated_list.append(nb_updated) | |||
| # show best graphs and save them to file. | |||
| for idx, item in enumerate(alpha_range): | |||
| print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||
| print('one of the possible corresponding pre-images is') | |||
| nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), | |||
| with_labels=True) | |||
| plt.show() | |||
| plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG") | |||
| plt.clf() | |||
| print(g_best[idx].nodes(data=True)) | |||
| print(g_best[idx].edges(data=True)) | |||
| # # compute the corresponding sod in graph space. (alpha range not considered.) | |||
| # sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||
| # sod_gs_list.append(sod_tmp) | |||
| # sod_gs_min_list.append(np.min(sod_tmp)) | |||
| # sod_ks_min_list.append(sod_ks) | |||
| # nb_updated_list.append(nb_updated) | |||
| # print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||
| print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||
| print('\nnumber of updates for each alpha: ', nb_updated_list) | |||
| print('\ntimes:', time_list) | |||
| ############################################################################### | |||
| if __name__ == '__main__': | |||
| ############################################################################### | |||
| # test on the combination of the two randomly chosen graphs. (the same as in the | |||
| # random pre-image paper.) | |||
| # test_random_preimage_2combination() | |||
| ############################################################################### | |||
| # tests all algorithms on different numbers of median-sets. | |||
| test_preimage_random_median_nb() | |||
| ############################################################################### | |||
| # tests all algorithms on different values on grid of median-sets and k. | |||
| # test_preimage_random_grid_k_median_nb() | |||
| @@ -0,0 +1,109 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Thu Oct 17 19:05:07 2019 | |||
| Useful functions. | |||
| @author: ljia | |||
| """ | |||
| #import networkx as nx | |||
| import multiprocessing | |||
| import numpy as np | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||
| from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||
| from pygraph.kernels.spKernel import spkernel | |||
| import functools | |||
| from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
| from pygraph.kernels.structuralspKernel import structuralspkernel | |||
| def remove_edges(Gn): | |||
| for G in Gn: | |||
| for _, _, attrs in G.edges(data=True): | |||
| attrs.clear() | |||
| def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||
| term1 = Kmatrix[idx_g, idx_g] | |||
| term2 = 0 | |||
| for i, a in enumerate(alpha): | |||
| term2 += a * Kmatrix[idx_g, idx_gi[i]] | |||
| term2 *= 2 | |||
| if withterm3 == False: | |||
| for i1, a1 in enumerate(alpha): | |||
| for i2, a2 in enumerate(alpha): | |||
| term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | |||
| return np.sqrt(term1 - term2 + term3) | |||
| def compute_kernel(Gn, graph_kernel, verbose): | |||
| if graph_kernel == 'marginalizedkernel': | |||
| Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | |||
| p_quit=0.03, n_iteration=10, remove_totters=False, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| elif graph_kernel == 'untilhpathkernel': | |||
| Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, | |||
| depth=10, k_func='MinMax', compute_method='trie', | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| elif graph_kernel == 'spkernel': | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= | |||
| {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| elif graph_kernel == 'structuralspkernel': | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= | |||
| {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| # normalization | |||
| Kmatrix_diag = Kmatrix.diagonal().copy() | |||
| for i in range(len(Kmatrix)): | |||
| for j in range(i, len(Kmatrix)): | |||
| Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
| Kmatrix[j][i] = Kmatrix[i][j] | |||
| return Kmatrix | |||
| def gram2distances(Kmatrix): | |||
| dmatrix = np.zeros((len(Kmatrix), len(Kmatrix))) | |||
| for i1 in range(len(Kmatrix)): | |||
| for i2 in range(len(Kmatrix)): | |||
| dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] | |||
| dmatrix = np.sqrt(dmatrix) | |||
| return dmatrix | |||
| def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None): | |||
| dis_mat = np.empty((len(Gn), len(Gn))) | |||
| if Kmatrix == None: | |||
| Kmatrix = compute_kernel(Gn, gkernel, True) | |||
| for i in range(len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j] | |||
| if dis < 0: | |||
| if dis > -1e-10: | |||
| dis = 0 | |||
| else: | |||
| raise ValueError('The distance is negative.') | |||
| dis_mat[i, j] = np.sqrt(dis) | |||
| dis_mat[j, i] = dis_mat[i, j] | |||
| dis_max = np.max(np.max(dis_mat)) | |||
| dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | |||
| dis_mean = np.mean(np.mean(dis_mat)) | |||
| return dis_mat, dis_max, dis_min, dis_mean | |||
| def get_same_item_indices(ls): | |||
| """Get the indices of the same items in a list. Return a dict keyed by items. | |||
| """ | |||
| idx_dict = {} | |||
| for idx, item in enumerate(ls): | |||
| if item in idx_dict: | |||
| idx_dict[item].append(idx) | |||
| else: | |||
| idx_dict[item] = [idx] | |||
| return idx_dict | |||