| @@ -8,6 +8,9 @@ Created on Wed Oct 16 14:20:06 2019 | |||||
| import numpy as np | import numpy as np | ||||
| from tqdm import tqdm | from tqdm import tqdm | ||||
| from scipy import optimize | |||||
| import cvxpy as cp | |||||
| import sys | import sys | ||||
| sys.path.insert(0, "../") | sys.path.insert(0, "../") | ||||
| from pygraph.utils.graphfiles import loadDataset | from pygraph.utils.graphfiles import loadDataset | ||||
| @@ -15,12 +18,9 @@ from ged import GED, get_nb_edit_operations | |||||
| from utils import kernel_distance_matrix | from utils import kernel_distance_matrix | ||||
| def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): | def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): | ||||
| c_vi = 1 | |||||
| c_vr = 1 | |||||
| c_vs = 1 | |||||
| c_ei = 1 | |||||
| c_er = 1 | |||||
| c_es = 1 | |||||
| # c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them. | |||||
| edit_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0] | |||||
| idx_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0] | |||||
| # compute distances in feature space. | # compute distances in feature space. | ||||
| dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel) | dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel) | ||||
| @@ -36,14 +36,9 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): | |||||
| for itr in range(itr_max): | for itr in range(itr_max): | ||||
| print('iteration', itr) | print('iteration', itr) | ||||
| ged_all = [] | ged_all = [] | ||||
| n_vi_all = [] | |||||
| n_vr_all = [] | |||||
| n_vs_all = [] | |||||
| n_ei_all = [] | |||||
| n_er_all = [] | |||||
| n_es_all = [] | |||||
| n_edit_operations = [[] for i in range(len(idx_nonzeros))] | |||||
| # compute GEDs and numbers of edit operations. | # compute GEDs and numbers of edit operations. | ||||
| edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] | |||||
| edit_cost_constant = [i for i in edit_costs] | |||||
| edit_cost_list.append(edit_cost_constant) | edit_cost_list.append(edit_cost_constant) | ||||
| for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout): | for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout): | ||||
| # for i in range(len(Gn)): | # for i in range(len(Gn)): | ||||
| @@ -53,41 +48,58 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): | |||||
| edit_cost_constant=edit_cost_constant, stabilizer='min', | edit_cost_constant=edit_cost_constant, stabilizer='min', | ||||
| repeat=30) | repeat=30) | ||||
| ged_all.append(dis) | ged_all.append(dis) | ||||
| n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(Gn[i], | |||||
| n_eo_tmp = get_nb_edit_operations(Gn[i], | |||||
| Gn[j], pi_forward, pi_backward) | Gn[j], pi_forward, pi_backward) | ||||
| n_vi_all.append(n_vi) | |||||
| n_vr_all.append(n_vr) | |||||
| n_vs_all.append(n_vs) | |||||
| n_ei_all.append(n_ei) | |||||
| n_er_all.append(n_er) | |||||
| n_es_all.append(n_es) | |||||
| for idx, item in enumerate(idx_nonzeros): | |||||
| n_edit_operations[idx].append(n_eo_tmp[item]) | |||||
| residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec))) | residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec))) | ||||
| residual_list.append(residual) | residual_list.append(residual) | ||||
| # "fit" geds to distances in feature space by tuning edit costs using the | # "fit" geds to distances in feature space by tuning edit costs using the | ||||
| # Least Squares Method. | # Least Squares Method. | ||||
| nb_cost_mat = np.column_stack((np.array(n_vi_all), np.array(n_vr_all), | |||||
| np.array(n_vs_all), np.array(n_ei_all), | |||||
| np.array(n_er_all), np.array(n_es_all))) | |||||
| edit_costs, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | |||||
| rcond=None) | |||||
| for i in range(len(edit_costs)): | |||||
| if edit_costs[i] < 0: | |||||
| if edit_costs[i] > -1e-3: | |||||
| edit_costs[i] = 0 | |||||
| # else: | |||||
| # raise ValueError('The edit cost is negative.') | |||||
| c_vi = edit_costs[0] | |||||
| c_vr = edit_costs[1] | |||||
| c_vs = edit_costs[2] | |||||
| c_ei = edit_costs[3] | |||||
| c_er = edit_costs[4] | |||||
| c_es = edit_costs[5] | |||||
| nb_cost_mat = np.array(n_edit_operations).T | |||||
| edit_costs_new, residual = get_better_costs(nb_cost_mat, dis_k_vec) | |||||
| print(residual) | |||||
| for i in range(len(edit_costs_new)): | |||||
| if edit_costs_new[i] < 0: | |||||
| if edit_costs_new[i] > -1e-6: | |||||
| edit_costs_new[i] = 0 | |||||
| else: | |||||
| raise ValueError('The edit cost is negative.') | |||||
| for idx, item in enumerate(idx_nonzeros): | |||||
| edit_costs[item] = edit_costs_new[idx] | |||||
| return c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list | |||||
| return edit_costs, residual_list, edit_cost_list | |||||
| def get_better_costs(nb_cost_mat, dis_k_vec): | |||||
| # # method 1: simple least square method. | |||||
| # edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | |||||
| # rcond=None) | |||||
| # # method 2: least square method with x_i >= 0. | |||||
| # edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec) | |||||
| # method 3: solve as a quadratic program with constraints: x_i >= 0, sum(x) = 1. | |||||
| P = np.dot(nb_cost_mat.T, nb_cost_mat) | |||||
| q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat) | |||||
| G = -1 * np.identity(nb_cost_mat.shape[1]) | |||||
| h = np.array([0 for i in range(nb_cost_mat.shape[1])]) | |||||
| A = np.array([1 for i in range(nb_cost_mat.shape[1])]) | |||||
| b = 1 | |||||
| x = cp.Variable(nb_cost_mat.shape[1]) | |||||
| prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x), | |||||
| [G@x <= h, | |||||
| A@x == b]) | |||||
| prob.solve() | |||||
| edit_costs_new = x.value | |||||
| residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec) | |||||
| # p = program(minimize(norm2(nb_cost_mat*x-dis_k_vec)),[equals(sum(x),1),geq(x,0)]) | |||||
| return edit_costs_new, residual | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| @@ -95,9 +107,9 @@ if __name__ == '__main__': | |||||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ||||
| 'extra_params': {}} # node/edge symb | 'extra_params': {}} # node/edge symb | ||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | ||||
| Gn = Gn[0:10] | |||||
| # Gn = Gn[0:10] | |||||
| remove_edges(Gn) | remove_edges(Gn) | ||||
| gkernel = 'marginalizedkernel' | gkernel = 'marginalizedkernel' | ||||
| itr_max = 10 | itr_max = 10 | ||||
| c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list = \ | |||||
| edit_costs, residual_list, edit_cost_list = \ | |||||
| fit_GED_to_kernel_distance(Gn, gkernel, itr_max) | fit_GED_to_kernel_distance(Gn, gkernel, itr_max) | ||||