| @@ -20,5 +20,8 @@ pygraph/kernels/*_sym.py | |||||
| *.dat | *.dat | ||||
| *.pyc | *.pyc | ||||
| preimage/* | |||||
| !preimage/*.py | |||||
| __pycache__ | __pycache__ | ||||
| ##*# | ##*# | ||||
| @@ -0,0 +1,196 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Tue Apr 30 17:07:43 2019 | |||||
| A graph pre-image method combining iterative pre-image method in reference [1] | |||||
| and the iterative alternate minimizations (IAM) in reference [2]. | |||||
| @author: ljia | |||||
| @references: | |||||
| [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph | |||||
| pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004. | |||||
| [2] Generalized median graph via iterative alternate minimization. | |||||
| """ | |||||
| import numpy as np | |||||
| import multiprocessing | |||||
| from tqdm import tqdm | |||||
| import networkx as nx | |||||
| import matplotlib.pyplot as plt | |||||
| from iam import iam | |||||
| def gk_iam(Gn, alpha): | |||||
| """This function constructs graph pre-image by the iterative pre-image | |||||
| framework in reference [1], algorithm 1, where the step of generating new | |||||
| graphs randomly is replaced by the IAM algorithm in reference [2]. | |||||
| notes | |||||
| ----- | |||||
| Every time a better graph is acquired, the older one is replaced by it. | |||||
| """ | |||||
| # compute k nearest neighbors of phi in DN. | |||||
| dis_list = [] # distance between g_star and each graph. | |||||
| for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): | |||||
| dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * | |||||
| k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * | |||||
| (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * | |||||
| k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) | |||||
| dis_list.append(dtemp) | |||||
| # sort | |||||
| sort_idx = np.argsort(dis_list) | |||||
| dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] | |||||
| g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN | |||||
| if dis_gs[0] == 0: # the exact pre-image. | |||||
| print('The exact pre-image is found from the input dataset.') | |||||
| return 0, g0hat | |||||
| dhat = dis_gs[0] # the nearest distance | |||||
| Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors | |||||
| gihat_list = [] | |||||
| # i = 1 | |||||
| r = 1 | |||||
| while r < r_max: | |||||
| print('r =', r) | |||||
| # found = False | |||||
| Gs_nearest = Gk + gihat_list | |||||
| g_tmp = iam(Gs_nearest) | |||||
| # compute distance between phi and the new generated graph. | |||||
| knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None, | |||||
| p_quit=lmbda, n_iteration=20, remove_totters=False, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=False) | |||||
| dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) * | |||||
| knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha * | |||||
| (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * | |||||
| k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) | |||||
| if dnew <= dhat: # the new distance is smaller | |||||
| print('I am smaller!') | |||||
| dhat = dnew | |||||
| g_new = g_tmp.copy() # found better graph. | |||||
| gihat_list = [g_new] | |||||
| dis_gs.append(dhat) | |||||
| r = 0 | |||||
| else: | |||||
| r += 1 | |||||
| ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list) | |||||
| return dhat, ghat | |||||
| def gk_iam_nearest(Gn, alpha): | |||||
| """This function constructs graph pre-image by the iterative pre-image | |||||
| framework in reference [1], algorithm 1, where the step of generating new | |||||
| graphs randomly is replaced by the IAM algorithm in reference [2]. | |||||
| notes | |||||
| ----- | |||||
| Every time a better graph is acquired, its distance in kernel space is | |||||
| compared with the k nearest ones, and the k nearest distances from the k+1 | |||||
| distances will be used as the new ones. | |||||
| """ | |||||
| # compute k nearest neighbors of phi in DN. | |||||
| dis_list = [] # distance between g_star and each graph. | |||||
| for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): | |||||
| dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * | |||||
| k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * | |||||
| (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * | |||||
| k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) | |||||
| dis_list.append(dtemp) | |||||
| # sort | |||||
| sort_idx = np.argsort(dis_list) | |||||
| dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances | |||||
| g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN | |||||
| if dis_gs[0] == 0: # the exact pre-image. | |||||
| print('The exact pre-image is found from the input dataset.') | |||||
| return 0, g0hat | |||||
| dhat = dis_gs[0] # the nearest distance | |||||
| ghat = g0hat | |||||
| Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors | |||||
| Gs_nearest = Gk | |||||
| # gihat_list = [] | |||||
| # i = 1 | |||||
| r = 1 | |||||
| while r < r_max: | |||||
| print('r =', r) | |||||
| # found = False | |||||
| # Gs_nearest = Gk + gihat_list | |||||
| g_tmp = iam(Gs_nearest) | |||||
| # compute distance between phi and the new generated graph. | |||||
| knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None, | |||||
| p_quit=lmbda, n_iteration=20, remove_totters=False, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=False) | |||||
| dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) * | |||||
| knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha * | |||||
| (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * | |||||
| k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) | |||||
| if dnew <= dhat: # the new distance is smaller | |||||
| print('I am smaller!') | |||||
| dhat = dnew | |||||
| g_new = g_tmp.copy() # found better graph. | |||||
| ghat = g_tmp.copy() | |||||
| dis_gs.append(dhat) # add the new nearest distance. | |||||
| Gs_nearest.append(g_new) # add the corresponding graph. | |||||
| sort_idx = np.argsort(dis_gs) | |||||
| dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. | |||||
| Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] | |||||
| r = 0 | |||||
| else: | |||||
| r += 1 | |||||
| return dhat, ghat | |||||
| if __name__ == '__main__': | |||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||||
| from pygraph.utils.graphfiles import loadDataset | |||||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||||
| 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| # Gn = Gn[0:10] | |||||
| lmbda = 0.03 # termination probalility | |||||
| r_max = 10 # recursions | |||||
| l = 500 | |||||
| alpha_range = np.linspace(0.1, 0.9, 9) | |||||
| k = 5 # k nearest neighbors | |||||
| # randomly select two molecules | |||||
| np.random.seed(1) | |||||
| idx1, idx2 = np.random.randint(0, len(Gn), 2) | |||||
| g1 = Gn[idx1] | |||||
| g2 = Gn[idx2] | |||||
| # compute | |||||
| k_list = [] # kernel between each graph and itself. | |||||
| k_g1_list = [] # kernel between each graph and g1 | |||||
| k_g2_list = [] # kernel between each graph and g2 | |||||
| for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout): | |||||
| ktemp = marginalizedkernel([g, g1, g2], node_label='atom', edge_label=None, | |||||
| p_quit=lmbda, n_iteration=20, remove_totters=False, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=False) | |||||
| k_list.append(ktemp[0][0, 0]) | |||||
| k_g1_list.append(ktemp[0][0, 1]) | |||||
| k_g2_list.append(ktemp[0][0, 2]) | |||||
| g_best = [] | |||||
| dis_best = [] | |||||
| # for each alpha | |||||
| for alpha in alpha_range: | |||||
| print('alpha =', alpha) | |||||
| dhat, ghat = gk_iam_nearest(Gn, alpha) | |||||
| dis_best.append(dhat) | |||||
| g_best.append(ghat) | |||||
| for idx, item in enumerate(alpha_range): | |||||
| print('when alpha is', item, 'the shortest distance is', dis_best[idx]) | |||||
| print('the corresponding pre-image is') | |||||
| nx.draw_networkx(g_best[idx]) | |||||
| plt.show() | |||||
| @@ -0,0 +1,195 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Fri Apr 26 11:49:12 2019 | |||||
| Iterative alternate minimizations using GED. | |||||
| @author: ljia | |||||
| """ | |||||
| import numpy as np | |||||
| import random | |||||
| import networkx as nx | |||||
| import sys | |||||
| #from Cython_GedLib_2 import librariesImport, script | |||||
| import librariesImport, script | |||||
| sys.path.insert(0, "../") | |||||
| from pygraph.utils.graphfiles import saveDataset | |||||
| from pygraph.utils.graphdataset import get_dataset_attributes | |||||
| def iam(Gn, node_label='atom', edge_label='bond_type'): | |||||
| """See my name, then you know what I do. | |||||
| """ | |||||
| # Gn = Gn[0:10] | |||||
| Gn = [nx.convert_node_labels_to_integers(g) for g in Gn] | |||||
| c_er = 1 | |||||
| c_es = 1 | |||||
| c_ei = 1 | |||||
| # phase 1: initilize. | |||||
| # compute set-median. | |||||
| dis_min = np.inf | |||||
| pi_p = [] | |||||
| pi_all = [] | |||||
| for idx1, G_p in enumerate(Gn): | |||||
| dist_sum = 0 | |||||
| pi_all.append([]) | |||||
| for idx2, G_p_prime in enumerate(Gn): | |||||
| dist_tmp, pi_tmp = GED(G_p, G_p_prime) | |||||
| pi_all[idx1].append(pi_tmp) | |||||
| dist_sum += dist_tmp | |||||
| if dist_sum < dis_min: | |||||
| dis_min = dist_sum | |||||
| G = G_p.copy() | |||||
| idx_min = idx1 | |||||
| # list of edit operations. | |||||
| pi_p = pi_all[idx_min] | |||||
| # phase 2: iteration. | |||||
| ds_attrs = get_dataset_attributes(Gn, attr_names=['edge_labeled', 'node_attr_dim'], | |||||
| edge_label=edge_label) | |||||
| for itr in range(0, 10): | |||||
| G_new = G.copy() | |||||
| # update vertex labels. | |||||
| # pre-compute h_i0 for each label. | |||||
| # for label in get_node_labels(Gn, node_label): | |||||
| # print(label) | |||||
| # for nd in G.nodes(data=True): | |||||
| # pass | |||||
| if not ds_attrs['node_attr_dim']: # labels are symbolic | |||||
| for nd, _ in G.nodes(data=True): | |||||
| h_i0_list = [] | |||||
| label_list = [] | |||||
| for label in get_node_labels(Gn, node_label): | |||||
| h_i0 = 0 | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p[idx][nd] | |||||
| if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label: | |||||
| h_i0 += 1 | |||||
| h_i0_list.append(h_i0) | |||||
| label_list.append(label) | |||||
| # choose one of the best randomly. | |||||
| idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist() | |||||
| idx_rdm = random.randint(0, len(idx_max) - 1) | |||||
| G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]] | |||||
| else: # labels are non-symbolic | |||||
| for nd, _ in G.nodes(data=True): | |||||
| Si_norm = 0 | |||||
| phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])]) | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p[idx][nd] | |||||
| if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0? | |||||
| Si_norm += 1 | |||||
| phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']]) | |||||
| phi_i_bar /= Si_norm | |||||
| G_new.nodes[nd]['attributes'] = phi_i_bar | |||||
| # update edge labels and adjacency matrix. | |||||
| if ds_attrs['edge_labeled']: | |||||
| for nd1, nd2, _ in G.edges(data=True): | |||||
| h_ij0_list = [] | |||||
| label_list = [] | |||||
| for label in get_edge_labels(Gn, edge_label): | |||||
| h_ij0 = 0 | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p[idx][nd1] | |||||
| pi_j = pi_p[idx][nd2] | |||||
| h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and | |||||
| g.has_edge(pi_i, pi_j) and | |||||
| g.edges[pi_i, pi_j][edge_label] == label) | |||||
| h_ij0 += h_ij0_p | |||||
| h_ij0_list.append(h_ij0) | |||||
| label_list.append(label) | |||||
| # choose one of the best randomly. | |||||
| idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist() | |||||
| h_ij0_max = h_ij0_list[idx_max[0]] | |||||
| idx_rdm = random.randint(0, len(idx_max) - 1) | |||||
| best_label = label_list[idx_max[idx_rdm]] | |||||
| # check whether a_ij is 0 or 1. | |||||
| sij_norm = 0 | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p[idx][nd1] | |||||
| pi_j = pi_p[idx][nd2] | |||||
| if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j): | |||||
| sij_norm += 1 | |||||
| if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es): | |||||
| if not G_new.has_edge(nd1, nd2): | |||||
| G_new.add_edge(nd1, nd2) | |||||
| G_new.edges[nd1, nd2][edge_label] = best_label | |||||
| else: | |||||
| if G_new.has_edge(nd1, nd2): | |||||
| G_new.remove_edge(nd1, nd2) | |||||
| else: # if edges are unlabeled | |||||
| for nd1, nd2, _ in G.edges(data=True): | |||||
| sij_norm = 0 | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p[idx][nd1] | |||||
| pi_j = pi_p[idx][nd2] | |||||
| if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j): | |||||
| sij_norm += 1 | |||||
| if sij_norm > len(Gn) * c_er / (c_er + c_ei): | |||||
| if not G_new.has_edge(nd1, nd2): | |||||
| G_new.add_edge(nd1, nd2) | |||||
| else: | |||||
| if G_new.has_edge(nd1, nd2): | |||||
| G_new.remove_edge(nd1, nd2) | |||||
| G = G_new.copy() | |||||
| return G | |||||
| def GED(g1, g2, lib='gedlib'): | |||||
| """ | |||||
| Compute GED. It is a dummy function for now. | |||||
| """ | |||||
| if lib == 'gedlib': | |||||
| saveDataset([g1, g2], [None, None], group='xml', filename='ged_tmp/tmp') | |||||
| script.appel() | |||||
| script.PyRestartEnv() | |||||
| script.PyLoadGXLGraph('ged_tmp/', 'collections/tmp.xml') | |||||
| listID = script.PyGetGraphIds() | |||||
| script.PySetEditCost("CHEM_1") | |||||
| script.PyInitEnv() | |||||
| script.PySetMethod("BIPARTITE", "") | |||||
| script.PyInitMethod() | |||||
| g = listID[0] | |||||
| h = listID[1] | |||||
| script.PyRunMethod(g, h) | |||||
| liste = script.PyGetAllMap(g, h) | |||||
| upper = script.PyGetUpperBound(g, h) | |||||
| lower = script.PyGetLowerBound(g, h) | |||||
| dis = upper + lower | |||||
| pi = liste[0] | |||||
| return dis, pi | |||||
| def get_node_labels(Gn, node_label): | |||||
| nl = set() | |||||
| for G in Gn: | |||||
| nl = nl | set(nx.get_node_attributes(G, node_label).values()) | |||||
| return nl | |||||
| def get_edge_labels(Gn, edge_label): | |||||
| el = set() | |||||
| for G in Gn: | |||||
| el = el | set(nx.get_edge_attributes(G, edge_label).values()) | |||||
| return el | |||||
| if __name__ == '__main__': | |||||
| from pygraph.utils.graphfiles import loadDataset | |||||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||||
| 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb | |||||
| # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||||
| # 'extra_params': {}} # node nsymb | |||||
| # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds', | |||||
| # 'extra_params': {}} | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| iam(Gn) | |||||
| @@ -0,0 +1,5 @@ | |||||
| from ctypes import * | |||||
| lib1 = cdll.LoadLibrary('lib/fann/libdoublefann.so') | |||||
| lib2 = cdll.LoadLibrary('lib/libsvm.3.22/libsvm.so') | |||||
| lib3 = cdll.LoadLibrary('lib/nomad/libnomad.so') | |||||
| lib4 = cdll.LoadLibrary('lib/nomad/libsgtelib.so') | |||||
| @@ -0,0 +1,5 @@ | |||||
| from ctypes import * | |||||
| lib1 = cdll.LoadLibrary('Cython_GedLib_2/lib/fann/libdoublefann.so') | |||||
| lib2 = cdll.LoadLibrary('Cython_GedLib_2/lib/libsvm.3.22/libsvm.so') | |||||
| lib3 = cdll.LoadLibrary('Cython_GedLib_2/lib/nomad/libnomad.so') | |||||
| lib4 = cdll.LoadLibrary('Cython_GedLib_2/lib/nomad/libsgtelib.so') | |||||
| @@ -126,6 +126,7 @@ for alpha in alpha_range: | |||||
| dhat = dnew | dhat = dnew | ||||
| gnew = gtemp.copy() | gnew = gtemp.copy() | ||||
| found = True # found better graph. | found = True # found better graph. | ||||
| r = 0 | |||||
| if found: | if found: | ||||
| gihat_list = [gnew] | gihat_list = [gnew] | ||||
| dis_gs.append(dhat) | dis_gs.append(dhat) | ||||
| @@ -0,0 +1,26 @@ | |||||
| #from distutils.core import setup | |||||
| from distutils.extension import Extension | |||||
| #from Cython.Distutils import build_ext | |||||
| from distutils.core import setup | |||||
| from Cython.Build import cythonize | |||||
| #setup(ext_modules=cythonize("script.pyx")) | |||||
| extensions = [Extension("script", | |||||
| sources=["script.pyx", "src/essai.cpp"], | |||||
| include_dirs=["include","include/lsape", "include/Eigen", "include/nomad", "include/sgtelib", "include/libsvm.3.22", "include/fann", "include/boost_1_69_0"], | |||||
| library_dirs=["lib/fann","lib/gedlib", "lib/libsvm.3.22","lib/nomad"], | |||||
| libraries=["doublefann","sgtelib", "svm", "nomad"], | |||||
| language="c++", | |||||
| extra_compile_args=["-std=c++11"], | |||||
| extra_link_args=["-std=c++11"])] | |||||
| setup(ext_modules=cythonize(extensions)) | |||||
| #extensions = [Extension("script", sources=["script.pyx", "include/gedlib-master/src/env/ged_env.ipp"], include_dirs=["."], language="c++")] | |||||
| #setup(name = "script", ext_modules = extensions, cmdclass = {'build_ext':build_ext},) | |||||
| # Commande Bash : python setup.py build_ext --inplace | |||||
| @@ -0,0 +1,57 @@ | |||||
| #export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/Cython_GedLib_2/lib/fann/:/export/home/lambertn/Documents/Cython_GedLib_2/lib/libsvm.3.22:/export/home/lambertn/Documents/Cython_GedLib_2/lib/nomad | |||||
| #Pour que "import script" trouve les librairies qu'a besoin GedLib | |||||
| #Equivalent à définir la variable d'environnement LD_LIBRARY_PATH sur un bash | |||||
| #Permet de fonctionner sur Idle et autre sans définir à chaque fois la variable d'environnement | |||||
| #os.environ ne fonctionne pas dans ce cas | |||||
| import librariesImport, script | |||||
| #import script | |||||
| #truc = script.computeEditDistanceOnGXlGraphs('include/gedlib-master/data/datasets/Mutagenicity/data/','collections/MUTA_10.xml',"CHEM_1", "BIPARTITE", "") | |||||
| #print(truc) | |||||
| #script.PyRestartEnv() | |||||
| #script.appel() | |||||
| def test() : | |||||
| # script.appel() | |||||
| script.PyRestartEnv() | |||||
| # print("Here is the Python function !") | |||||
| # | |||||
| # print("List of Edit Cost Options : ") | |||||
| # for i in script.listOfEditCostOptions : | |||||
| # print (i) | |||||
| # print("") | |||||
| # | |||||
| # print("List of Method Options : ") | |||||
| # for j in script.listOfMethodOptions : | |||||
| # print (j) | |||||
| # print("") | |||||
| script.PyLoadGXLGraph('include/gedlib-master/data/datasets/Mutagenicity/data/', 'collections/MUTA_10.xml') | |||||
| listID = script.PyGetGraphIds() | |||||
| afficheId = "" | |||||
| for i in listID : | |||||
| afficheId+=str(i) + " " | |||||
| print("Number of graphs = " + str(len(listID)) + ", list of Ids = " + afficheId) | |||||
| script.PySetEditCost("CHEM_1") | |||||
| script.PyInitEnv() | |||||
| script.PySetMethod("BIPARTITE", "") | |||||
| script.PyInitMethod() | |||||
| g = listID[0] | |||||
| h = listID[1] | |||||
| script.PyRunMethod(g,h) | |||||
| liste = script.PyGetAllMap(g,h) | |||||
| print("Forward map : " ,liste[0], ", Backward map : ", liste[1]) | |||||
| print ("Upper Bound = " + str(script.PyGetUpperBound(g,h)) + ", Lower Bound = " + str(script.PyGetLowerBound(g,h)) + ", Runtime = " + str(script.PyGetRuntime(g,h))) | |||||
| test() | |||||
| @@ -52,10 +52,10 @@ def get_dataset_attributes(Gn, | |||||
| return False if edge_label is None else True | return False if edge_label is None else True | ||||
| def get_edge_label_num(Gn): | def get_edge_label_num(Gn): | ||||
| nl = set() | |||||
| el = set() | |||||
| for G in Gn: | for G in Gn: | ||||
| nl = nl | set(nx.get_edge_attributes(G, edge_label).values()) | |||||
| return len(nl) | |||||
| el = el | set(nx.get_edge_attributes(G, edge_label).values()) | |||||
| return len(el) | |||||
| def is_directed(Gn): | def is_directed(Gn): | ||||
| return nx.is_directed(Gn[0]) | return nx.is_directed(Gn[0]) | ||||
| @@ -22,8 +22,8 @@ def loadCT(filename): | |||||
| with open(filename) as f: | with open(filename) as f: | ||||
| content = f.read().splitlines() | content = f.read().splitlines() | ||||
| g = nx.Graph( | g = nx.Graph( | ||||
| name=str(content[0]), | |||||
| filename=basename(filename)) # set name of the graph | |||||
| name = str(content[0]), | |||||
| filename = basename(filename)) # set name of the graph | |||||
| tmp = content[1].split(" ") | tmp = content[1].split(" ") | ||||
| if tmp[0] == '': | if tmp[0] == '': | ||||
| nb_nodes = int(tmp[1]) # number of the nodes | nb_nodes = int(tmp[1]) # number of the nodes | ||||
| @@ -84,43 +84,63 @@ def loadGXL(filename): | |||||
| return g | return g | ||||
| def saveGXL(graph, filename): | |||||
| import xml.etree.ElementTree as ET | |||||
| root_node = ET.Element('gxl') | |||||
| attr = dict() | |||||
| attr['id'] = graph.graph['name'] | |||||
| attr['edgeids'] = 'true' | |||||
| attr['edgemode'] = 'undirected' | |||||
| graph_node = ET.SubElement(root_node, 'graph', attrib=attr) | |||||
| for v in graph: | |||||
| current_node = ET.SubElement(graph_node, 'node', attrib={'id': str(v)}) | |||||
| for attr in graph.nodes[v].keys(): | |||||
| cur_attr = ET.SubElement( | |||||
| current_node, 'attr', attrib={'name': attr}) | |||||
| cur_value = ET.SubElement(cur_attr, | |||||
| graph.nodes[v][attr].__class__.__name__) | |||||
| cur_value.text = graph.nodes[v][attr] | |||||
| for v1 in graph: | |||||
| for v2 in graph[v1]: | |||||
| if (v1 < v2): # Non oriented graphs | |||||
| cur_edge = ET.SubElement( | |||||
| graph_node, | |||||
| 'edge', | |||||
| attrib={ | |||||
| 'from': str(v1), | |||||
| 'to': str(v2) | |||||
| }) | |||||
| for attr in graph[v1][v2].keys(): | |||||
| cur_attr = ET.SubElement( | |||||
| cur_edge, 'attr', attrib={'name': attr}) | |||||
| cur_value = ET.SubElement( | |||||
| cur_attr, graph[v1][v2][attr].__class__.__name__) | |||||
| cur_value.text = str(graph[v1][v2][attr]) | |||||
| tree = ET.ElementTree(root_node) | |||||
| tree.write(filename) | |||||
| def saveGXL(graph, filename, method='benoit'): | |||||
| if method == 'benoit': | |||||
| import xml.etree.ElementTree as ET | |||||
| root_node = ET.Element('gxl') | |||||
| attr = dict() | |||||
| attr['id'] = str(graph.graph['name']) | |||||
| attr['edgeids'] = 'true' | |||||
| attr['edgemode'] = 'undirected' | |||||
| graph_node = ET.SubElement(root_node, 'graph', attrib=attr) | |||||
| for v in graph: | |||||
| current_node = ET.SubElement(graph_node, 'node', attrib={'id': str(v)}) | |||||
| for attr in graph.nodes[v].keys(): | |||||
| cur_attr = ET.SubElement( | |||||
| current_node, 'attr', attrib={'name': attr}) | |||||
| cur_value = ET.SubElement(cur_attr, | |||||
| graph.nodes[v][attr].__class__.__name__) | |||||
| cur_value.text = graph.nodes[v][attr] | |||||
| for v1 in graph: | |||||
| for v2 in graph[v1]: | |||||
| if (v1 < v2): # Non oriented graphs | |||||
| cur_edge = ET.SubElement( | |||||
| graph_node, | |||||
| 'edge', | |||||
| attrib={ | |||||
| 'from': str(v1), | |||||
| 'to': str(v2) | |||||
| }) | |||||
| for attr in graph[v1][v2].keys(): | |||||
| cur_attr = ET.SubElement( | |||||
| cur_edge, 'attr', attrib={'name': attr}) | |||||
| cur_value = ET.SubElement( | |||||
| cur_attr, graph[v1][v2][attr].__class__.__name__) | |||||
| cur_value.text = str(graph[v1][v2][attr]) | |||||
| tree = ET.ElementTree(root_node) | |||||
| tree.write(filename) | |||||
| elif method == 'gedlib': | |||||
| # reference: https://github.com/dbblumenthal/gedlib/blob/master/data/generate_molecules.py#L22 | |||||
| pass | |||||
| # gxl_file = open(filename, 'w') | |||||
| # gxl_file.write("<?xml version=\"1.0\"?>\n") | |||||
| # gxl_file.write("<!DOCTYPE gxl SYSTEM \"http://www.gupro.de/GXL/gxl-1.0.dtd\">\n") | |||||
| # gxl_file.write("<gxl>\n") | |||||
| # gxl_file.write("<graph id=\"" + str(graph.graph['name']) + "\" edgeids=\"false\" edgemode=\"undirected\">\n") | |||||
| # for v in graph: | |||||
| # gxl_file.write("<node id=\"_" + str(v) + "\">\n") | |||||
| # gxl_file.write("<attr name=\"chem\"><int>" + str(self.node_labels[node]) + "</int></attr>\n") | |||||
| # gxl_file.write("</node>\n") | |||||
| # for edge in self.edge_list: | |||||
| # gxl_file.write("<edge from=\"_" + str(edge[0]) + "\" to=\"_" + str(edge[1]) + "\">\n") | |||||
| # gxl_file.write("<attr name=\"valence\"><int>1</int></attr>\n") | |||||
| # gxl_file.write("</edge>\n") | |||||
| # gxl_file.write("</graph>\n") | |||||
| # gxl_file.write("</gxl>\n") | |||||
| # gxl_file.close() | |||||
| def loadSDF(filename): | def loadSDF(filename): | ||||
| @@ -412,3 +432,33 @@ def loadDataset(filename, filename_y=None, extra_params=None): | |||||
| # print(g.edges(data=True)) | # print(g.edges(data=True)) | ||||
| return data, y | return data, y | ||||
| def saveDataset(Gn, y, gformat='gxl', group=None, filename='gfile'): | |||||
| """Save list of graphs. | |||||
| """ | |||||
| import os | |||||
| dirname_ds = os.path.dirname(filename) | |||||
| if dirname_ds != '': | |||||
| dirname_ds += '/' | |||||
| if not os.path.exists(dirname_ds) : | |||||
| os.makedirs(dirname_ds) | |||||
| if group == 'xml' and gformat == 'gxl': | |||||
| with open(filename + '.xml', 'w') as fgroup: | |||||
| fgroup.write("<?xml version=\"1.0\"?>") | |||||
| fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"https://dbblumenthal.github.io/gedlib/GraphCollection_8dtd_source.html\">") | |||||
| fgroup.write("\n<GraphCollection>") | |||||
| for idx, g in enumerate(Gn): | |||||
| fname_tmp = "graph" + str(idx) + ".gxl" | |||||
| saveGXL(g, dirname_ds + fname_tmp) | |||||
| fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>") | |||||
| fgroup.write("\n</GraphCollection>") | |||||
| fgroup.close() | |||||
| if __name__ == '__main__': | |||||
| ds = {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat', | |||||
| 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb | |||||
| Gn, y = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| saveDataset(Gn, y, group='xml', filename='temp/temp') | |||||
| @@ -420,55 +420,6 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
| # np.save(results_name_pre + 'best_gram_matrix_time.dt', | # np.save(results_name_pre + 'best_gram_matrix_time.dt', | ||||
| # best_gram_matrix_time) | # best_gram_matrix_time) | ||||
| # print out as table. | |||||
| from collections import OrderedDict | |||||
| from tabulate import tabulate | |||||
| table_dict = {} | |||||
| if model_type == 'regression': | |||||
| for param_in in param_list: | |||||
| param_in['alpha'] = '{:.2e}'.format(param_in['alpha']) | |||||
| else: | |||||
| for param_in in param_list: | |||||
| param_in['C'] = '{:.2e}'.format(param_in['C']) | |||||
| table_dict['params'] = [{**param_out, **param_in} | |||||
| for param_in in param_list for param_out in param_list_pre_revised] | |||||
| table_dict['gram_matrix_time'] = [ | |||||
| '{:.2f}'.format(gram_matrix_time[index_out]) | |||||
| for param_in in param_list | |||||
| for index_out, _ in enumerate(param_list_pre_revised) | |||||
| ] | |||||
| table_dict['valid_perf'] = [ | |||||
| '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in], | |||||
| std_val_scores[index_out][index_in]) | |||||
| for index_in, _ in enumerate(param_list) | |||||
| for index_out, _ in enumerate(param_list_pre_revised) | |||||
| ] | |||||
| table_dict['test_perf'] = [ | |||||
| '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in], | |||||
| std_perf_scores[index_out][index_in]) | |||||
| for index_in, _ in enumerate(param_list) | |||||
| for index_out, _ in enumerate(param_list_pre_revised) | |||||
| ] | |||||
| table_dict['train_perf'] = [ | |||||
| '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in], | |||||
| std_train_scores[index_out][index_in]) | |||||
| for index_in, _ in enumerate(param_list) | |||||
| for index_out, _ in enumerate(param_list_pre_revised) | |||||
| ] | |||||
| keyorder = [ | |||||
| 'params', 'train_perf', 'valid_perf', 'test_perf', | |||||
| 'gram_matrix_time' | |||||
| ] | |||||
| if verbose: | |||||
| print() | |||||
| tb_print = tabulate( | |||||
| OrderedDict( | |||||
| sorted(table_dict.items(), | |||||
| key=lambda i: keyorder.index(i[0]))), | |||||
| headers='keys') | |||||
| # print(tb_print) | |||||
| str_fw += 'table of performance v.s. hyper-params:\n\n%s\n\n' % tb_print | |||||
| # read gram matrices from file. | # read gram matrices from file. | ||||
| else: | else: | ||||
| # Grid of parameters with a discrete number of values for each. | # Grid of parameters with a discrete number of values for each. | ||||
| @@ -632,58 +583,16 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
| # str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\n'.format(ave_bgmt, std_bgmt) | # str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\n'.format(ave_bgmt, std_bgmt) | ||||
| str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\n\n'.format(tt_poster) | str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\n\n'.format(tt_poster) | ||||
| # print out as table. | |||||
| from collections import OrderedDict | |||||
| from tabulate import tabulate | |||||
| table_dict = {} | |||||
| if model_type == 'regression': | |||||
| for param_in in param_list: | |||||
| param_in['alpha'] = '{:.2e}'.format(param_in['alpha']) | |||||
| else: | |||||
| for param_in in param_list: | |||||
| param_in['C'] = '{:.2e}'.format(param_in['C']) | |||||
| table_dict['params'] = [{**param_out, **param_in} | |||||
| for param_in in param_list for param_out in param_list_pre_revised] | |||||
| # table_dict['gram_matrix_time'] = [ | |||||
| # '{:.2f}'.format(gram_matrix_time[index_out]) | |||||
| # for param_in in param_list | |||||
| # for index_out, _ in enumerate(param_list_pre_revised) | |||||
| # ] | |||||
| table_dict['valid_perf'] = [ | |||||
| '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in], | |||||
| std_val_scores[index_out][index_in]) | |||||
| for index_in, _ in enumerate(param_list) | |||||
| for index_out, _ in enumerate(param_list_pre_revised) | |||||
| ] | |||||
| table_dict['test_perf'] = [ | |||||
| '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in], | |||||
| std_perf_scores[index_out][index_in]) | |||||
| for index_in, _ in enumerate(param_list) | |||||
| for index_out, _ in enumerate(param_list_pre_revised) | |||||
| ] | |||||
| table_dict['train_perf'] = [ | |||||
| '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in], | |||||
| std_train_scores[index_out][index_in]) | |||||
| for index_in, _ in enumerate(param_list) | |||||
| for index_out, _ in enumerate(param_list_pre_revised) | |||||
| ] | |||||
| keyorder = [ | |||||
| 'params', 'train_perf', 'valid_perf', 'test_perf' | |||||
| ] | |||||
| if verbose: | |||||
| print() | |||||
| tb_print = tabulate( | |||||
| OrderedDict( | |||||
| sorted(table_dict.items(), | |||||
| key=lambda i: keyorder.index(i[0]))), | |||||
| headers='keys') | |||||
| # print(tb_print) | |||||
| str_fw += 'table of performance v.s. hyper-params:\n\n%s\n\n' % tb_print | |||||
| # open file to save all results for this dataset. | # open file to save all results for this dataset. | ||||
| if not os.path.exists(results_dir): | if not os.path.exists(results_dir): | ||||
| os.makedirs(results_dir) | os.makedirs(results_dir) | ||||
| # print out results as table. | |||||
| str_fw += printResultsInTable(param_list, param_list_pre_revised, average_val_scores, | |||||
| std_val_scores, average_perf_scores, std_perf_scores, | |||||
| average_train_scores, std_train_scores, gram_matrix_time, | |||||
| model_type, verbose) | |||||
| # open file to save all results for this dataset. | # open file to save all results for this dataset. | ||||
| if not os.path.exists(results_dir + '/' + ds_name + '.output.txt'): | if not os.path.exists(results_dir + '/' + ds_name + '.output.txt'): | ||||
| with open(results_dir + '/' + ds_name + '.output.txt', 'w') as f: | with open(results_dir + '/' + ds_name + '.output.txt', 'w') as f: | ||||
| @@ -974,4 +883,55 @@ def read_gram_matrices_from_file(results_dir, ds_name): | |||||
| gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed | gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed | ||||
| param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones | param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones | ||||
| y = gmfile['y'].tolist() | y = gmfile['y'].tolist() | ||||
| return gram_matrices, param_list_pre_revised, y | |||||
| return gram_matrices, param_list_pre_revised, y | |||||
| def printResultsInTable(param_list, param_list_pre_revised, average_val_scores, | |||||
| std_val_scores, average_perf_scores, std_perf_scores, | |||||
| average_train_scores, std_train_scores, gram_matrix_time, | |||||
| model_type, verbose): | |||||
| from collections import OrderedDict | |||||
| from tabulate import tabulate | |||||
| table_dict = {} | |||||
| if model_type == 'regression': | |||||
| for param_in in param_list: | |||||
| param_in['alpha'] = '{:.2e}'.format(param_in['alpha']) | |||||
| else: | |||||
| for param_in in param_list: | |||||
| param_in['C'] = '{:.2e}'.format(param_in['C']) | |||||
| table_dict['params'] = [{**param_out, **param_in} | |||||
| for param_in in param_list for param_out in param_list_pre_revised] | |||||
| table_dict['gram_matrix_time'] = [ | |||||
| '{:.2f}'.format(gram_matrix_time[index_out]) | |||||
| for param_in in param_list | |||||
| for index_out, _ in enumerate(param_list_pre_revised) | |||||
| ] | |||||
| table_dict['valid_perf'] = [ | |||||
| '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in], | |||||
| std_val_scores[index_out][index_in]) | |||||
| for index_in, _ in enumerate(param_list) | |||||
| for index_out, _ in enumerate(param_list_pre_revised) | |||||
| ] | |||||
| table_dict['test_perf'] = [ | |||||
| '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in], | |||||
| std_perf_scores[index_out][index_in]) | |||||
| for index_in, _ in enumerate(param_list) | |||||
| for index_out, _ in enumerate(param_list_pre_revised) | |||||
| ] | |||||
| table_dict['train_perf'] = [ | |||||
| '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in], | |||||
| std_train_scores[index_out][index_in]) | |||||
| for index_in, _ in enumerate(param_list) | |||||
| for index_out, _ in enumerate(param_list_pre_revised) | |||||
| ] | |||||
| keyorder = [ | |||||
| 'params', 'train_perf', 'valid_perf', 'test_perf', | |||||
| 'gram_matrix_time' | |||||
| ] | |||||
| if verbose: | |||||
| print() | |||||
| tb_print = tabulate(OrderedDict(sorted(table_dict.items(), | |||||
| key=lambda i: keyorder.index(i[0]))), headers='keys') | |||||
| # print(tb_print) | |||||
| return 'table of performance v.s. hyper-params:\n\n%s\n\n' % tb_print | |||||