| @@ -0,0 +1,73 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| """compute_distance_in_kernel_space.ipynb | |||||
| Automatically generated by Colaboratory. | |||||
| Original file is located at | |||||
| https://colab.research.google.com/drive/17tZP6IrineQmzo9sRtfZOnHpHx6HnlMA | |||||
| **This script demonstrates how to compute distance in kernel space between the image of a graph and the mean of images of a group of graphs.** | |||||
| --- | |||||
| **0. Install `graphkit-learn`.** | |||||
| """ | |||||
| """**1. Get dataset.**""" | |||||
| from gklearn.utils import Dataset | |||||
| # Predefined dataset name, use dataset "MUTAG". | |||||
| ds_name = 'MUTAG' | |||||
| # Initialize a Dataset. | |||||
| dataset = Dataset() | |||||
| # Load predefined dataset "MUTAG". | |||||
| dataset.load_predefined_dataset(ds_name) | |||||
| len(dataset.graphs) | |||||
| """**2. Compute graph kernel.**""" | |||||
| from gklearn.kernels import PathUpToH | |||||
| import multiprocessing | |||||
| # Initailize parameters for graph kernel computation. | |||||
| kernel_options = {'depth': 3, | |||||
| 'k_func': 'MinMax', | |||||
| 'compute_method': 'trie' | |||||
| } | |||||
| # Initialize graph kernel. | |||||
| graph_kernel = PathUpToH(node_labels=dataset.node_labels, # list of node label names. | |||||
| edge_labels=dataset.edge_labels, # list of edge label names. | |||||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), # dataset information required for computation. | |||||
| **kernel_options, # options for computation. | |||||
| ) | |||||
| # Compute Gram matrix. | |||||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||||
| parallel='imap_unordered', # or None. | |||||
| n_jobs=multiprocessing.cpu_count(), # number of parallel jobs. | |||||
| normalize=True, # whether to return normalized Gram matrix. | |||||
| verbose=2 # whether to print out results. | |||||
| ) | |||||
| """**3. Compute distance in kernel space.** | |||||
| Given a dataset $\mathcal{G}_N$, compute the distance in kernel space between the image of $G_1 \in \mathcal{G}_N$ and the mean of images of $\mathcal{G}_k \subset \mathcal{G}_N$. | |||||
| """ | |||||
| from gklearn.preimage.utils import compute_k_dis | |||||
| # Index of $G_1$. | |||||
| idx_1 = 10 | |||||
| # Indices of graphs in $\mathcal{G}_k$. | |||||
| idx_graphs = range(0, 10) | |||||
| # Compute the distance in kernel space. | |||||
| dis_k = compute_k_dis(idx_1, | |||||
| idx_graphs, | |||||
| [1 / len(idx_graphs)] * len(idx_graphs), # weights for images of graphs in $\mathcal{G}_k$; all equal when computing the mean. | |||||
| gram_matrix, # gram matrix of al graphs. | |||||
| withterm3=False | |||||
| ) | |||||
| print(dis_k) | |||||
| @@ -0,0 +1,87 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| """compute_graph_kernel.ipynb | |||||
| Automatically generated by Colaboratory. | |||||
| Original file is located at | |||||
| https://colab.research.google.com/drive/17Q2QCl9CAtDweGF8LiWnWoN2laeJqT0u | |||||
| **This script demonstrates how to compute a graph kernel.** | |||||
| --- | |||||
| **0. Install `graphkit-learn`.** | |||||
| """ | |||||
| """**1. Get dataset.**""" | |||||
| from gklearn.utils import Dataset | |||||
| # Predefined dataset name, use dataset "MUTAG". | |||||
| ds_name = 'MUTAG' | |||||
| # Initialize a Dataset. | |||||
| dataset = Dataset() | |||||
| # Load predefined dataset "MUTAG". | |||||
| dataset.load_predefined_dataset(ds_name) | |||||
| len(dataset.graphs) | |||||
| """**2. Compute graph kernel.**""" | |||||
| from gklearn.kernels import PathUpToH | |||||
| # Initailize parameters for graph kernel computation. | |||||
| kernel_options = {'depth': 3, | |||||
| 'k_func': 'MinMax', | |||||
| 'compute_method': 'trie' | |||||
| } | |||||
| # Initialize graph kernel. | |||||
| graph_kernel = PathUpToH(node_labels=dataset.node_labels, # list of node label names. | |||||
| edge_labels=dataset.edge_labels, # list of edge label names. | |||||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), # dataset information required for computation. | |||||
| **kernel_options, # options for computation. | |||||
| ) | |||||
| print('done.') | |||||
| import multiprocessing | |||||
| import matplotlib.pyplot as plt | |||||
| # Compute Gram matrix. | |||||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||||
| parallel='imap_unordered', # or None. | |||||
| n_jobs=multiprocessing.cpu_count(), # number of parallel jobs. | |||||
| normalize=True, # whether to return normalized Gram matrix. | |||||
| verbose=2 # whether to print out results. | |||||
| ) | |||||
| # Print results. | |||||
| print() | |||||
| print(gram_matrix) | |||||
| print(run_time) | |||||
| plt.imshow(gram_matrix) | |||||
| import multiprocessing | |||||
| # Compute grah kernels between a graph and a list of graphs. | |||||
| kernel_list, run_time = graph_kernel.compute(dataset.graphs, # a list of graphs. | |||||
| dataset.graphs[0], # a single graph. | |||||
| parallel='imap_unordered', # or None. | |||||
| n_jobs=multiprocessing.cpu_count(), # number of parallel jobs. | |||||
| verbose=2 # whether to print out results. | |||||
| ) | |||||
| # Print results. | |||||
| print() | |||||
| print(kernel_list) | |||||
| print(run_time) | |||||
| import multiprocessing | |||||
| # Compute a grah kernel between two graphs. | |||||
| kernel, run_time = graph_kernel.compute(dataset.graphs[0], # a single graph. | |||||
| dataset.graphs[1], # another single graph. | |||||
| verbose=2 # whether to print out results. | |||||
| ) | |||||
| # Print results. | |||||
| print() | |||||
| print(kernel) | |||||
| print(run_time) | |||||
| @@ -0,0 +1,115 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| """example_median_preimege_generator.ipynb | |||||
| Automatically generated by Colaboratory. | |||||
| Original file is located at | |||||
| https://colab.research.google.com/drive/1PIDvHOcmiLEQ5Np3bgBDdu0kLOquOMQK | |||||
| **This script demonstrates how to generate a graph preimage using Boria's method.** | |||||
| --- | |||||
| """ | |||||
| """**1. Get dataset.**""" | |||||
| from gklearn.utils import Dataset, split_dataset_by_target | |||||
| # Predefined dataset name, use dataset "MAO". | |||||
| ds_name = 'MAO' | |||||
| # The node/edge labels that will not be used in the computation. | |||||
| irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} | |||||
| # Initialize a Dataset. | |||||
| dataset_all = Dataset() | |||||
| # Load predefined dataset "MAO". | |||||
| dataset_all.load_predefined_dataset(ds_name) | |||||
| # Remove irrelevant labels. | |||||
| dataset_all.remove_labels(**irrelevant_labels) | |||||
| # Split the whole dataset according to the classification targets. | |||||
| datasets = split_dataset_by_target(dataset_all) | |||||
| # Get the first class of graphs, whose median preimage will be computed. | |||||
| dataset = datasets[0] | |||||
| len(dataset.graphs) | |||||
| """**2. Set parameters.**""" | |||||
| import multiprocessing | |||||
| # Parameters for MedianPreimageGenerator (our method). | |||||
| mpg_options = {'fit_method': 'k-graphs', # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting. | |||||
| 'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs. | |||||
| 'ds_name': ds_name, # name of the dataset. | |||||
| 'parallel': True, # whether the parallel scheme is to be used. | |||||
| 'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit. | |||||
| 'max_itrs': 100, # maximum iteration limit to optimize edit costs. If set to 0 then no limit. | |||||
| 'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops. | |||||
| 'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number. | |||||
| 'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number. | |||||
| 'verbose': 2 # whether to print out results. | |||||
| } | |||||
| # Parameters for graph kernel computation. | |||||
| kernel_options = {'name': 'PathUpToH', # use path kernel up to length h. | |||||
| 'depth': 9, | |||||
| 'k_func': 'MinMax', | |||||
| 'compute_method': 'trie', | |||||
| 'parallel': 'imap_unordered', # or None | |||||
| 'n_jobs': multiprocessing.cpu_count(), | |||||
| 'normalize': True, # whether to use normalized Gram matrix to optimize edit costs. | |||||
| 'verbose': 2 # whether to print out results. | |||||
| } | |||||
| # Parameters for GED computation. | |||||
| ged_options = {'method': 'IPFP', # use IPFP huristic. | |||||
| 'initialization_method': 'RANDOM', # or 'NODE', etc. | |||||
| 'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP. | |||||
| 'edit_cost': 'CONSTANT', # use CONSTANT cost. | |||||
| 'attr_distance': 'euclidean', # the distance between non-symbolic node/edge labels is computed by euclidean distance. | |||||
| 'ratio_runs_from_initial_solutions': 1, | |||||
| 'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False. | |||||
| 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' | |||||
| } | |||||
| # Parameters for MedianGraphEstimator (Boria's method). | |||||
| mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD. | |||||
| 'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'. | |||||
| 'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit. | |||||
| 'verbose': 2, # whether to print out results. | |||||
| 'refine': False # whether to refine the final SODs or not. | |||||
| } | |||||
| print('done.') | |||||
| """**3. Run median preimage generator.**""" | |||||
| from gklearn.preimage import MedianPreimageGenerator | |||||
| # Create median preimage generator instance. | |||||
| mpg = MedianPreimageGenerator() | |||||
| # Add dataset. | |||||
| mpg.dataset = dataset | |||||
| # Set parameters. | |||||
| mpg.set_options(**mpg_options.copy()) | |||||
| mpg.kernel_options = kernel_options.copy() | |||||
| mpg.ged_options = ged_options.copy() | |||||
| mpg.mge_options = mge_options.copy() | |||||
| # Run. | |||||
| mpg.run() | |||||
| """**4. Get results.**""" | |||||
| # Get results. | |||||
| import pprint | |||||
| pp = pprint.PrettyPrinter(indent=4) # pretty print | |||||
| results = mpg.get_results() | |||||
| pp.pprint(results) | |||||
| # Draw generated graphs. | |||||
| def draw_graph(graph): | |||||
| import matplotlib.pyplot as plt | |||||
| import networkx as nx | |||||
| plt.figure() | |||||
| pos = nx.spring_layout(graph) | |||||
| nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True) | |||||
| plt.show() | |||||
| plt.clf() | |||||
| plt.close() | |||||
| draw_graph(mpg.set_median) | |||||
| draw_graph(mpg.gen_median) | |||||
| @@ -8,8 +8,9 @@ __author__ = "Linlin Jia" | |||||
| __date__ = "November 2018" | __date__ = "November 2018" | ||||
| from gklearn.kernels.graph_kernel import GraphKernel | from gklearn.kernels.graph_kernel import GraphKernel | ||||
| from gklearn.kernels.structural_sp import StructuralSP | |||||
| from gklearn.kernels.marginalized import Marginalized | |||||
| from gklearn.kernels.shortest_path import ShortestPath | from gklearn.kernels.shortest_path import ShortestPath | ||||
| from gklearn.kernels.structural_sp import StructuralSP | |||||
| from gklearn.kernels.path_up_to_h import PathUpToH | from gklearn.kernels.path_up_to_h import PathUpToH | ||||
| from gklearn.kernels.treelet import Treelet | from gklearn.kernels.treelet import Treelet | ||||
| from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman, WLSubtree | from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman, WLSubtree | ||||
| @@ -0,0 +1,338 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Wed Jun 3 22:22:57 2020 | |||||
| @author: ljia | |||||
| @references: | |||||
| [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between | |||||
| labeled graphs. In Proceedings of the 20th International Conference on | |||||
| Machine Learning, Washington, DC, United States, 2003. | |||||
| [2] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and | |||||
| Jean-Philippe Vert. Extensions of marginalized graph kernels. In | |||||
| Proceedings of the twenty-first international conference on Machine | |||||
| learning, page 70. ACM, 2004. | |||||
| """ | |||||
| import sys | |||||
| from multiprocessing import Pool | |||||
| from tqdm import tqdm | |||||
| import numpy as np | |||||
| import networkx as nx | |||||
| from gklearn.utils import SpecialLabel | |||||
| from gklearn.utils.kernels import deltakernel | |||||
| from gklearn.utils.parallel import parallel_gm, parallel_me | |||||
| from gklearn.utils.utils import untotterTransformation | |||||
| from gklearn.kernels import GraphKernel | |||||
| class Marginalized(GraphKernel): | |||||
| def __init__(self, **kwargs): | |||||
| GraphKernel.__init__(self) | |||||
| self.__node_labels = kwargs.get('node_labels', []) | |||||
| self.__edge_labels = kwargs.get('edge_labels', []) | |||||
| self.__p_quit = kwargs.get('p_quit', 0.5) | |||||
| self.__n_iteration = kwargs.get('n_iteration', 10) | |||||
| self.__remove_totters = kwargs.get('remove_totters', False) | |||||
| self.__ds_infos = kwargs.get('ds_infos', {}) | |||||
| self.__n_iteration = int(self.__n_iteration) | |||||
| def _compute_gm_series(self): | |||||
| self.__add_dummy_labels(self._graphs) | |||||
| if self.__remove_totters: | |||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout) | |||||
| else: | |||||
| iterator = self._graphs | |||||
| # @todo: this may not work. | |||||
| self._graphs = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator] | |||||
| # compute Gram matrix. | |||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| from itertools import combinations_with_replacement | |||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = itr | |||||
| for i, j in iterator: | |||||
| kernel = self.__kernel_do(self._graphs[i], self._graphs[j]) | |||||
| gram_matrix[i][j] = kernel | |||||
| gram_matrix[j][i] = kernel # @todo: no directed graph considered? | |||||
| return gram_matrix | |||||
| def _compute_gm_imap_unordered(self): | |||||
| self.__add_dummy_labels(self._graphs) | |||||
| if self.__remove_totters: | |||||
| pool = Pool(self._n_jobs) | |||||
| itr = range(0, len(self._graphs)) | |||||
| if len(self._graphs) < 100 * self._n_jobs: | |||||
| chunksize = int(len(self._graphs) / self._n_jobs) + 1 | |||||
| else: | |||||
| chunksize = 100 | |||||
| remove_fun = self._wrapper_untotter | |||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(pool.imap_unordered(remove_fun, itr, chunksize), | |||||
| desc='removing tottering', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(remove_fun, itr, chunksize) | |||||
| for i, g in iterator: | |||||
| self._graphs[i] = g | |||||
| pool.close() | |||||
| pool.join() | |||||
| # compute Gram matrix. | |||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| def init_worker(gn_toshare): | |||||
| global G_gn | |||||
| G_gn = gn_toshare | |||||
| do_fun = self._wrapper_kernel_do | |||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| return gram_matrix | |||||
| def _compute_kernel_list_series(self, g1, g_list): | |||||
| self.__add_dummy_labels(g_list + [g1]) | |||||
| if self.__remove_totters: | |||||
| g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. | |||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) | |||||
| else: | |||||
| iterator = g_list | |||||
| # @todo: this may not work. | |||||
| g_list = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator] | |||||
| # compute kernel list. | |||||
| kernel_list = [None] * len(g_list) | |||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = range(len(g_list)) | |||||
| for i in iterator: | |||||
| kernel = self.__kernel_do(g1, g_list[i]) | |||||
| kernel_list[i] = kernel | |||||
| return kernel_list | |||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||||
| self.__add_dummy_labels(g_list + [g1]) | |||||
| if self.__remove_totters: | |||||
| g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. | |||||
| pool = Pool(self._n_jobs) | |||||
| itr = range(0, len(g_list)) | |||||
| if len(g_list) < 100 * self._n_jobs: | |||||
| chunksize = int(len(g_list) / self._n_jobs) + 1 | |||||
| else: | |||||
| chunksize = 100 | |||||
| remove_fun = self._wrapper_untotter | |||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(pool.imap_unordered(remove_fun, itr, chunksize), | |||||
| desc='removing tottering', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(remove_fun, itr, chunksize) | |||||
| for i, g in iterator: | |||||
| g_list[i] = g | |||||
| pool.close() | |||||
| pool.join() | |||||
| # compute kernel list. | |||||
| kernel_list = [None] * len(g_list) | |||||
| def init_worker(g1_toshare, g_list_toshare): | |||||
| global G_g1, G_g_list | |||||
| G_g1 = g1_toshare | |||||
| G_g_list = g_list_toshare | |||||
| do_fun = self._wrapper_kernel_list_do | |||||
| def func_assign(result, var_to_assign): | |||||
| var_to_assign[result[0]] = result[1] | |||||
| itr = range(len(g_list)) | |||||
| len_itr = len(g_list) | |||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | |||||
| n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||||
| return kernel_list | |||||
| def _wrapper_kernel_list_do(self, itr): | |||||
| return itr, self.__kernel_do(G_g1, G_g_list[itr]) | |||||
| def _compute_single_kernel_series(self, g1, g2): | |||||
| self.__add_dummy_labels([g1] + [g2]) | |||||
| if self.__remove_totters: | |||||
| g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. | |||||
| g2 = untotterTransformation(g2, self.__node_label, self.__edge_label) | |||||
| kernel = self.__kernel_do(g1, g2) | |||||
| return kernel | |||||
| def __kernel_do(self, g1, g2): | |||||
| """Calculate marginalized graph kernel between 2 graphs. | |||||
| Parameters | |||||
| ---------- | |||||
| g1, g2 : NetworkX graphs | |||||
| 2 graphs between which the kernel is calculated. | |||||
| Return | |||||
| ------ | |||||
| kernel : float | |||||
| Marginalized kernel between 2 graphs. | |||||
| """ | |||||
| # init parameters | |||||
| kernel = 0 | |||||
| num_nodes_G1 = nx.number_of_nodes(g1) | |||||
| num_nodes_G2 = nx.number_of_nodes(g2) | |||||
| # the initial probability distribution in the random walks generating step | |||||
| # (uniform distribution over |G|) | |||||
| p_init_G1 = 1 / num_nodes_G1 | |||||
| p_init_G2 = 1 / num_nodes_G2 | |||||
| q = self.__p_quit * self.__p_quit | |||||
| r1 = q | |||||
| # # initial R_inf | |||||
| # # matrix to save all the R_inf for all pairs of nodes | |||||
| # R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) | |||||
| # | |||||
| # # calculate R_inf with a simple interative method | |||||
| # for i in range(1, n_iteration): | |||||
| # R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) | |||||
| # R_inf_new.fill(r1) | |||||
| # | |||||
| # # calculate R_inf for each pair of nodes | |||||
| # for node1 in g1.nodes(data=True): | |||||
| # neighbor_n1 = g1[node1[0]] | |||||
| # # the transition probability distribution in the random walks | |||||
| # # generating step (uniform distribution over the vertices adjacent | |||||
| # # to the current vertex) | |||||
| # if len(neighbor_n1) > 0: | |||||
| # p_trans_n1 = (1 - p_quit) / len(neighbor_n1) | |||||
| # for node2 in g2.nodes(data=True): | |||||
| # neighbor_n2 = g2[node2[0]] | |||||
| # if len(neighbor_n2) > 0: | |||||
| # p_trans_n2 = (1 - p_quit) / len(neighbor_n2) | |||||
| # | |||||
| # for neighbor1 in neighbor_n1: | |||||
| # for neighbor2 in neighbor_n2: | |||||
| # t = p_trans_n1 * p_trans_n2 * \ | |||||
| # deltakernel(g1.node[neighbor1][node_label], | |||||
| # g2.node[neighbor2][node_label]) * \ | |||||
| # deltakernel( | |||||
| # neighbor_n1[neighbor1][edge_label], | |||||
| # neighbor_n2[neighbor2][edge_label]) | |||||
| # | |||||
| # R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][ | |||||
| # neighbor2] # ref [1] equation (8) | |||||
| # R_inf[:] = R_inf_new | |||||
| # | |||||
| # # add elements of R_inf up and calculate kernel | |||||
| # for node1 in g1.nodes(data=True): | |||||
| # for node2 in g2.nodes(data=True): | |||||
| # s = p_init_G1 * p_init_G2 * deltakernel( | |||||
| # node1[1][node_label], node2[1][node_label]) | |||||
| # kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6) | |||||
| R_inf = {} # dict to save all the R_inf for all pairs of nodes | |||||
| # initial R_inf, the 1st iteration. | |||||
| for node1 in g1.nodes(): | |||||
| for node2 in g2.nodes(): | |||||
| # R_inf[(node1[0], node2[0])] = r1 | |||||
| if len(g1[node1]) > 0: | |||||
| if len(g2[node2]) > 0: | |||||
| R_inf[(node1, node2)] = r1 | |||||
| else: | |||||
| R_inf[(node1, node2)] = self.__p_quit | |||||
| else: | |||||
| if len(g2[node2]) > 0: | |||||
| R_inf[(node1, node2)] = self.__p_quit | |||||
| else: | |||||
| R_inf[(node1, node2)] = 1 | |||||
| # compute all transition probability first. | |||||
| t_dict = {} | |||||
| if self.__n_iteration > 1: | |||||
| for node1 in g1.nodes(): | |||||
| neighbor_n1 = g1[node1] | |||||
| # the transition probability distribution in the random walks | |||||
| # generating step (uniform distribution over the vertices adjacent | |||||
| # to the current vertex) | |||||
| if len(neighbor_n1) > 0: | |||||
| p_trans_n1 = (1 - self.__p_quit) / len(neighbor_n1) | |||||
| for node2 in g2.nodes(): | |||||
| neighbor_n2 = g2[node2] | |||||
| if len(neighbor_n2) > 0: | |||||
| p_trans_n2 = (1 - self.__p_quit) / len(neighbor_n2) | |||||
| for neighbor1 in neighbor_n1: | |||||
| for neighbor2 in neighbor_n2: | |||||
| t_dict[(node1, node2, neighbor1, neighbor2)] = \ | |||||
| p_trans_n1 * p_trans_n2 * \ | |||||
| deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \ | |||||
| deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels)) | |||||
| # calculate R_inf with a simple interative method | |||||
| for i in range(2, self.__n_iteration + 1): | |||||
| R_inf_old = R_inf.copy() | |||||
| # calculate R_inf for each pair of nodes | |||||
| for node1 in g1.nodes(): | |||||
| neighbor_n1 = g1[node1] | |||||
| # the transition probability distribution in the random walks | |||||
| # generating step (uniform distribution over the vertices adjacent | |||||
| # to the current vertex) | |||||
| if len(neighbor_n1) > 0: | |||||
| for node2 in g2.nodes(): | |||||
| neighbor_n2 = g2[node2] | |||||
| if len(neighbor_n2) > 0: | |||||
| R_inf[(node1, node2)] = r1 | |||||
| for neighbor1 in neighbor_n1: | |||||
| for neighbor2 in neighbor_n2: | |||||
| R_inf[(node1, node2)] += \ | |||||
| (t_dict[(node1, node2, neighbor1, neighbor2)] * \ | |||||
| R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) | |||||
| # add elements of R_inf up and calculate kernel | |||||
| for (n1, n2), value in R_inf.items(): | |||||
| s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels)) | |||||
| kernel += s * value # ref [1] equation (6) | |||||
| return kernel | |||||
| def _wrapper_kernel_do(self, itr): | |||||
| i = itr[0] | |||||
| j = itr[1] | |||||
| return i, j, self.__kernel_do(G_gn[i], G_gn[j]) | |||||
| def _wrapper_untotter(self, i): | |||||
| return i, untotterTransformation(self._graphs[i], self.__node_label, self.__edge_label) # @todo: this may not work. | |||||
| def __add_dummy_labels(self, Gn): | |||||
| if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.__node_labels = [SpecialLabel.DUMMY] | |||||
| if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.__edge_labels = [SpecialLabel.DUMMY] | |||||
| @@ -195,7 +195,7 @@ class Treelet(GraphKernel): | |||||
| Return | Return | ||||
| ------ | ------ | ||||
| kernel : float | kernel : float | ||||
| Treelet Kernel between 2 graphs. | |||||
| Treelet kernel between 2 graphs. | |||||
| """ | """ | ||||
| keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs | keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs | ||||
| vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) | vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) | ||||
| @@ -12,4 +12,6 @@ __date__ = "March 2020" | |||||
| from gklearn.preimage.preimage_generator import PreimageGenerator | from gklearn.preimage.preimage_generator import PreimageGenerator | ||||
| from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator | from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator | ||||
| from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator | |||||
| from gklearn.preimage.kernel_knn_cv import kernel_knn_cv | from gklearn.preimage.kernel_knn_cv import kernel_knn_cv | ||||
| from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class | |||||
| @@ -0,0 +1,262 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Mon Jun 1 11:37:57 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| import multiprocessing | |||||
| import numpy as np | |||||
| import networkx as nx | |||||
| import os | |||||
| from gklearn.utils.graphfiles import saveGXL | |||||
| from gklearn.preimage import RandomPreimageGenerator | |||||
| from gklearn.utils import Dataset | |||||
| dir_root = '../results/xp_random_preimage_generation/' | |||||
| def xp_random_preimage_generation(kernel_name): | |||||
| """ | |||||
| Experiment similar to the one in Bakir's paper. A test to check if RandomPreimageGenerator class works correctly. | |||||
| Returns | |||||
| ------- | |||||
| None. | |||||
| """ | |||||
| alpha1_list = np.linspace(0, 1, 11) | |||||
| k_dis_datasets = [] | |||||
| k_dis_preimages = [] | |||||
| preimages = [] | |||||
| bests_from_dataset = [] | |||||
| for alpha1 in alpha1_list: | |||||
| print('alpha1 =', alpha1, ':\n') | |||||
| # set parameters. | |||||
| ds_name = 'MUTAG' | |||||
| rpg_options = {'k': 5, | |||||
| 'r_max': 10, # | |||||
| 'l': 500, | |||||
| 'alphas': None, | |||||
| 'parallel': True, | |||||
| 'verbose': 2} | |||||
| if kernel_name == 'PathUpToH': | |||||
| kernel_options = {'name': 'PathUpToH', | |||||
| 'depth': 2, # | |||||
| 'k_func': 'MinMax', # | |||||
| 'compute_method': 'trie', | |||||
| 'parallel': 'imap_unordered', | |||||
| # 'parallel': None, | |||||
| 'n_jobs': multiprocessing.cpu_count(), | |||||
| 'normalize': True, | |||||
| 'verbose': 0} | |||||
| elif kernel_name == 'Marginalized': | |||||
| kernel_options = {'name': 'Marginalized', | |||||
| 'p_quit': 0.8, # | |||||
| 'n_iteration': 7, # | |||||
| 'remove_totters': False, | |||||
| 'parallel': 'imap_unordered', | |||||
| # 'parallel': None, | |||||
| 'n_jobs': multiprocessing.cpu_count(), | |||||
| 'normalize': True, | |||||
| 'verbose': 0} | |||||
| edge_required = True | |||||
| irrelevant_labels = {'edge_labels': ['label_0']} | |||||
| cut_range = None | |||||
| # create/get Gram matrix. | |||||
| dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/' | |||||
| if not os.path.exists(dir_save): | |||||
| os.makedirs(dir_save) | |||||
| gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||||
| gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | |||||
| if gmfile_exist: | |||||
| gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. | |||||
| gram_matrix_unnorm = gmfile['gram_matrix_unnorm'] | |||||
| time_precompute_gm = gmfile['run_time'] | |||||
| # 1. get dataset. | |||||
| print('1. getting dataset...') | |||||
| dataset_all = Dataset() | |||||
| dataset_all.load_predefined_dataset(ds_name) | |||||
| dataset_all.trim_dataset(edge_required=edge_required) | |||||
| if irrelevant_labels is not None: | |||||
| dataset_all.remove_labels(**irrelevant_labels) | |||||
| if cut_range is not None: | |||||
| dataset_all.cut_graphs(cut_range) | |||||
| # # add two "random" graphs. | |||||
| # g1 = nx.Graph() | |||||
| # g1.add_nodes_from(range(0, 16), label_0='0') | |||||
| # g1.add_nodes_from(range(16, 25), label_0='1') | |||||
| # g1.add_node(25, label_0='2') | |||||
| # g1.add_nodes_from([26, 27], label_0='3') | |||||
| # g1.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), (5, 0), (4, 9), (12, 3), (10, 13), (13, 14), (14, 15), (15, 8), (0, 16), (1, 17), (2, 18), (12, 19), (11, 20), (13, 21), (15, 22), (7, 23), (6, 24), (14, 25), (25, 26), (25, 27)]) | |||||
| # g2 = nx.Graph() | |||||
| # g2.add_nodes_from(range(0, 12), label_0='0') | |||||
| # g2.add_nodes_from(range(12, 19), label_0='1') | |||||
| # g2.add_nodes_from([19, 20, 21], label_0='2') | |||||
| # g2.add_nodes_from([22, 23], label_0='3') | |||||
| # g2.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 19), (19, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 20), (20, 7), (5, 0), (4, 8), (0, 12), (1, 13), (2, 14), (9, 15), (10, 16), (11, 17), (6, 18), (3, 21), (21, 22), (21, 23)]) | |||||
| # dataset_all.load_graphs([g1, g2] + dataset_all.graphs, targets=None) | |||||
| # 2. initialize rpg and setting parameters. | |||||
| print('2. initializing rpg and setting parameters...') | |||||
| # nb_graphs = len(dataset_all.graphs) - 2 | |||||
| # rpg_options['alphas'] = [alpha1, 1 - alpha1] + [0] * nb_graphs | |||||
| nb_graphs = len(dataset_all.graphs) | |||||
| alphas = [0] * nb_graphs | |||||
| alphas[1] = alpha1 | |||||
| alphas[6] = 1 - alpha1 | |||||
| rpg_options['alphas'] = alphas | |||||
| if gmfile_exist: | |||||
| rpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm | |||||
| rpg_options['runtime_precompute_gm'] = time_precompute_gm | |||||
| rpg = RandomPreimageGenerator() | |||||
| rpg.dataset = dataset_all | |||||
| rpg.set_options(**rpg_options.copy()) | |||||
| rpg.kernel_options = kernel_options.copy() | |||||
| # 3. compute preimage. | |||||
| print('3. computing preimage...') | |||||
| rpg.run() | |||||
| results = rpg.get_results() | |||||
| k_dis_datasets.append(results['k_dis_dataset']) | |||||
| k_dis_preimages.append(results['k_dis_preimage']) | |||||
| bests_from_dataset.append(rpg.best_from_dataset) | |||||
| preimages.append(rpg.preimage) | |||||
| # 4. save results. | |||||
| # write Gram matrices to file. | |||||
| if not gmfile_exist: | |||||
| np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=rpg.gram_matrix_unnorm, run_time=results['runtime_precompute_gm']) | |||||
| # save graphs. | |||||
| fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3] | |||||
| saveGXL(rpg.best_from_dataset, fn_best_dataset + '.gxl', method='default', | |||||
| node_labels=dataset_all.node_labels, edge_labels=dataset_all.edge_labels, | |||||
| node_attrs=dataset_all.node_attrs, edge_attrs=dataset_all.edge_attrs) | |||||
| fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3] | |||||
| saveGXL(rpg.preimage, fn_preimage + '.gxl', method='default', | |||||
| node_labels=dataset_all.node_labels, edge_labels=dataset_all.edge_labels, | |||||
| node_attrs=dataset_all.node_attrs, edge_attrs=dataset_all.edge_attrs) | |||||
| # draw graphs. | |||||
| __draw_graph(rpg.best_from_dataset, fn_best_dataset) | |||||
| __draw_graph(rpg.preimage, fn_preimage) | |||||
| # save distances. | |||||
| np.savez(dir_save + 'distances.' + ds_name + '.' + kernel_options['name'], k_dis_datasets=k_dis_datasets, k_dis_preimages=k_dis_preimages) | |||||
| # plot results figure. | |||||
| __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save) | |||||
| print('\ncomplete.\n') | |||||
| return k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages | |||||
| def __draw_graph(graph, file_prefix): | |||||
| # import matplotlib | |||||
| # matplotlib.use('agg') | |||||
| import matplotlib.pyplot as plt | |||||
| plt.figure() | |||||
| pos = nx.spring_layout(graph) | |||||
| nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'label_0'), font_color='w', width=3, with_labels=True) | |||||
| plt.savefig(file_prefix + '.eps', format='eps', dpi=300) | |||||
| # plt.show() | |||||
| plt.clf() | |||||
| plt.close() | |||||
| def __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save): | |||||
| import matplotlib.pyplot as plt | |||||
| fig, ax = plt.subplots(1, 1, figsize=(7, 4.5)) | |||||
| ind = np.arange(len(alpha1_list)) # the x locations for the groups | |||||
| width = 0.35 # the width of the bars: can also be len(x) sequence | |||||
| ax.bar(ind, k_dis_preimages, width, label='Reconstructed pre-image', zorder=3, color='#133AAC') | |||||
| ax.set_xlabel(r'$\alpha \in [0,1]$') | |||||
| ax.set_ylabel(r'$d(g_i,g^\star(\alpha))$') | |||||
| #ax.set_title('Runtime of the shortest path kernel on all datasets') | |||||
| plt.xticks(ind, [str(i)[0:3] for i in alpha1_list]) | |||||
| #ax.set_yticks(np.logspace(-16, -3, num=20, base=10)) | |||||
| #ax.set_ylim(bottom=1e-15) | |||||
| ax.grid(axis='y', zorder=0) | |||||
| ax.spines['top'].set_visible(False) | |||||
| ax.spines['bottom'].set_visible(False) | |||||
| ax.spines['left'].set_visible(False) | |||||
| ax.spines['right'].set_visible(False) | |||||
| ax.xaxis.set_ticks_position('none') | |||||
| ax.plot(ind, k_dis_datasets, 'b.-', label=r'Nearest neighbor in $D_N$', color='orange', zorder=4) | |||||
| ax.yaxis.set_ticks_position('none') | |||||
| fig.subplots_adjust(bottom=.2) | |||||
| fig.legend(loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6) | |||||
| plt.savefig(dir_save + 'distances in kernel space.eps', format='eps', dpi=300, | |||||
| transparent=True, bbox_inches='tight') | |||||
| plt.show() | |||||
| plt.clf() | |||||
| plt.close() | |||||
| if __name__ == '__main__': | |||||
| # kernel_name = 'PathUpToH' | |||||
| kernel_name = 'Marginalized' | |||||
| k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages = xp_random_preimage_generation(kernel_name) | |||||
| # # save graphs. | |||||
| # dir_save = dir_root + 'MUTAG.PathUpToH/' | |||||
| # for i, alpha1 in enumerate(np.linspace(0, 1, 11)): | |||||
| # fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3] | |||||
| # saveGXL(bests_from_dataset[i], fn_best_dataset + '.gxl', method='default', | |||||
| # node_labels=['label_0'], edge_labels=[], | |||||
| # node_attrs=[], edge_attrs=[]) | |||||
| # fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3] | |||||
| # saveGXL(preimages[i], fn_preimage + '.gxl', method='default', | |||||
| # node_labels=['label_0'], edge_labels=[], | |||||
| # node_attrs=[], edge_attrs=[]) | |||||
| # # draw graphs. | |||||
| # dir_save = dir_root + 'MUTAG.PathUpToH/' | |||||
| # for i, alpha1 in enumerate(np.linspace(0, 1, 11)): | |||||
| # fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3] | |||||
| # __draw_graph(bests_from_dataset[i], fn_best_dataset) | |||||
| # fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3] | |||||
| # __draw_graph(preimages[i], fn_preimage) | |||||
| # # plot results figure. | |||||
| # alpha1_list = np.linspace(0, 1, 11) | |||||
| # dir_save = dir_root + 'MUTAG.PathUpToH/' | |||||
| # __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save) | |||||
| # k_dis_datasets = [0.0, | |||||
| # 0.08882515554098754, | |||||
| # 0.17765031108197632, | |||||
| # 0.2664754666229643, | |||||
| # 0.35530062216395264, | |||||
| # 0.44412577770494066, | |||||
| # 0.35530062216395236, | |||||
| # 0.2664754666229643, | |||||
| # 0.17765031108197632, | |||||
| # 0.08882515554098878, | |||||
| # 0.0] | |||||
| # k_dis_preimages = [0.0, | |||||
| # 0.08882515554098754, | |||||
| # 0.17765031108197632, | |||||
| # 0.2664754666229643, | |||||
| # 0.35530062216395264, | |||||
| # 0.44412577770494066, | |||||
| # 0.35530062216395236, | |||||
| # 0.2664754666229643, | |||||
| # 0.17765031108197632, | |||||
| # 0.08882515554098878, | |||||
| # 0.0] | |||||
| @@ -0,0 +1,176 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Fri Jun 12 10:30:17 2020 | |||||
| @author: ljia | |||||
| This script constructs simple preimages to test preimage methods and find bugs and shortcomings in them. | |||||
| """ | |||||
| def xp_simple_preimage(): | |||||
| import numpy as np | |||||
| """**1. Get dataset.**""" | |||||
| from gklearn.utils import Dataset, split_dataset_by_target | |||||
| # Predefined dataset name, use dataset "MAO". | |||||
| ds_name = 'MAO' | |||||
| # The node/edge labels that will not be used in the computation. | |||||
| irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} | |||||
| # Initialize a Dataset. | |||||
| dataset_all = Dataset() | |||||
| # Load predefined dataset "MAO". | |||||
| dataset_all.load_predefined_dataset(ds_name) | |||||
| # Remove irrelevant labels. | |||||
| dataset_all.remove_labels(**irrelevant_labels) | |||||
| # Split the whole dataset according to the classification targets. | |||||
| datasets = split_dataset_by_target(dataset_all) | |||||
| # Get the first class of graphs, whose median preimage will be computed. | |||||
| dataset = datasets[0] | |||||
| len(dataset.graphs) | |||||
| """**2. Set parameters.**""" | |||||
| import multiprocessing | |||||
| # Parameters for MedianPreimageGenerator (our method). | |||||
| mpg_options = {'fit_method': 'k-graphs', # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting. | |||||
| 'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs. | |||||
| 'ds_name': ds_name, # name of the dataset. | |||||
| 'parallel': True, # whether the parallel scheme is to be used. | |||||
| 'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit. | |||||
| 'max_itrs': 10, # maximum iteration limit to optimize edit costs. If set to 0 then no limit. | |||||
| 'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops. | |||||
| 'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number. | |||||
| 'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number. | |||||
| 'verbose': 2 # whether to print out results. | |||||
| } | |||||
| # Parameters for graph kernel computation. | |||||
| kernel_options = {'name': 'PathUpToH', # use path kernel up to length h. | |||||
| 'depth': 9, | |||||
| 'k_func': 'MinMax', | |||||
| 'compute_method': 'trie', | |||||
| 'parallel': 'imap_unordered', # or None | |||||
| 'n_jobs': multiprocessing.cpu_count(), | |||||
| 'normalize': True, # whether to use normalized Gram matrix to optimize edit costs. | |||||
| 'verbose': 2 # whether to print out results. | |||||
| } | |||||
| # Parameters for GED computation. | |||||
| ged_options = {'method': 'IPFP', # use IPFP huristic. | |||||
| 'initialization_method': 'RANDOM', # or 'NODE', etc. | |||||
| 'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP. | |||||
| 'edit_cost': 'CONSTANT', # use CONSTANT cost. | |||||
| 'attr_distance': 'euclidean', # the distance between non-symbolic node/edge labels is computed by euclidean distance. | |||||
| 'ratio_runs_from_initial_solutions': 1, | |||||
| 'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False. | |||||
| 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' | |||||
| } | |||||
| # Parameters for MedianGraphEstimator (Boria's method). | |||||
| mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD. | |||||
| 'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'. | |||||
| 'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit. | |||||
| 'verbose': 2, # whether to print out results. | |||||
| 'refine': False # whether to refine the final SODs or not. | |||||
| } | |||||
| print('done.') | |||||
| """**3. Compute the Gram matrix and distance matrix.**""" | |||||
| from gklearn.utils.utils import get_graph_kernel_by_name | |||||
| # Get a graph kernel instance. | |||||
| graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||||
| node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
| node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs, | |||||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
| kernel_options=kernel_options) | |||||
| # Compute Gram matrix. | |||||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options) | |||||
| # Compute distance matrix. | |||||
| from gklearn.utils import compute_distance_matrix | |||||
| dis_mat, _, _, _ = compute_distance_matrix(gram_matrix) | |||||
| print('done.') | |||||
| """**4. Find the candidate graph.**""" | |||||
| from gklearn.preimage.utils import compute_k_dis | |||||
| # Number of the nearest neighbors. | |||||
| k_neighbors = 10 | |||||
| # For each graph G in dataset, compute the distance between its image \Phi(G) and the mean of its neighbors' images. | |||||
| dis_min = np.inf # the minimum distance between possible \Phi(G) and the mean of its neighbors. | |||||
| for idx, G in enumerate(dataset.graphs): | |||||
| # Find the k nearest neighbors of G. | |||||
| dis_list = dis_mat[idx] # distance between \Phi(G) and image of each graphs. | |||||
| idx_sort = np.argsort(dis_list) # sort distances and get the sorted indices. | |||||
| idx_nearest = idx_sort[1:k_neighbors+1] # indices of the k-nearest neighbors. | |||||
| dis_k_nearest = [dis_list[i] for i in idx_nearest] # k-nearest distances, except the 0. | |||||
| G_k_nearest = [dataset.graphs[i] for i in idx_nearest] # k-nearest neighbors. | |||||
| # Compute the distance between \Phi(G) and the mean of its neighbors. | |||||
| dis_tmp = compute_k_dis(idx, # the index of G in Gram matrix. | |||||
| idx_nearest, # the indices of the neighbors | |||||
| [1 / k_neighbors] * k_neighbors, # coefficients for neighbors. | |||||
| gram_matrix, | |||||
| withterm3=False) | |||||
| # Check if the new distance is smallers. | |||||
| if dis_tmp < dis_min: | |||||
| dis_min = dis_tmp | |||||
| G_cand = G | |||||
| G_neighbors = G_k_nearest | |||||
| print('The minimum distance is', dis_min) | |||||
| """**5. Run median preimage generator.**""" | |||||
| from gklearn.preimage import MedianPreimageGenerator | |||||
| # Set the dataset as the k-nearest neighbors. | |||||
| dataset.load_graphs(G_neighbors) | |||||
| # Create median preimage generator instance. | |||||
| mpg = MedianPreimageGenerator() | |||||
| # Add dataset. | |||||
| mpg.dataset = dataset | |||||
| # Set parameters. | |||||
| mpg.set_options(**mpg_options.copy()) | |||||
| mpg.kernel_options = kernel_options.copy() | |||||
| mpg.ged_options = ged_options.copy() | |||||
| mpg.mge_options = mge_options.copy() | |||||
| # Run. | |||||
| mpg.run() | |||||
| """**4. Get results.**""" | |||||
| # Get results. | |||||
| import pprint | |||||
| pp = pprint.PrettyPrinter(indent=4) # pretty print | |||||
| results = mpg.get_results() | |||||
| pp.pprint(results) | |||||
| draw_graph(mpg.set_median) | |||||
| draw_graph(mpg.gen_median) | |||||
| draw_graph(G_cand) | |||||
| # Draw generated graphs. | |||||
| def draw_graph(graph): | |||||
| import matplotlib.pyplot as plt | |||||
| import networkx as nx | |||||
| plt.figure() | |||||
| pos = nx.spring_layout(graph) | |||||
| nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True) | |||||
| plt.show() | |||||
| plt.clf() | |||||
| plt.close() | |||||
| if __name__ == '__main__': | |||||
| xp_simple_preimage() | |||||
| @@ -0,0 +1,188 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Mon Jun 1 17:02:51 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| import numpy as np | |||||
| from gklearn.utils import Dataset | |||||
| import csv | |||||
| import os | |||||
| import os.path | |||||
| from gklearn.preimage import RandomPreimageGenerator | |||||
| from gklearn.utils import split_dataset_by_target | |||||
| from gklearn.utils.graphfiles import saveGXL | |||||
| def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=True, save_preimages=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None): | |||||
| # 1. get dataset. | |||||
| print('1. getting dataset...') | |||||
| dataset_all = Dataset() | |||||
| dataset_all.load_predefined_dataset(ds_name) | |||||
| dataset_all.trim_dataset(edge_required=edge_required) | |||||
| if irrelevant_labels is not None: | |||||
| dataset_all.remove_labels(**irrelevant_labels) | |||||
| if cut_range is not None: | |||||
| dataset_all.cut_graphs(cut_range) | |||||
| datasets = split_dataset_by_target(dataset_all) | |||||
| if save_results: | |||||
| # create result files. | |||||
| print('creating output files...') | |||||
| fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], dir_save) | |||||
| dis_k_dataset_list = [] | |||||
| dis_k_preimage_list = [] | |||||
| time_precompute_gm_list = [] | |||||
| time_generate_list = [] | |||||
| time_total_list = [] | |||||
| itrs_list = [] | |||||
| num_updates_list = [] | |||||
| if load_gm == 'auto': | |||||
| gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||||
| gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | |||||
| if gmfile_exist: | |||||
| gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. | |||||
| gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']] | |||||
| time_precompute_gm_list = gmfile['run_time_list'].tolist() | |||||
| else: | |||||
| gram_matrix_unnorm_list = [] | |||||
| time_precompute_gm_list = [] | |||||
| elif not load_gm: | |||||
| gram_matrix_unnorm_list = [] | |||||
| time_precompute_gm_list = [] | |||||
| else: | |||||
| gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||||
| gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. | |||||
| gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']] | |||||
| time_precompute_gm_list = gmfile['run_time_list'].tolist() | |||||
| print('starting generating preimage for each class of target...') | |||||
| idx_offset = 0 | |||||
| for idx, dataset in enumerate(datasets): | |||||
| target = dataset.targets[0] | |||||
| print('\ntarget =', target, '\n') | |||||
| # if target != 1: | |||||
| # continue | |||||
| num_graphs = len(dataset.graphs) | |||||
| if num_graphs < 2: | |||||
| print('\nnumber of graphs = ', num_graphs, ', skip.\n') | |||||
| idx_offset += 1 | |||||
| continue | |||||
| # 2. set parameters. | |||||
| print('2. initializing mpg and setting parameters...') | |||||
| if load_gm: | |||||
| if gmfile_exist: | |||||
| rpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx - idx_offset] | |||||
| rpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx - idx_offset] | |||||
| rpg = RandomPreimageGenerator() | |||||
| rpg.dataset = dataset | |||||
| rpg.set_options(**rpg_options.copy()) | |||||
| rpg.kernel_options = kernel_options.copy() | |||||
| # 3. compute preimage. | |||||
| print('3. computing preimage...') | |||||
| rpg.run() | |||||
| results = rpg.get_results() | |||||
| # 4. save results (and median graphs). | |||||
| print('4. saving results (and preimages)...') | |||||
| # write result detail. | |||||
| if save_results: | |||||
| print('writing results to files...') | |||||
| f_detail = open(dir_save + fn_output_detail, 'a') | |||||
| csv.writer(f_detail).writerow([ds_name, kernel_options['name'], | |||||
| num_graphs, target, 1, | |||||
| results['k_dis_dataset'], results['k_dis_preimage'], | |||||
| results['runtime_precompute_gm'], | |||||
| results['runtime_generate_preimage'], results['runtime_total'], | |||||
| results['itrs'], results['num_updates']]) | |||||
| f_detail.close() | |||||
| # compute result summary. | |||||
| dis_k_dataset_list.append(results['k_dis_dataset']) | |||||
| dis_k_preimage_list.append(results['k_dis_preimage']) | |||||
| time_precompute_gm_list.append(results['runtime_precompute_gm']) | |||||
| time_generate_list.append(results['runtime_generate_preimage']) | |||||
| time_total_list.append(results['runtime_total']) | |||||
| itrs_list.append(results['itrs']) | |||||
| num_updates_list.append(results['num_updates']) | |||||
| # write result summary for each letter. | |||||
| f_summary = open(dir_save + fn_output_summary, 'a') | |||||
| csv.writer(f_summary).writerow([ds_name, kernel_options['name'], | |||||
| num_graphs, target, | |||||
| results['k_dis_dataset'], results['k_dis_preimage'], | |||||
| results['runtime_precompute_gm'], | |||||
| results['runtime_generate_preimage'], results['runtime_total'], | |||||
| results['itrs'], results['num_updates']]) | |||||
| f_summary.close() | |||||
| # save median graphs. | |||||
| if save_preimages: | |||||
| if not os.path.exists(dir_save + 'preimages/'): | |||||
| os.makedirs(dir_save + 'preimages/') | |||||
| print('Saving preimages to files...') | |||||
| fn_best_dataset = dir_save + 'preimages/g_best_dataset.' + 'nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | |||||
| saveGXL(rpg.best_from_dataset, fn_best_dataset + '.gxl', method='default', | |||||
| node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
| node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | |||||
| fn_preimage = dir_save + 'preimages/g_preimage.' + 'nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | |||||
| saveGXL(rpg.preimage, fn_preimage + '.gxl', method='default', | |||||
| node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
| node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | |||||
| if (load_gm == 'auto' and not gmfile_exist) or not load_gm: | |||||
| gram_matrix_unnorm_list.append(rpg.gram_matrix_unnorm) | |||||
| # write result summary for each class. | |||||
| if save_results: | |||||
| dis_k_dataset_mean = np.mean(dis_k_dataset_list) | |||||
| dis_k_preimage_mean = np.mean(dis_k_preimage_list) | |||||
| time_precompute_gm_mean = np.mean(time_precompute_gm_list) | |||||
| time_generate_mean = np.mean(time_generate_list) | |||||
| time_total_mean = np.mean(time_total_list) | |||||
| itrs_mean = np.mean(itrs_list) | |||||
| num_updates_mean = np.mean(num_updates_list) | |||||
| f_summary = open(dir_save + fn_output_summary, 'a') | |||||
| csv.writer(f_summary).writerow([ds_name, kernel_options['name'], | |||||
| num_graphs, 'all', | |||||
| dis_k_dataset_mean, dis_k_preimage_mean, | |||||
| time_precompute_gm_mean, | |||||
| time_generate_mean, time_total_mean, itrs_mean, | |||||
| num_updates_mean]) | |||||
| f_summary.close() | |||||
| # write Gram matrices to file. | |||||
| if (load_gm == 'auto' and not gmfile_exist) or not load_gm: | |||||
| np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) | |||||
| print('\ncomplete.\n') | |||||
| def __init_output_file_preimage(ds_name, gkernel, dir_output): | |||||
| if not os.path.exists(dir_output): | |||||
| os.makedirs(dir_output) | |||||
| fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' | |||||
| f_detail = open(dir_output + fn_output_detail, 'a') | |||||
| csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'num graphs', | |||||
| 'target', 'repeat', 'dis_k best from dataset', 'dis_k preimage', | |||||
| 'time precompute gm', 'time generate preimage', 'time total', | |||||
| 'itrs', 'num updates']) | |||||
| f_detail.close() | |||||
| fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv' | |||||
| f_summary = open(dir_output + fn_output_summary, 'a') | |||||
| csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'num graphs', | |||||
| 'target', 'dis_k best from dataset', 'dis_k preimage', | |||||
| 'time precompute gm', 'time generate preimage', 'time total', | |||||
| 'itrs', 'num updates']) | |||||
| f_summary.close() | |||||
| return fn_output_detail, fn_output_summary | |||||
| @@ -19,7 +19,7 @@ from gklearn.ged.median import constant_node_costs,mge_options_to_string | |||||
| from gklearn.gedlib import librariesImport, gedlibpy | from gklearn.gedlib import librariesImport, gedlibpy | ||||
| from gklearn.utils import Timer | from gklearn.utils import Timer | ||||
| from gklearn.utils.utils import get_graph_kernel_by_name | from gklearn.utils.utils import get_graph_kernel_by_name | ||||
| # from gklearn.utils.dataset import Dataset | |||||
| class MedianPreimageGenerator(PreimageGenerator): | class MedianPreimageGenerator(PreimageGenerator): | ||||
| @@ -127,8 +127,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
| # 3. compute set median and gen median using optimized edit costs. | # 3. compute set median and gen median using optimized edit costs. | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| print('\nstart computing set median and gen median using optimized edit costs...\n') | print('\nstart computing set median and gen median using optimized edit costs...\n') | ||||
| # group_fnames = [Gn[g].graph['filename'] for g in group_min] | |||||
| self.__generate_preimage_iam() | |||||
| self.__gmg_bcu() | |||||
| end_generate_preimage = time.time() | end_generate_preimage = time.time() | ||||
| self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec | self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec | ||||
| self.__runtime_total = end_generate_preimage - start | self.__runtime_total = end_generate_preimage - start | ||||
| @@ -140,19 +139,13 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
| # 4. compute kernel distances to the true median. | # 4. compute kernel distances to the true median. | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| print('\nstart computing distances to true median....\n') | print('\nstart computing distances to true median....\n') | ||||
| # Gn_median = [Gn[g].copy() for g in group_min] | |||||
| self.__compute_distances_to_true_median() | self.__compute_distances_to_true_median() | ||||
| # dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min = | |||||
| # idx_dis_k_gi_min = group_min[idx_dis_k_gi_min] | |||||
| # print('index min dis_k_gi:', idx_dis_k_gi_min) | |||||
| # print('sod_sm:', sod_sm) | |||||
| # print('sod_gm:', sod_gm) | |||||
| # 5. print out results. | # 5. print out results. | ||||
| if self._verbose: | if self._verbose: | ||||
| print() | print() | ||||
| print('================================================================================') | print('================================================================================') | ||||
| print('Finished generalization of preimages.') | |||||
| print('Finished generation of preimages.') | |||||
| print('--------------------------------------------------------------------------------') | print('--------------------------------------------------------------------------------') | ||||
| print('The optimized edit cost constants:', self.__edit_cost_constants) | print('The optimized edit cost constants:', self.__edit_cost_constants) | ||||
| print('SOD of the set median:', self.__sod_set_median) | print('SOD of the set median:', self.__sod_set_median) | ||||
| @@ -169,11 +162,6 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
| print('Is optimization of edit costs converged:', self.__converged) | print('Is optimization of edit costs converged:', self.__converged) | ||||
| print('================================================================================') | print('================================================================================') | ||||
| print() | print() | ||||
| # collect return values. | |||||
| # return (sod_sm, sod_gm), \ | |||||
| # (dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min), \ | |||||
| # (time_fitting, time_generating) | |||||
| def get_results(self): | def get_results(self): | ||||
| @@ -203,20 +191,22 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
| """ | """ | ||||
| if self.__fit_method == 'random': # random | if self.__fit_method == 'random': # random | ||||
| if self.__ged_options['edit_cost'] == 'LETTER': | if self.__ged_options['edit_cost'] == 'LETTER': | ||||
| self.__edit_cost_constants = random.sample(range(1, 10), 3) | |||||
| self.__edit_cost_constants = [item * 0.1 for item in self.__edit_cost_constants] | |||||
| self.__edit_cost_constants = random.sample(range(1, 1000), 3) | |||||
| self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants] | |||||
| elif self.__ged_options['edit_cost'] == 'LETTER2': | elif self.__ged_options['edit_cost'] == 'LETTER2': | ||||
| random.seed(time.time()) | random.seed(time.time()) | ||||
| self.__edit_cost_constants = random.sample(range(1, 10), 5) | |||||
| # self.__edit_cost_constants = [item * 0.1 for item in self.__edit_cost_constants] | |||||
| self.__edit_cost_constants = random.sample(range(1, 1000), 5) | |||||
| self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] | |||||
| elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': | elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': | ||||
| self.__edit_cost_constants = random.sample(range(1, 10), 6) | |||||
| self.__edit_cost_constants = random.sample(range(1, 1000), 6) | |||||
| self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] | |||||
| if self._dataset.node_attrs == []: | if self._dataset.node_attrs == []: | ||||
| self.__edit_cost_constants[2] = 0 | self.__edit_cost_constants[2] = 0 | ||||
| if self._dataset.edge_attrs == []: | if self._dataset.edge_attrs == []: | ||||
| self.__edit_cost_constants[5] = 0 | self.__edit_cost_constants[5] = 0 | ||||
| else: | else: | ||||
| self.__edit_cost_constants = random.sample(range(1, 10), 6) | |||||
| self.__edit_cost_constants = random.sample(range(1, 1000), 6) | |||||
| self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] | |||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| print('edit cost constants used:', self.__edit_cost_constants) | print('edit cost constants used:', self.__edit_cost_constants) | ||||
| elif self.__fit_method == 'expert': # expert | elif self.__fit_method == 'expert': # expert | ||||
| @@ -861,7 +851,15 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
| print() | print() | ||||
| def __generate_preimage_iam(self): | |||||
| def __gmg_bcu(self): | |||||
| """ | |||||
| The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG). | |||||
| Returns | |||||
| ------- | |||||
| None. | |||||
| """ | |||||
| # Set up the ged environment. | # Set up the ged environment. | ||||
| ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible. | ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible. | ||||
| # gedlibpy.restart_env() | # gedlibpy.restart_env() | ||||
| @@ -910,24 +908,24 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
| # compute distance in kernel space for set median. | # compute distance in kernel space for set median. | ||||
| kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options) | kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options) | ||||
| kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options) | kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options) | ||||
| kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize | |||||
| if self._kernel_options['normalize']: | |||||
| kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize | |||||
| kernel_sm = 1 | |||||
| # @todo: not correct kernel value | # @todo: not correct kernel value | ||||
| gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | ||||
| gram_with_sm = np.concatenate((np.array([[1] + kernels_to_sm]).T, gram_with_sm), axis=1) | |||||
| gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1) | |||||
| self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | ||||
| [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | ||||
| gram_with_sm, withterm3=False) | gram_with_sm, withterm3=False) | ||||
| # print(gen_median.nodes(data=True)) | |||||
| # print(gen_median.edges(data=True)) | |||||
| # print(set_median.nodes(data=True)) | |||||
| # print(set_median.edges(data=True)) | |||||
| # compute distance in kernel space for generalized median. | # compute distance in kernel space for generalized median. | ||||
| kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options) | kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options) | ||||
| kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options) | kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options) | ||||
| kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize | |||||
| if self._kernel_options['normalize']: | |||||
| kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize | |||||
| kernel_gm = 1 | |||||
| gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | ||||
| gram_with_gm = np.concatenate((np.array([[1] + kernels_to_gm]).T, gram_with_gm), axis=1) | |||||
| gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1) | |||||
| self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | ||||
| [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | ||||
| gram_with_gm, withterm3=False) | gram_with_gm, withterm3=False) | ||||
| @@ -0,0 +1,389 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Fri May 29 14:29:52 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| import numpy as np | |||||
| import time | |||||
| import sys | |||||
| from tqdm import tqdm | |||||
| import multiprocessing | |||||
| import networkx as nx | |||||
| from multiprocessing import Pool | |||||
| from functools import partial | |||||
| from gklearn.preimage import PreimageGenerator | |||||
| from gklearn.preimage.utils import compute_k_dis | |||||
| from gklearn.utils import Timer | |||||
| from gklearn.utils.utils import get_graph_kernel_by_name | |||||
| # from gklearn.utils.dataset import Dataset | |||||
| class RandomPreimageGenerator(PreimageGenerator): | |||||
| def __init__(self, dataset=None): | |||||
| PreimageGenerator.__init__(self, dataset=dataset) | |||||
| # arguments to set. | |||||
| self.__k = 5 # number of nearest neighbors of phi in D_N. | |||||
| self.__r_max = 10 # maximum number of iterations. | |||||
| self.__l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}. | |||||
| self.__alphas = None # weights of linear combinations of points in kernel space. | |||||
| self.__parallel = True | |||||
| self.__n_jobs = multiprocessing.cpu_count() | |||||
| self.__time_limit_in_sec = 0 | |||||
| self.__max_itrs = 20 | |||||
| # values to compute. | |||||
| self.__runtime_generate_preimage = None | |||||
| self.__runtime_total = None | |||||
| self.__preimage = None | |||||
| self.__best_from_dataset = None | |||||
| self.__k_dis_preimage = None | |||||
| self.__k_dis_dataset = None | |||||
| self.__itrs = 0 | |||||
| self.__converged = False # @todo | |||||
| self.__num_updates = 0 | |||||
| # values that can be set or to be computed. | |||||
| self.__gram_matrix_unnorm = None | |||||
| self.__runtime_precompute_gm = None | |||||
| def set_options(self, **kwargs): | |||||
| self._kernel_options = kwargs.get('kernel_options', {}) | |||||
| self._graph_kernel = kwargs.get('graph_kernel', None) | |||||
| self._verbose = kwargs.get('verbose', 2) | |||||
| self.__k = kwargs.get('k', 5) | |||||
| self.__r_max = kwargs.get('r_max', 10) | |||||
| self.__l = kwargs.get('l', 500) | |||||
| self.__alphas = kwargs.get('alphas', None) | |||||
| self.__parallel = kwargs.get('parallel', True) | |||||
| self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) | |||||
| self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) | |||||
| self.__max_itrs = kwargs.get('max_itrs', 20) | |||||
| self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | |||||
| self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | |||||
| def run(self): | |||||
| self._graph_kernel = get_graph_kernel_by_name(self._kernel_options['name'], | |||||
| node_labels=self._dataset.node_labels, | |||||
| edge_labels=self._dataset.edge_labels, | |||||
| node_attrs=self._dataset.node_attrs, | |||||
| edge_attrs=self._dataset.edge_attrs, | |||||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
| kernel_options=self._kernel_options) | |||||
| # record start time. | |||||
| start = time.time() | |||||
| # 1. precompute gram matrix. | |||||
| if self.__gram_matrix_unnorm is None: | |||||
| gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) | |||||
| self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm | |||||
| end_precompute_gm = time.time() | |||||
| self.__runtime_precompute_gm = end_precompute_gm - start | |||||
| else: | |||||
| if self.__runtime_precompute_gm is None: | |||||
| raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') | |||||
| self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm | |||||
| if self._kernel_options['normalize']: | |||||
| self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) | |||||
| else: | |||||
| self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) | |||||
| end_precompute_gm = time.time() | |||||
| start -= self.__runtime_precompute_gm | |||||
| # 2. compute k nearest neighbors of phi in D_N. | |||||
| if self._verbose >= 2: | |||||
| print('\nstart computing k nearest neighbors of phi in D_N...\n') | |||||
| D_N = self._dataset.graphs | |||||
| if self.__alphas is None: | |||||
| self.__alphas = [1 / len(D_N)] * len(D_N) | |||||
| k_dis_list = [] # distance between g_star and each graph. | |||||
| term3 = 0 | |||||
| for i1, a1 in enumerate(self.__alphas): | |||||
| for i2, a2 in enumerate(self.__alphas): | |||||
| term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2] | |||||
| for idx in range(len(D_N)): | |||||
| k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self.__alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True)) | |||||
| # sort. | |||||
| sort_idx = np.argsort(k_dis_list) | |||||
| dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self.__k]] # the k shortest distances. | |||||
| nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | |||||
| g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N | |||||
| self.__best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple. | |||||
| self.__k_dis_dataset = dis_gs[0] | |||||
| if self.__k_dis_dataset == 0: # get the exact pre-image. | |||||
| end_generate_preimage = time.time() | |||||
| self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm | |||||
| self.__runtime_total = end_generate_preimage - start | |||||
| self.__preimage = self.__best_from_dataset.copy() | |||||
| self.__k_dis_preimage = self.__k_dis_dataset | |||||
| if self._verbose: | |||||
| print() | |||||
| print('=============================================================================') | |||||
| print('The exact pre-image is found from the input dataset.') | |||||
| print('-----------------------------------------------------------------------------') | |||||
| print('Distance in kernel space for the best graph from dataset and for preimage:', self.__k_dis_dataset) | |||||
| print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) | |||||
| print('Time to generate pre-images:', self.__runtime_generate_preimage) | |||||
| print('Total time:', self.__runtime_total) | |||||
| print('=============================================================================') | |||||
| print() | |||||
| return | |||||
| dhat = dis_gs[0] # the nearest distance | |||||
| Gk = [D_N[ig].copy() for ig in sort_idx[0:self.__k]] # the k nearest neighbors | |||||
| Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk] | |||||
| # 3. start iterations. | |||||
| if self._verbose >= 2: | |||||
| print('starting iterations...') | |||||
| gihat_list = [] | |||||
| dihat_list = [] | |||||
| r = 0 | |||||
| dis_of_each_itr = [dhat] | |||||
| if self.__parallel: | |||||
| self._kernel_options['parallel'] = None | |||||
| self.__itrs = 0 | |||||
| self.__num_updates = 0 | |||||
| timer = Timer(self.__time_limit_in_sec) | |||||
| while not self.__termination_criterion_met(timer, self.__itrs, r): | |||||
| print('\n- r =', r) | |||||
| found = False | |||||
| dis_bests = dis_gs + dihat_list | |||||
| # compute numbers of edges to be inserted/deleted. | |||||
| # @todo what if the log is negetive? how to choose alpha (scalar)? | |||||
| fdgs_list = np.array(dis_bests) | |||||
| if np.min(fdgs_list) < 1: # in case the log is negetive. | |||||
| fdgs_list /= np.min(fdgs_list) | |||||
| fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))] | |||||
| if np.min(fdgs_list) < 1: # in case the log is smaller than 1. | |||||
| fdgs_list = np.array(fdgs_list) + 1 | |||||
| # expand the number of modifications to increase the possiblity. | |||||
| nb_vpairs_list = [nx.number_of_nodes(g) * (nx.number_of_nodes(g) - 1) for g in (Gs_nearest + gihat_list)] | |||||
| nb_vpairs_min = np.min(nb_vpairs_list) | |||||
| idx_fdgs_max = np.argmax(fdgs_list) | |||||
| fdgs_max_old = fdgs_list[idx_fdgs_max] | |||||
| fdgs_max = fdgs_max_old | |||||
| nb_modif = 1 | |||||
| for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)): | |||||
| nb_modif *= nb / (fdgs_max - idx) | |||||
| while fdgs_max < nb_vpairs_min and nb_modif < self.__l: | |||||
| fdgs_max += 1 | |||||
| nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max | |||||
| nb_increase = int(fdgs_max - fdgs_max_old) | |||||
| if nb_increase > 0: | |||||
| fdgs_list += 1 | |||||
| for ig, gs in enumerate(Gs_nearest + gihat_list): | |||||
| if self._verbose >= 2: | |||||
| print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) | |||||
| gnew, dhat, found = self.__generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3) | |||||
| if found: | |||||
| r = 0 | |||||
| gihat_list = [gnew] | |||||
| dihat_list = [dhat] | |||||
| else: | |||||
| r += 1 | |||||
| dis_of_each_itr.append(dhat) | |||||
| self.__itrs += 1 | |||||
| if self._verbose >= 2: | |||||
| print('Total number of iterations is', self.__itrs, '.') | |||||
| print('The preimage is updated', self.__num_updates, 'times.') | |||||
| print('The shortest distances for previous iterations are', dis_of_each_itr, '.') | |||||
| # get results and print. | |||||
| end_generate_preimage = time.time() | |||||
| self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm | |||||
| self.__runtime_total = end_generate_preimage - start | |||||
| self.__preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) | |||||
| self.__k_dis_preimage = dhat | |||||
| if self._verbose: | |||||
| print() | |||||
| print('=============================================================================') | |||||
| print('Finished generation of preimages.') | |||||
| print('-----------------------------------------------------------------------------') | |||||
| print('Distance in kernel space for the best graph from dataset:', self.__k_dis_dataset) | |||||
| print('Distance in kernel space for the preimage:', self.__k_dis_preimage) | |||||
| print('Total number of iterations for optimizing:', self.__itrs) | |||||
| print('Total number of updating preimage:', self.__num_updates) | |||||
| print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) | |||||
| print('Time to generate pre-images:', self.__runtime_generate_preimage) | |||||
| print('Total time:', self.__runtime_total) | |||||
| print('=============================================================================') | |||||
| print() | |||||
| def __generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3): | |||||
| if self.__parallel: | |||||
| gnew, dhat, found = self.__generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3) | |||||
| else: | |||||
| gnew, dhat, found = self.__generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3) | |||||
| return gnew, dhat, found | |||||
| def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3): | |||||
| gnew = None | |||||
| updated = False | |||||
| for trial in range(0, self.__l): | |||||
| if self._verbose >= 2: | |||||
| print('---', trial + 1, 'trial out of', self.__l) | |||||
| gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial) | |||||
| # get the better graph preimage. | |||||
| if dnew <= dhat: # @todo: the new distance is smaller or also equal? | |||||
| if dhat - dnew > 1e-6: | |||||
| if self._verbose >= 2: | |||||
| print('trial =', str(trial)) | |||||
| print('\nI am smaller!') | |||||
| print('index (as in D_k U {gihat} =', str(ig)) | |||||
| print('distance:', dhat, '->', dnew) | |||||
| updated = True | |||||
| else: | |||||
| if self._verbose >= 2: | |||||
| print('I am equal!') | |||||
| dhat = dnew | |||||
| gnew = gtemp.copy() | |||||
| found = True # found better or equally good graph. | |||||
| if updated: | |||||
| self.__num_updates += 1 | |||||
| return gnew, dhat, found | |||||
| def __generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3): | |||||
| gnew = None | |||||
| len_itr = self.__l | |||||
| gnew_list = [None] * len_itr | |||||
| dnew_list = [None] * len_itr | |||||
| itr = range(0, len_itr) | |||||
| n_jobs = multiprocessing.cpu_count() | |||||
| if len_itr < 100 * n_jobs: | |||||
| chunksize = int(len_itr / n_jobs) + 1 | |||||
| else: | |||||
| chunksize = 100 | |||||
| do_fun = partial(self._generate_graph_parallel, g_init, fdgs, term3) | |||||
| pool = Pool(processes=n_jobs) | |||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(pool.imap_unordered(do_fun, itr, chunksize), | |||||
| desc='Generating l graphs', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(do_fun, itr, chunksize) | |||||
| for idx, gnew, dnew in iterator: | |||||
| gnew_list[idx] = gnew | |||||
| dnew_list[idx] = dnew | |||||
| pool.close() | |||||
| pool.join() | |||||
| # check if get the better graph preimage. | |||||
| idx_min = np.argmin(dnew_list) | |||||
| dnew = dnew_list[idx_min] | |||||
| if dnew <= dhat: # @todo: the new distance is smaller or also equal? | |||||
| if dhat - dnew > 1e-6: # @todo: use a proportion and watch out for 0. | |||||
| if self._verbose >= 2: | |||||
| print('I am smaller!') | |||||
| print('index (as in D_k U {gihat}) =', str(ig)) | |||||
| print('distance:', dhat, '->', dnew, '\n') | |||||
| self.__num_updates += 1 | |||||
| else: | |||||
| if self._verbose >= 2: | |||||
| print('I am equal!') | |||||
| dhat = dnew | |||||
| gnew = gnew_list[idx_min] | |||||
| found = True # found better graph. | |||||
| return gnew, dhat, found | |||||
| def _generate_graph_parallel(self, g_init, fdgs, term3, itr): | |||||
| trial = itr | |||||
| gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial) | |||||
| return trial, gtemp, dnew | |||||
| def __do_trial(self, g_init, fdgs, term3, trial): | |||||
| # add and delete edges. | |||||
| gtemp = g_init.copy() | |||||
| seed = (trial + int(time.time())) % (2 ** 32 - 1) | |||||
| rdm_state = np.random.RandomState(seed=seed) | |||||
| # which edges to change. | |||||
| # @todo: should we use just half of the adjacency matrix for undirected graphs? | |||||
| nb_vpairs = nx.number_of_nodes(g_init) * (nx.number_of_nodes(g_init) - 1) | |||||
| # @todo: what if fdgs is bigger than nb_vpairs? | |||||
| idx_change = rdm_state.randint(0, high=nb_vpairs, size=(fdgs if | |||||
| fdgs < nb_vpairs else nb_vpairs)) | |||||
| # print(idx_change) | |||||
| for item in idx_change: | |||||
| node1 = int(item / (nx.number_of_nodes(g_init) - 1)) | |||||
| node2 = (item - node1 * (nx.number_of_nodes(g_init) - 1)) | |||||
| if node2 >= node1: # skip the self pair. | |||||
| node2 += 1 | |||||
| # @todo: is the randomness correct? | |||||
| if not gtemp.has_edge(node1, node2): | |||||
| gtemp.add_edge(node1, node2) | |||||
| else: | |||||
| gtemp.remove_edge(node1, node2) | |||||
| # compute new distances. | |||||
| kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) | |||||
| kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) | |||||
| if self._kernel_options['normalize']: | |||||
| kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize | |||||
| kernel_gtmp = 1 | |||||
| # @todo: not correct kernel value | |||||
| gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | |||||
| gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) | |||||
| dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) | |||||
| return gtemp, dnew | |||||
| def get_results(self): | |||||
| results = {} | |||||
| results['runtime_precompute_gm'] = self.__runtime_precompute_gm | |||||
| results['runtime_generate_preimage'] = self.__runtime_generate_preimage | |||||
| results['runtime_total'] = self.__runtime_total | |||||
| results['k_dis_dataset'] = self.__k_dis_dataset | |||||
| results['k_dis_preimage'] = self.__k_dis_preimage | |||||
| results['itrs'] = self.__itrs | |||||
| results['num_updates'] = self.__num_updates | |||||
| return results | |||||
| def __termination_criterion_met(self, timer, itr, r): | |||||
| if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): | |||||
| # if self.__state == AlgorithmState.TERMINATED: | |||||
| # self.__state = AlgorithmState.INITIALIZED | |||||
| return True | |||||
| return (r >= self.__r_max if self.__r_max >= 0 else False) | |||||
| # return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) | |||||
| @property | |||||
| def preimage(self): | |||||
| return self.__preimage | |||||
| @property | |||||
| def best_from_dataset(self): | |||||
| return self.__best_from_dataset | |||||
| @property | |||||
| def gram_matrix_unnorm(self): | |||||
| return self.__gram_matrix_unnorm | |||||
| @gram_matrix_unnorm.setter | |||||
| def gram_matrix_unnorm(self, value): | |||||
| self.__gram_matrix_unnorm = value | |||||
| @@ -256,7 +256,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
| if (load_gm == 'auto' and not gmfile_exist) or not load_gm: | if (load_gm == 'auto' and not gmfile_exist) or not load_gm: | ||||
| gram_matrix_unnorm_list.append(mpg.gram_matrix_unnorm) | gram_matrix_unnorm_list.append(mpg.gram_matrix_unnorm) | ||||
| # write result summary for each letter. | |||||
| # write result summary for each class. | |||||
| if save_results: | if save_results: | ||||
| sod_sm_mean = np.mean(sod_sm_list) | sod_sm_mean = np.mean(sod_sm_list) | ||||
| sod_gm_mean = np.mean(sod_gm_list) | sod_gm_mean = np.mean(sod_gm_list) | ||||
| @@ -387,15 +387,15 @@ def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||||
| return np.sqrt(term1 - term2 + term3) | return np.sqrt(term1 - term2 + term3) | ||||
| def compute_k_dis(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||||
| def compute_k_dis(idx_g, idx_gi, alphas, Kmatrix, term3=0, withterm3=True): | |||||
| term1 = Kmatrix[idx_g, idx_g] | term1 = Kmatrix[idx_g, idx_g] | ||||
| term2 = 0 | term2 = 0 | ||||
| for i, a in enumerate(alpha): | |||||
| for i, a in enumerate(alphas): | |||||
| term2 += a * Kmatrix[idx_g, idx_gi[i]] | term2 += a * Kmatrix[idx_g, idx_gi[i]] | ||||
| term2 *= 2 | term2 *= 2 | ||||
| if withterm3 == False: | if withterm3 == False: | ||||
| for i1, a1 in enumerate(alpha): | |||||
| for i2, a2 in enumerate(alpha): | |||||
| for i1, a1 in enumerate(alphas): | |||||
| for i2, a2 in enumerate(alphas): | |||||
| term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | ||||
| return np.sqrt(term1 - term2 + term3) | return np.sqrt(term1 - term2 + term3) | ||||
| @@ -300,7 +300,13 @@ def get_edge_labels(Gn, edge_label): | |||||
| def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): | def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): | ||||
| if name == 'ShortestPath': | |||||
| if name == 'Marginalized': | |||||
| from gklearn.kernels import Marginalized | |||||
| graph_kernel = Marginalized(node_labels=node_labels, | |||||
| edge_labels=edge_labels, | |||||
| ds_infos=ds_infos, | |||||
| **kernel_options) | |||||
| elif name == 'ShortestPath': | |||||
| from gklearn.kernels import ShortestPath | from gklearn.kernels import ShortestPath | ||||
| graph_kernel = ShortestPath(node_labels=node_labels, | graph_kernel = ShortestPath(node_labels=node_labels, | ||||
| node_attrs=node_attrs, | node_attrs=node_attrs, | ||||