| @@ -0,0 +1,73 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| """compute_distance_in_kernel_space.ipynb | |||||
| Automatically generated by Colaboratory. | |||||
| Original file is located at | |||||
| https://colab.research.google.com/drive/17tZP6IrineQmzo9sRtfZOnHpHx6HnlMA | |||||
| **This script demonstrates how to compute distance in kernel space between the image of a graph and the mean of images of a group of graphs.** | |||||
| --- | |||||
| **0. Install `graphkit-learn`.** | |||||
| """ | |||||
| """**1. Get dataset.**""" | |||||
| from gklearn.utils import Dataset | |||||
| # Predefined dataset name, use dataset "MUTAG". | |||||
| ds_name = 'MUTAG' | |||||
| # Initialize a Dataset. | |||||
| dataset = Dataset() | |||||
| # Load predefined dataset "MUTAG". | |||||
| dataset.load_predefined_dataset(ds_name) | |||||
| len(dataset.graphs) | |||||
| """**2. Compute graph kernel.**""" | |||||
| from gklearn.kernels import PathUpToH | |||||
| import multiprocessing | |||||
| # Initailize parameters for graph kernel computation. | |||||
| kernel_options = {'depth': 3, | |||||
| 'k_func': 'MinMax', | |||||
| 'compute_method': 'trie' | |||||
| } | |||||
| # Initialize graph kernel. | |||||
| graph_kernel = PathUpToH(node_labels=dataset.node_labels, # list of node label names. | |||||
| edge_labels=dataset.edge_labels, # list of edge label names. | |||||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), # dataset information required for computation. | |||||
| **kernel_options, # options for computation. | |||||
| ) | |||||
| # Compute Gram matrix. | |||||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||||
| parallel='imap_unordered', # or None. | |||||
| n_jobs=multiprocessing.cpu_count(), # number of parallel jobs. | |||||
| normalize=True, # whether to return normalized Gram matrix. | |||||
| verbose=2 # whether to print out results. | |||||
| ) | |||||
| """**3. Compute distance in kernel space.** | |||||
| Given a dataset $\mathcal{G}_N$, compute the distance in kernel space between the image of $G_1 \in \mathcal{G}_N$ and the mean of images of $\mathcal{G}_k \subset \mathcal{G}_N$. | |||||
| """ | |||||
| from gklearn.preimage.utils import compute_k_dis | |||||
| # Index of $G_1$. | |||||
| idx_1 = 10 | |||||
| # Indices of graphs in $\mathcal{G}_k$. | |||||
| idx_graphs = range(0, 10) | |||||
| # Compute the distance in kernel space. | |||||
| dis_k = compute_k_dis(idx_1, | |||||
| idx_graphs, | |||||
| [1 / len(idx_graphs)] * len(idx_graphs), # weights for images of graphs in $\mathcal{G}_k$; all equal when computing the mean. | |||||
| gram_matrix, # gram matrix of al graphs. | |||||
| withterm3=False | |||||
| ) | |||||
| print(dis_k) | |||||