| @@ -107,7 +107,7 @@ def xp_synthesied_graphs_dataset_size(): | |||||
| # Run and save. | # Run and save. | ||||
| import pickle | import pickle | ||||
| import os | import os | ||||
| save_dir = 'outputs/' | |||||
| save_dir = 'outputs/synthesized_graphs_N/' | |||||
| if not os.path.exists(save_dir): | if not os.path.exists(save_dir): | ||||
| os.makedirs(save_dir) | os.makedirs(save_dir) | ||||
| @@ -0,0 +1,54 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Mon Sep 21 10:34:26 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| from utils import Graph_Kernel_List, compute_graph_kernel | |||||
| def generate_graphs(num_nodes): | |||||
| from gklearn.utils.graph_synthesizer import GraphSynthesizer | |||||
| gsyzer = GraphSynthesizer() | |||||
| graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=num_nodes, num_edges=int(num_nodes*2), num_node_labels=0, num_edge_labels=0, seed=None, directed=False) | |||||
| return graphs | |||||
| def xp_synthesied_graphs_num_nodes(): | |||||
| # Run and save. | |||||
| import pickle | |||||
| import os | |||||
| save_dir = 'outputs/synthesized_graphs_num_nodes/' | |||||
| if not os.path.exists(save_dir): | |||||
| os.makedirs(save_dir) | |||||
| run_times = {} | |||||
| for kernel_name in Graph_Kernel_List: | |||||
| print() | |||||
| print('Kernel:', kernel_name) | |||||
| run_times[kernel_name] = [] | |||||
| for num_nodes in [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]: | |||||
| print() | |||||
| print('Number of nodes:', num_nodes) | |||||
| # Generate graphs. | |||||
| graphs = generate_graphs(num_nodes) | |||||
| # Compute Gram matrix. | |||||
| gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name) | |||||
| run_times[kernel_name].append(run_time) | |||||
| pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_nodes) + '.pkl', 'wb')) | |||||
| # Save all. | |||||
| pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb')) | |||||
| return | |||||
| if __name__ == '__main__': | |||||
| xp_synthesied_graphs_num_nodes() | |||||
| @@ -0,0 +1,93 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Tue Sep 22 11:33:28 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| Graph_Kernel_List = ['PathUpToH', 'WLSubtree', 'SylvesterEquation', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'StructuralSP', 'CommonWalk'] | |||||
| # Graph_Kernel_List = ['CommonWalk', 'Marginalized', 'SylvesterEquation', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'ShortestPath', 'StructuralSP', 'PathUpToH', 'Treelet', 'WLSubtree'] | |||||
| def compute_graph_kernel(graphs, kernel_name): | |||||
| import multiprocessing | |||||
| if kernel_name == 'CommonWalk': | |||||
| from gklearn.kernels.commonWalkKernel import commonwalkkernel | |||||
| estimator = commonwalkkernel | |||||
| params = {'compute_method': 'geo', 'weight': 0.1} | |||||
| elif kernel_name == 'Marginalized': | |||||
| from gklearn.kernels.marginalizedKernel import marginalizedkernel | |||||
| estimator = marginalizedkernel | |||||
| params = {'p_quit': 0.5, 'n_iteration': 5, 'remove_totters': False} | |||||
| elif kernel_name == 'SylvesterEquation': | |||||
| from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||||
| estimator = randomwalkkernel | |||||
| params = {'compute_method': 'sylvester', 'weight': 0.1} | |||||
| elif kernel_name == 'ConjugateGradient': | |||||
| from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||||
| estimator = randomwalkkernel | |||||
| from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
| import functools | |||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
| sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||||
| params = {'compute_method': 'conjugate', 'weight': 0.1, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} | |||||
| elif kernel_name == 'FixedPoint': | |||||
| from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||||
| estimator = randomwalkkernel | |||||
| from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
| import functools | |||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
| sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||||
| params = {'compute_method': 'fp', 'weight': 1e-3, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} | |||||
| elif kernel_name == 'SpectralDecomposition': | |||||
| from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||||
| estimator = randomwalkkernel | |||||
| params = {'compute_method': 'spectral', 'sub_kernel': 'geo', 'weight': 0.1} | |||||
| elif kernel_name == 'ShortestPath': | |||||
| from gklearn.kernels.spKernel import spkernel | |||||
| estimator = spkernel | |||||
| from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
| import functools | |||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
| sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||||
| params = {'node_kernels': sub_kernel} | |||||
| elif kernel_name == 'StructuralSP': | |||||
| from gklearn.kernels.structuralspKernel import structuralspkernel | |||||
| estimator = structuralspkernel | |||||
| from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
| import functools | |||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
| sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||||
| params = {'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} | |||||
| elif kernel_name == 'PathUpToH': | |||||
| from gklearn.kernels.untilHPathKernel import untilhpathkernel | |||||
| estimator = untilhpathkernel | |||||
| params = {'depth': 5, 'k_func': 'MinMax', 'compute_method': 'trie'} | |||||
| elif kernel_name == 'Treelet': | |||||
| from gklearn.kernels.treeletKernel import treeletkernel | |||||
| estimator = treeletkernel | |||||
| from gklearn.utils.kernels import polynomialkernel | |||||
| import functools | |||||
| sub_kernel = functools.partial(polynomialkernel, d=4, c=1e+8) | |||||
| params = {'sub_kernel': sub_kernel} | |||||
| elif kernel_name == 'WLSubtree': | |||||
| from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel | |||||
| estimator = weisfeilerlehmankernel | |||||
| params = {'base_kernel': 'subtree', 'height': 5} | |||||
| params['n_jobs'] = multiprocessing.cpu_count() | |||||
| params['verbose'] = True | |||||
| results = estimator(graphs, **params) | |||||
| return results[0], results[1] | |||||
| @@ -17,33 +17,37 @@ class GraphSynthesizer(object): | |||||
| pass | pass | ||||
| def unified_graphs(self, num_graphs=1000, num_nodes=100, num_edges=196, num_node_labels=0, num_edge_labels=0, seed=None, directed=False): | |||||
| def random_graph(self, num_nodes, num_edges, num_node_labels=0, num_edge_labels=0, seed=None, directed=False, max_num_edges=None, all_edges=None): | |||||
| g = nx.Graph() | |||||
| if num_node_labels > 0: | |||||
| for i in range(0, num_nodes): | |||||
| node_labels = np.random.randint(0, high=num_node_labels, size=num_nodes) | |||||
| g.add_node(str(i), node_label=node_labels[i]) | |||||
| else: | |||||
| for i in range(0, num_nodes): | |||||
| g.add_node(str(i)) | |||||
| if num_edge_labels > 0: | |||||
| edge_labels = np.random.randint(0, high=num_edge_labels, size=num_edges) | |||||
| for i in random.sample(range(0, max_num_edges), num_edges): | |||||
| node1, node2 = all_edges[i] | |||||
| g.add_edge(node1, node2, edge_label=edge_labels[i]) | |||||
| else: | |||||
| for i in random.sample(range(0, max_num_edges), num_edges): | |||||
| node1, node2 = all_edges[i] | |||||
| g.add_edge(node1, node2) | |||||
| return g | |||||
| def unified_graphs(self, num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False): | |||||
| max_num_edges = int((num_nodes - 1) * num_nodes / 2) | max_num_edges = int((num_nodes - 1) * num_nodes / 2) | ||||
| if num_edges > max_num_edges: | if num_edges > max_num_edges: | ||||
| raise Exception('Too many edges.') | raise Exception('Too many edges.') | ||||
| all_edges = [(i, j) for i in range(0, num_nodes) for j in range(i + 1, num_nodes)] # @todo: optimize. No directed graphs. | all_edges = [(i, j) for i in range(0, num_nodes) for j in range(i + 1, num_nodes)] # @todo: optimize. No directed graphs. | ||||
| graphs = [] | graphs = [] | ||||
| for idx in range(0, num_graphs): | |||||
| g = nx.Graph() | |||||
| if num_node_labels > 0: | |||||
| for i in range(0, num_nodes): | |||||
| node_labels = np.random.randint(0, high=num_node_labels, size=num_nodes) | |||||
| g.add_node(str(i), node_label=node_labels[i]) | |||||
| else: | |||||
| for i in range(0, num_nodes): | |||||
| g.add_node(str(i)) | |||||
| if num_edge_labels > 0: | |||||
| edge_labels = np.random.randint(0, high=num_edge_labels, size=num_edges) | |||||
| for i in random.sample(range(0, max_num_edges), num_edges): | |||||
| node1, node2 = all_edges[i] | |||||
| g.add_edge(node1, node2, edge_label=edge_labels[i]) | |||||
| else: | |||||
| for i in random.sample(range(0, max_num_edges), num_edges): | |||||
| node1, node2 = all_edges[i] | |||||
| g.add_edge(node1, node2) | |||||
| graphs.append(g) | |||||
| for idx in range(0, num_graphs): | |||||
| graphs.append(self.random_graph(num_nodes, num_edges, num_node_labels=num_node_labels, num_edge_labels=num_edge_labels, seed=seed, directed=directed, max_num_edges=max_num_edges, all_edges=all_edges)) | |||||
| return graphs | return graphs | ||||