| @@ -0,0 +1,299 @@ | |||
| """Tests of graph kernels. | |||
| """ | |||
| import pytest | |||
| import multiprocessing | |||
| def chooseDataset(ds_name): | |||
| """Choose dataset according to name. | |||
| """ | |||
| from gklearn.utils import Dataset | |||
| dataset = Dataset() | |||
| # no node labels (and no edge labels). | |||
| if ds_name == 'Alkane': | |||
| dataset.load_predefined_dataset(ds_name) | |||
| dataset.trim_dataset(edge_required=False) | |||
| irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} | |||
| dataset.remove_labels(**irrelevant_labels) | |||
| # node symbolic labels. | |||
| elif ds_name == 'Acyclic': | |||
| dataset.load_predefined_dataset(ds_name) | |||
| dataset.trim_dataset(edge_required=False) | |||
| irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} | |||
| dataset.remove_labels(**irrelevant_labels) | |||
| # node non-symbolic labels. | |||
| elif ds_name == 'Letter-med': | |||
| dataset.load_predefined_dataset(ds_name) | |||
| dataset.trim_dataset(edge_required=False) | |||
| # node symbolic and non-symbolic labels (and edge symbolic labels). | |||
| elif ds_name == 'AIDS': | |||
| dataset.load_predefined_dataset(ds_name) | |||
| dataset.trim_dataset(edge_required=False) | |||
| # edge non-symbolic labels (no node labels). | |||
| elif ds_name == 'Fingerprint_edge': | |||
| dataset.load_predefined_dataset('Fingerprint') | |||
| dataset.trim_dataset(edge_required=True) | |||
| irrelevant_labels = {'edge_attrs': ['orient', 'angle']} | |||
| dataset.remove_labels(**irrelevant_labels) | |||
| # edge non-symbolic labels (and node non-symbolic labels). | |||
| elif ds_name == 'Fingerprint': | |||
| dataset.load_predefined_dataset(ds_name) | |||
| dataset.trim_dataset(edge_required=True) | |||
| # edge symbolic and non-symbolic labels (and node symbolic and non-symbolic labels). | |||
| elif ds_name == 'Cuneiform': | |||
| dataset.load_predefined_dataset(ds_name) | |||
| dataset.trim_dataset(edge_required=True) | |||
| dataset.cut_graphs(range(0, 3)) | |||
| return dataset | |||
| @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||
| @pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')]) | |||
| @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
| def test_CommonWalk(ds_name, parallel, weight, compute_method): | |||
| """Test common walk kernel. | |||
| """ | |||
| from gklearn.kernels import CommonWalk | |||
| import networkx as nx | |||
| dataset = chooseDataset(ds_name) | |||
| dataset.load_graphs([g for g in dataset.graphs if nx.number_of_nodes(g) > 1]) | |||
| try: | |||
| graph_kernel = CommonWalk(node_labels=dataset.node_labels, | |||
| edge_labels=dataset.edge_labels, | |||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
| weight=weight, | |||
| compute_method=compute_method) | |||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| except Exception as exception: | |||
| assert False, exception | |||
| @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||
| @pytest.mark.parametrize('remove_totters', [False]) #[True, False]) | |||
| @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
| def test_Marginalized(ds_name, parallel, remove_totters): | |||
| """Test marginalized kernel. | |||
| """ | |||
| from gklearn.kernels import Marginalized | |||
| dataset = chooseDataset(ds_name) | |||
| try: | |||
| graph_kernel = Marginalized(node_labels=dataset.node_labels, | |||
| edge_labels=dataset.edge_labels, | |||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
| p_quit=0.5, | |||
| n_iteration=2, | |||
| remove_totters=remove_totters) | |||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| except Exception as exception: | |||
| assert False, exception | |||
| # @pytest.mark.parametrize( | |||
| # 'compute_method,ds_name,sub_kernel', | |||
| # [ | |||
| # # ('sylvester', 'Alkane', None), | |||
| # # ('conjugate', 'Alkane', None), | |||
| # # ('conjugate', 'AIDS', None), | |||
| # # ('fp', 'Alkane', None), | |||
| # # ('fp', 'AIDS', None), | |||
| # ('spectral', 'Alkane', 'exp'), | |||
| # ('spectral', 'Alkane', 'geo'), | |||
| # ] | |||
| # ) | |||
| # #@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
| # def test_randomwalkkernel(ds_name, compute_method, sub_kernel): | |||
| # """Test random walk kernel kernel. | |||
| # """ | |||
| # from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||
| # from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
| # import functools | |||
| # Gn, y = chooseDataset(ds_name) | |||
| # mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| # sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}] | |||
| # try: | |||
| # Kmatrix, run_time, idx = randomwalkkernel(Gn, | |||
| # compute_method=compute_method, | |||
| # weight=1e-3, | |||
| # p=None, | |||
| # q=None, | |||
| # edge_weight=None, | |||
| # node_kernels=sub_kernels, | |||
| # edge_kernels=sub_kernels, | |||
| # node_label='atom', | |||
| # edge_label='bond_type', | |||
| # sub_kernel=sub_kernel, | |||
| # # parallel=parallel, | |||
| # n_jobs=multiprocessing.cpu_count(), | |||
| # verbose=True) | |||
| # except Exception as exception: | |||
| # assert False, exception | |||
| @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) | |||
| @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
| def test_ShortestPath(ds_name, parallel): | |||
| """Test shortest path kernel. | |||
| """ | |||
| from gklearn.kernels import ShortestPath | |||
| from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
| import functools | |||
| dataset = chooseDataset(ds_name) | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
| try: | |||
| graph_kernel = ShortestPath(node_labels=dataset.node_labels, | |||
| node_attrs=dataset.node_attrs, | |||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
| node_kernels=sub_kernels) | |||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| except Exception as exception: | |||
| assert False, exception | |||
| #@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) | |||
| @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint', 'Fingerprint_edge', 'Cuneiform']) | |||
| @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
| def test_StructuralSP(ds_name, parallel): | |||
| """Test structural shortest path kernel. | |||
| """ | |||
| from gklearn.kernels import StructuralSP | |||
| from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
| import functools | |||
| dataset = chooseDataset(ds_name) | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
| try: | |||
| graph_kernel = StructuralSP(node_labels=dataset.node_labels, | |||
| edge_labels=dataset.edge_labels, | |||
| node_attrs=dataset.node_attrs, | |||
| edge_attrs=dataset.edge_attrs, | |||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
| node_kernels=sub_kernels, | |||
| edge_kernels=sub_kernels) | |||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| except Exception as exception: | |||
| assert False, exception | |||
| @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||
| @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
| #@pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto', None]) | |||
| @pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto']) | |||
| @pytest.mark.parametrize('compute_method', ['trie', 'naive']) | |||
| def test_PathUpToH(ds_name, parallel, k_func, compute_method): | |||
| """Test path kernel up to length $h$. | |||
| """ | |||
| from gklearn.kernels import PathUpToH | |||
| dataset = chooseDataset(ds_name) | |||
| try: | |||
| graph_kernel = PathUpToH(node_labels=dataset.node_labels, | |||
| edge_labels=dataset.edge_labels, | |||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
| depth=2, k_func=k_func, compute_method=compute_method) | |||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| except Exception as exception: | |||
| assert False, exception | |||
| @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||
| @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
| def test_Treelet(ds_name, parallel): | |||
| """Test treelet kernel. | |||
| """ | |||
| from gklearn.kernels import Treelet | |||
| from gklearn.utils.kernels import polynomialkernel | |||
| import functools | |||
| dataset = chooseDataset(ds_name) | |||
| pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | |||
| try: | |||
| graph_kernel = Treelet(node_labels=dataset.node_labels, | |||
| edge_labels=dataset.edge_labels, | |||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
| sub_kernel=pkernel) | |||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| except Exception as exception: | |||
| assert False, exception | |||
| @pytest.mark.parametrize('ds_name', ['Acyclic']) | |||
| #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge']) | |||
| # @pytest.mark.parametrize('base_kernel', ['subtree']) | |||
| @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
| def test_WLSubtree(ds_name, parallel): | |||
| """Test Weisfeiler-Lehman subtree kernel. | |||
| """ | |||
| from gklearn.kernels import WLSubtree | |||
| dataset = chooseDataset(ds_name) | |||
| try: | |||
| graph_kernel = WLSubtree(node_labels=dataset.node_labels, | |||
| edge_labels=dataset.edge_labels, | |||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
| height=2) | |||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| except Exception as exception: | |||
| assert False, exception | |||
| if __name__ == "__main__": | |||
| # test_spkernel('Alkane', 'imap_unordered') | |||
| test_StructuralSP('Fingerprint_edge', 'imap_unordered') | |||