| @@ -0,0 +1,28 @@ | |||||
| environment: | |||||
| matrix: | |||||
| - PYTHON: "C:\\Python35" | |||||
| - PYTHON: "C:\\Python35-x64" | |||||
| - PYTHON: "C:\\Python36" | |||||
| - PYTHON: "C:\\Python36-x64" | |||||
| - PYTHON: "C:\\Python37" | |||||
| - PYTHON: "C:\\Python37-x64" | |||||
| - PYTHON: "C:\\Python38" | |||||
| - PYTHON: "C:\\Python38-x64" | |||||
| # skip_commits: | |||||
| # files: | |||||
| # - "*.yml" | |||||
| # - "*.rst" | |||||
| # - "LICENSE" | |||||
| install: | |||||
| - "%PYTHON%\\python.exe -m pip install -U pip" | |||||
| - "%PYTHON%\\python.exe -m pip install -U pytest" | |||||
| - "%PYTHON%\\python.exe -m pip install -r requirements.txt" | |||||
| - "%PYTHON%\\python.exe -m pip install wheel" | |||||
| build: off | |||||
| test_script: | |||||
| - "%PYTHON%\\python.exe setup.py bdist_wheel" | |||||
| - "%PYTHON%\\python.exe -m pytest -v gklearn/tests/" | |||||
| @@ -1,5 +1,6 @@ | |||||
| # graphkit-learn | # graphkit-learn | ||||
| [](https://travis-ci.org/jajupmochi/graphkit-learn) | [](https://travis-ci.org/jajupmochi/graphkit-learn) | ||||
| [](https://ci.appveyor.com/project/jajupmochi/graphkit-learn) | |||||
| [](https://codecov.io/gh/jajupmochi/graphkit-learn) | [](https://codecov.io/gh/jajupmochi/graphkit-learn) | ||||
| [](https://graphkit-learn.readthedocs.io/en/master/?badge=master) | [](https://graphkit-learn.readthedocs.io/en/master/?badge=master) | ||||
| [](https://badge.fury.io/py/graphkit-learn) | [](https://badge.fury.io/py/graphkit-learn) | ||||
| @@ -1 +1,2 @@ | |||||
| from gklearn.ged.env.common_types import AlgorithmState | |||||
| from gklearn.ged.env.common_types import AlgorithmState | |||||
| from gklearn.ged.env.node_map import NodeMap | |||||
| @@ -0,0 +1,80 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Wed Apr 22 11:31:26 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| import numpy as np | |||||
| class NodeMap(object): | |||||
| def __init__(self, num_nodes_g, num_nodes_h): | |||||
| self.__forward_map = [np.inf] * num_nodes_g | |||||
| self.__backward_map = [np.inf] * num_nodes_h | |||||
| self.__induced_cost = np.inf | |||||
| def num_source_nodes(self): | |||||
| return len(self.__forward_map) | |||||
| def num_target_nodes(self): | |||||
| return len(self.__backward_map) | |||||
| def image(self, node): | |||||
| if node < len(self.__forward_map): | |||||
| return self.__forward_map[node] | |||||
| else: | |||||
| raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | |||||
| return np.inf | |||||
| def pre_image(self, node): | |||||
| if node < len(self.__backward_map): | |||||
| return self.__backward_map[node] | |||||
| else: | |||||
| raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') | |||||
| return np.inf | |||||
| def get_forward_map(self): | |||||
| return self.__forward_map | |||||
| def get_backward_map(self): | |||||
| return self.__backward_map | |||||
| def as_relation(self, relation): | |||||
| relation.clear() | |||||
| for i in range(0, len(self.__forward_map)): | |||||
| k = self.__forward_map[i] | |||||
| if k != np.inf: | |||||
| relation.append(tuple((i, k))) | |||||
| for k in range(0, len(self.__backward_map)): | |||||
| i = self.__backward_map[k] | |||||
| if i == np.inf: | |||||
| relation.append(tuple((i, k))) | |||||
| def add_assignment(self, i, k): | |||||
| if i != np.inf: | |||||
| if i < len(self.__forward_map): | |||||
| self.__forward_map[i] = k | |||||
| else: | |||||
| raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') | |||||
| if k != np.inf: | |||||
| if k < len(self.__backward_map): | |||||
| self.__backward_map[k] = i | |||||
| else: | |||||
| raise Exception('The node with ID ', str(k), ' is not contained in the target nodes of the node map.') | |||||
| def set_induced_cost(self, induced_cost): | |||||
| self.__induced_cost = induced_cost | |||||
| def induced_cost(self): | |||||
| return self.__induced_cost | |||||
| @@ -7,11 +7,10 @@ Created on Mon Mar 16 17:26:40 2020 | |||||
| """ | """ | ||||
| def test_median_graph_estimator(): | def test_median_graph_estimator(): | ||||
| from gklearn.utils.graphfiles import loadDataset | |||||
| from gklearn.utils import load_dataset | |||||
| from gklearn.ged.median import MedianGraphEstimator, constant_node_costs | from gklearn.ged.median import MedianGraphEstimator, constant_node_costs | ||||
| from gklearn.gedlib import librariesImport, gedlibpy | from gklearn.gedlib import librariesImport, gedlibpy | ||||
| from gklearn.preimage.utils import get_same_item_indices | from gklearn.preimage.utils import get_same_item_indices | ||||
| from gklearn.preimage.ged import convertGraph | |||||
| import multiprocessing | import multiprocessing | ||||
| # estimator parameters. | # estimator parameters. | ||||
| @@ -22,17 +21,17 @@ def test_median_graph_estimator(): | |||||
| # algorithm parameters. | # algorithm parameters. | ||||
| algo = 'IPFP' | algo = 'IPFP' | ||||
| initial_solutions = 40 | |||||
| algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1' | |||||
| initial_solutions = 1 | |||||
| algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE ' | |||||
| edit_cost_name = 'LETTER2' | edit_cost_name = 'LETTER2' | ||||
| edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001] | edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001] | ||||
| ds_name = 'COIL-DEL' | |||||
| ds_name = 'Letter_high' | |||||
| # Load dataset. | # Load dataset. | ||||
| # dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt' | # dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt' | ||||
| dataset = '../../../datasets/Letter-high/Letter-high_A.txt' | dataset = '../../../datasets/Letter-high/Letter-high_A.txt' | ||||
| Gn, y_all = loadDataset(dataset) | |||||
| Gn, y_all, label_names = load_dataset(dataset) | |||||
| y_idx = get_same_item_indices(y_all) | y_idx = get_same_item_indices(y_all) | ||||
| for i, (y, values) in enumerate(y_idx.items()): | for i, (y, values) in enumerate(y_idx.items()): | ||||
| Gn_i = [Gn[val] for val in values] | Gn_i = [Gn[val] for val in values] | ||||
| @@ -43,7 +42,7 @@ def test_median_graph_estimator(): | |||||
| # gedlibpy.restart_env() | # gedlibpy.restart_env() | ||||
| ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) | ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) | ||||
| for G in Gn_i: | for G in Gn_i: | ||||
| ged_env.add_nx_graph(convertGraph(G, edit_cost_name), '') | |||||
| ged_env.add_nx_graph(G, '') | |||||
| graph_ids = ged_env.get_all_graph_ids() | graph_ids = ged_env.get_all_graph_ids() | ||||
| set_median_id = ged_env.add_graph('set_median') | set_median_id = ged_env.add_graph('set_median') | ||||
| gen_median_id = ged_env.add_graph('gen_median') | gen_median_id = ged_env.add_graph('gen_median') | ||||
| @@ -54,11 +53,89 @@ def test_median_graph_estimator(): | |||||
| mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | ||||
| mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | ||||
| mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --refine FALSE'# @todo: std::to_string(rng()) | |||||
| mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE --randomness PSEUDO '# @todo: std::to_string(rng()) | |||||
| # Select the GED algorithm. | # Select the GED algorithm. | ||||
| algo_options = '--threads ' + str(threads) + algo_options_suffix | algo_options = '--threads ' + str(threads) + algo_options_suffix | ||||
| mge.set_options(mge_options) | mge.set_options(mge_options) | ||||
| mge.set_label_names(node_labels=label_names['node_labels'], | |||||
| edge_labels=label_names['edge_labels'], | |||||
| node_attrs=label_names['node_attrs'], | |||||
| edge_attrs=label_names['edge_attrs']) | |||||
| mge.set_init_method(algo, algo_options) | |||||
| mge.set_descent_method(algo, algo_options) | |||||
| # Run the estimator. | |||||
| mge.run(graph_ids, set_median_id, gen_median_id) | |||||
| # Get SODs. | |||||
| sod_sm = mge.get_sum_of_distances('initialized') | |||||
| sod_gm = mge.get_sum_of_distances('converged') | |||||
| print('sod_sm, sod_gm: ', sod_sm, sod_gm) | |||||
| # Get median graphs. | |||||
| set_median = ged_env.get_nx_graph(set_median_id) | |||||
| gen_median = ged_env.get_nx_graph(gen_median_id) | |||||
| return set_median, gen_median | |||||
| def test_median_graph_estimator_symb(): | |||||
| from gklearn.utils import load_dataset | |||||
| from gklearn.ged.median import MedianGraphEstimator, constant_node_costs | |||||
| from gklearn.gedlib import librariesImport, gedlibpy | |||||
| from gklearn.preimage.utils import get_same_item_indices | |||||
| import multiprocessing | |||||
| # estimator parameters. | |||||
| init_type = 'MEDOID' | |||||
| num_inits = 1 | |||||
| threads = multiprocessing.cpu_count() | |||||
| time_limit = 60000 | |||||
| # algorithm parameters. | |||||
| algo = 'IPFP' | |||||
| initial_solutions = 1 | |||||
| algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE ' | |||||
| edit_cost_name = 'CONSTANT' | |||||
| edit_cost_constants = [4, 4, 2, 1, 1, 1] | |||||
| ds_name = 'MUTAG' | |||||
| # Load dataset. | |||||
| dataset = '../../../datasets/MUTAG/MUTAG_A.txt' | |||||
| Gn, y_all, label_names = load_dataset(dataset) | |||||
| y_idx = get_same_item_indices(y_all) | |||||
| for i, (y, values) in enumerate(y_idx.items()): | |||||
| Gn_i = [Gn[val] for val in values] | |||||
| break | |||||
| Gn_i = Gn_i[0:10] | |||||
| # Set up the environment. | |||||
| ged_env = gedlibpy.GEDEnv() | |||||
| # gedlibpy.restart_env() | |||||
| ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) | |||||
| for G in Gn_i: | |||||
| ged_env.add_nx_graph(G, '') | |||||
| graph_ids = ged_env.get_all_graph_ids() | |||||
| set_median_id = ged_env.add_graph('set_median') | |||||
| gen_median_id = ged_env.add_graph('gen_median') | |||||
| ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES') | |||||
| # Set up the estimator. | |||||
| mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name)) | |||||
| mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | |||||
| mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | |||||
| mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE'# @todo: std::to_string(rng()) | |||||
| # Select the GED algorithm. | |||||
| algo_options = '--threads ' + str(threads) + algo_options_suffix | |||||
| mge.set_options(mge_options) | |||||
| mge.set_label_names(node_labels=label_names['node_labels'], | |||||
| edge_labels=label_names['edge_labels'], | |||||
| node_attrs=label_names['node_attrs'], | |||||
| edge_attrs=label_names['edge_attrs']) | |||||
| mge.set_init_method(algo, algo_options) | mge.set_init_method(algo, algo_options) | ||||
| mge.set_descent_method(algo, algo_options) | mge.set_descent_method(algo, algo_options) | ||||
| @@ -78,4 +155,5 @@ def test_median_graph_estimator(): | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| set_median, gen_median = test_median_graph_estimator() | |||||
| set_median, gen_median = test_median_graph_estimator() | |||||
| # set_median, gen_median = test_median_graph_estimator_symb() | |||||
| @@ -30,6 +30,8 @@ def mge_options_to_string(options): | |||||
| opt_str += '--randomness ' + str(val) + ' ' | opt_str += '--randomness ' + str(val) + ' ' | ||||
| elif key == 'verbose': | elif key == 'verbose': | ||||
| opt_str += '--stdout ' + str(val) + ' ' | opt_str += '--stdout ' + str(val) + ' ' | ||||
| elif key == 'update_order': | |||||
| opt_str += '--update-order ' + ('TRUE' if val else 'FALSE') + ' ' | |||||
| elif key == 'refine': | elif key == 'refine': | ||||
| opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' ' | opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' ' | ||||
| elif key == 'time_limit': | elif key == 'time_limit': | ||||
| @@ -35,8 +35,8 @@ from libcpp.pair cimport pair | |||||
| from libcpp.list cimport list | from libcpp.list cimport list | ||||
| #Long unsigned int equivalent | #Long unsigned int equivalent | ||||
| cimport numpy as np | |||||
| ctypedef np.npy_uint32 UINT32_t | |||||
| cimport numpy as cnp | |||||
| ctypedef cnp.npy_uint32 UINT32_t | |||||
| from cpython cimport array | from cpython cimport array | ||||
| @@ -76,14 +76,14 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged": | |||||
| void runMethod(size_t g, size_t h) except + | void runMethod(size_t g, size_t h) except + | ||||
| double getUpperBound(size_t g, size_t h) except + | double getUpperBound(size_t g, size_t h) except + | ||||
| double getLowerBound(size_t g, size_t h) except + | double getLowerBound(size_t g, size_t h) except + | ||||
| vector[np.npy_uint64] getForwardMap(size_t g, size_t h) except + | |||||
| vector[np.npy_uint64] getBackwardMap(size_t g, size_t h) except + | |||||
| vector[cnp.npy_uint64] getForwardMap(size_t g, size_t h) except + | |||||
| vector[cnp.npy_uint64] getBackwardMap(size_t g, size_t h) except + | |||||
| size_t getNodeImage(size_t g, size_t h, size_t nodeId) except + | size_t getNodeImage(size_t g, size_t h, size_t nodeId) except + | ||||
| size_t getNodePreImage(size_t g, size_t h, size_t nodeId) except + | size_t getNodePreImage(size_t g, size_t h, size_t nodeId) except + | ||||
| double getInducedCost(size_t g, size_t h) except + | double getInducedCost(size_t g, size_t h) except + | ||||
| vector[pair[size_t,size_t]] getNodeMap(size_t g, size_t h) except + | vector[pair[size_t,size_t]] getNodeMap(size_t g, size_t h) except + | ||||
| vector[vector[int]] getAssignmentMatrix(size_t g, size_t h) except + | vector[vector[int]] getAssignmentMatrix(size_t g, size_t h) except + | ||||
| vector[vector[np.npy_uint64]] getAllMap(size_t g, size_t h) except + | |||||
| vector[vector[cnp.npy_uint64]] getAllMap(size_t g, size_t h) except + | |||||
| double getRuntime(size_t g, size_t h) except + | double getRuntime(size_t g, size_t h) except + | ||||
| bool quasimetricCosts() except + | bool quasimetricCosts() except + | ||||
| vector[vector[size_t]] hungarianLSAP(vector[vector[size_t]] matrixCost) except + | vector[vector[size_t]] hungarianLSAP(vector[vector[size_t]] matrixCost) except + | ||||
| @@ -105,14 +105,16 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged": | |||||
| map[string, string] getMedianEdgeLabel(vector[map[string, string]] & edge_labels) except + | map[string, string] getMedianEdgeLabel(vector[map[string, string]] & edge_labels) except + | ||||
| string getInitType() except + | string getInitType() except + | ||||
| # double getNodeCost(size_t label1, size_t label2) except + | # double getNodeCost(size_t label1, size_t label2) except + | ||||
| void computeInducedCost(size_t g_id, size_t h_id) except + | |||||
| double computeInducedCost(size_t g_id, size_t h_id, vector[pair[size_t,size_t]]) except + | |||||
| ############################# | ############################# | ||||
| ##CYTHON WRAPPER INTERFACES## | ##CYTHON WRAPPER INTERFACES## | ||||
| ############################# | ############################# | ||||
| import numpy as np | |||||
| import networkx as nx | import networkx as nx | ||||
| from gklearn.ged.env import NodeMap | |||||
| # import librariesImport | # import librariesImport | ||||
| from ctypes import * | from ctypes import * | ||||
| @@ -726,13 +728,30 @@ cdef class GEDEnv: | |||||
| :type g: size_t | :type g: size_t | ||||
| :type h: size_t | :type h: size_t | ||||
| :return: The Node Map between the two selected graph. | :return: The Node Map between the two selected graph. | ||||
| :rtype: list[tuple(size_t, size_t)] | |||||
| :rtype: gklearn.ged.env.NodeMap. | |||||
| .. seealso:: run_method(), get_forward_map(), get_backward_map(), get_node_image(), get_node_pre_image(), get_assignment_matrix() | .. seealso:: run_method(), get_forward_map(), get_backward_map(), get_node_image(), get_node_pre_image(), get_assignment_matrix() | ||||
| .. warning:: run_method() between the same two graph must be called before this function. | .. warning:: run_method() between the same two graph must be called before this function. | ||||
| .. note:: This function creates datas so use it if necessary, however you can understand how assignement works with this example. | .. note:: This function creates datas so use it if necessary, however you can understand how assignement works with this example. | ||||
| """ | """ | ||||
| return self.c_env.getNodeMap(g, h) | |||||
| map_as_relation = self.c_env.getNodeMap(g, h) | |||||
| induced_cost = self.c_env.getInducedCost(g, h) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary. | |||||
| source_map = [item.first if item.first < len(map_as_relation) else np.inf for item in map_as_relation] # item.first < len(map_as_relation) is not exactly correct. | |||||
| # print(source_map) | |||||
| target_map = [item.second if item.second < len(map_as_relation) else np.inf for item in map_as_relation] | |||||
| # print(target_map) | |||||
| num_node_source = len([item for item in source_map if item != np.inf]) | |||||
| # print(num_node_source) | |||||
| num_node_target = len([item for item in target_map if item != np.inf]) | |||||
| # print(num_node_target) | |||||
| node_map = NodeMap(num_node_source, num_node_target) | |||||
| # print(node_map.get_forward_map(), node_map.get_backward_map()) | |||||
| for i in range(len(source_map)): | |||||
| node_map.add_assignment(source_map[i], target_map[i]) | |||||
| node_map.set_induced_cost(induced_cost) | |||||
| return node_map | |||||
| def get_assignment_matrix(self, g, h) : | def get_assignment_matrix(self, g, h) : | ||||
| @@ -1320,7 +1339,7 @@ cdef class GEDEnv: | |||||
| return graph_id | return graph_id | ||||
| def compute_induced_cost(self, g_id, h_id): | |||||
| def compute_induced_cost(self, g_id, h_id, node_map): | |||||
| """ | """ | ||||
| Computes the edit cost between two graphs induced by a node map. | Computes the edit cost between two graphs induced by a node map. | ||||
| @@ -1330,19 +1349,25 @@ cdef class GEDEnv: | |||||
| ID of input graph. | ID of input graph. | ||||
| h_id : int | h_id : int | ||||
| ID of input graph. | ID of input graph. | ||||
| node_map: gklearn.ged.env.NodeMap. | |||||
| The NodeMap instance whose reduced cost will be computed and re-assigned. | |||||
| Returns | Returns | ||||
| ------- | ------- | ||||
| None. | |||||
| Notes | |||||
| ----- | |||||
| The induced edit cost of the node map between `g_id` and `h_id` is implictly computed and stored in `GEDEnv::node_maps_`. | |||||
| """ | |||||
| cost = 0.0 | |||||
| self.c_env.computeInducedCost(g_id, h_id) | |||||
| None. | |||||
| """ | |||||
| relation = [] | |||||
| node_map.as_relation(relation) | |||||
| # print(relation) | |||||
| dummy_node = get_dummy_node() | |||||
| # print(dummy_node) | |||||
| for i, val in enumerate(relation): | |||||
| val1 = dummy_node if val[0] == np.inf else val[0] | |||||
| val2 = dummy_node if val[1] == np.inf else val[1] | |||||
| relation[i] = tuple((val1, val2)) | |||||
| # print(relation) | |||||
| induced_cost = self.c_env.computeInducedCost(g_id, h_id, relation) | |||||
| node_map.set_induced_cost(induced_cost) | |||||
| ##################################################################### | ##################################################################### | ||||
| @@ -475,8 +475,9 @@ public: | |||||
| * @brief Computes the edit cost between two graphs induced by a node map. | * @brief Computes the edit cost between two graphs induced by a node map. | ||||
| * @param[in] g_id ID of input graph. | * @param[in] g_id ID of input graph. | ||||
| * @param[in] h_id ID of input graph. | * @param[in] h_id ID of input graph. | ||||
| * @return Computed induced cost. | |||||
| */ | */ | ||||
| void computeInducedCost(std::size_t g_id, std::size_t h_id) const; | |||||
| double computeInducedCost(std::size_t g_id, std::size_t h_id, std::vector<pair<std::size_t, std::size_t>> relation) const; | |||||
| // /*! | // /*! | ||||
| // * @brief Returns node relabeling, insertion, or deletion cost. | // * @brief Returns node relabeling, insertion, or deletion cost. | ||||
| @@ -492,7 +493,7 @@ public: | |||||
| private: | private: | ||||
| ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> env; // environment variable | |||||
| ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> * env_; // environment variable | |||||
| bool initialized; // initialization boolean (because env has one but not accessible) | bool initialized; // initialization boolean (because env has one but not accessible) | ||||
| @@ -277,11 +277,16 @@ std::string toStringVectorInt(std::vector<unsigned long int> vector) { | |||||
| PyGEDEnv::PyGEDEnv () { | PyGEDEnv::PyGEDEnv () { | ||||
| this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||||
| env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||||
| this->initialized = false; | this->initialized = false; | ||||
| } | } | ||||
| PyGEDEnv::~PyGEDEnv () {} | |||||
| PyGEDEnv::~PyGEDEnv () { | |||||
| if (env_ != NULL) { | |||||
| delete env_; | |||||
| env_ = NULL; | |||||
| } | |||||
| } | |||||
| // bool initialized = false; //Initialization boolean (because Env has one but not accessible). | // bool initialized = false; //Initialization boolean (because Env has one but not accessible). | ||||
| @@ -290,64 +295,68 @@ bool PyGEDEnv::isInitialized() { | |||||
| } | } | ||||
| void PyGEDEnv::restartEnv() { | void PyGEDEnv::restartEnv() { | ||||
| this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||||
| if (env_ != NULL) { | |||||
| delete env_; | |||||
| env_ = NULL; | |||||
| } | |||||
| env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||||
| initialized = false; | initialized = false; | ||||
| } | } | ||||
| void PyGEDEnv::loadGXLGraph(const std::string & pathFolder, const std::string & pathXML, bool node_type, bool edge_type) { | void PyGEDEnv::loadGXLGraph(const std::string & pathFolder, const std::string & pathXML, bool node_type, bool edge_type) { | ||||
| std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(this->env.load_gxl_graph(pathFolder, pathXML, | |||||
| std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(env_->load_gxl_graph(pathFolder, pathXML, | |||||
| (node_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | (node_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | ||||
| (edge_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | (edge_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | ||||
| std::unordered_set<std::string>(), std::unordered_set<std::string>())); | std::unordered_set<std::string>(), std::unordered_set<std::string>())); | ||||
| } | } | ||||
| std::pair<std::size_t,std::size_t> PyGEDEnv::getGraphIds() const { | std::pair<std::size_t,std::size_t> PyGEDEnv::getGraphIds() const { | ||||
| return this->env.graph_ids(); | |||||
| return env_->graph_ids(); | |||||
| } | } | ||||
| std::vector<std::size_t> PyGEDEnv::getAllGraphIds() { | std::vector<std::size_t> PyGEDEnv::getAllGraphIds() { | ||||
| std::vector<std::size_t> listID; | std::vector<std::size_t> listID; | ||||
| for (std::size_t i = this->env.graph_ids().first; i != this->env.graph_ids().second; i++) { | |||||
| for (std::size_t i = env_->graph_ids().first; i != env_->graph_ids().second; i++) { | |||||
| listID.push_back(i); | listID.push_back(i); | ||||
| } | } | ||||
| return listID; | return listID; | ||||
| } | } | ||||
| const std::string PyGEDEnv::getGraphClass(std::size_t id) const { | const std::string PyGEDEnv::getGraphClass(std::size_t id) const { | ||||
| return this->env.get_graph_class(id); | |||||
| return env_->get_graph_class(id); | |||||
| } | } | ||||
| const std::string PyGEDEnv::getGraphName(std::size_t id) const { | const std::string PyGEDEnv::getGraphName(std::size_t id) const { | ||||
| return this->env.get_graph_name(id); | |||||
| return env_->get_graph_name(id); | |||||
| } | } | ||||
| std::size_t PyGEDEnv::addGraph(const std::string & graph_name, const std::string & graph_class) { | std::size_t PyGEDEnv::addGraph(const std::string & graph_name, const std::string & graph_class) { | ||||
| ged::GEDGraph::GraphID newId = this->env.add_graph(graph_name, graph_class); | |||||
| ged::GEDGraph::GraphID newId = env_->add_graph(graph_name, graph_class); | |||||
| initialized = false; | initialized = false; | ||||
| return std::stoi(std::to_string(newId)); | return std::stoi(std::to_string(newId)); | ||||
| } | } | ||||
| void PyGEDEnv::addNode(std::size_t graphId, const std::string & nodeId, const std::map<std::string, std::string> & nodeLabel) { | void PyGEDEnv::addNode(std::size_t graphId, const std::string & nodeId, const std::map<std::string, std::string> & nodeLabel) { | ||||
| this->env.add_node(graphId, nodeId, nodeLabel); | |||||
| env_->add_node(graphId, nodeId, nodeLabel); | |||||
| initialized = false; | initialized = false; | ||||
| } | } | ||||
| /*void addEdge(std::size_t graphId, ged::GXLNodeID tail, ged::GXLNodeID head, ged::GXLLabel edgeLabel) { | /*void addEdge(std::size_t graphId, ged::GXLNodeID tail, ged::GXLNodeID head, ged::GXLLabel edgeLabel) { | ||||
| this->env.add_edge(graphId, tail, head, edgeLabel); | |||||
| env_->add_edge(graphId, tail, head, edgeLabel); | |||||
| }*/ | }*/ | ||||
| void PyGEDEnv::addEdge(std::size_t graphId, const std::string & tail, const std::string & head, const std::map<std::string, std::string> & edgeLabel, bool ignoreDuplicates) { | void PyGEDEnv::addEdge(std::size_t graphId, const std::string & tail, const std::string & head, const std::map<std::string, std::string> & edgeLabel, bool ignoreDuplicates) { | ||||
| this->env.add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates); | |||||
| env_->add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates); | |||||
| initialized = false; | initialized = false; | ||||
| } | } | ||||
| void PyGEDEnv::clearGraph(std::size_t graphId) { | void PyGEDEnv::clearGraph(std::size_t graphId) { | ||||
| this->env.clear_graph(graphId); | |||||
| env_->clear_graph(graphId); | |||||
| initialized = false; | initialized = false; | ||||
| } | } | ||||
| ged::ExchangeGraph<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> PyGEDEnv::getGraph(std::size_t graphId) const { | ged::ExchangeGraph<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> PyGEDEnv::getGraph(std::size_t graphId) const { | ||||
| return this->env.get_graph(graphId); | |||||
| return env_->get_graph(graphId); | |||||
| } | } | ||||
| std::size_t PyGEDEnv::getGraphInternalId(std::size_t graphId) { | std::size_t PyGEDEnv::getGraphInternalId(std::size_t graphId) { | ||||
| @@ -379,71 +388,71 @@ std::vector<std::vector<std::size_t>> PyGEDEnv::getGraphAdjacenceMatrix(std::siz | |||||
| } | } | ||||
| void PyGEDEnv::setEditCost(std::string editCost, std::vector<double> editCostConstants) { | void PyGEDEnv::setEditCost(std::string editCost, std::vector<double> editCostConstants) { | ||||
| this->env.set_edit_costs(translateEditCost(editCost), editCostConstants); | |||||
| env_->set_edit_costs(translateEditCost(editCost), editCostConstants); | |||||
| } | } | ||||
| void PyGEDEnv::setPersonalEditCost(std::vector<double> editCostConstants) { | void PyGEDEnv::setPersonalEditCost(std::vector<double> editCostConstants) { | ||||
| //this->env.set_edit_costs(Your EditCost Class(editCostConstants)); | |||||
| //env_->set_edit_costs(Your EditCost Class(editCostConstants)); | |||||
| } | } | ||||
| // void PyGEDEnv::initEnv() { | // void PyGEDEnv::initEnv() { | ||||
| // this->env.init(); | |||||
| // env_->init(); | |||||
| // initialized = true; | // initialized = true; | ||||
| // } | // } | ||||
| void PyGEDEnv::initEnv(std::string initOption, bool print_to_stdout) { | void PyGEDEnv::initEnv(std::string initOption, bool print_to_stdout) { | ||||
| this->env.init(translateInitOptions(initOption), print_to_stdout); | |||||
| env_->init(translateInitOptions(initOption), print_to_stdout); | |||||
| initialized = true; | initialized = true; | ||||
| } | } | ||||
| void PyGEDEnv::setMethod(std::string method, const std::string & options) { | void PyGEDEnv::setMethod(std::string method, const std::string & options) { | ||||
| this->env.set_method(translateMethod(method), options); | |||||
| env_->set_method(translateMethod(method), options); | |||||
| } | } | ||||
| void PyGEDEnv::initMethod() { | void PyGEDEnv::initMethod() { | ||||
| this->env.init_method(); | |||||
| env_->init_method(); | |||||
| } | } | ||||
| double PyGEDEnv::getInitime() const { | double PyGEDEnv::getInitime() const { | ||||
| return this->env.get_init_time(); | |||||
| return env_->get_init_time(); | |||||
| } | } | ||||
| void PyGEDEnv::runMethod(std::size_t g, std::size_t h) { | void PyGEDEnv::runMethod(std::size_t g, std::size_t h) { | ||||
| this->env.run_method(g, h); | |||||
| env_->run_method(g, h); | |||||
| } | } | ||||
| double PyGEDEnv::getUpperBound(std::size_t g, std::size_t h) const { | double PyGEDEnv::getUpperBound(std::size_t g, std::size_t h) const { | ||||
| return this->env.get_upper_bound(g, h); | |||||
| return env_->get_upper_bound(g, h); | |||||
| } | } | ||||
| double PyGEDEnv::getLowerBound(std::size_t g, std::size_t h) const { | double PyGEDEnv::getLowerBound(std::size_t g, std::size_t h) const { | ||||
| return this->env.get_lower_bound(g, h); | |||||
| return env_->get_lower_bound(g, h); | |||||
| } | } | ||||
| std::vector<long unsigned int> PyGEDEnv::getForwardMap(std::size_t g, std::size_t h) const { | std::vector<long unsigned int> PyGEDEnv::getForwardMap(std::size_t g, std::size_t h) const { | ||||
| return this->env.get_node_map(g, h).get_forward_map(); | |||||
| return env_->get_node_map(g, h).get_forward_map(); | |||||
| } | } | ||||
| std::vector<long unsigned int> PyGEDEnv::getBackwardMap(std::size_t g, std::size_t h) const { | std::vector<long unsigned int> PyGEDEnv::getBackwardMap(std::size_t g, std::size_t h) const { | ||||
| return this->env.get_node_map(g, h).get_backward_map(); | |||||
| return env_->get_node_map(g, h).get_backward_map(); | |||||
| } | } | ||||
| std::size_t PyGEDEnv::getNodeImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | std::size_t PyGEDEnv::getNodeImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | ||||
| return this->env.get_node_map(g, h).image(nodeId); | |||||
| return env_->get_node_map(g, h).image(nodeId); | |||||
| } | } | ||||
| std::size_t PyGEDEnv::getNodePreImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | std::size_t PyGEDEnv::getNodePreImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | ||||
| return this->env.get_node_map(g, h).pre_image(nodeId); | |||||
| return env_->get_node_map(g, h).pre_image(nodeId); | |||||
| } | } | ||||
| double PyGEDEnv::getInducedCost(std::size_t g, std::size_t h) const { | double PyGEDEnv::getInducedCost(std::size_t g, std::size_t h) const { | ||||
| return this->env.get_node_map(g, h).induced_cost(); | |||||
| return env_->get_node_map(g, h).induced_cost(); | |||||
| } | } | ||||
| std::vector<pair<std::size_t, std::size_t>> PyGEDEnv::getNodeMap(std::size_t g, std::size_t h) { | std::vector<pair<std::size_t, std::size_t>> PyGEDEnv::getNodeMap(std::size_t g, std::size_t h) { | ||||
| std::vector<pair<std::size_t, std::size_t>> res; | std::vector<pair<std::size_t, std::size_t>> res; | ||||
| std::vector<ged::NodeMap::Assignment> relation; | std::vector<ged::NodeMap::Assignment> relation; | ||||
| this->env.get_node_map(g, h).as_relation(relation); | |||||
| env_->get_node_map(g, h).as_relation(relation); | |||||
| for (const auto & assignment : relation) { | for (const auto & assignment : relation) { | ||||
| res.push_back(std::make_pair(assignment.first, assignment.second)); | res.push_back(std::make_pair(assignment.first, assignment.second)); | ||||
| } | } | ||||
| @@ -493,11 +502,11 @@ std::vector<std::vector<unsigned long int>> PyGEDEnv::getAllMap(std::size_t g, s | |||||
| } | } | ||||
| double PyGEDEnv::getRuntime(std::size_t g, std::size_t h) const { | double PyGEDEnv::getRuntime(std::size_t g, std::size_t h) const { | ||||
| return this->env.get_runtime(g, h); | |||||
| return env_->get_runtime(g, h); | |||||
| } | } | ||||
| bool PyGEDEnv::quasimetricCosts() const { | bool PyGEDEnv::quasimetricCosts() const { | ||||
| return this->env.quasimetric_costs(); | |||||
| return env_->quasimetric_costs(); | |||||
| } | } | ||||
| std::vector<std::vector<size_t>> PyGEDEnv::hungarianLSAP(std::vector<std::vector<std::size_t>> matrixCost) { | std::vector<std::vector<size_t>> PyGEDEnv::hungarianLSAP(std::vector<std::vector<std::size_t>> matrixCost) { | ||||
| @@ -542,73 +551,99 @@ std::vector<std::vector<double>> PyGEDEnv::hungarianLSAPE(std::vector<std::vecto | |||||
| } | } | ||||
| std::size_t PyGEDEnv::getNumNodeLabels() const { | std::size_t PyGEDEnv::getNumNodeLabels() const { | ||||
| return this->env.num_node_labels(); | |||||
| return env_->num_node_labels(); | |||||
| } | } | ||||
| std::map<std::string, std::string> PyGEDEnv::getNodeLabel(std::size_t label_id) const { | std::map<std::string, std::string> PyGEDEnv::getNodeLabel(std::size_t label_id) const { | ||||
| return this->env.get_node_label(label_id); | |||||
| return env_->get_node_label(label_id); | |||||
| } | } | ||||
| std::size_t PyGEDEnv::getNumEdgeLabels() const { | std::size_t PyGEDEnv::getNumEdgeLabels() const { | ||||
| return this->env.num_edge_labels(); | |||||
| return env_->num_edge_labels(); | |||||
| } | } | ||||
| std::map<std::string, std::string> PyGEDEnv::getEdgeLabel(std::size_t label_id) const { | std::map<std::string, std::string> PyGEDEnv::getEdgeLabel(std::size_t label_id) const { | ||||
| return this->env.get_edge_label(label_id); | |||||
| return env_->get_edge_label(label_id); | |||||
| } | } | ||||
| // std::size_t PyGEDEnv::getNumNodes(std::size_t graph_id) const { | // std::size_t PyGEDEnv::getNumNodes(std::size_t graph_id) const { | ||||
| // return this->env.get_num_nodes(graph_id); | |||||
| // return env_->get_num_nodes(graph_id); | |||||
| // } | // } | ||||
| double PyGEDEnv::getAvgNumNodes() const { | double PyGEDEnv::getAvgNumNodes() const { | ||||
| return this->env.get_avg_num_nodes(); | |||||
| return env_->get_avg_num_nodes(); | |||||
| } | } | ||||
| double PyGEDEnv::getNodeRelCost(const std::map<std::string, std::string> & node_label_1, const std::map<std::string, std::string> & node_label_2) const { | double PyGEDEnv::getNodeRelCost(const std::map<std::string, std::string> & node_label_1, const std::map<std::string, std::string> & node_label_2) const { | ||||
| return this->env.node_rel_cost(node_label_1, node_label_2); | |||||
| return env_->node_rel_cost(node_label_1, node_label_2); | |||||
| } | } | ||||
| double PyGEDEnv::getNodeDelCost(const std::map<std::string, std::string> & node_label) const { | double PyGEDEnv::getNodeDelCost(const std::map<std::string, std::string> & node_label) const { | ||||
| return this->env.node_del_cost(node_label); | |||||
| return env_->node_del_cost(node_label); | |||||
| } | } | ||||
| double PyGEDEnv::getNodeInsCost(const std::map<std::string, std::string> & node_label) const { | double PyGEDEnv::getNodeInsCost(const std::map<std::string, std::string> & node_label) const { | ||||
| return this->env.node_ins_cost(node_label); | |||||
| return env_->node_ins_cost(node_label); | |||||
| } | } | ||||
| std::map<std::string, std::string> PyGEDEnv::getMedianNodeLabel(const std::vector<std::map<std::string, std::string>> & node_labels) const { | std::map<std::string, std::string> PyGEDEnv::getMedianNodeLabel(const std::vector<std::map<std::string, std::string>> & node_labels) const { | ||||
| return this->env.median_node_label(node_labels); | |||||
| return env_->median_node_label(node_labels); | |||||
| } | } | ||||
| double PyGEDEnv::getEdgeRelCost(const std::map<std::string, std::string> & edge_label_1, const std::map<std::string, std::string> & edge_label_2) const { | double PyGEDEnv::getEdgeRelCost(const std::map<std::string, std::string> & edge_label_1, const std::map<std::string, std::string> & edge_label_2) const { | ||||
| return this->env.edge_rel_cost(edge_label_1, edge_label_2); | |||||
| return env_->edge_rel_cost(edge_label_1, edge_label_2); | |||||
| } | } | ||||
| double PyGEDEnv::getEdgeDelCost(const std::map<std::string, std::string> & edge_label) const { | double PyGEDEnv::getEdgeDelCost(const std::map<std::string, std::string> & edge_label) const { | ||||
| return this->env.edge_del_cost(edge_label); | |||||
| return env_->edge_del_cost(edge_label); | |||||
| } | } | ||||
| double PyGEDEnv::getEdgeInsCost(const std::map<std::string, std::string> & edge_label) const { | double PyGEDEnv::getEdgeInsCost(const std::map<std::string, std::string> & edge_label) const { | ||||
| return this->env.edge_ins_cost(edge_label); | |||||
| return env_->edge_ins_cost(edge_label); | |||||
| } | } | ||||
| std::map<std::string, std::string> PyGEDEnv::getMedianEdgeLabel(const std::vector<std::map<std::string, std::string>> & edge_labels) const { | std::map<std::string, std::string> PyGEDEnv::getMedianEdgeLabel(const std::vector<std::map<std::string, std::string>> & edge_labels) const { | ||||
| return this->env.median_edge_label(edge_labels); | |||||
| return env_->median_edge_label(edge_labels); | |||||
| } | } | ||||
| std::string PyGEDEnv::getInitType() const { | std::string PyGEDEnv::getInitType() const { | ||||
| return initOptionsToString(this->env.get_init_type()); | |||||
| return initOptionsToString(env_->get_init_type()); | |||||
| } | } | ||||
| void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||||
| ged::NodeMap node_map = this->env.get_node_map(g_id, h_id); | |||||
| this->env.compute_induced_cost(g_id, h_id, node_map); | |||||
| double PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id, std::vector<pair<std::size_t, std::size_t>> relation) const { | |||||
| ged::NodeMap node_map = ged::NodeMap(env_->get_num_nodes(g_id), env_->get_num_nodes(h_id)); | |||||
| for (const auto & assignment : relation) { | |||||
| node_map.add_assignment(assignment.first, assignment.second); | |||||
| // std::cout << assignment.first << assignment.second << endl; | |||||
| } | |||||
| const std::vector<ged::GEDGraph::NodeID> forward_map = node_map.get_forward_map(); | |||||
| for (std::size_t i{0}; i < node_map.num_source_nodes(); i++) { | |||||
| if (forward_map.at(i) == ged::GEDGraph::undefined_node()) { | |||||
| node_map.add_assignment(i, ged::GEDGraph::dummy_node()); | |||||
| } | |||||
| } | |||||
| const std::vector<ged::GEDGraph::NodeID> backward_map = node_map.get_backward_map(); | |||||
| for (std::size_t i{0}; i < node_map.num_target_nodes(); i++) { | |||||
| if (backward_map.at(i) == ged::GEDGraph::undefined_node()) { | |||||
| node_map.add_assignment(ged::GEDGraph::dummy_node(), i); | |||||
| } | |||||
| } | |||||
| // for (auto & map : node_map.get_forward_map()) { | |||||
| // std::cout << map << ", "; | |||||
| // } | |||||
| // std::cout << endl; | |||||
| // for (auto & map : node_map.get_backward_map()) { | |||||
| // std::cout << map << ", "; | |||||
| // } | |||||
| env_->compute_induced_cost(g_id, h_id, node_map); | |||||
| return node_map.induced_cost(); | |||||
| } | } | ||||
| // double PyGEDEnv::getNodeCost(std::size_t label1, std::size_t label2) const { | // double PyGEDEnv::getNodeCost(std::size_t label1, std::size_t label2) const { | ||||
| // return this->env.ged_data_node_cost(label1, label2); | |||||
| // return env_->ged_data_node_cost(label1, label2); | |||||
| // } | // } | ||||
| @@ -630,7 +665,7 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||||
| /*loadGXLGraph(pathFolder, pathXML); | /*loadGXLGraph(pathFolder, pathXML); | ||||
| std::vector<std::size_t> graph_ids = getAllGraphIds(); | std::vector<std::size_t> graph_ids = getAllGraphIds(); | ||||
| std::size_t median_id = this->env.add_graph("median", ""); | |||||
| std::size_t median_id = env_->add_graph("median", ""); | |||||
| initEnv(initOption); | initEnv(initOption); | ||||
| @@ -640,10 +675,10 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||||
| median_estimator.set_options("--init-type RANDOM --randomness PSEUDO --seed " + seed); | median_estimator.set_options("--init-type RANDOM --randomness PSEUDO --seed " + seed); | ||||
| median_estimator.run(graph_ids, median_id); | median_estimator.run(graph_ids, median_id); | ||||
| std::string gxl_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".gxl"); | std::string gxl_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".gxl"); | ||||
| this->env.save_as_gxl_graph(median_id, gxl_file_name);*/ | |||||
| env_->save_as_gxl_graph(median_id, gxl_file_name);*/ | |||||
| /*std::string tikz_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".tex"); | /*std::string tikz_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".tex"); | ||||
| save_letter_graph_as_tikz_file(this->env.get_graph(median_id), tikz_file_name);*/ | |||||
| save_letter_graph_as_tikz_file(env_->get_graph(median_id), tikz_file_name);*/ | |||||
| //} | //} | ||||
| } | } | ||||
| @@ -12,4 +12,4 @@ from gklearn.kernels.structural_sp import StructuralSP | |||||
| from gklearn.kernels.shortest_path import ShortestPath | from gklearn.kernels.shortest_path import ShortestPath | ||||
| from gklearn.kernels.path_up_to_h import PathUpToH | from gklearn.kernels.path_up_to_h import PathUpToH | ||||
| from gklearn.kernels.treelet import Treelet | from gklearn.kernels.treelet import Treelet | ||||
| from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman | |||||
| from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman, WLSubtree | |||||
| @@ -18,6 +18,7 @@ import numpy as np | |||||
| import networkx as nx | import networkx as nx | ||||
| from collections import Counter | from collections import Counter | ||||
| from functools import partial | from functools import partial | ||||
| from gklearn.utils import SpecialLabel | |||||
| from gklearn.utils.parallel import parallel_gm, parallel_me | from gklearn.utils.parallel import parallel_gm, parallel_me | ||||
| from gklearn.kernels import GraphKernel | from gklearn.kernels import GraphKernel | ||||
| from gklearn.utils import Trie | from gklearn.utils import Trie | ||||
| @@ -582,11 +583,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||||
| def __add_dummy_labels(self, Gn): | def __add_dummy_labels(self, Gn): | ||||
| if self.__k_func is not None: | if self.__k_func is not None: | ||||
| if len(self.__node_labels) == 0: | |||||
| for G in Gn: | |||||
| nx.set_node_attributes(G, '0', 'dummy') | |||||
| self.__node_labels.append('dummy') | |||||
| if len(self.__edge_labels) == 0: | |||||
| for G in Gn: | |||||
| nx.set_edge_attributes(G, '0', 'dummy') | |||||
| self.__edge_labels.append('dummy') | |||||
| if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.__node_labels = [SpecialLabel.DUMMY] | |||||
| if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.__edge_labels = [SpecialLabel.DUMMY] | |||||
| @@ -18,6 +18,7 @@ import numpy as np | |||||
| import networkx as nx | import networkx as nx | ||||
| from collections import Counter | from collections import Counter | ||||
| from itertools import chain | from itertools import chain | ||||
| from gklearn.utils import SpecialLabel | |||||
| from gklearn.utils.parallel import parallel_gm, parallel_me | from gklearn.utils.parallel import parallel_gm, parallel_me | ||||
| from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs | from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs | ||||
| from gklearn.kernels import GraphKernel | from gklearn.kernels import GraphKernel | ||||
| @@ -495,11 +496,11 @@ class Treelet(GraphKernel): | |||||
| def __add_dummy_labels(self, Gn): | def __add_dummy_labels(self, Gn): | ||||
| if len(self.__node_labels) == 0: | |||||
| for G in Gn: | |||||
| nx.set_node_attributes(G, '0', 'dummy') | |||||
| self.__node_labels.append('dummy') | |||||
| if len(self.__edge_labels) == 0: | |||||
| for G in Gn: | |||||
| nx.set_edge_attributes(G, '0', 'dummy') | |||||
| self.__edge_labels.append('dummy') | |||||
| if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.__node_labels = [SpecialLabel.DUMMY] | |||||
| if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.__edge_labels = [SpecialLabel.DUMMY] | |||||
| @@ -16,6 +16,7 @@ import numpy as np | |||||
| import networkx as nx | import networkx as nx | ||||
| from collections import Counter | from collections import Counter | ||||
| from functools import partial | from functools import partial | ||||
| from gklearn.utils import SpecialLabel | |||||
| from gklearn.utils.parallel import parallel_gm | from gklearn.utils.parallel import parallel_gm | ||||
| from gklearn.kernels import GraphKernel | from gklearn.kernels import GraphKernel | ||||
| @@ -32,6 +33,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| if self._verbose >= 2: | |||||
| import warnings | |||||
| warnings.warn('A part of the computation is parallelized.') | |||||
| self.__add_dummy_node_labels(self._graphs) | self.__add_dummy_node_labels(self._graphs) | ||||
| # for WL subtree kernel | # for WL subtree kernel | ||||
| @@ -55,11 +60,16 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| raise Warning('Only a part of the computation is parallelized due to the structure of this kernel.') | |||||
| import warnings | |||||
| warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.') | |||||
| return self._compute_gm_series() | return self._compute_gm_series() | ||||
| def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better. | def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better. | ||||
| if self._verbose >= 2: | |||||
| import warnings | |||||
| warnings.warn('A part of the computation is parallelized.') | |||||
| self.__add_dummy_node_labels(g_list + [g1]) | self.__add_dummy_node_labels(g_list + [g1]) | ||||
| # for WL subtree kernel | # for WL subtree kernel | ||||
| @@ -83,8 +93,9 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| raise Warning('Only a part of the computation is parallelized due to the structure of this kernel.') | |||||
| return self._compute_gm_imap_unordered() | |||||
| import warnings | |||||
| warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.') | |||||
| return self._compute_kernel_list_series(g1, g_list) | |||||
| def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
| @@ -459,7 +470,14 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
| def __add_dummy_node_labels(self, Gn): | def __add_dummy_node_labels(self, Gn): | ||||
| if len(self.__node_labels) == 0: | |||||
| for G in Gn: | |||||
| nx.set_node_attributes(G, '0', 'dummy') | |||||
| self.__node_labels.append('dummy') | |||||
| if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.__node_labels = [SpecialLabel.DUMMY] | |||||
| class WLSubtree(WeisfeilerLehman): | |||||
| def __init__(self, **kwargs): | |||||
| kwargs['base_kernel'] = 'subtree' | |||||
| super().__init__(**kwargs) | |||||
| @@ -18,6 +18,7 @@ from gklearn.ged.median import MedianGraphEstimator | |||||
| from gklearn.ged.median import constant_node_costs,mge_options_to_string | from gklearn.ged.median import constant_node_costs,mge_options_to_string | ||||
| from gklearn.gedlib import librariesImport, gedlibpy | from gklearn.gedlib import librariesImport, gedlibpy | ||||
| from gklearn.utils import Timer | from gklearn.utils import Timer | ||||
| from gklearn.utils.utils import get_graph_kernel_by_name | |||||
| # from gklearn.utils.dataset import Dataset | # from gklearn.utils.dataset import Dataset | ||||
| class MedianPreimageGenerator(PreimageGenerator): | class MedianPreimageGenerator(PreimageGenerator): | ||||
| @@ -81,7 +82,13 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
| def run(self): | def run(self): | ||||
| self.__set_graph_kernel_by_name() | |||||
| self._graph_kernel = get_graph_kernel_by_name(self._kernel_options['name'], | |||||
| node_labels=self._dataset.node_labels, | |||||
| edge_labels=self._dataset.edge_labels, | |||||
| node_attrs=self._dataset.node_attrs, | |||||
| edge_attrs=self._dataset.edge_attrs, | |||||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
| kernel_options=self._kernel_options) | |||||
| # record start time. | # record start time. | ||||
| start = time.time() | start = time.time() | ||||
| @@ -180,6 +187,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
| results['itrs'] = self.__itrs | results['itrs'] = self.__itrs | ||||
| results['converged'] = self.__converged | results['converged'] = self.__converged | ||||
| results['num_updates_ecc'] = self.__num_updates_ecc | results['num_updates_ecc'] = self.__num_updates_ecc | ||||
| results['mge'] = {} | |||||
| results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased() | |||||
| results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased() | |||||
| results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents() | |||||
| return results | return results | ||||
| @@ -653,27 +664,27 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
| ged_env.init(init_option=self.__ged_options['init_option']) | ged_env.init(init_option=self.__ged_options['init_option']) | ||||
| # Set up the madian graph estimator. | # Set up the madian graph estimator. | ||||
| mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||||
| mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
| self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||||
| self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
| options = self.__mge_options.copy() | options = self.__mge_options.copy() | ||||
| if not 'seed' in options: | if not 'seed' in options: | ||||
| options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. | options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. | ||||
| # Select the GED algorithm. | # Select the GED algorithm. | ||||
| mge.set_options(mge_options_to_string(options)) | |||||
| mge.set_label_names(node_labels=self._dataset.node_labels, | |||||
| self.__mge.set_options(mge_options_to_string(options)) | |||||
| self.__mge.set_label_names(node_labels=self._dataset.node_labels, | |||||
| edge_labels=self._dataset.edge_labels, | edge_labels=self._dataset.edge_labels, | ||||
| node_attrs=self._dataset.node_attrs, | node_attrs=self._dataset.node_attrs, | ||||
| edge_attrs=self._dataset.edge_attrs) | edge_attrs=self._dataset.edge_attrs) | ||||
| mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
| mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
| self.__mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
| self.__mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
| # Run the estimator. | # Run the estimator. | ||||
| mge.run(graph_ids, set_median_id, gen_median_id) | |||||
| self.__mge.run(graph_ids, set_median_id, gen_median_id) | |||||
| # Get SODs. | # Get SODs. | ||||
| self.__sod_set_median = mge.get_sum_of_distances('initialized') | |||||
| self.__sod_gen_median = mge.get_sum_of_distances('converged') | |||||
| self.__sod_set_median = self.__mge.get_sum_of_distances('initialized') | |||||
| self.__sod_gen_median = self.__mge.get_sum_of_distances('converged') | |||||
| # Get median graphs. | # Get median graphs. | ||||
| self.__set_median = ged_env.get_nx_graph(set_median_id) | self.__set_median = ged_env.get_nx_graph(set_median_id) | ||||
| @@ -722,43 +733,6 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
| print('distance in kernel space for generalized median:', self.__k_dis_gen_median) | print('distance in kernel space for generalized median:', self.__k_dis_gen_median) | ||||
| print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) | print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) | ||||
| print('distance in kernel space for each graph in median set:', k_dis_median_set) | print('distance in kernel space for each graph in median set:', k_dis_median_set) | ||||
| def __set_graph_kernel_by_name(self): | |||||
| if self._kernel_options['name'] == 'ShortestPath': | |||||
| from gklearn.kernels import ShortestPath | |||||
| self._graph_kernel = ShortestPath(node_labels=self._dataset.node_labels, | |||||
| node_attrs=self._dataset.node_attrs, | |||||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
| **self._kernel_options) | |||||
| elif self._kernel_options['name'] == 'StructuralSP': | |||||
| from gklearn.kernels import StructuralSP | |||||
| self._graph_kernel = StructuralSP(node_labels=self._dataset.node_labels, | |||||
| edge_labels=self._dataset.edge_labels, | |||||
| node_attrs=self._dataset.node_attrs, | |||||
| edge_attrs=self._dataset.edge_attrs, | |||||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
| **self._kernel_options) | |||||
| elif self._kernel_options['name'] == 'PathUpToH': | |||||
| from gklearn.kernels import PathUpToH | |||||
| self._graph_kernel = PathUpToH(node_labels=self._dataset.node_labels, | |||||
| edge_labels=self._dataset.edge_labels, | |||||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
| **self._kernel_options) | |||||
| elif self._kernel_options['name'] == 'Treelet': | |||||
| from gklearn.kernels import Treelet | |||||
| self._graph_kernel = Treelet(node_labels=self._dataset.node_labels, | |||||
| edge_labels=self._dataset.edge_labels, | |||||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
| **self._kernel_options) | |||||
| elif self._kernel_options['name'] == 'WeisfeilerLehman': | |||||
| from gklearn.kernels import WeisfeilerLehman | |||||
| self._graph_kernel = WeisfeilerLehman(node_labels=self._dataset.node_labels, | |||||
| edge_labels=self._dataset.edge_labels, | |||||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
| **self._kernel_options) | |||||
| else: | |||||
| raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WeisfeilerLehman".') | |||||
| # def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | # def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | ||||
| @@ -25,7 +25,7 @@ import networkx as nx | |||||
| import os | import os | ||||
| def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False): | |||||
| def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None): | |||||
| import os.path | import os.path | ||||
| from gklearn.preimage import MedianPreimageGenerator | from gklearn.preimage import MedianPreimageGenerator | ||||
| from gklearn.utils import split_dataset_by_target | from gklearn.utils import split_dataset_by_target | ||||
| @@ -38,7 +38,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
| dataset_all.trim_dataset(edge_required=edge_required) | dataset_all.trim_dataset(edge_required=edge_required) | ||||
| if irrelevant_labels is not None: | if irrelevant_labels is not None: | ||||
| dataset_all.remove_labels(**irrelevant_labels) | dataset_all.remove_labels(**irrelevant_labels) | ||||
| # dataset_all.cut_graphs(range(0, 10)) | |||||
| if cut_range is not None: | |||||
| dataset_all.cut_graphs(cut_range) | |||||
| datasets = split_dataset_by_target(dataset_all) | datasets = split_dataset_by_target(dataset_all) | ||||
| if save_results: | if save_results: | ||||
| @@ -57,6 +58,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
| itrs_list = [] | itrs_list = [] | ||||
| converged_list = [] | converged_list = [] | ||||
| num_updates_ecc_list = [] | num_updates_ecc_list = [] | ||||
| mge_decrease_order_list = [] | |||||
| mge_increase_order_list = [] | |||||
| mge_converged_order_list = [] | |||||
| nb_sod_sm2gm = [0, 0, 0] | nb_sod_sm2gm = [0, 0, 0] | ||||
| nb_dis_k_sm2gm = [0, 0, 0] | nb_dis_k_sm2gm = [0, 0, 0] | ||||
| nb_dis_k_gi2sm = [0, 0, 0] | nb_dis_k_gi2sm = [0, 0, 0] | ||||
| @@ -148,7 +152,10 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
| results['runtime_precompute_gm'], results['runtime_optimize_ec'], | results['runtime_precompute_gm'], results['runtime_optimize_ec'], | ||||
| results['runtime_generate_preimage'], results['runtime_total'], | results['runtime_generate_preimage'], results['runtime_total'], | ||||
| results['itrs'], results['converged'], | results['itrs'], results['converged'], | ||||
| results['num_updates_ecc']]) | |||||
| results['num_updates_ecc'], | |||||
| results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge | |||||
| results['mge']['num_increase_order'] > 0, | |||||
| results['mge']['num_converged_descents'] > 0]) | |||||
| f_detail.close() | f_detail.close() | ||||
| # compute result summary. | # compute result summary. | ||||
| @@ -164,6 +171,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
| itrs_list.append(results['itrs']) | itrs_list.append(results['itrs']) | ||||
| converged_list.append(results['converged']) | converged_list.append(results['converged']) | ||||
| num_updates_ecc_list.append(results['num_updates_ecc']) | num_updates_ecc_list.append(results['num_updates_ecc']) | ||||
| mge_decrease_order_list.append(results['mge']['num_decrease_order'] > 0) | |||||
| mge_increase_order_list.append(results['mge']['num_increase_order'] > 0) | |||||
| mge_converged_order_list.append(results['mge']['num_converged_descents'] > 0) | |||||
| # # SOD SM -> GM | # # SOD SM -> GM | ||||
| if results['sod_set_median'] > results['sod_gen_median']: | if results['sod_set_median'] > results['sod_gen_median']: | ||||
| nb_sod_sm2gm[0] += 1 | nb_sod_sm2gm[0] += 1 | ||||
| @@ -210,7 +220,11 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
| results['runtime_precompute_gm'], results['runtime_optimize_ec'], | results['runtime_precompute_gm'], results['runtime_optimize_ec'], | ||||
| results['runtime_generate_preimage'], results['runtime_total'], | results['runtime_generate_preimage'], results['runtime_total'], | ||||
| results['itrs'], results['converged'], | results['itrs'], results['converged'], | ||||
| results['num_updates_ecc'], nb_sod_sm2gm, | |||||
| results['num_updates_ecc'], | |||||
| results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge | |||||
| results['mge']['num_increase_order'] > 0, | |||||
| results['mge']['num_converged_descents'] > 0, | |||||
| nb_sod_sm2gm, | |||||
| nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm]) | nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm]) | ||||
| f_summary.close() | f_summary.close() | ||||
| @@ -256,6 +270,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
| itrs_mean = np.mean(itrs_list) | itrs_mean = np.mean(itrs_list) | ||||
| num_converged = np.sum(converged_list) | num_converged = np.sum(converged_list) | ||||
| num_updates_ecc_mean = np.mean(num_updates_ecc_list) | num_updates_ecc_mean = np.mean(num_updates_ecc_list) | ||||
| num_mge_decrease_order = np.sum(mge_decrease_order_list) | |||||
| num_mge_increase_order = np.sum(mge_increase_order_list) | |||||
| num_mge_converged = np.sum(mge_converged_order_list) | |||||
| sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean)) | sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean)) | ||||
| dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) | dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) | ||||
| dis_k_gi2sm_mean = get_relations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) | dis_k_gi2sm_mean = get_relations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) | ||||
| @@ -270,7 +287,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
| dis_k_gi2sm_mean, dis_k_gi2gm_mean, | dis_k_gi2sm_mean, dis_k_gi2gm_mean, | ||||
| time_precompute_gm_mean, time_optimize_ec_mean, | time_precompute_gm_mean, time_optimize_ec_mean, | ||||
| time_generate_mean, time_total_mean, itrs_mean, | time_generate_mean, time_total_mean, itrs_mean, | ||||
| num_converged, num_updates_ecc_mean]) | |||||
| num_converged, num_updates_ecc_mean, | |||||
| num_mge_decrease_order, num_mge_increase_order, | |||||
| num_mge_converged]) | |||||
| f_summary.close() | f_summary.close() | ||||
| # save total pairwise kernel distances. | # save total pairwise kernel distances. | ||||
| @@ -300,7 +319,8 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||||
| 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | ||||
| 'dis_k gi -> GM', 'edit cost constants', 'time precompute gm', | 'dis_k gi -> GM', 'edit cost constants', 'time precompute gm', | ||||
| 'time optimize ec', 'time generate preimage', 'time total', | 'time optimize ec', 'time generate preimage', 'time total', | ||||
| 'itrs', 'converged', 'num updates ecc']) | |||||
| 'itrs', 'converged', 'num updates ecc', 'mge decrease order', | |||||
| 'mge increase order', 'mge converged']) | |||||
| f_detail.close() | f_detail.close() | ||||
| # fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | # fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | ||||
| @@ -312,7 +332,8 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||||
| 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | ||||
| 'dis_k gi -> GM', 'time precompute gm', 'time optimize ec', | 'dis_k gi -> GM', 'time precompute gm', 'time optimize ec', | ||||
| 'time generate preimage', 'time total', 'itrs', 'num converged', | 'time generate preimage', 'time total', 'itrs', 'num converged', | ||||
| 'num updates ecc', '# SOD SM -> GM', '# dis_k SM -> GM', | |||||
| 'num updates ecc', 'mge num decrease order', 'mge num increase order', | |||||
| 'mge num converged', '# SOD SM -> GM', '# dis_k SM -> GM', | |||||
| '# dis_k gi -> SM', '# dis_k gi -> GM']) | '# dis_k gi -> SM', '# dis_k gi -> GM']) | ||||
| # 'repeats better SOD SM -> GM', | # 'repeats better SOD SM -> GM', | ||||
| # 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', | # 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', | ||||
| @@ -418,6 +439,8 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel=' | |||||
| Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | ||||
| height=4, base_kernel='subtree', parallel=None, | height=4, base_kernel='subtree', parallel=None, | ||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
| else: | |||||
| raise Exception('The graph kernel "', graph_kernel, '" is not defined.') | |||||
| # normalization | # normalization | ||||
| Kmatrix_diag = Kmatrix.diagonal().copy() | Kmatrix_diag = Kmatrix.diagonal().copy() | ||||
| @@ -260,20 +260,20 @@ def test_Treelet(ds_name, parallel): | |||||
| @pytest.mark.parametrize('ds_name', ['Acyclic']) | @pytest.mark.parametrize('ds_name', ['Acyclic']) | ||||
| #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge']) | #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge']) | ||||
| @pytest.mark.parametrize('base_kernel', ['subtree']) | |||||
| # @pytest.mark.parametrize('base_kernel', ['subtree']) | |||||
| @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | ||||
| def test_WeisfeilerLehman(ds_name, parallel, base_kernel): | |||||
| """Test Weisfeiler-Lehman kernel. | |||||
| def test_WLSubtree(ds_name, parallel): | |||||
| """Test Weisfeiler-Lehman subtree kernel. | |||||
| """ | """ | ||||
| from gklearn.kernels import WeisfeilerLehman | |||||
| from gklearn.kernels import WLSubtree | |||||
| dataset = chooseDataset(ds_name) | dataset = chooseDataset(ds_name) | ||||
| try: | try: | ||||
| graph_kernel = WeisfeilerLehman(node_labels=dataset.node_labels, | |||||
| graph_kernel = WLSubtree(node_labels=dataset.node_labels, | |||||
| edge_labels=dataset.edge_labels, | edge_labels=dataset.edge_labels, | ||||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | ds_infos=dataset.get_dataset_infos(keys=['directed']), | ||||
| height=2, base_kernel=base_kernel) | |||||
| height=2) | |||||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | ||||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | ||||
| kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | ||||
| @@ -20,4 +20,5 @@ from gklearn.utils.graph_files import load_dataset, save_dataset | |||||
| from gklearn.utils.timer import Timer | from gklearn.utils.timer import Timer | ||||
| from gklearn.utils.utils import get_graph_kernel_by_name | from gklearn.utils.utils import get_graph_kernel_by_name | ||||
| from gklearn.utils.utils import compute_gram_matrices_by_class | from gklearn.utils.utils import compute_gram_matrices_by_class | ||||
| from gklearn.utils.utils import SpecialLabel | |||||
| from gklearn.utils.trie import Trie | from gklearn.utils.trie import Trie | ||||
| @@ -56,13 +56,14 @@ class Dataset(object): | |||||
| self.__node_attrs = label_names['node_attrs'] | self.__node_attrs = label_names['node_attrs'] | ||||
| self.__edge_labels = label_names['edge_labels'] | self.__edge_labels = label_names['edge_labels'] | ||||
| self.__edge_attrs = label_names['edge_attrs'] | self.__edge_attrs = label_names['edge_attrs'] | ||||
| self.clean_labels() | |||||
| def load_graphs(self, graphs, targets=None): | def load_graphs(self, graphs, targets=None): | ||||
| # this has to be followed by set_labels(). | # this has to be followed by set_labels(). | ||||
| self.__graphs = graphs | self.__graphs = graphs | ||||
| self.__targets = targets | self.__targets = targets | ||||
| # self.set_labels_attrs() | |||||
| # self.set_labels_attrs() # @todo | |||||
| def load_predefined_dataset(self, ds_name): | def load_predefined_dataset(self, ds_name): | ||||
| @@ -89,6 +90,9 @@ class Dataset(object): | |||||
| elif ds_name == 'Cuneiform': | elif ds_name == 'Cuneiform': | ||||
| ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' | ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' | ||||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
| elif ds_name == 'DD': | |||||
| ds_file = current_path + '../../datasets/DD/DD_A.txt' | |||||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
| elif ds_name == 'Fingerprint': | elif ds_name == 'Fingerprint': | ||||
| ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | ||||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
| @@ -113,6 +117,9 @@ class Dataset(object): | |||||
| elif ds_name == 'MUTAG': | elif ds_name == 'MUTAG': | ||||
| ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' | ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' | ||||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
| elif ds_name == 'PAH': | |||||
| ds_file = current_path + '../../datasets/PAH/dataset.ds' | |||||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
| elif ds_name == 'SYNTHETIC': | elif ds_name == 'SYNTHETIC': | ||||
| pass | pass | ||||
| elif ds_name == 'SYNTHETICnew': | elif ds_name == 'SYNTHETICnew': | ||||
| @@ -120,11 +127,14 @@ class Dataset(object): | |||||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
| elif ds_name == 'Synthie': | elif ds_name == 'Synthie': | ||||
| pass | pass | ||||
| else: | |||||
| raise Exception('The dataset name "', ds_name, '" is not pre-defined.') | |||||
| self.__node_labels = label_names['node_labels'] | self.__node_labels = label_names['node_labels'] | ||||
| self.__node_attrs = label_names['node_attrs'] | self.__node_attrs = label_names['node_attrs'] | ||||
| self.__edge_labels = label_names['edge_labels'] | self.__edge_labels = label_names['edge_labels'] | ||||
| self.__edge_attrs = label_names['edge_attrs'] | self.__edge_attrs = label_names['edge_attrs'] | ||||
| self.clean_labels() | |||||
| def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): | def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): | ||||
| @@ -138,27 +148,27 @@ class Dataset(object): | |||||
| # @todo: remove labels which have only one possible values. | # @todo: remove labels which have only one possible values. | ||||
| if node_labels is None: | if node_labels is None: | ||||
| self.__node_labels = self.__graphs[0].graph['node_labels'] | self.__node_labels = self.__graphs[0].graph['node_labels'] | ||||
| # # graphs are considered node unlabeled if all nodes have the same label. | |||||
| # infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | |||||
| # # graphs are considered node unlabeled if all nodes have the same label. | |||||
| # infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | |||||
| if node_attrs is None: | if node_attrs is None: | ||||
| self.__node_attrs = self.__graphs[0].graph['node_attrs'] | self.__node_attrs = self.__graphs[0].graph['node_attrs'] | ||||
| # for G in Gn: | |||||
| # for n in G.nodes(data=True): | |||||
| # if 'attributes' in n[1]: | |||||
| # return len(n[1]['attributes']) | |||||
| # return 0 | |||||
| # for G in Gn: | |||||
| # for n in G.nodes(data=True): | |||||
| # if 'attributes' in n[1]: | |||||
| # return len(n[1]['attributes']) | |||||
| # return 0 | |||||
| if edge_labels is None: | if edge_labels is None: | ||||
| self.__edge_labels = self.__graphs[0].graph['edge_labels'] | self.__edge_labels = self.__graphs[0].graph['edge_labels'] | ||||
| # # graphs are considered edge unlabeled if all edges have the same label. | |||||
| # infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | |||||
| # # graphs are considered edge unlabeled if all edges have the same label. | |||||
| # infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | |||||
| if edge_attrs is None: | if edge_attrs is None: | ||||
| self.__edge_attrs = self.__graphs[0].graph['edge_attrs'] | self.__edge_attrs = self.__graphs[0].graph['edge_attrs'] | ||||
| # for G in Gn: | |||||
| # if nx.number_of_edges(G) > 0: | |||||
| # for e in G.edges(data=True): | |||||
| # if 'attributes' in e[2]: | |||||
| # return len(e[2]['attributes']) | |||||
| # return 0 | |||||
| # for G in Gn: | |||||
| # if nx.number_of_edges(G) > 0: | |||||
| # for e in G.edges(data=True): | |||||
| # if 'attributes' in e[2]: | |||||
| # return len(e[2]['attributes']) | |||||
| # return 0 | |||||
| def get_dataset_infos(self, keys=None): | def get_dataset_infos(self, keys=None): | ||||
| @@ -323,7 +333,7 @@ class Dataset(object): | |||||
| if self.__node_label_nums is None: | if self.__node_label_nums is None: | ||||
| self.__node_label_nums = {} | self.__node_label_nums = {} | ||||
| for node_label in self.__node_labels: | for node_label in self.__node_labels: | ||||
| self.__node_label_nums[node_label] = self.get_node_label_num(node_label) | |||||
| self.__node_label_nums[node_label] = self.__get_node_label_num(node_label) | |||||
| infos['node_label_nums'] = self.__node_label_nums | infos['node_label_nums'] = self.__node_label_nums | ||||
| if 'edge_label_dim' in keys: | if 'edge_label_dim' in keys: | ||||
| @@ -335,7 +345,7 @@ class Dataset(object): | |||||
| if self.__edge_label_nums is None: | if self.__edge_label_nums is None: | ||||
| self.__edge_label_nums = {} | self.__edge_label_nums = {} | ||||
| for edge_label in self.__edge_labels: | for edge_label in self.__edge_labels: | ||||
| self.__edge_label_nums[edge_label] = self.get_edge_label_num(edge_label) | |||||
| self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label) | |||||
| infos['edge_label_nums'] = self.__edge_label_nums | infos['edge_label_nums'] = self.__edge_label_nums | ||||
| if 'directed' in keys or 'substructures' in keys: | if 'directed' in keys or 'substructures' in keys: | ||||
| @@ -411,33 +421,95 @@ class Dataset(object): | |||||
| def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | ||||
| node_labels = [item for item in node_labels if item in self.__node_labels] | |||||
| edge_labels = [item for item in edge_labels if item in self.__edge_labels] | |||||
| node_attrs = [item for item in node_attrs if item in self.__node_attrs] | |||||
| edge_attrs = [item for item in edge_attrs if item in self.__edge_attrs] | |||||
| for g in self.__graphs: | for g in self.__graphs: | ||||
| for nd in g.nodes(): | for nd in g.nodes(): | ||||
| for nl in node_labels: | for nl in node_labels: | ||||
| del g.nodes[nd][nl] | |||||
| del g.nodes[nd][nl] | |||||
| for na in node_attrs: | for na in node_attrs: | ||||
| del g.nodes[nd][na] | del g.nodes[nd][na] | ||||
| for ed in g.edges(): | for ed in g.edges(): | ||||
| for el in edge_labels: | for el in edge_labels: | ||||
| del g.edges[ed][el] | |||||
| del g.edges[ed][el] | |||||
| for ea in edge_attrs: | for ea in edge_attrs: | ||||
| del g.edges[ed][ea] | |||||
| del g.edges[ed][ea] | |||||
| if len(node_labels) > 0: | if len(node_labels) > 0: | ||||
| self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||||
| self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||||
| if len(edge_labels) > 0: | if len(edge_labels) > 0: | ||||
| self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||||
| self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||||
| if len(node_attrs) > 0: | if len(node_attrs) > 0: | ||||
| self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||||
| self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||||
| if len(edge_attrs) > 0: | if len(edge_attrs) > 0: | ||||
| self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||||
| self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||||
| def clean_labels(self): | |||||
| labels = [] | |||||
| for name in self.__node_labels: | |||||
| label = set() | |||||
| for G in self.__graphs: | |||||
| label = label | set(nx.get_node_attributes(G, name).values()) | |||||
| if len(label) > 1: | |||||
| labels.append(name) | |||||
| break | |||||
| if len(label) < 2: | |||||
| for G in self.__graphs: | |||||
| for nd in G.nodes(): | |||||
| del G.nodes[nd][name] | |||||
| self.__node_labels = labels | |||||
| labels = [] | |||||
| for name in self.__edge_labels: | |||||
| label = set() | |||||
| for G in self.__graphs: | |||||
| label = label | set(nx.get_edge_attributes(G, name).values()) | |||||
| if len(label) > 1: | |||||
| labels.append(name) | |||||
| break | |||||
| if len(label) < 2: | |||||
| for G in self.__graphs: | |||||
| for ed in G.edges(): | |||||
| del G.edges[ed][name] | |||||
| self.__edge_labels = labels | |||||
| labels = [] | |||||
| for name in self.__node_attrs: | |||||
| label = set() | |||||
| for G in self.__graphs: | |||||
| label = label | set(nx.get_node_attributes(G, name).values()) | |||||
| if len(label) > 1: | |||||
| labels.append(name) | |||||
| break | |||||
| if len(label) < 2: | |||||
| for G in self.__graphs: | |||||
| for nd in G.nodes(): | |||||
| del G.nodes[nd][name] | |||||
| self.__node_attrs = labels | |||||
| labels = [] | |||||
| for name in self.__edge_attrs: | |||||
| label = set() | |||||
| for G in self.__graphs: | |||||
| label = label | set(nx.get_edge_attributes(G, name).values()) | |||||
| if len(label) > 1: | |||||
| labels.append(name) | |||||
| break | |||||
| if len(label) < 2: | |||||
| for G in self.__graphs: | |||||
| for ed in G.edges(): | |||||
| del G.edges[ed][name] | |||||
| self.__edge_attrs = labels | |||||
| def cut_graphs(self, range_): | def cut_graphs(self, range_): | ||||
| self.__graphs = [self.__graphs[i] for i in range_] | self.__graphs = [self.__graphs[i] for i in range_] | ||||
| if self.__targets is not None: | if self.__targets is not None: | ||||
| self.__targets = [self.__targets[i] for i in range_] | self.__targets = [self.__targets[i] for i in range_] | ||||
| # @todo | |||||
| # self.set_labels_attrs() | |||||
| self.clean_labels() | |||||
| def trim_dataset(self, edge_required=False): | def trim_dataset(self, edge_required=False): | ||||
| @@ -448,8 +520,7 @@ class Dataset(object): | |||||
| idx = [p[0] for p in trimed_pairs] | idx = [p[0] for p in trimed_pairs] | ||||
| self.__graphs = [p[1] for p in trimed_pairs] | self.__graphs = [p[1] for p in trimed_pairs] | ||||
| self.__targets = [self.__targets[i] for i in idx] | self.__targets = [self.__targets[i] for i in idx] | ||||
| # @todo | |||||
| # self.set_labels_attrs() | |||||
| self.clean_labels() | |||||
| def __get_dataset_size(self): | def __get_dataset_size(self): | ||||
| @@ -652,4 +723,5 @@ def split_dataset_by_target(dataset): | |||||
| sub_dataset.load_graphs(sub_graphs, [key] * len(val)) | sub_dataset.load_graphs(sub_graphs, [key] * len(val)) | ||||
| sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs) | sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs) | ||||
| datasets.append(sub_dataset) | datasets.append(sub_dataset) | ||||
| # @todo: clean_labels? | |||||
| return datasets | return datasets | ||||
| @@ -63,7 +63,7 @@ def load_dataset(filename, filename_targets=None, gformat=None, **kwargs): | |||||
| return data, y, label_names | return data, y, label_names | ||||
| def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=None): | |||||
| def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', **kwargs): | |||||
| """Save list of graphs. | """Save list of graphs. | ||||
| """ | """ | ||||
| import os | import os | ||||
| @@ -73,22 +73,22 @@ def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=Non | |||||
| if not os.path.exists(dirname_ds) : | if not os.path.exists(dirname_ds) : | ||||
| os.makedirs(dirname_ds) | os.makedirs(dirname_ds) | ||||
| if xparams is not None and 'graph_dir' in xparams: | |||||
| graph_dir = xparams['graph_dir'] + '/' | |||||
| if 'graph_dir' in kwargs: | |||||
| graph_dir = kwargs['graph_dir'] + '/' | |||||
| if not os.path.exists(graph_dir): | if not os.path.exists(graph_dir): | ||||
| os.makedirs(graph_dir) | os.makedirs(graph_dir) | ||||
| del kwargs['graph_dir'] | |||||
| else: | else: | ||||
| graph_dir = dirname_ds | graph_dir = dirname_ds | ||||
| if group == 'xml' and gformat == 'gxl': | if group == 'xml' and gformat == 'gxl': | ||||
| kwargs = {'method': xparams['method']} if xparams is not None else {} | |||||
| with open(filename + '.xml', 'w') as fgroup: | with open(filename + '.xml', 'w') as fgroup: | ||||
| fgroup.write("<?xml version=\"1.0\"?>") | fgroup.write("<?xml version=\"1.0\"?>") | ||||
| fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">") | fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">") | ||||
| fgroup.write("\n<GraphCollection>") | fgroup.write("\n<GraphCollection>") | ||||
| for idx, g in enumerate(Gn): | for idx, g in enumerate(Gn): | ||||
| fname_tmp = "graph" + str(idx) + ".gxl" | fname_tmp = "graph" + str(idx) + ".gxl" | ||||
| saveGXL(g, graph_dir + fname_tmp, **kwargs) | |||||
| save_gxl(g, graph_dir + fname_tmp, **kwargs) | |||||
| fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>") | fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>") | ||||
| fgroup.write("\n</GraphCollection>") | fgroup.write("\n</GraphCollection>") | ||||
| fgroup.close() | fgroup.close() | ||||
| @@ -226,7 +226,7 @@ def load_gxl(filename): # @todo: directed graphs. | |||||
| return g, label_names | return g, label_names | ||||
| def saveGXL(graph, filename, method='default', node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||||
| def save_gxl(graph, filename, method='default', node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||||
| if method == 'default': | if method == 'default': | ||||
| gxl_file = open(filename, 'w') | gxl_file = open(filename, 'w') | ||||
| gxl_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") | gxl_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") | ||||
| @@ -1,6 +1,7 @@ | |||||
| import networkx as nx | import networkx as nx | ||||
| import numpy as np | import numpy as np | ||||
| from copy import deepcopy | from copy import deepcopy | ||||
| from enum import Enum, auto | |||||
| #from itertools import product | #from itertools import product | ||||
| # from tqdm import tqdm | # from tqdm import tqdm | ||||
| @@ -299,21 +300,59 @@ def get_edge_labels(Gn, edge_label): | |||||
| def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): | def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): | ||||
| if name == 'structuralspkernel': | |||||
| if name == 'ShortestPath': | |||||
| from gklearn.kernels import ShortestPath | |||||
| graph_kernel = ShortestPath(node_labels=node_labels, | |||||
| node_attrs=node_attrs, | |||||
| ds_infos=ds_infos, | |||||
| **kernel_options) | |||||
| elif name == 'StructuralSP': | |||||
| from gklearn.kernels import StructuralSP | from gklearn.kernels import StructuralSP | ||||
| graph_kernel = StructuralSP(node_labels=node_labels, edge_labels=edge_labels, | |||||
| node_attrs=node_attrs, edge_attrs=edge_attrs, | |||||
| ds_infos=ds_infos, **kernel_options) | |||||
| graph_kernel = StructuralSP(node_labels=node_labels, | |||||
| edge_labels=edge_labels, | |||||
| node_attrs=node_attrs, | |||||
| edge_attrs=edge_attrs, | |||||
| ds_infos=ds_infos, | |||||
| **kernel_options) | |||||
| elif name == 'PathUpToH': | |||||
| from gklearn.kernels import PathUpToH | |||||
| graph_kernel = PathUpToH(node_labels=node_labels, | |||||
| edge_labels=edge_labels, | |||||
| ds_infos=ds_infos, | |||||
| **kernel_options) | |||||
| elif name == 'Treelet': | |||||
| from gklearn.kernels import Treelet | |||||
| graph_kernel = Treelet(node_labels=node_labels, | |||||
| edge_labels=edge_labels, | |||||
| ds_infos=ds_infos, | |||||
| **kernel_options) | |||||
| elif name == 'WLSubtree': | |||||
| from gklearn.kernels import WLSubtree | |||||
| graph_kernel = WLSubtree(node_labels=node_labels, | |||||
| edge_labels=edge_labels, | |||||
| ds_infos=ds_infos, | |||||
| **kernel_options) | |||||
| elif name == 'WeisfeilerLehman': | |||||
| from gklearn.kernels import WeisfeilerLehman | |||||
| graph_kernel = WeisfeilerLehman(node_labels=node_labels, | |||||
| edge_labels=edge_labels, | |||||
| ds_infos=ds_infos, | |||||
| **kernel_options) | |||||
| else: | |||||
| raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WLSubtree", "WeisfeilerLehman".') | |||||
| return graph_kernel | return graph_kernel | ||||
| def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None): | |||||
| def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False): | |||||
| import os | |||||
| from gklearn.utils import Dataset, split_dataset_by_target | from gklearn.utils import Dataset, split_dataset_by_target | ||||
| # 1. get dataset. | # 1. get dataset. | ||||
| print('1. getting dataset...') | print('1. getting dataset...') | ||||
| dataset_all = Dataset() | dataset_all = Dataset() | ||||
| dataset_all.load_predefined_dataset(ds_name) | dataset_all.load_predefined_dataset(ds_name) | ||||
| dataset_all.trim_dataset(edge_required=edge_required) | |||||
| if not irrelevant_labels is None: | if not irrelevant_labels is None: | ||||
| dataset_all.remove_labels(**irrelevant_labels) | dataset_all.remove_labels(**irrelevant_labels) | ||||
| # dataset_all.cut_graphs(range(0, 10)) | # dataset_all.cut_graphs(range(0, 10)) | ||||
| @@ -349,6 +388,8 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d | |||||
| print() | print() | ||||
| print('4. saving results...') | print('4. saving results...') | ||||
| if save_results: | if save_results: | ||||
| if not os.path.exists(dir_save): | |||||
| os.makedirs(dir_save) | |||||
| np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | ||||
| print('\ncomplete.') | print('\ncomplete.') | ||||
| @@ -424,4 +465,10 @@ def get_mlti_dim_edge_attrs(G, attr_names): | |||||
| attributes = [] | attributes = [] | ||||
| for ed, attrs in G.edges(data=True): | for ed, attrs in G.edges(data=True): | ||||
| attributes.append(tuple(attrs[aname] for aname in attr_names)) | attributes.append(tuple(attrs[aname] for aname in attr_names)) | ||||
| return attributes | |||||
| return attributes | |||||
| class SpecialLabel(Enum): | |||||
| """can be used to define special labels. | |||||
| """ | |||||
| DUMMY = auto # The dummy label. | |||||