| @@ -0,0 +1,28 @@ | |||
| environment: | |||
| matrix: | |||
| - PYTHON: "C:\\Python35" | |||
| - PYTHON: "C:\\Python35-x64" | |||
| - PYTHON: "C:\\Python36" | |||
| - PYTHON: "C:\\Python36-x64" | |||
| - PYTHON: "C:\\Python37" | |||
| - PYTHON: "C:\\Python37-x64" | |||
| - PYTHON: "C:\\Python38" | |||
| - PYTHON: "C:\\Python38-x64" | |||
| # skip_commits: | |||
| # files: | |||
| # - "*.yml" | |||
| # - "*.rst" | |||
| # - "LICENSE" | |||
| install: | |||
| - "%PYTHON%\\python.exe -m pip install -U pip" | |||
| - "%PYTHON%\\python.exe -m pip install -U pytest" | |||
| - "%PYTHON%\\python.exe -m pip install -r requirements.txt" | |||
| - "%PYTHON%\\python.exe -m pip install wheel" | |||
| build: off | |||
| test_script: | |||
| - "%PYTHON%\\python.exe setup.py bdist_wheel" | |||
| - "%PYTHON%\\python.exe -m pytest -v gklearn/tests/" | |||
| @@ -1,5 +1,6 @@ | |||
| # graphkit-learn | |||
| [](https://travis-ci.org/jajupmochi/graphkit-learn) | |||
| [](https://ci.appveyor.com/project/jajupmochi/graphkit-learn) | |||
| [](https://codecov.io/gh/jajupmochi/graphkit-learn) | |||
| [](https://graphkit-learn.readthedocs.io/en/master/?badge=master) | |||
| [](https://badge.fury.io/py/graphkit-learn) | |||
| @@ -1 +1,2 @@ | |||
| from gklearn.ged.env.common_types import AlgorithmState | |||
| from gklearn.ged.env.common_types import AlgorithmState | |||
| from gklearn.ged.env.node_map import NodeMap | |||
| @@ -0,0 +1,80 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Wed Apr 22 11:31:26 2020 | |||
| @author: ljia | |||
| """ | |||
| import numpy as np | |||
| class NodeMap(object): | |||
| def __init__(self, num_nodes_g, num_nodes_h): | |||
| self.__forward_map = [np.inf] * num_nodes_g | |||
| self.__backward_map = [np.inf] * num_nodes_h | |||
| self.__induced_cost = np.inf | |||
| def num_source_nodes(self): | |||
| return len(self.__forward_map) | |||
| def num_target_nodes(self): | |||
| return len(self.__backward_map) | |||
| def image(self, node): | |||
| if node < len(self.__forward_map): | |||
| return self.__forward_map[node] | |||
| else: | |||
| raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | |||
| return np.inf | |||
| def pre_image(self, node): | |||
| if node < len(self.__backward_map): | |||
| return self.__backward_map[node] | |||
| else: | |||
| raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') | |||
| return np.inf | |||
| def get_forward_map(self): | |||
| return self.__forward_map | |||
| def get_backward_map(self): | |||
| return self.__backward_map | |||
| def as_relation(self, relation): | |||
| relation.clear() | |||
| for i in range(0, len(self.__forward_map)): | |||
| k = self.__forward_map[i] | |||
| if k != np.inf: | |||
| relation.append(tuple((i, k))) | |||
| for k in range(0, len(self.__backward_map)): | |||
| i = self.__backward_map[k] | |||
| if i == np.inf: | |||
| relation.append(tuple((i, k))) | |||
| def add_assignment(self, i, k): | |||
| if i != np.inf: | |||
| if i < len(self.__forward_map): | |||
| self.__forward_map[i] = k | |||
| else: | |||
| raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') | |||
| if k != np.inf: | |||
| if k < len(self.__backward_map): | |||
| self.__backward_map[k] = i | |||
| else: | |||
| raise Exception('The node with ID ', str(k), ' is not contained in the target nodes of the node map.') | |||
| def set_induced_cost(self, induced_cost): | |||
| self.__induced_cost = induced_cost | |||
| def induced_cost(self): | |||
| return self.__induced_cost | |||
| @@ -7,11 +7,10 @@ Created on Mon Mar 16 17:26:40 2020 | |||
| """ | |||
| def test_median_graph_estimator(): | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| from gklearn.utils import load_dataset | |||
| from gklearn.ged.median import MedianGraphEstimator, constant_node_costs | |||
| from gklearn.gedlib import librariesImport, gedlibpy | |||
| from gklearn.preimage.utils import get_same_item_indices | |||
| from gklearn.preimage.ged import convertGraph | |||
| import multiprocessing | |||
| # estimator parameters. | |||
| @@ -22,17 +21,17 @@ def test_median_graph_estimator(): | |||
| # algorithm parameters. | |||
| algo = 'IPFP' | |||
| initial_solutions = 40 | |||
| algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1' | |||
| initial_solutions = 1 | |||
| algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE ' | |||
| edit_cost_name = 'LETTER2' | |||
| edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001] | |||
| ds_name = 'COIL-DEL' | |||
| ds_name = 'Letter_high' | |||
| # Load dataset. | |||
| # dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt' | |||
| dataset = '../../../datasets/Letter-high/Letter-high_A.txt' | |||
| Gn, y_all = loadDataset(dataset) | |||
| Gn, y_all, label_names = load_dataset(dataset) | |||
| y_idx = get_same_item_indices(y_all) | |||
| for i, (y, values) in enumerate(y_idx.items()): | |||
| Gn_i = [Gn[val] for val in values] | |||
| @@ -43,7 +42,7 @@ def test_median_graph_estimator(): | |||
| # gedlibpy.restart_env() | |||
| ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) | |||
| for G in Gn_i: | |||
| ged_env.add_nx_graph(convertGraph(G, edit_cost_name), '') | |||
| ged_env.add_nx_graph(G, '') | |||
| graph_ids = ged_env.get_all_graph_ids() | |||
| set_median_id = ged_env.add_graph('set_median') | |||
| gen_median_id = ged_env.add_graph('gen_median') | |||
| @@ -54,11 +53,89 @@ def test_median_graph_estimator(): | |||
| mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | |||
| mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | |||
| mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --refine FALSE'# @todo: std::to_string(rng()) | |||
| mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE --randomness PSEUDO '# @todo: std::to_string(rng()) | |||
| # Select the GED algorithm. | |||
| algo_options = '--threads ' + str(threads) + algo_options_suffix | |||
| mge.set_options(mge_options) | |||
| mge.set_label_names(node_labels=label_names['node_labels'], | |||
| edge_labels=label_names['edge_labels'], | |||
| node_attrs=label_names['node_attrs'], | |||
| edge_attrs=label_names['edge_attrs']) | |||
| mge.set_init_method(algo, algo_options) | |||
| mge.set_descent_method(algo, algo_options) | |||
| # Run the estimator. | |||
| mge.run(graph_ids, set_median_id, gen_median_id) | |||
| # Get SODs. | |||
| sod_sm = mge.get_sum_of_distances('initialized') | |||
| sod_gm = mge.get_sum_of_distances('converged') | |||
| print('sod_sm, sod_gm: ', sod_sm, sod_gm) | |||
| # Get median graphs. | |||
| set_median = ged_env.get_nx_graph(set_median_id) | |||
| gen_median = ged_env.get_nx_graph(gen_median_id) | |||
| return set_median, gen_median | |||
| def test_median_graph_estimator_symb(): | |||
| from gklearn.utils import load_dataset | |||
| from gklearn.ged.median import MedianGraphEstimator, constant_node_costs | |||
| from gklearn.gedlib import librariesImport, gedlibpy | |||
| from gklearn.preimage.utils import get_same_item_indices | |||
| import multiprocessing | |||
| # estimator parameters. | |||
| init_type = 'MEDOID' | |||
| num_inits = 1 | |||
| threads = multiprocessing.cpu_count() | |||
| time_limit = 60000 | |||
| # algorithm parameters. | |||
| algo = 'IPFP' | |||
| initial_solutions = 1 | |||
| algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE ' | |||
| edit_cost_name = 'CONSTANT' | |||
| edit_cost_constants = [4, 4, 2, 1, 1, 1] | |||
| ds_name = 'MUTAG' | |||
| # Load dataset. | |||
| dataset = '../../../datasets/MUTAG/MUTAG_A.txt' | |||
| Gn, y_all, label_names = load_dataset(dataset) | |||
| y_idx = get_same_item_indices(y_all) | |||
| for i, (y, values) in enumerate(y_idx.items()): | |||
| Gn_i = [Gn[val] for val in values] | |||
| break | |||
| Gn_i = Gn_i[0:10] | |||
| # Set up the environment. | |||
| ged_env = gedlibpy.GEDEnv() | |||
| # gedlibpy.restart_env() | |||
| ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) | |||
| for G in Gn_i: | |||
| ged_env.add_nx_graph(G, '') | |||
| graph_ids = ged_env.get_all_graph_ids() | |||
| set_median_id = ged_env.add_graph('set_median') | |||
| gen_median_id = ged_env.add_graph('gen_median') | |||
| ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES') | |||
| # Set up the estimator. | |||
| mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name)) | |||
| mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | |||
| mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | |||
| mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE'# @todo: std::to_string(rng()) | |||
| # Select the GED algorithm. | |||
| algo_options = '--threads ' + str(threads) + algo_options_suffix | |||
| mge.set_options(mge_options) | |||
| mge.set_label_names(node_labels=label_names['node_labels'], | |||
| edge_labels=label_names['edge_labels'], | |||
| node_attrs=label_names['node_attrs'], | |||
| edge_attrs=label_names['edge_attrs']) | |||
| mge.set_init_method(algo, algo_options) | |||
| mge.set_descent_method(algo, algo_options) | |||
| @@ -78,4 +155,5 @@ def test_median_graph_estimator(): | |||
| if __name__ == '__main__': | |||
| set_median, gen_median = test_median_graph_estimator() | |||
| set_median, gen_median = test_median_graph_estimator() | |||
| # set_median, gen_median = test_median_graph_estimator_symb() | |||
| @@ -30,6 +30,8 @@ def mge_options_to_string(options): | |||
| opt_str += '--randomness ' + str(val) + ' ' | |||
| elif key == 'verbose': | |||
| opt_str += '--stdout ' + str(val) + ' ' | |||
| elif key == 'update_order': | |||
| opt_str += '--update-order ' + ('TRUE' if val else 'FALSE') + ' ' | |||
| elif key == 'refine': | |||
| opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' ' | |||
| elif key == 'time_limit': | |||
| @@ -35,8 +35,8 @@ from libcpp.pair cimport pair | |||
| from libcpp.list cimport list | |||
| #Long unsigned int equivalent | |||
| cimport numpy as np | |||
| ctypedef np.npy_uint32 UINT32_t | |||
| cimport numpy as cnp | |||
| ctypedef cnp.npy_uint32 UINT32_t | |||
| from cpython cimport array | |||
| @@ -76,14 +76,14 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged": | |||
| void runMethod(size_t g, size_t h) except + | |||
| double getUpperBound(size_t g, size_t h) except + | |||
| double getLowerBound(size_t g, size_t h) except + | |||
| vector[np.npy_uint64] getForwardMap(size_t g, size_t h) except + | |||
| vector[np.npy_uint64] getBackwardMap(size_t g, size_t h) except + | |||
| vector[cnp.npy_uint64] getForwardMap(size_t g, size_t h) except + | |||
| vector[cnp.npy_uint64] getBackwardMap(size_t g, size_t h) except + | |||
| size_t getNodeImage(size_t g, size_t h, size_t nodeId) except + | |||
| size_t getNodePreImage(size_t g, size_t h, size_t nodeId) except + | |||
| double getInducedCost(size_t g, size_t h) except + | |||
| vector[pair[size_t,size_t]] getNodeMap(size_t g, size_t h) except + | |||
| vector[vector[int]] getAssignmentMatrix(size_t g, size_t h) except + | |||
| vector[vector[np.npy_uint64]] getAllMap(size_t g, size_t h) except + | |||
| vector[vector[cnp.npy_uint64]] getAllMap(size_t g, size_t h) except + | |||
| double getRuntime(size_t g, size_t h) except + | |||
| bool quasimetricCosts() except + | |||
| vector[vector[size_t]] hungarianLSAP(vector[vector[size_t]] matrixCost) except + | |||
| @@ -105,14 +105,16 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged": | |||
| map[string, string] getMedianEdgeLabel(vector[map[string, string]] & edge_labels) except + | |||
| string getInitType() except + | |||
| # double getNodeCost(size_t label1, size_t label2) except + | |||
| void computeInducedCost(size_t g_id, size_t h_id) except + | |||
| double computeInducedCost(size_t g_id, size_t h_id, vector[pair[size_t,size_t]]) except + | |||
| ############################# | |||
| ##CYTHON WRAPPER INTERFACES## | |||
| ############################# | |||
| import numpy as np | |||
| import networkx as nx | |||
| from gklearn.ged.env import NodeMap | |||
| # import librariesImport | |||
| from ctypes import * | |||
| @@ -726,13 +728,30 @@ cdef class GEDEnv: | |||
| :type g: size_t | |||
| :type h: size_t | |||
| :return: The Node Map between the two selected graph. | |||
| :rtype: list[tuple(size_t, size_t)] | |||
| :rtype: gklearn.ged.env.NodeMap. | |||
| .. seealso:: run_method(), get_forward_map(), get_backward_map(), get_node_image(), get_node_pre_image(), get_assignment_matrix() | |||
| .. warning:: run_method() between the same two graph must be called before this function. | |||
| .. note:: This function creates datas so use it if necessary, however you can understand how assignement works with this example. | |||
| """ | |||
| return self.c_env.getNodeMap(g, h) | |||
| map_as_relation = self.c_env.getNodeMap(g, h) | |||
| induced_cost = self.c_env.getInducedCost(g, h) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary. | |||
| source_map = [item.first if item.first < len(map_as_relation) else np.inf for item in map_as_relation] # item.first < len(map_as_relation) is not exactly correct. | |||
| # print(source_map) | |||
| target_map = [item.second if item.second < len(map_as_relation) else np.inf for item in map_as_relation] | |||
| # print(target_map) | |||
| num_node_source = len([item for item in source_map if item != np.inf]) | |||
| # print(num_node_source) | |||
| num_node_target = len([item for item in target_map if item != np.inf]) | |||
| # print(num_node_target) | |||
| node_map = NodeMap(num_node_source, num_node_target) | |||
| # print(node_map.get_forward_map(), node_map.get_backward_map()) | |||
| for i in range(len(source_map)): | |||
| node_map.add_assignment(source_map[i], target_map[i]) | |||
| node_map.set_induced_cost(induced_cost) | |||
| return node_map | |||
| def get_assignment_matrix(self, g, h) : | |||
| @@ -1320,7 +1339,7 @@ cdef class GEDEnv: | |||
| return graph_id | |||
| def compute_induced_cost(self, g_id, h_id): | |||
| def compute_induced_cost(self, g_id, h_id, node_map): | |||
| """ | |||
| Computes the edit cost between two graphs induced by a node map. | |||
| @@ -1330,19 +1349,25 @@ cdef class GEDEnv: | |||
| ID of input graph. | |||
| h_id : int | |||
| ID of input graph. | |||
| node_map: gklearn.ged.env.NodeMap. | |||
| The NodeMap instance whose reduced cost will be computed and re-assigned. | |||
| Returns | |||
| ------- | |||
| None. | |||
| Notes | |||
| ----- | |||
| The induced edit cost of the node map between `g_id` and `h_id` is implictly computed and stored in `GEDEnv::node_maps_`. | |||
| """ | |||
| cost = 0.0 | |||
| self.c_env.computeInducedCost(g_id, h_id) | |||
| None. | |||
| """ | |||
| relation = [] | |||
| node_map.as_relation(relation) | |||
| # print(relation) | |||
| dummy_node = get_dummy_node() | |||
| # print(dummy_node) | |||
| for i, val in enumerate(relation): | |||
| val1 = dummy_node if val[0] == np.inf else val[0] | |||
| val2 = dummy_node if val[1] == np.inf else val[1] | |||
| relation[i] = tuple((val1, val2)) | |||
| # print(relation) | |||
| induced_cost = self.c_env.computeInducedCost(g_id, h_id, relation) | |||
| node_map.set_induced_cost(induced_cost) | |||
| ##################################################################### | |||
| @@ -475,8 +475,9 @@ public: | |||
| * @brief Computes the edit cost between two graphs induced by a node map. | |||
| * @param[in] g_id ID of input graph. | |||
| * @param[in] h_id ID of input graph. | |||
| * @return Computed induced cost. | |||
| */ | |||
| void computeInducedCost(std::size_t g_id, std::size_t h_id) const; | |||
| double computeInducedCost(std::size_t g_id, std::size_t h_id, std::vector<pair<std::size_t, std::size_t>> relation) const; | |||
| // /*! | |||
| // * @brief Returns node relabeling, insertion, or deletion cost. | |||
| @@ -492,7 +493,7 @@ public: | |||
| private: | |||
| ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> env; // environment variable | |||
| ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> * env_; // environment variable | |||
| bool initialized; // initialization boolean (because env has one but not accessible) | |||
| @@ -277,11 +277,16 @@ std::string toStringVectorInt(std::vector<unsigned long int> vector) { | |||
| PyGEDEnv::PyGEDEnv () { | |||
| this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||
| env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||
| this->initialized = false; | |||
| } | |||
| PyGEDEnv::~PyGEDEnv () {} | |||
| PyGEDEnv::~PyGEDEnv () { | |||
| if (env_ != NULL) { | |||
| delete env_; | |||
| env_ = NULL; | |||
| } | |||
| } | |||
| // bool initialized = false; //Initialization boolean (because Env has one but not accessible). | |||
| @@ -290,64 +295,68 @@ bool PyGEDEnv::isInitialized() { | |||
| } | |||
| void PyGEDEnv::restartEnv() { | |||
| this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||
| if (env_ != NULL) { | |||
| delete env_; | |||
| env_ = NULL; | |||
| } | |||
| env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||
| initialized = false; | |||
| } | |||
| void PyGEDEnv::loadGXLGraph(const std::string & pathFolder, const std::string & pathXML, bool node_type, bool edge_type) { | |||
| std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(this->env.load_gxl_graph(pathFolder, pathXML, | |||
| std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(env_->load_gxl_graph(pathFolder, pathXML, | |||
| (node_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | |||
| (edge_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | |||
| std::unordered_set<std::string>(), std::unordered_set<std::string>())); | |||
| } | |||
| std::pair<std::size_t,std::size_t> PyGEDEnv::getGraphIds() const { | |||
| return this->env.graph_ids(); | |||
| return env_->graph_ids(); | |||
| } | |||
| std::vector<std::size_t> PyGEDEnv::getAllGraphIds() { | |||
| std::vector<std::size_t> listID; | |||
| for (std::size_t i = this->env.graph_ids().first; i != this->env.graph_ids().second; i++) { | |||
| for (std::size_t i = env_->graph_ids().first; i != env_->graph_ids().second; i++) { | |||
| listID.push_back(i); | |||
| } | |||
| return listID; | |||
| } | |||
| const std::string PyGEDEnv::getGraphClass(std::size_t id) const { | |||
| return this->env.get_graph_class(id); | |||
| return env_->get_graph_class(id); | |||
| } | |||
| const std::string PyGEDEnv::getGraphName(std::size_t id) const { | |||
| return this->env.get_graph_name(id); | |||
| return env_->get_graph_name(id); | |||
| } | |||
| std::size_t PyGEDEnv::addGraph(const std::string & graph_name, const std::string & graph_class) { | |||
| ged::GEDGraph::GraphID newId = this->env.add_graph(graph_name, graph_class); | |||
| ged::GEDGraph::GraphID newId = env_->add_graph(graph_name, graph_class); | |||
| initialized = false; | |||
| return std::stoi(std::to_string(newId)); | |||
| } | |||
| void PyGEDEnv::addNode(std::size_t graphId, const std::string & nodeId, const std::map<std::string, std::string> & nodeLabel) { | |||
| this->env.add_node(graphId, nodeId, nodeLabel); | |||
| env_->add_node(graphId, nodeId, nodeLabel); | |||
| initialized = false; | |||
| } | |||
| /*void addEdge(std::size_t graphId, ged::GXLNodeID tail, ged::GXLNodeID head, ged::GXLLabel edgeLabel) { | |||
| this->env.add_edge(graphId, tail, head, edgeLabel); | |||
| env_->add_edge(graphId, tail, head, edgeLabel); | |||
| }*/ | |||
| void PyGEDEnv::addEdge(std::size_t graphId, const std::string & tail, const std::string & head, const std::map<std::string, std::string> & edgeLabel, bool ignoreDuplicates) { | |||
| this->env.add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates); | |||
| env_->add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates); | |||
| initialized = false; | |||
| } | |||
| void PyGEDEnv::clearGraph(std::size_t graphId) { | |||
| this->env.clear_graph(graphId); | |||
| env_->clear_graph(graphId); | |||
| initialized = false; | |||
| } | |||
| ged::ExchangeGraph<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> PyGEDEnv::getGraph(std::size_t graphId) const { | |||
| return this->env.get_graph(graphId); | |||
| return env_->get_graph(graphId); | |||
| } | |||
| std::size_t PyGEDEnv::getGraphInternalId(std::size_t graphId) { | |||
| @@ -379,71 +388,71 @@ std::vector<std::vector<std::size_t>> PyGEDEnv::getGraphAdjacenceMatrix(std::siz | |||
| } | |||
| void PyGEDEnv::setEditCost(std::string editCost, std::vector<double> editCostConstants) { | |||
| this->env.set_edit_costs(translateEditCost(editCost), editCostConstants); | |||
| env_->set_edit_costs(translateEditCost(editCost), editCostConstants); | |||
| } | |||
| void PyGEDEnv::setPersonalEditCost(std::vector<double> editCostConstants) { | |||
| //this->env.set_edit_costs(Your EditCost Class(editCostConstants)); | |||
| //env_->set_edit_costs(Your EditCost Class(editCostConstants)); | |||
| } | |||
| // void PyGEDEnv::initEnv() { | |||
| // this->env.init(); | |||
| // env_->init(); | |||
| // initialized = true; | |||
| // } | |||
| void PyGEDEnv::initEnv(std::string initOption, bool print_to_stdout) { | |||
| this->env.init(translateInitOptions(initOption), print_to_stdout); | |||
| env_->init(translateInitOptions(initOption), print_to_stdout); | |||
| initialized = true; | |||
| } | |||
| void PyGEDEnv::setMethod(std::string method, const std::string & options) { | |||
| this->env.set_method(translateMethod(method), options); | |||
| env_->set_method(translateMethod(method), options); | |||
| } | |||
| void PyGEDEnv::initMethod() { | |||
| this->env.init_method(); | |||
| env_->init_method(); | |||
| } | |||
| double PyGEDEnv::getInitime() const { | |||
| return this->env.get_init_time(); | |||
| return env_->get_init_time(); | |||
| } | |||
| void PyGEDEnv::runMethod(std::size_t g, std::size_t h) { | |||
| this->env.run_method(g, h); | |||
| env_->run_method(g, h); | |||
| } | |||
| double PyGEDEnv::getUpperBound(std::size_t g, std::size_t h) const { | |||
| return this->env.get_upper_bound(g, h); | |||
| return env_->get_upper_bound(g, h); | |||
| } | |||
| double PyGEDEnv::getLowerBound(std::size_t g, std::size_t h) const { | |||
| return this->env.get_lower_bound(g, h); | |||
| return env_->get_lower_bound(g, h); | |||
| } | |||
| std::vector<long unsigned int> PyGEDEnv::getForwardMap(std::size_t g, std::size_t h) const { | |||
| return this->env.get_node_map(g, h).get_forward_map(); | |||
| return env_->get_node_map(g, h).get_forward_map(); | |||
| } | |||
| std::vector<long unsigned int> PyGEDEnv::getBackwardMap(std::size_t g, std::size_t h) const { | |||
| return this->env.get_node_map(g, h).get_backward_map(); | |||
| return env_->get_node_map(g, h).get_backward_map(); | |||
| } | |||
| std::size_t PyGEDEnv::getNodeImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | |||
| return this->env.get_node_map(g, h).image(nodeId); | |||
| return env_->get_node_map(g, h).image(nodeId); | |||
| } | |||
| std::size_t PyGEDEnv::getNodePreImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | |||
| return this->env.get_node_map(g, h).pre_image(nodeId); | |||
| return env_->get_node_map(g, h).pre_image(nodeId); | |||
| } | |||
| double PyGEDEnv::getInducedCost(std::size_t g, std::size_t h) const { | |||
| return this->env.get_node_map(g, h).induced_cost(); | |||
| return env_->get_node_map(g, h).induced_cost(); | |||
| } | |||
| std::vector<pair<std::size_t, std::size_t>> PyGEDEnv::getNodeMap(std::size_t g, std::size_t h) { | |||
| std::vector<pair<std::size_t, std::size_t>> res; | |||
| std::vector<ged::NodeMap::Assignment> relation; | |||
| this->env.get_node_map(g, h).as_relation(relation); | |||
| env_->get_node_map(g, h).as_relation(relation); | |||
| for (const auto & assignment : relation) { | |||
| res.push_back(std::make_pair(assignment.first, assignment.second)); | |||
| } | |||
| @@ -493,11 +502,11 @@ std::vector<std::vector<unsigned long int>> PyGEDEnv::getAllMap(std::size_t g, s | |||
| } | |||
| double PyGEDEnv::getRuntime(std::size_t g, std::size_t h) const { | |||
| return this->env.get_runtime(g, h); | |||
| return env_->get_runtime(g, h); | |||
| } | |||
| bool PyGEDEnv::quasimetricCosts() const { | |||
| return this->env.quasimetric_costs(); | |||
| return env_->quasimetric_costs(); | |||
| } | |||
| std::vector<std::vector<size_t>> PyGEDEnv::hungarianLSAP(std::vector<std::vector<std::size_t>> matrixCost) { | |||
| @@ -542,73 +551,99 @@ std::vector<std::vector<double>> PyGEDEnv::hungarianLSAPE(std::vector<std::vecto | |||
| } | |||
| std::size_t PyGEDEnv::getNumNodeLabels() const { | |||
| return this->env.num_node_labels(); | |||
| return env_->num_node_labels(); | |||
| } | |||
| std::map<std::string, std::string> PyGEDEnv::getNodeLabel(std::size_t label_id) const { | |||
| return this->env.get_node_label(label_id); | |||
| return env_->get_node_label(label_id); | |||
| } | |||
| std::size_t PyGEDEnv::getNumEdgeLabels() const { | |||
| return this->env.num_edge_labels(); | |||
| return env_->num_edge_labels(); | |||
| } | |||
| std::map<std::string, std::string> PyGEDEnv::getEdgeLabel(std::size_t label_id) const { | |||
| return this->env.get_edge_label(label_id); | |||
| return env_->get_edge_label(label_id); | |||
| } | |||
| // std::size_t PyGEDEnv::getNumNodes(std::size_t graph_id) const { | |||
| // return this->env.get_num_nodes(graph_id); | |||
| // return env_->get_num_nodes(graph_id); | |||
| // } | |||
| double PyGEDEnv::getAvgNumNodes() const { | |||
| return this->env.get_avg_num_nodes(); | |||
| return env_->get_avg_num_nodes(); | |||
| } | |||
| double PyGEDEnv::getNodeRelCost(const std::map<std::string, std::string> & node_label_1, const std::map<std::string, std::string> & node_label_2) const { | |||
| return this->env.node_rel_cost(node_label_1, node_label_2); | |||
| return env_->node_rel_cost(node_label_1, node_label_2); | |||
| } | |||
| double PyGEDEnv::getNodeDelCost(const std::map<std::string, std::string> & node_label) const { | |||
| return this->env.node_del_cost(node_label); | |||
| return env_->node_del_cost(node_label); | |||
| } | |||
| double PyGEDEnv::getNodeInsCost(const std::map<std::string, std::string> & node_label) const { | |||
| return this->env.node_ins_cost(node_label); | |||
| return env_->node_ins_cost(node_label); | |||
| } | |||
| std::map<std::string, std::string> PyGEDEnv::getMedianNodeLabel(const std::vector<std::map<std::string, std::string>> & node_labels) const { | |||
| return this->env.median_node_label(node_labels); | |||
| return env_->median_node_label(node_labels); | |||
| } | |||
| double PyGEDEnv::getEdgeRelCost(const std::map<std::string, std::string> & edge_label_1, const std::map<std::string, std::string> & edge_label_2) const { | |||
| return this->env.edge_rel_cost(edge_label_1, edge_label_2); | |||
| return env_->edge_rel_cost(edge_label_1, edge_label_2); | |||
| } | |||
| double PyGEDEnv::getEdgeDelCost(const std::map<std::string, std::string> & edge_label) const { | |||
| return this->env.edge_del_cost(edge_label); | |||
| return env_->edge_del_cost(edge_label); | |||
| } | |||
| double PyGEDEnv::getEdgeInsCost(const std::map<std::string, std::string> & edge_label) const { | |||
| return this->env.edge_ins_cost(edge_label); | |||
| return env_->edge_ins_cost(edge_label); | |||
| } | |||
| std::map<std::string, std::string> PyGEDEnv::getMedianEdgeLabel(const std::vector<std::map<std::string, std::string>> & edge_labels) const { | |||
| return this->env.median_edge_label(edge_labels); | |||
| return env_->median_edge_label(edge_labels); | |||
| } | |||
| std::string PyGEDEnv::getInitType() const { | |||
| return initOptionsToString(this->env.get_init_type()); | |||
| return initOptionsToString(env_->get_init_type()); | |||
| } | |||
| void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||
| ged::NodeMap node_map = this->env.get_node_map(g_id, h_id); | |||
| this->env.compute_induced_cost(g_id, h_id, node_map); | |||
| double PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id, std::vector<pair<std::size_t, std::size_t>> relation) const { | |||
| ged::NodeMap node_map = ged::NodeMap(env_->get_num_nodes(g_id), env_->get_num_nodes(h_id)); | |||
| for (const auto & assignment : relation) { | |||
| node_map.add_assignment(assignment.first, assignment.second); | |||
| // std::cout << assignment.first << assignment.second << endl; | |||
| } | |||
| const std::vector<ged::GEDGraph::NodeID> forward_map = node_map.get_forward_map(); | |||
| for (std::size_t i{0}; i < node_map.num_source_nodes(); i++) { | |||
| if (forward_map.at(i) == ged::GEDGraph::undefined_node()) { | |||
| node_map.add_assignment(i, ged::GEDGraph::dummy_node()); | |||
| } | |||
| } | |||
| const std::vector<ged::GEDGraph::NodeID> backward_map = node_map.get_backward_map(); | |||
| for (std::size_t i{0}; i < node_map.num_target_nodes(); i++) { | |||
| if (backward_map.at(i) == ged::GEDGraph::undefined_node()) { | |||
| node_map.add_assignment(ged::GEDGraph::dummy_node(), i); | |||
| } | |||
| } | |||
| // for (auto & map : node_map.get_forward_map()) { | |||
| // std::cout << map << ", "; | |||
| // } | |||
| // std::cout << endl; | |||
| // for (auto & map : node_map.get_backward_map()) { | |||
| // std::cout << map << ", "; | |||
| // } | |||
| env_->compute_induced_cost(g_id, h_id, node_map); | |||
| return node_map.induced_cost(); | |||
| } | |||
| // double PyGEDEnv::getNodeCost(std::size_t label1, std::size_t label2) const { | |||
| // return this->env.ged_data_node_cost(label1, label2); | |||
| // return env_->ged_data_node_cost(label1, label2); | |||
| // } | |||
| @@ -630,7 +665,7 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||
| /*loadGXLGraph(pathFolder, pathXML); | |||
| std::vector<std::size_t> graph_ids = getAllGraphIds(); | |||
| std::size_t median_id = this->env.add_graph("median", ""); | |||
| std::size_t median_id = env_->add_graph("median", ""); | |||
| initEnv(initOption); | |||
| @@ -640,10 +675,10 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||
| median_estimator.set_options("--init-type RANDOM --randomness PSEUDO --seed " + seed); | |||
| median_estimator.run(graph_ids, median_id); | |||
| std::string gxl_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".gxl"); | |||
| this->env.save_as_gxl_graph(median_id, gxl_file_name);*/ | |||
| env_->save_as_gxl_graph(median_id, gxl_file_name);*/ | |||
| /*std::string tikz_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".tex"); | |||
| save_letter_graph_as_tikz_file(this->env.get_graph(median_id), tikz_file_name);*/ | |||
| save_letter_graph_as_tikz_file(env_->get_graph(median_id), tikz_file_name);*/ | |||
| //} | |||
| } | |||
| @@ -12,4 +12,4 @@ from gklearn.kernels.structural_sp import StructuralSP | |||
| from gklearn.kernels.shortest_path import ShortestPath | |||
| from gklearn.kernels.path_up_to_h import PathUpToH | |||
| from gklearn.kernels.treelet import Treelet | |||
| from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman | |||
| from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman, WLSubtree | |||
| @@ -18,6 +18,7 @@ import numpy as np | |||
| import networkx as nx | |||
| from collections import Counter | |||
| from functools import partial | |||
| from gklearn.utils import SpecialLabel | |||
| from gklearn.utils.parallel import parallel_gm, parallel_me | |||
| from gklearn.kernels import GraphKernel | |||
| from gklearn.utils import Trie | |||
| @@ -582,11 +583,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||
| def __add_dummy_labels(self, Gn): | |||
| if self.__k_func is not None: | |||
| if len(self.__node_labels) == 0: | |||
| for G in Gn: | |||
| nx.set_node_attributes(G, '0', 'dummy') | |||
| self.__node_labels.append('dummy') | |||
| if len(self.__edge_labels) == 0: | |||
| for G in Gn: | |||
| nx.set_edge_attributes(G, '0', 'dummy') | |||
| self.__edge_labels.append('dummy') | |||
| if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
| for i in range(len(Gn)): | |||
| nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
| self.__node_labels = [SpecialLabel.DUMMY] | |||
| if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
| for i in range(len(Gn)): | |||
| nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
| self.__edge_labels = [SpecialLabel.DUMMY] | |||
| @@ -18,6 +18,7 @@ import numpy as np | |||
| import networkx as nx | |||
| from collections import Counter | |||
| from itertools import chain | |||
| from gklearn.utils import SpecialLabel | |||
| from gklearn.utils.parallel import parallel_gm, parallel_me | |||
| from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs | |||
| from gklearn.kernels import GraphKernel | |||
| @@ -495,11 +496,11 @@ class Treelet(GraphKernel): | |||
| def __add_dummy_labels(self, Gn): | |||
| if len(self.__node_labels) == 0: | |||
| for G in Gn: | |||
| nx.set_node_attributes(G, '0', 'dummy') | |||
| self.__node_labels.append('dummy') | |||
| if len(self.__edge_labels) == 0: | |||
| for G in Gn: | |||
| nx.set_edge_attributes(G, '0', 'dummy') | |||
| self.__edge_labels.append('dummy') | |||
| if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
| for i in range(len(Gn)): | |||
| nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
| self.__node_labels = [SpecialLabel.DUMMY] | |||
| if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
| for i in range(len(Gn)): | |||
| nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
| self.__edge_labels = [SpecialLabel.DUMMY] | |||
| @@ -16,6 +16,7 @@ import numpy as np | |||
| import networkx as nx | |||
| from collections import Counter | |||
| from functools import partial | |||
| from gklearn.utils import SpecialLabel | |||
| from gklearn.utils.parallel import parallel_gm | |||
| from gklearn.kernels import GraphKernel | |||
| @@ -32,6 +33,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
| def _compute_gm_series(self): | |||
| if self._verbose >= 2: | |||
| import warnings | |||
| warnings.warn('A part of the computation is parallelized.') | |||
| self.__add_dummy_node_labels(self._graphs) | |||
| # for WL subtree kernel | |||
| @@ -55,11 +60,16 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
| def _compute_gm_imap_unordered(self): | |||
| if self._verbose >= 2: | |||
| raise Warning('Only a part of the computation is parallelized due to the structure of this kernel.') | |||
| import warnings | |||
| warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.') | |||
| return self._compute_gm_series() | |||
| def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better. | |||
| if self._verbose >= 2: | |||
| import warnings | |||
| warnings.warn('A part of the computation is parallelized.') | |||
| self.__add_dummy_node_labels(g_list + [g1]) | |||
| # for WL subtree kernel | |||
| @@ -83,8 +93,9 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
| if self._verbose >= 2: | |||
| raise Warning('Only a part of the computation is parallelized due to the structure of this kernel.') | |||
| return self._compute_gm_imap_unordered() | |||
| import warnings | |||
| warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.') | |||
| return self._compute_kernel_list_series(g1, g_list) | |||
| def _wrapper_kernel_list_do(self, itr): | |||
| @@ -459,7 +470,14 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
| def __add_dummy_node_labels(self, Gn): | |||
| if len(self.__node_labels) == 0: | |||
| for G in Gn: | |||
| nx.set_node_attributes(G, '0', 'dummy') | |||
| self.__node_labels.append('dummy') | |||
| if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
| for i in range(len(Gn)): | |||
| nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
| self.__node_labels = [SpecialLabel.DUMMY] | |||
| class WLSubtree(WeisfeilerLehman): | |||
| def __init__(self, **kwargs): | |||
| kwargs['base_kernel'] = 'subtree' | |||
| super().__init__(**kwargs) | |||
| @@ -18,6 +18,7 @@ from gklearn.ged.median import MedianGraphEstimator | |||
| from gklearn.ged.median import constant_node_costs,mge_options_to_string | |||
| from gklearn.gedlib import librariesImport, gedlibpy | |||
| from gklearn.utils import Timer | |||
| from gklearn.utils.utils import get_graph_kernel_by_name | |||
| # from gklearn.utils.dataset import Dataset | |||
| class MedianPreimageGenerator(PreimageGenerator): | |||
| @@ -81,7 +82,13 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| def run(self): | |||
| self.__set_graph_kernel_by_name() | |||
| self._graph_kernel = get_graph_kernel_by_name(self._kernel_options['name'], | |||
| node_labels=self._dataset.node_labels, | |||
| edge_labels=self._dataset.edge_labels, | |||
| node_attrs=self._dataset.node_attrs, | |||
| edge_attrs=self._dataset.edge_attrs, | |||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
| kernel_options=self._kernel_options) | |||
| # record start time. | |||
| start = time.time() | |||
| @@ -180,6 +187,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| results['itrs'] = self.__itrs | |||
| results['converged'] = self.__converged | |||
| results['num_updates_ecc'] = self.__num_updates_ecc | |||
| results['mge'] = {} | |||
| results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased() | |||
| results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased() | |||
| results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents() | |||
| return results | |||
| @@ -653,27 +664,27 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| ged_env.init(init_option=self.__ged_options['init_option']) | |||
| # Set up the madian graph estimator. | |||
| mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||
| mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
| self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||
| self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
| options = self.__mge_options.copy() | |||
| if not 'seed' in options: | |||
| options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. | |||
| # Select the GED algorithm. | |||
| mge.set_options(mge_options_to_string(options)) | |||
| mge.set_label_names(node_labels=self._dataset.node_labels, | |||
| self.__mge.set_options(mge_options_to_string(options)) | |||
| self.__mge.set_label_names(node_labels=self._dataset.node_labels, | |||
| edge_labels=self._dataset.edge_labels, | |||
| node_attrs=self._dataset.node_attrs, | |||
| edge_attrs=self._dataset.edge_attrs) | |||
| mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
| mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
| self.__mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
| self.__mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
| # Run the estimator. | |||
| mge.run(graph_ids, set_median_id, gen_median_id) | |||
| self.__mge.run(graph_ids, set_median_id, gen_median_id) | |||
| # Get SODs. | |||
| self.__sod_set_median = mge.get_sum_of_distances('initialized') | |||
| self.__sod_gen_median = mge.get_sum_of_distances('converged') | |||
| self.__sod_set_median = self.__mge.get_sum_of_distances('initialized') | |||
| self.__sod_gen_median = self.__mge.get_sum_of_distances('converged') | |||
| # Get median graphs. | |||
| self.__set_median = ged_env.get_nx_graph(set_median_id) | |||
| @@ -722,43 +733,6 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| print('distance in kernel space for generalized median:', self.__k_dis_gen_median) | |||
| print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) | |||
| print('distance in kernel space for each graph in median set:', k_dis_median_set) | |||
| def __set_graph_kernel_by_name(self): | |||
| if self._kernel_options['name'] == 'ShortestPath': | |||
| from gklearn.kernels import ShortestPath | |||
| self._graph_kernel = ShortestPath(node_labels=self._dataset.node_labels, | |||
| node_attrs=self._dataset.node_attrs, | |||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
| **self._kernel_options) | |||
| elif self._kernel_options['name'] == 'StructuralSP': | |||
| from gklearn.kernels import StructuralSP | |||
| self._graph_kernel = StructuralSP(node_labels=self._dataset.node_labels, | |||
| edge_labels=self._dataset.edge_labels, | |||
| node_attrs=self._dataset.node_attrs, | |||
| edge_attrs=self._dataset.edge_attrs, | |||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
| **self._kernel_options) | |||
| elif self._kernel_options['name'] == 'PathUpToH': | |||
| from gklearn.kernels import PathUpToH | |||
| self._graph_kernel = PathUpToH(node_labels=self._dataset.node_labels, | |||
| edge_labels=self._dataset.edge_labels, | |||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
| **self._kernel_options) | |||
| elif self._kernel_options['name'] == 'Treelet': | |||
| from gklearn.kernels import Treelet | |||
| self._graph_kernel = Treelet(node_labels=self._dataset.node_labels, | |||
| edge_labels=self._dataset.edge_labels, | |||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
| **self._kernel_options) | |||
| elif self._kernel_options['name'] == 'WeisfeilerLehman': | |||
| from gklearn.kernels import WeisfeilerLehman | |||
| self._graph_kernel = WeisfeilerLehman(node_labels=self._dataset.node_labels, | |||
| edge_labels=self._dataset.edge_labels, | |||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
| **self._kernel_options) | |||
| else: | |||
| raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WeisfeilerLehman".') | |||
| # def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
| @@ -25,7 +25,7 @@ import networkx as nx | |||
| import os | |||
| def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False): | |||
| def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None): | |||
| import os.path | |||
| from gklearn.preimage import MedianPreimageGenerator | |||
| from gklearn.utils import split_dataset_by_target | |||
| @@ -38,7 +38,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
| dataset_all.trim_dataset(edge_required=edge_required) | |||
| if irrelevant_labels is not None: | |||
| dataset_all.remove_labels(**irrelevant_labels) | |||
| # dataset_all.cut_graphs(range(0, 10)) | |||
| if cut_range is not None: | |||
| dataset_all.cut_graphs(cut_range) | |||
| datasets = split_dataset_by_target(dataset_all) | |||
| if save_results: | |||
| @@ -57,6 +58,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
| itrs_list = [] | |||
| converged_list = [] | |||
| num_updates_ecc_list = [] | |||
| mge_decrease_order_list = [] | |||
| mge_increase_order_list = [] | |||
| mge_converged_order_list = [] | |||
| nb_sod_sm2gm = [0, 0, 0] | |||
| nb_dis_k_sm2gm = [0, 0, 0] | |||
| nb_dis_k_gi2sm = [0, 0, 0] | |||
| @@ -148,7 +152,10 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
| results['runtime_precompute_gm'], results['runtime_optimize_ec'], | |||
| results['runtime_generate_preimage'], results['runtime_total'], | |||
| results['itrs'], results['converged'], | |||
| results['num_updates_ecc']]) | |||
| results['num_updates_ecc'], | |||
| results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge | |||
| results['mge']['num_increase_order'] > 0, | |||
| results['mge']['num_converged_descents'] > 0]) | |||
| f_detail.close() | |||
| # compute result summary. | |||
| @@ -164,6 +171,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
| itrs_list.append(results['itrs']) | |||
| converged_list.append(results['converged']) | |||
| num_updates_ecc_list.append(results['num_updates_ecc']) | |||
| mge_decrease_order_list.append(results['mge']['num_decrease_order'] > 0) | |||
| mge_increase_order_list.append(results['mge']['num_increase_order'] > 0) | |||
| mge_converged_order_list.append(results['mge']['num_converged_descents'] > 0) | |||
| # # SOD SM -> GM | |||
| if results['sod_set_median'] > results['sod_gen_median']: | |||
| nb_sod_sm2gm[0] += 1 | |||
| @@ -210,7 +220,11 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
| results['runtime_precompute_gm'], results['runtime_optimize_ec'], | |||
| results['runtime_generate_preimage'], results['runtime_total'], | |||
| results['itrs'], results['converged'], | |||
| results['num_updates_ecc'], nb_sod_sm2gm, | |||
| results['num_updates_ecc'], | |||
| results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge | |||
| results['mge']['num_increase_order'] > 0, | |||
| results['mge']['num_converged_descents'] > 0, | |||
| nb_sod_sm2gm, | |||
| nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm]) | |||
| f_summary.close() | |||
| @@ -256,6 +270,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
| itrs_mean = np.mean(itrs_list) | |||
| num_converged = np.sum(converged_list) | |||
| num_updates_ecc_mean = np.mean(num_updates_ecc_list) | |||
| num_mge_decrease_order = np.sum(mge_decrease_order_list) | |||
| num_mge_increase_order = np.sum(mge_increase_order_list) | |||
| num_mge_converged = np.sum(mge_converged_order_list) | |||
| sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean)) | |||
| dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) | |||
| dis_k_gi2sm_mean = get_relations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) | |||
| @@ -270,7 +287,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
| dis_k_gi2sm_mean, dis_k_gi2gm_mean, | |||
| time_precompute_gm_mean, time_optimize_ec_mean, | |||
| time_generate_mean, time_total_mean, itrs_mean, | |||
| num_converged, num_updates_ecc_mean]) | |||
| num_converged, num_updates_ecc_mean, | |||
| num_mge_decrease_order, num_mge_increase_order, | |||
| num_mge_converged]) | |||
| f_summary.close() | |||
| # save total pairwise kernel distances. | |||
| @@ -300,7 +319,8 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||
| 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||
| 'dis_k gi -> GM', 'edit cost constants', 'time precompute gm', | |||
| 'time optimize ec', 'time generate preimage', 'time total', | |||
| 'itrs', 'converged', 'num updates ecc']) | |||
| 'itrs', 'converged', 'num updates ecc', 'mge decrease order', | |||
| 'mge increase order', 'mge converged']) | |||
| f_detail.close() | |||
| # fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | |||
| @@ -312,7 +332,8 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||
| 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||
| 'dis_k gi -> GM', 'time precompute gm', 'time optimize ec', | |||
| 'time generate preimage', 'time total', 'itrs', 'num converged', | |||
| 'num updates ecc', '# SOD SM -> GM', '# dis_k SM -> GM', | |||
| 'num updates ecc', 'mge num decrease order', 'mge num increase order', | |||
| 'mge num converged', '# SOD SM -> GM', '# dis_k SM -> GM', | |||
| '# dis_k gi -> SM', '# dis_k gi -> GM']) | |||
| # 'repeats better SOD SM -> GM', | |||
| # 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', | |||
| @@ -418,6 +439,8 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel=' | |||
| Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | |||
| height=4, base_kernel='subtree', parallel=None, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| else: | |||
| raise Exception('The graph kernel "', graph_kernel, '" is not defined.') | |||
| # normalization | |||
| Kmatrix_diag = Kmatrix.diagonal().copy() | |||
| @@ -260,20 +260,20 @@ def test_Treelet(ds_name, parallel): | |||
| @pytest.mark.parametrize('ds_name', ['Acyclic']) | |||
| #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge']) | |||
| @pytest.mark.parametrize('base_kernel', ['subtree']) | |||
| # @pytest.mark.parametrize('base_kernel', ['subtree']) | |||
| @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
| def test_WeisfeilerLehman(ds_name, parallel, base_kernel): | |||
| """Test Weisfeiler-Lehman kernel. | |||
| def test_WLSubtree(ds_name, parallel): | |||
| """Test Weisfeiler-Lehman subtree kernel. | |||
| """ | |||
| from gklearn.kernels import WeisfeilerLehman | |||
| from gklearn.kernels import WLSubtree | |||
| dataset = chooseDataset(ds_name) | |||
| try: | |||
| graph_kernel = WeisfeilerLehman(node_labels=dataset.node_labels, | |||
| graph_kernel = WLSubtree(node_labels=dataset.node_labels, | |||
| edge_labels=dataset.edge_labels, | |||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
| height=2, base_kernel=base_kernel) | |||
| height=2) | |||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
| kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
| @@ -20,4 +20,5 @@ from gklearn.utils.graph_files import load_dataset, save_dataset | |||
| from gklearn.utils.timer import Timer | |||
| from gklearn.utils.utils import get_graph_kernel_by_name | |||
| from gklearn.utils.utils import compute_gram_matrices_by_class | |||
| from gklearn.utils.utils import SpecialLabel | |||
| from gklearn.utils.trie import Trie | |||
| @@ -56,13 +56,14 @@ class Dataset(object): | |||
| self.__node_attrs = label_names['node_attrs'] | |||
| self.__edge_labels = label_names['edge_labels'] | |||
| self.__edge_attrs = label_names['edge_attrs'] | |||
| self.clean_labels() | |||
| def load_graphs(self, graphs, targets=None): | |||
| # this has to be followed by set_labels(). | |||
| self.__graphs = graphs | |||
| self.__targets = targets | |||
| # self.set_labels_attrs() | |||
| # self.set_labels_attrs() # @todo | |||
| def load_predefined_dataset(self, ds_name): | |||
| @@ -89,6 +90,9 @@ class Dataset(object): | |||
| elif ds_name == 'Cuneiform': | |||
| ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' | |||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
| elif ds_name == 'DD': | |||
| ds_file = current_path + '../../datasets/DD/DD_A.txt' | |||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
| elif ds_name == 'Fingerprint': | |||
| ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | |||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
| @@ -113,6 +117,9 @@ class Dataset(object): | |||
| elif ds_name == 'MUTAG': | |||
| ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' | |||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
| elif ds_name == 'PAH': | |||
| ds_file = current_path + '../../datasets/PAH/dataset.ds' | |||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
| elif ds_name == 'SYNTHETIC': | |||
| pass | |||
| elif ds_name == 'SYNTHETICnew': | |||
| @@ -120,11 +127,14 @@ class Dataset(object): | |||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
| elif ds_name == 'Synthie': | |||
| pass | |||
| else: | |||
| raise Exception('The dataset name "', ds_name, '" is not pre-defined.') | |||
| self.__node_labels = label_names['node_labels'] | |||
| self.__node_attrs = label_names['node_attrs'] | |||
| self.__edge_labels = label_names['edge_labels'] | |||
| self.__edge_attrs = label_names['edge_attrs'] | |||
| self.clean_labels() | |||
| def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): | |||
| @@ -138,27 +148,27 @@ class Dataset(object): | |||
| # @todo: remove labels which have only one possible values. | |||
| if node_labels is None: | |||
| self.__node_labels = self.__graphs[0].graph['node_labels'] | |||
| # # graphs are considered node unlabeled if all nodes have the same label. | |||
| # infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | |||
| # # graphs are considered node unlabeled if all nodes have the same label. | |||
| # infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | |||
| if node_attrs is None: | |||
| self.__node_attrs = self.__graphs[0].graph['node_attrs'] | |||
| # for G in Gn: | |||
| # for n in G.nodes(data=True): | |||
| # if 'attributes' in n[1]: | |||
| # return len(n[1]['attributes']) | |||
| # return 0 | |||
| # for G in Gn: | |||
| # for n in G.nodes(data=True): | |||
| # if 'attributes' in n[1]: | |||
| # return len(n[1]['attributes']) | |||
| # return 0 | |||
| if edge_labels is None: | |||
| self.__edge_labels = self.__graphs[0].graph['edge_labels'] | |||
| # # graphs are considered edge unlabeled if all edges have the same label. | |||
| # infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | |||
| # # graphs are considered edge unlabeled if all edges have the same label. | |||
| # infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | |||
| if edge_attrs is None: | |||
| self.__edge_attrs = self.__graphs[0].graph['edge_attrs'] | |||
| # for G in Gn: | |||
| # if nx.number_of_edges(G) > 0: | |||
| # for e in G.edges(data=True): | |||
| # if 'attributes' in e[2]: | |||
| # return len(e[2]['attributes']) | |||
| # return 0 | |||
| # for G in Gn: | |||
| # if nx.number_of_edges(G) > 0: | |||
| # for e in G.edges(data=True): | |||
| # if 'attributes' in e[2]: | |||
| # return len(e[2]['attributes']) | |||
| # return 0 | |||
| def get_dataset_infos(self, keys=None): | |||
| @@ -323,7 +333,7 @@ class Dataset(object): | |||
| if self.__node_label_nums is None: | |||
| self.__node_label_nums = {} | |||
| for node_label in self.__node_labels: | |||
| self.__node_label_nums[node_label] = self.get_node_label_num(node_label) | |||
| self.__node_label_nums[node_label] = self.__get_node_label_num(node_label) | |||
| infos['node_label_nums'] = self.__node_label_nums | |||
| if 'edge_label_dim' in keys: | |||
| @@ -335,7 +345,7 @@ class Dataset(object): | |||
| if self.__edge_label_nums is None: | |||
| self.__edge_label_nums = {} | |||
| for edge_label in self.__edge_labels: | |||
| self.__edge_label_nums[edge_label] = self.get_edge_label_num(edge_label) | |||
| self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label) | |||
| infos['edge_label_nums'] = self.__edge_label_nums | |||
| if 'directed' in keys or 'substructures' in keys: | |||
| @@ -411,33 +421,95 @@ class Dataset(object): | |||
| def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
| node_labels = [item for item in node_labels if item in self.__node_labels] | |||
| edge_labels = [item for item in edge_labels if item in self.__edge_labels] | |||
| node_attrs = [item for item in node_attrs if item in self.__node_attrs] | |||
| edge_attrs = [item for item in edge_attrs if item in self.__edge_attrs] | |||
| for g in self.__graphs: | |||
| for nd in g.nodes(): | |||
| for nl in node_labels: | |||
| del g.nodes[nd][nl] | |||
| del g.nodes[nd][nl] | |||
| for na in node_attrs: | |||
| del g.nodes[nd][na] | |||
| for ed in g.edges(): | |||
| for el in edge_labels: | |||
| del g.edges[ed][el] | |||
| del g.edges[ed][el] | |||
| for ea in edge_attrs: | |||
| del g.edges[ed][ea] | |||
| del g.edges[ed][ea] | |||
| if len(node_labels) > 0: | |||
| self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||
| self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||
| if len(edge_labels) > 0: | |||
| self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||
| self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||
| if len(node_attrs) > 0: | |||
| self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||
| self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||
| if len(edge_attrs) > 0: | |||
| self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||
| self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||
| def clean_labels(self): | |||
| labels = [] | |||
| for name in self.__node_labels: | |||
| label = set() | |||
| for G in self.__graphs: | |||
| label = label | set(nx.get_node_attributes(G, name).values()) | |||
| if len(label) > 1: | |||
| labels.append(name) | |||
| break | |||
| if len(label) < 2: | |||
| for G in self.__graphs: | |||
| for nd in G.nodes(): | |||
| del G.nodes[nd][name] | |||
| self.__node_labels = labels | |||
| labels = [] | |||
| for name in self.__edge_labels: | |||
| label = set() | |||
| for G in self.__graphs: | |||
| label = label | set(nx.get_edge_attributes(G, name).values()) | |||
| if len(label) > 1: | |||
| labels.append(name) | |||
| break | |||
| if len(label) < 2: | |||
| for G in self.__graphs: | |||
| for ed in G.edges(): | |||
| del G.edges[ed][name] | |||
| self.__edge_labels = labels | |||
| labels = [] | |||
| for name in self.__node_attrs: | |||
| label = set() | |||
| for G in self.__graphs: | |||
| label = label | set(nx.get_node_attributes(G, name).values()) | |||
| if len(label) > 1: | |||
| labels.append(name) | |||
| break | |||
| if len(label) < 2: | |||
| for G in self.__graphs: | |||
| for nd in G.nodes(): | |||
| del G.nodes[nd][name] | |||
| self.__node_attrs = labels | |||
| labels = [] | |||
| for name in self.__edge_attrs: | |||
| label = set() | |||
| for G in self.__graphs: | |||
| label = label | set(nx.get_edge_attributes(G, name).values()) | |||
| if len(label) > 1: | |||
| labels.append(name) | |||
| break | |||
| if len(label) < 2: | |||
| for G in self.__graphs: | |||
| for ed in G.edges(): | |||
| del G.edges[ed][name] | |||
| self.__edge_attrs = labels | |||
| def cut_graphs(self, range_): | |||
| self.__graphs = [self.__graphs[i] for i in range_] | |||
| if self.__targets is not None: | |||
| self.__targets = [self.__targets[i] for i in range_] | |||
| # @todo | |||
| # self.set_labels_attrs() | |||
| self.clean_labels() | |||
| def trim_dataset(self, edge_required=False): | |||
| @@ -448,8 +520,7 @@ class Dataset(object): | |||
| idx = [p[0] for p in trimed_pairs] | |||
| self.__graphs = [p[1] for p in trimed_pairs] | |||
| self.__targets = [self.__targets[i] for i in idx] | |||
| # @todo | |||
| # self.set_labels_attrs() | |||
| self.clean_labels() | |||
| def __get_dataset_size(self): | |||
| @@ -652,4 +723,5 @@ def split_dataset_by_target(dataset): | |||
| sub_dataset.load_graphs(sub_graphs, [key] * len(val)) | |||
| sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs) | |||
| datasets.append(sub_dataset) | |||
| # @todo: clean_labels? | |||
| return datasets | |||
| @@ -63,7 +63,7 @@ def load_dataset(filename, filename_targets=None, gformat=None, **kwargs): | |||
| return data, y, label_names | |||
| def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=None): | |||
| def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', **kwargs): | |||
| """Save list of graphs. | |||
| """ | |||
| import os | |||
| @@ -73,22 +73,22 @@ def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=Non | |||
| if not os.path.exists(dirname_ds) : | |||
| os.makedirs(dirname_ds) | |||
| if xparams is not None and 'graph_dir' in xparams: | |||
| graph_dir = xparams['graph_dir'] + '/' | |||
| if 'graph_dir' in kwargs: | |||
| graph_dir = kwargs['graph_dir'] + '/' | |||
| if not os.path.exists(graph_dir): | |||
| os.makedirs(graph_dir) | |||
| del kwargs['graph_dir'] | |||
| else: | |||
| graph_dir = dirname_ds | |||
| if group == 'xml' and gformat == 'gxl': | |||
| kwargs = {'method': xparams['method']} if xparams is not None else {} | |||
| with open(filename + '.xml', 'w') as fgroup: | |||
| fgroup.write("<?xml version=\"1.0\"?>") | |||
| fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">") | |||
| fgroup.write("\n<GraphCollection>") | |||
| for idx, g in enumerate(Gn): | |||
| fname_tmp = "graph" + str(idx) + ".gxl" | |||
| saveGXL(g, graph_dir + fname_tmp, **kwargs) | |||
| save_gxl(g, graph_dir + fname_tmp, **kwargs) | |||
| fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>") | |||
| fgroup.write("\n</GraphCollection>") | |||
| fgroup.close() | |||
| @@ -226,7 +226,7 @@ def load_gxl(filename): # @todo: directed graphs. | |||
| return g, label_names | |||
| def saveGXL(graph, filename, method='default', node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
| def save_gxl(graph, filename, method='default', node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
| if method == 'default': | |||
| gxl_file = open(filename, 'w') | |||
| gxl_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") | |||
| @@ -1,6 +1,7 @@ | |||
| import networkx as nx | |||
| import numpy as np | |||
| from copy import deepcopy | |||
| from enum import Enum, auto | |||
| #from itertools import product | |||
| # from tqdm import tqdm | |||
| @@ -299,21 +300,59 @@ def get_edge_labels(Gn, edge_label): | |||
| def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): | |||
| if name == 'structuralspkernel': | |||
| if name == 'ShortestPath': | |||
| from gklearn.kernels import ShortestPath | |||
| graph_kernel = ShortestPath(node_labels=node_labels, | |||
| node_attrs=node_attrs, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| elif name == 'StructuralSP': | |||
| from gklearn.kernels import StructuralSP | |||
| graph_kernel = StructuralSP(node_labels=node_labels, edge_labels=edge_labels, | |||
| node_attrs=node_attrs, edge_attrs=edge_attrs, | |||
| ds_infos=ds_infos, **kernel_options) | |||
| graph_kernel = StructuralSP(node_labels=node_labels, | |||
| edge_labels=edge_labels, | |||
| node_attrs=node_attrs, | |||
| edge_attrs=edge_attrs, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| elif name == 'PathUpToH': | |||
| from gklearn.kernels import PathUpToH | |||
| graph_kernel = PathUpToH(node_labels=node_labels, | |||
| edge_labels=edge_labels, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| elif name == 'Treelet': | |||
| from gklearn.kernels import Treelet | |||
| graph_kernel = Treelet(node_labels=node_labels, | |||
| edge_labels=edge_labels, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| elif name == 'WLSubtree': | |||
| from gklearn.kernels import WLSubtree | |||
| graph_kernel = WLSubtree(node_labels=node_labels, | |||
| edge_labels=edge_labels, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| elif name == 'WeisfeilerLehman': | |||
| from gklearn.kernels import WeisfeilerLehman | |||
| graph_kernel = WeisfeilerLehman(node_labels=node_labels, | |||
| edge_labels=edge_labels, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| else: | |||
| raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WLSubtree", "WeisfeilerLehman".') | |||
| return graph_kernel | |||
| def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None): | |||
| def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False): | |||
| import os | |||
| from gklearn.utils import Dataset, split_dataset_by_target | |||
| # 1. get dataset. | |||
| print('1. getting dataset...') | |||
| dataset_all = Dataset() | |||
| dataset_all.load_predefined_dataset(ds_name) | |||
| dataset_all.trim_dataset(edge_required=edge_required) | |||
| if not irrelevant_labels is None: | |||
| dataset_all.remove_labels(**irrelevant_labels) | |||
| # dataset_all.cut_graphs(range(0, 10)) | |||
| @@ -349,6 +388,8 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d | |||
| print() | |||
| print('4. saving results...') | |||
| if save_results: | |||
| if not os.path.exists(dir_save): | |||
| os.makedirs(dir_save) | |||
| np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | |||
| print('\ncomplete.') | |||
| @@ -424,4 +465,10 @@ def get_mlti_dim_edge_attrs(G, attr_names): | |||
| attributes = [] | |||
| for ed, attrs in G.edges(data=True): | |||
| attributes.append(tuple(attrs[aname] for aname in attr_names)) | |||
| return attributes | |||
| return attributes | |||
| class SpecialLabel(Enum): | |||
| """can be used to define special labels. | |||
| """ | |||
| DUMMY = auto # The dummy label. | |||