| @@ -1,2 +1,4 @@ | |||
| [run] | |||
| omit = gklearn/tests/* | |||
| omit = | |||
| gklearn/tests/* | |||
| gklearn/examples/* | |||
| @@ -0,0 +1,58 @@ | |||
| # -*- coding: utf-8 -*- | |||
| """compute_graph_edit_distance.ipynb | |||
| Automatically generated by Colaboratory. | |||
| Original file is located at | |||
| https://colab.research.google.com/drive/1Wfgn7WVuyOQQgwOvdUQBz0BzEVdp0YM3 | |||
| **This script demonstrates how to compute a graph edit distance.** | |||
| --- | |||
| **0. Install `graphkit-learn`.** | |||
| """ | |||
| """**1. Get dataset.**""" | |||
| from gklearn.utils import Dataset | |||
| # Predefined dataset name, use dataset "MUTAG". | |||
| ds_name = 'MUTAG' | |||
| # Initialize a Dataset. | |||
| dataset = Dataset() | |||
| # Load predefined dataset "MUTAG". | |||
| dataset.load_predefined_dataset(ds_name) | |||
| graph1 = dataset.graphs[0] | |||
| graph2 = dataset.graphs[1] | |||
| print(graph1, graph2) | |||
| """**2. Compute graph edit distance.**""" | |||
| from gklearn.ged.env import GEDEnv | |||
| ged_env = GEDEnv() # initailize GED environment. | |||
| ged_env.set_edit_cost('CONSTANT', # GED cost type. | |||
| edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. | |||
| ) | |||
| ged_env.add_nx_graph(graph1, '') # add graph1 | |||
| ged_env.add_nx_graph(graph2, '') # add graph2 | |||
| listID = ged_env.get_all_graph_ids() # get list IDs of graphs | |||
| ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. | |||
| options = {'initialization_method': 'RANDOM', # or 'NODE', etc. | |||
| 'threads': 1 # parallel threads. | |||
| } | |||
| ged_env.set_method('BIPARTITE', # GED method. | |||
| options # options for GED method. | |||
| ) | |||
| ged_env.init_method() # initialize GED method. | |||
| ged_env.run_method(listID[0], listID[1]) # run. | |||
| pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map. | |||
| pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map. | |||
| dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. | |||
| print(pi_forward) | |||
| print(pi_backward) | |||
| print(dis) | |||
| @@ -0,0 +1,2 @@ | |||
| from gklearn.ged.edit_costs.edit_cost import EditCost | |||
| from gklearn.ged.edit_costs.constant import Constant | |||
| @@ -0,0 +1,50 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Wed Jun 17 17:52:23 2020 | |||
| @author: ljia | |||
| """ | |||
| from gklearn.ged.edit_costs import EditCost | |||
| class Constant(EditCost): | |||
| """Implements constant edit cost functions. | |||
| """ | |||
| def __init__(self, node_ins_cost=1, node_del_cost=1, node_rel_cost=1, edge_ins_cost=1, edge_del_cost=1, edge_rel_cost=1): | |||
| self.__node_ins_cost = node_ins_cost | |||
| self.__node_del_cost = node_del_cost | |||
| self.__node_rel_cost = node_rel_cost | |||
| self.__edge_ins_cost = edge_ins_cost | |||
| self.__edge_del_cost = edge_del_cost | |||
| self.__edge_rel_cost = edge_rel_cost | |||
| def node_ins_cost_fun(self, node_label): | |||
| return self.__node_ins_cost | |||
| def node_del_cost_fun(self, node_label): | |||
| return self.__node_del_cost | |||
| def node_rel_cost_fun(self, node_label_1, node_label_2): | |||
| if node_label_1 != node_label_2: | |||
| return self.__node_rel_cost | |||
| return 0 | |||
| def edge_ins_cost_fun(self, edge_label): | |||
| return self.__edge_ins_cost | |||
| def edge_del_cost_fun(self, edge_label): | |||
| return self.__edge_del_cost | |||
| def edge_rel_cost_fun(self, edge_label_1, edge_label_2): | |||
| if edge_label_1 != edge_label_2: | |||
| return self.__edge_rel_cost | |||
| return 0 | |||
| @@ -0,0 +1,88 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Wed Jun 17 17:49:24 2020 | |||
| @author: ljia | |||
| """ | |||
| class EditCost(object): | |||
| def __init__(self): | |||
| pass | |||
| def node_ins_cost_fun(self, node_label): | |||
| """ | |||
| /*! | |||
| * @brief Node insertions cost function. | |||
| * @param[in] node_label A node label. | |||
| * @return The cost of inserting a node with label @p node_label. | |||
| * @note Must be implemented by derived classes of ged::EditCosts. | |||
| */ | |||
| """ | |||
| return 0 | |||
| def node_del_cost_fun(self, node_label): | |||
| """ | |||
| /*! | |||
| * @brief Node deletion cost function. | |||
| * @param[in] node_label A node label. | |||
| * @return The cost of deleting a node with label @p node_label. | |||
| * @note Must be implemented by derived classes of ged::EditCosts. | |||
| */ | |||
| """ | |||
| return 0 | |||
| def node_rel_cost_fun(self, node_label_1, node_label_2): | |||
| """ | |||
| /*! | |||
| * @brief Node relabeling cost function. | |||
| * @param[in] node_label_1 A node label. | |||
| * @param[in] node_label_2 A node label. | |||
| * @return The cost of changing a node's label from @p node_label_1 to @p node_label_2. | |||
| * @note Must be implemented by derived classes of ged::EditCosts. | |||
| */ | |||
| """ | |||
| return 0 | |||
| def edge_ins_cost_fun(self, edge_label): | |||
| """ | |||
| /*! | |||
| * @brief Edge insertion cost function. | |||
| * @param[in] edge_label An edge label. | |||
| * @return The cost of inserting an edge with label @p edge_label. | |||
| * @note Must be implemented by derived classes of ged::EditCosts. | |||
| */ | |||
| """ | |||
| return 0 | |||
| def edge_del_cost_fun(self, edge_label): | |||
| """ | |||
| /*! | |||
| * @brief Edge deletion cost function. | |||
| * @param[in] edge_label An edge label. | |||
| * @return The cost of deleting an edge with label @p edge_label. | |||
| * @note Must be implemented by derived classes of ged::EditCosts. | |||
| */ | |||
| """ | |||
| return 0 | |||
| def edge_rel_cost_fun(self, edge_label_1, edge_label_2): | |||
| """ | |||
| /*! | |||
| * @brief Edge relabeling cost function. | |||
| * @param[in] edge_label_1 An edge label. | |||
| * @param[in] edge_label_2 An edge label. | |||
| * @return The cost of changing an edge's label from @p edge_label_1 to @p edge_label_2. | |||
| * @note Must be implemented by derived classes of ged::EditCosts. | |||
| */ | |||
| """ | |||
| return 0 | |||
| @@ -1,2 +1,4 @@ | |||
| from gklearn.ged.env.common_types import AlgorithmState | |||
| from gklearn.ged.env.common_types import Options, OptionsStringMap, AlgorithmState | |||
| from gklearn.ged.env.ged_data import GEDData | |||
| from gklearn.ged.env.ged_env import GEDEnv | |||
| from gklearn.ged.env.node_map import NodeMap | |||
| @@ -8,11 +8,152 @@ Created on Thu Mar 19 18:17:38 2020 | |||
| from enum import Enum, unique | |||
| class Options(object): | |||
| """Contains enums for options employed by ged::GEDEnv. | |||
| """ | |||
| @unique | |||
| class GEDMethod(Enum): | |||
| """Selects the method. | |||
| """ | |||
| # @todo: what is this? #ifdef GUROBI | |||
| F1 = 1 # Selects ged::F1. | |||
| F2 = 2 # Selects ged::F2. | |||
| COMPACT_MIP = 3 # Selects ged::CompactMIP. | |||
| BLP_NO_EDGE_LABELS = 4 # Selects ged::BLPNoEdgeLabels. | |||
| #endif /* GUROBI */ | |||
| BRANCH = 5 # Selects ged::Branch. | |||
| BRANCH_FAST = 6 # Selects ged::BranchFast. | |||
| BRANCH_TIGHT = 7 # Selects ged::BranchTight. | |||
| BRANCH_UNIFORM = 8 # Selects ged::BranchUniform. | |||
| BRANCH_COMPACT = 9 # Selects ged::BranchCompact. | |||
| PARTITION = 10 # Selects ged::Partition. | |||
| HYBRID = 11 # Selects ged::Hybrid. | |||
| RING = 12 # Selects ged::Ring. | |||
| ANCHOR_AWARE_GED = 13 # Selects ged::AnchorAwareGED. | |||
| WALKS = 14 # Selects ged::Walks. | |||
| IPFP = 15 # Selects ged::IPFP | |||
| BIPARTITE = 16 # Selects ged::Bipartite. | |||
| SUBGRAPH = 17 # Selects ged::Subgraph. | |||
| NODE = 18 # Selects ged::Node. | |||
| RING_ML = 19 # Selects ged::RingML. | |||
| BIPARTITE_ML = 20 # Selects ged::BipartiteML. | |||
| REFINE = 21 # Selects ged::Refine. | |||
| BP_BEAM = 22 # Selects ged::BPBeam. | |||
| SIMULATED_ANNEALING = 23 # Selects ged::SimulatedAnnealing. | |||
| HED = 24 # Selects ged::HED. | |||
| STAR = 25 # Selects ged::Star. | |||
| @unique | |||
| class EditCosts(Enum): | |||
| """Selects the edit costs. | |||
| """ | |||
| CHEM_1 = 1 # Selects ged::CHEM1. | |||
| CHEM_2 = 2 # Selects ged::CHEM2. | |||
| CMU = 3 # Selects ged::CMU. | |||
| GREC_1 = 4 # Selects ged::GREC1. | |||
| GREC_2 = 5 # Selects ged::GREC2. | |||
| PROTEIN = 6 # Selects ged::Protein. | |||
| FINGERPRINT = 7 # Selects ged::Fingerprint. | |||
| LETTER = 8 # Selects ged::Letter. | |||
| LETTER2 = 9 # Selects ged:Letter2. | |||
| NON_SYMBOLIC = 10 # Selects ged:NonSymbolic. | |||
| CONSTANT = 11 # Selects ged::Constant. | |||
| @unique | |||
| class InitType(Enum): | |||
| """@brief Selects the initialization type of the environment. | |||
| * @details If eager initialization is selected, all edit costs are pre-computed when initializing the environment. | |||
| * Otherwise, they are computed at runtime. If initialization with shuffled copies is selected, shuffled copies of | |||
| * all graphs are created. These copies are used when calling ged::GEDEnv::run_method() with two identical graph IDs. | |||
| * In this case, one of the IDs is internally replaced by the ID of the shuffled copy and the graph is hence | |||
| * compared to an isomorphic but non-identical graph. If initialization without shuffled copies is selected, no shuffled copies | |||
| * are created and calling ged::GEDEnv::run_method() with two identical graph IDs amounts to comparing a graph to itself. | |||
| """ | |||
| LAZY_WITHOUT_SHUFFLED_COPIES = 1 # Lazy initialization, no shuffled graph copies are constructed. | |||
| EAGER_WITHOUT_SHUFFLED_COPIES = 2 # Eager initialization, no shuffled graph copies are constructed. | |||
| LAZY_WITH_SHUFFLED_COPIES = 3 # Lazy initialization, shuffled graph copies are constructed. | |||
| EAGER_WITH_SHUFFLED_COPIES = 4 # Eager initialization, shuffled graph copies are constructed. | |||
| @unique | |||
| class AlgorithmState(Enum): | |||
| """can be used to specify the state of an algorithm. | |||
| """ | |||
| CALLED = 1 # The algorithm has been called. | |||
| INITIALIZED = 2 # The algorithm has been initialized. | |||
| CONVERGED = 3 # The algorithm has converged. | |||
| TERMINATED = 4 # The algorithm has terminated. | |||
| class OptionsStringMap(object): | |||
| # Map of available computation methods between enum type and string. | |||
| GEDMethod = { | |||
| "BRANCH": Options.GEDMethod.BRANCH, | |||
| "BRANCH_FAST": Options.GEDMethod.BRANCH_FAST, | |||
| "BRANCH_TIGHT": Options.GEDMethod.BRANCH_TIGHT, | |||
| "BRANCH_UNIFORM": Options.GEDMethod.BRANCH_UNIFORM, | |||
| "BRANCH_COMPACT": Options.GEDMethod.BRANCH_COMPACT, | |||
| "PARTITION": Options.GEDMethod.PARTITION, | |||
| "HYBRID": Options.GEDMethod.HYBRID, | |||
| "RING": Options.GEDMethod.RING, | |||
| "ANCHOR_AWARE_GED": Options.GEDMethod.ANCHOR_AWARE_GED, | |||
| "WALKS": Options.GEDMethod.WALKS, | |||
| "IPFP": Options.GEDMethod.IPFP, | |||
| "BIPARTITE": Options.GEDMethod.BIPARTITE, | |||
| "SUBGRAPH": Options.GEDMethod.SUBGRAPH, | |||
| "NODE": Options.GEDMethod.NODE, | |||
| "RING_ML": Options.GEDMethod.RING_ML, | |||
| "BIPARTITE_ML": Options.GEDMethod.BIPARTITE_ML, | |||
| "REFINE": Options.GEDMethod.REFINE, | |||
| "BP_BEAM": Options.GEDMethod.BP_BEAM, | |||
| "SIMULATED_ANNEALING": Options.GEDMethod.SIMULATED_ANNEALING, | |||
| "HED": Options.GEDMethod.HED, | |||
| "STAR": Options.GEDMethod.STAR, | |||
| # ifdef GUROBI | |||
| "F1": Options.GEDMethod.F1, | |||
| "F2": Options.GEDMethod.F2, | |||
| "COMPACT_MIP": Options.GEDMethod.COMPACT_MIP, | |||
| "BLP_NO_EDGE_LABELS": Options.GEDMethod.BLP_NO_EDGE_LABELS | |||
| } | |||
| # Map of available edit cost functions between enum type and string. | |||
| EditCosts = { | |||
| "CHEM_1": Options.EditCosts.CHEM_1, | |||
| "CHEM_2": Options.EditCosts.CHEM_2, | |||
| "CMU": Options.EditCosts.CMU, | |||
| "GREC_1": Options.EditCosts.GREC_1, | |||
| "GREC_2": Options.EditCosts.GREC_2, | |||
| "LETTER": Options.EditCosts.LETTER, | |||
| "LETTER2": Options.EditCosts.LETTER2, | |||
| "NON_SYMBOLIC": Options.EditCosts.NON_SYMBOLIC, | |||
| "FINGERPRINT": Options.EditCosts.FINGERPRINT, | |||
| "PROTEIN": Options.EditCosts.PROTEIN, | |||
| "CONSTANT": Options.EditCosts.CONSTANT | |||
| } | |||
| # Map of available initialization types of the environment between enum type and string. | |||
| InitType = { | |||
| "LAZY_WITHOUT_SHUFFLED_COPIES": Options.InitType.LAZY_WITHOUT_SHUFFLED_COPIES, | |||
| "EAGER_WITHOUT_SHUFFLED_COPIES": Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, | |||
| "LAZY_WITH_SHUFFLED_COPIES": Options.InitType.LAZY_WITH_SHUFFLED_COPIES, | |||
| "LAZY_WITH_SHUFFLED_COPIES": Options.InitType.LAZY_WITH_SHUFFLED_COPIES | |||
| } | |||
| @unique | |||
| class AlgorithmState(Enum): | |||
| """can be used to specify the state of an algorithm. | |||
| """ | |||
| CALLED = 1 # The algorithm has been called. | |||
| INITIALIZED = 2 # The algorithm has been initialized. | |||
| CONVERGED = 3 # The algorithm has converged. | |||
| TERMINATED = 4 # The algorithm has terminated. | |||
| """can be used to specify the state of an algorithm. | |||
| """ | |||
| CALLED = 1 # The algorithm has been called. | |||
| INITIALIZED = 2 # The algorithm has been initialized. | |||
| CONVERGED = 3 # The algorithm has converged. | |||
| TERMINATED = 4 # The algorithm has terminated. | |||
| @@ -0,0 +1,181 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Wed Jun 17 15:05:01 2020 | |||
| @author: ljia | |||
| """ | |||
| from gklearn.ged.env import Options, OptionsStringMap | |||
| from gklearn.ged.edit_costs import Constant | |||
| from gklearn.utils import SpecialLabel, dummy_node | |||
| class GEDData(object): | |||
| def __init__(self): | |||
| self._graphs = [] | |||
| self._graph_names = [] | |||
| self._graph_classes = [] | |||
| self._num_graphs_without_shuffled_copies = 0 | |||
| self._strings_to_internal_node_ids = [] | |||
| self._internal_node_ids_to_strings = [] | |||
| self._edit_cost = None | |||
| self._node_costs = None | |||
| self._edge_costs = None | |||
| self._node_labels = [] | |||
| self._edge_labels = [] | |||
| self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES | |||
| self._delete_edit_cost = True | |||
| self._max_num_nodes = 0 | |||
| self._max_num_edges = 0 | |||
| def num_graphs(self): | |||
| """ | |||
| /*! | |||
| * @brief Returns the number of graphs. | |||
| * @return Number of graphs in the instance. | |||
| */ | |||
| """ | |||
| return len(self._graphs) | |||
| def shuffled_graph_copies_available(self): | |||
| """ | |||
| /*! | |||
| * @brief Checks if shuffled graph copies are available. | |||
| * @return Boolean @p true if shuffled graph copies are available. | |||
| */ | |||
| """ | |||
| return (self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES or self._init_type == Options.InitType.LAZY_WITH_SHUFFLED_COPIES) | |||
| def node_cost(self, label1, label2): | |||
| """ | |||
| /*! | |||
| * @brief Returns node relabeling, insertion, or deletion cost. | |||
| * @param[in] label1 First node label. | |||
| * @param[in] label2 Second node label. | |||
| * @return Node relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(), | |||
| * node insertion cost if @p label1 equals ged::dummy_label and @p label2 does not, | |||
| * node deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does, | |||
| * and 0 otherwise. | |||
| */ | |||
| """ | |||
| if self._eager_init(): # @todo: check if correct | |||
| return self._node_costs[label1, label2] | |||
| if label1 == label2: | |||
| return 0 | |||
| if label1 == SpecialLabel.DUMMY: # @todo: check dummy | |||
| return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check | |||
| if label2 == SpecialLabel.DUMMY: # @todo: check dummy | |||
| return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1]) | |||
| return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1]) | |||
| def edge_cost(self, label1, label2): | |||
| """ | |||
| /*! | |||
| * @brief Returns edge relabeling, insertion, or deletion cost. | |||
| * @param[in] label1 First edge label. | |||
| * @param[in] label2 Second edge label. | |||
| * @return Edge relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(), | |||
| * edge insertion cost if @p label1 equals ged::dummy_label and @p label2 does not, | |||
| * edge deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does, | |||
| * and 0 otherwise. | |||
| */ | |||
| """ | |||
| if self._eager_init(): # @todo: check if correct | |||
| return self._node_costs[label1, label2] | |||
| if label1 == label2: | |||
| return 0 | |||
| if label1 == SpecialLabel.DUMMY: | |||
| return self._edit_cost.edge_ins_cost_fun(label2) # self._edge_labels[label2 - 1]) | |||
| if label2 == SpecialLabel.DUMMY: | |||
| return self._edit_cost.edge_del_cost_fun(label1) # self._edge_labels[label1 - 1]) | |||
| return self._edit_cost.edge_rel_cost_fun(label1, label2) # self._edge_labels[label1 - 1], self._edge_labels[label2 - 1]) | |||
| def compute_induced_cost(self, g, h, node_map): | |||
| """ | |||
| /*! | |||
| * @brief Computes the edit cost between two graphs induced by a node map. | |||
| * @param[in] g Input graph. | |||
| * @param[in] h Input graph. | |||
| * @param[in,out] node_map Node map whose induced edit cost is to be computed. | |||
| */ | |||
| """ | |||
| cost = 0 | |||
| # collect node costs | |||
| for node in g.nodes(): | |||
| image = node_map.image(node) | |||
| label2 = (SpecialLabel.DUMMY if image == dummy_node() else h.nodes[image]['label']) | |||
| cost += self.node_cost(g.nodes[node]['label'], label2) | |||
| for node in h.nodes(): | |||
| pre_image = node_map.pre_image(node) | |||
| if pre_image == dummy_node(): | |||
| cost += self.node_cost(SpecialLabel.DUMMY, h.nodes[node]['label']) | |||
| # collect edge costs | |||
| for (n1, n2) in g.edges(): | |||
| image1 = node_map.image(n1) | |||
| image2 = node_map.image(n2) | |||
| label2 = (h.edges[(image2, image1)]['label'] if h.has_edge(image2, image1) else SpecialLabel.DUMMY) | |||
| cost += self.edge_cost(g.edges[(n1, n2)]['label'], label2) | |||
| for (n1, n2) in h.edges(): | |||
| if not g.has_edge(node_map.pre_image(n2), node_map.pre_image(n1)): | |||
| cost += self.edge_cost(SpecialLabel.DUMMY, h.edges[(n1, n2)]['label']) | |||
| node_map.set_induced_cost(cost) | |||
| def _set_edit_cost(self, edit_cost, edit_cost_constants): | |||
| if self._delete_edit_cost: | |||
| self._edit_cost = None | |||
| if isinstance(edit_cost, str): | |||
| edit_cost = OptionsStringMap.EditCosts[edit_cost] | |||
| if edit_cost == Options.EditCosts.CHEM_1: | |||
| if len(edit_cost_constants) == 4: | |||
| self._edit_cost = CHEM1(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3]) | |||
| elif len(edit_cost_constants) == 0: | |||
| self._edit_cost = CHEM1() | |||
| else: | |||
| raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CHEM_1. Expected: 4 or 0; actual:', len(edit_cost_constants), '.') | |||
| elif edit_cost == Options.EditCosts.LETTER: | |||
| if len(edit_cost_constants) == 3: | |||
| self._edit_cost = Letter(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2]) | |||
| elif len(edit_cost_constants) == 0: | |||
| self._edit_cost = Letter() | |||
| else: | |||
| raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER. Expected: 3 or 0; actual:', len(edit_cost_constants), '.') | |||
| elif edit_cost == Options.EditCosts.LETTER2: | |||
| if len(edit_cost_constants) == 5: | |||
| self._edit_cost = Letter2(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4]) | |||
| elif len(edit_cost_constants) == 0: | |||
| self._edit_cost = Letter2() | |||
| else: | |||
| raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER2. Expected: 5 or 0; actual:', len(edit_cost_constants), '.') | |||
| elif edit_cost == Options.EditCosts.NON_SYMBOLIC: | |||
| if len(edit_cost_constants) == 6: | |||
| self._edit_cost = NonSymbolic(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5]) | |||
| elif len(edit_cost_constants) == 0: | |||
| self._edit_cost = NonSymbolic() | |||
| else: | |||
| raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::NON_SYMBOLIC. Expected: 6 or 0; actual:', len(edit_cost_constants), '.') | |||
| elif edit_cost == Options.EditCosts.CONSTANT: | |||
| if len(edit_cost_constants) == 6: | |||
| self._edit_cost = Constant(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5]) | |||
| elif len(edit_cost_constants) == 0: | |||
| self._edit_cost = Constant() | |||
| else: | |||
| raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CONSTANT. Expected: 6 or 0; actual:', len(edit_cost_constants), '.') | |||
| self._delete_edit_cost = True | |||
| def _eager_init(self): | |||
| return (self._init_type == Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES or self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES) | |||
| @@ -0,0 +1,369 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Wed Jun 17 12:02:36 2020 | |||
| @author: ljia | |||
| """ | |||
| import numpy as np | |||
| import networkx as nx | |||
| from gklearn.ged.env import Options, OptionsStringMap | |||
| from gklearn.ged.env import GEDData | |||
| class GEDEnv(object): | |||
| def __init__(self): | |||
| self.__initialized = False | |||
| self.__new_graph_ids = [] | |||
| self.__ged_data = GEDData() | |||
| # Variables needed for approximating ged_instance_. | |||
| self.__lower_bounds = {} | |||
| self.__upper_bounds = {} | |||
| self.__runtimes = {} | |||
| self.__node_maps = {} | |||
| self.__original_to_internal_node_ids = [] | |||
| self.__internal_to_original_node_ids = [] | |||
| self.__ged_method = None | |||
| def set_edit_cost(self, edit_cost, edit_cost_constants=[]): | |||
| """ | |||
| /*! | |||
| * @brief Sets the edit costs to one of the predefined edit costs. | |||
| * @param[in] edit_costs Select one of the predefined edit costs. | |||
| * @param[in] edit_cost_constants Constants passed to the constructor of the edit cost class selected by @p edit_costs. | |||
| */ | |||
| """ | |||
| self.__ged_data._set_edit_cost(edit_cost, edit_cost_constants) | |||
| def add_graph(self, graph_name='', graph_class=''): | |||
| """ | |||
| /*! | |||
| * @brief Adds a new uninitialized graph to the environment. Call init() after calling this method. | |||
| * @param[in] graph_name The name of the added graph. Empty if not specified. | |||
| * @param[in] graph_class The class of the added graph. Empty if not specified. | |||
| * @return The ID of the newly added graph. | |||
| */ | |||
| """ | |||
| # @todo: graphs are not uninitialized. | |||
| self.__initialized = False | |||
| graph_id = self.__ged_data._num_graphs_without_shuffled_copies | |||
| self.__ged_data._num_graphs_without_shuffled_copies += 1 | |||
| self.__new_graph_ids.append(graph_id) | |||
| self.__ged_data._graphs.append(nx.Graph()) | |||
| self.__ged_data._graph_names.append(graph_name) | |||
| self.__ged_data._graph_classes.append(graph_class) | |||
| self.__original_to_internal_node_ids.append({}) | |||
| self.__internal_to_original_node_ids.append({}) | |||
| self.__ged_data._strings_to_internal_node_ids.append({}) | |||
| self.__ged_data._internal_node_ids_to_strings.append({}) | |||
| return graph_id | |||
| def add_node(self, graph_id, node_id, node_label): | |||
| """ | |||
| /*! | |||
| * @brief Adds a labeled node. | |||
| * @param[in] graph_id ID of graph that has been added to the environment. | |||
| * @param[in] node_id The user-specific ID of the vertex that has to be added. | |||
| * @param[in] node_label The label of the vertex that has to be added. Set to ged::NoLabel() if template parameter @p UserNodeLabel equals ged::NoLabel. | |||
| */ | |||
| """ | |||
| # @todo: check ids. | |||
| self.__initialized = False | |||
| internal_node_id = nx.number_of_nodes(self.__ged_data._graphs[graph_id]) | |||
| self.__ged_data._graphs[graph_id].add_node(internal_node_id, label=node_label) | |||
| self.__original_to_internal_node_ids[graph_id][node_id] = internal_node_id | |||
| self.__internal_to_original_node_ids[graph_id][internal_node_id] = node_id | |||
| self.__ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id | |||
| self.__ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id) | |||
| # @todo: node_label_to_id_ | |||
| def add_edge(self, graph_id, nd_from, nd_to, edge_label, ignore_duplicates=True): | |||
| """ | |||
| /*! | |||
| * @brief Adds a labeled edge. | |||
| * @param[in] graph_id ID of graph that has been added to the environment. | |||
| * @param[in] tail The user-specific ID of the tail of the edge that has to be added. | |||
| * @param[in] head The user-specific ID of the head of the edge that has to be added. | |||
| * @param[in] edge_label The label of the vertex that has to be added. Set to ged::NoLabel() if template parameter @p UserEdgeLabel equals ged::NoLabel. | |||
| * @param[in] ignore_duplicates If @p true, duplicate edges are ignores. Otherwise, an exception is thrown if an existing edge is added to the graph. | |||
| */ | |||
| """ | |||
| # @todo: check everything. | |||
| self.__initialized = False | |||
| # @todo: check ignore_duplicates. | |||
| self.__ged_data._graphs[graph_id].add_edge(self.__original_to_internal_node_ids[graph_id][nd_from], self.__original_to_internal_node_ids[graph_id][nd_to], label=edge_label) | |||
| # @todo: edge_id and label_id, edge_label_to_id_. | |||
| def add_nx_graph(self, g, classe, ignore_duplicates=True) : | |||
| """ | |||
| Add a Graph (made by networkx) on the environment. Be careful to respect the same format as GXL graphs for labelling nodes and edges. | |||
| :param g: The graph to add (networkx graph) | |||
| :param ignore_duplicates: If True, duplicate edges are ignored, otherwise it's raise an error if an existing edge is added. True by default | |||
| :type g: networkx.graph | |||
| :type ignore_duplicates: bool | |||
| :return: The ID of the newly added graphe | |||
| :rtype: size_t | |||
| .. note:: The NX graph must respect the GXL structure. Please see how a GXL graph is construct. | |||
| """ | |||
| graph_id = self.add_graph(g.name, classe) # check if the graph name already exists. | |||
| for node in g.nodes: # @todo: if the keys of labels include int and str at the same time. | |||
| self.add_node(graph_id, node, tuple(sorted(g.nodes[node].items(), key=lambda kv: kv[0]))) | |||
| for edge in g.edges: | |||
| self.add_edge(graph_id, edge[0], edge[1], tuple(sorted(g.edges[(edge[0], edge[1])].items(), key=lambda kv: kv[0])), ignore_duplicates) | |||
| return graph_id | |||
| def init(self, init_type=Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, print_to_stdout=False): | |||
| if isinstance(init_type, str): | |||
| init_type = OptionsStringMap.InitType[init_type] | |||
| # Throw an exception if no edit costs have been selected. | |||
| if self.__ged_data._edit_cost is None: | |||
| raise Exception('No edit costs have been selected. Call set_edit_cost() before calling init().') | |||
| # Return if the environment is initialized. | |||
| if self.__initialized: | |||
| return | |||
| # Set initialization type. | |||
| self.__ged_data._init_type = init_type | |||
| # @todo: Construct shuffled graph copies if necessary. | |||
| # Re-initialize adjacency matrices (also previously initialized graphs must be re-initialized because of possible re-allocation). | |||
| # @todo: setup_adjacency_matrix, don't know if neccessary. | |||
| self.__ged_data._max_num_nodes = np.max([nx.number_of_nodes(g) for g in self.__ged_data._graphs]) | |||
| self.__ged_data._max_num_edges = np.max([nx.number_of_edges(g) for g in self.__ged_data._graphs]) | |||
| # Initialize cost matrices if necessary. | |||
| if self.__ged_data._eager_init(): | |||
| pass # @todo: init_cost_matrices_: 1. Update node cost matrix if new node labels have been added to the environment; 2. Update edge cost matrix if new edge labels have been added to the environment. | |||
| # Mark environment as initialized. | |||
| self.__initialized = True | |||
| self.__new_graph_ids.clear() | |||
| def set_method(self, method, options=''): | |||
| """ | |||
| /*! | |||
| * @brief Sets the GEDMethod to be used by run_method(). | |||
| * @param[in] method Select the method that is to be used. | |||
| * @param[in] options An options string of the form @"[--@<option@> @<arg@>] [...]@" passed to the selected method. | |||
| */ | |||
| """ | |||
| del self.__ged_method | |||
| if isinstance(method, str): | |||
| method = OptionsStringMap.GEDMethod[method] | |||
| if method == Options.GEDMethod.BRANCH: | |||
| self.__ged_method = Branch(self.__ged_data) | |||
| elif method == Options.GEDMethod.BRANCH_FAST: | |||
| self.__ged_method = BranchFast(self.__ged_data) | |||
| elif method == Options.GEDMethod.BRANCH_FAST: | |||
| self.__ged_method = BranchFast(self.__ged_data) | |||
| elif method == Options.GEDMethod.BRANCH_TIGHT: | |||
| self.__ged_method = BranchTight(self.__ged_data) | |||
| elif method == Options.GEDMethod.BRANCH_UNIFORM: | |||
| self.__ged_method = BranchUniform(self.__ged_data) | |||
| elif method == Options.GEDMethod.BRANCH_COMPACT: | |||
| self.__ged_method = BranchCompact(self.__ged_data) | |||
| elif method == Options.GEDMethod.PARTITION: | |||
| self.__ged_method = Partition(self.__ged_data) | |||
| elif method == Options.GEDMethod.HYBRID: | |||
| self.__ged_method = Hybrid(self.__ged_data) | |||
| elif method == Options.GEDMethod.RING: | |||
| self.__ged_method = Ring(self.__ged_data) | |||
| elif method == Options.GEDMethod.ANCHOR_AWARE_GED: | |||
| self.__ged_method = AnchorAwareGED(self.__ged_data) | |||
| elif method == Options.GEDMethod.WALKS: | |||
| self.__ged_method = Walks(self.__ged_data) | |||
| elif method == Options.GEDMethod.IPFP: | |||
| self.__ged_method = IPFP(self.__ged_data) | |||
| elif method == Options.GEDMethod.BIPARTITE: | |||
| from gklearn.ged.methods import Bipartite | |||
| self.__ged_method = Bipartite(self.__ged_data) | |||
| elif method == Options.GEDMethod.SUBGRAPH: | |||
| self.__ged_method = Subgraph(self.__ged_data) | |||
| elif method == Options.GEDMethod.NODE: | |||
| self.__ged_method = Node(self.__ged_data) | |||
| elif method == Options.GEDMethod.RING_ML: | |||
| self.__ged_method = RingML(self.__ged_data) | |||
| elif method == Options.GEDMethod.BIPARTITE_ML: | |||
| self.__ged_method = BipartiteML(self.__ged_data) | |||
| elif method == Options.GEDMethod.REFINE: | |||
| self.__ged_method = Refine(self.__ged_data) | |||
| elif method == Options.GEDMethod.BP_BEAM: | |||
| self.__ged_method = BPBeam(self.__ged_data) | |||
| elif method == Options.GEDMethod.SIMULATED_ANNEALING: | |||
| self.__ged_method = SimulatedAnnealing(self.__ged_data) | |||
| elif method == Options.GEDMethod.HED: | |||
| self.__ged_method = HED(self.__ged_data) | |||
| elif method == Options.GEDMethod.STAR: | |||
| self.__ged_method = STAR(self.__ged_data) | |||
| # #ifdef GUROBI | |||
| elif method == Options.GEDMethod.F1: | |||
| self.__ged_method = F1(self.__ged_data) | |||
| elif method == Options.GEDMethod.F2: | |||
| self.__ged_method = F2(self.__ged_data) | |||
| elif method == Options.GEDMethod.COMPACT_MIP: | |||
| self.__ged_method = CompactMIP(self.__ged_data) | |||
| elif method == Options.GEDMethod.BLP_NO_EDGE_LABELS: | |||
| self.__ged_method = BLPNoEdgeLabels(self.__ged_data) | |||
| self.__ged_method.set_options(options) | |||
| def run_method(self, g_id, h_id): | |||
| """ | |||
| /*! | |||
| * @brief Runs the GED method specified by call to set_method() between the graphs with IDs @p g_id and @p h_id. | |||
| * @param[in] g_id ID of an input graph that has been added to the environment. | |||
| * @param[in] h_id ID of an input graph that has been added to the environment. | |||
| */ | |||
| """ | |||
| if g_id >= self.__ged_data.num_graphs(): | |||
| raise Exception('The graph with ID', str(g_id), 'has not been added to the environment.') | |||
| if h_id >= self.__ged_data.num_graphs(): | |||
| raise Exception('The graph with ID', str(h_id), 'has not been added to the environment.') | |||
| if not self.__initialized: | |||
| raise Exception('The environment is uninitialized. Call init() after adding all graphs to the environment.') | |||
| if self.__ged_method is None: | |||
| raise Exception('No method has been set. Call set_method() before calling run().') | |||
| # Call selected GEDMethod and store results. | |||
| if self.__ged_data.shuffled_graph_copies_available() and (g_id == h_id): | |||
| self.__ged_method.run(g_id, self.__ged_data.id_shuffled_graph_copy(h_id)) # @todo: why shuffle? | |||
| else: | |||
| self.__ged_method.run(g_id, h_id) | |||
| self.__lower_bounds[(g_id, h_id)] = self.__ged_method.get_lower_bound() | |||
| self.__upper_bounds[(g_id, h_id)] = self.__ged_method.get_upper_bound() | |||
| self.__runtimes[(g_id, h_id)] = self.__ged_method.get_runtime() | |||
| self.__node_maps[(g_id, h_id)] = self.__ged_method.get_node_map() | |||
| def init_method(self): | |||
| """Initializes the method specified by call to set_method(). | |||
| """ | |||
| if not self.__initialized: | |||
| raise Exception('The environment is uninitialized. Call init() before calling init_method().') | |||
| if self.__ged_method is None: | |||
| raise Exception('No method has been set. Call set_method() before calling init_method().') | |||
| self.__ged_method.init() | |||
| def get_upper_bound(self, g_id, h_id): | |||
| """ | |||
| /*! | |||
| * @brief Returns upper bound for edit distance between the input graphs. | |||
| * @param[in] g_id ID of an input graph that has been added to the environment. | |||
| * @param[in] h_id ID of an input graph that has been added to the environment. | |||
| * @return Upper bound computed by the last call to run_method() with arguments @p g_id and @p h_id. | |||
| */ | |||
| """ | |||
| if (g_id, h_id) not in self.__upper_bounds: | |||
| raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_upper_bound(' + str(g_id) + ',' + str(h_id) + ').') | |||
| return self.__upper_bounds[(g_id, h_id)] | |||
| def get_lower_bound(self, g_id, h_id): | |||
| """ | |||
| /*! | |||
| * @brief Returns lower bound for edit distance between the input graphs. | |||
| * @param[in] g_id ID of an input graph that has been added to the environment. | |||
| * @param[in] h_id ID of an input graph that has been added to the environment. | |||
| * @return Lower bound computed by the last call to run_method() with arguments @p g_id and @p h_id. | |||
| */ | |||
| """ | |||
| if (g_id, h_id) not in self.__lower_bounds: | |||
| raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_lower_bound(' + str(g_id) + ',' + str(h_id) + ').') | |||
| return self.__lower_bounds[(g_id, h_id)] | |||
| def get_runtime(self, g_id, h_id): | |||
| """ | |||
| /*! | |||
| * @brief Returns runtime. | |||
| * @param[in] g_id ID of an input graph that has been added to the environment. | |||
| * @param[in] h_id ID of an input graph that has been added to the environment. | |||
| * @return Runtime of last call to run_method() with arguments @p g_id and @p h_id. | |||
| */ | |||
| """ | |||
| if (g_id, h_id) not in self.__runtimes: | |||
| raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_runtime(' + str(g_id) + ',' + str(h_id) + ').') | |||
| return self.__runtimes[(g_id, h_id)] | |||
| def get_init_time(self): | |||
| """ | |||
| /*! | |||
| * @brief Returns initialization time. | |||
| * @return Runtime of the last call to init_method(). | |||
| */ | |||
| """ | |||
| return self.__ged_method.get_init_time() | |||
| def get_node_map(self, g_id, h_id): | |||
| """ | |||
| /*! | |||
| * @brief Returns node map between the input graphs. | |||
| * @param[in] g_id ID of an input graph that has been added to the environment. | |||
| * @param[in] h_id ID of an input graph that has been added to the environment. | |||
| * @return Node map computed by the last call to run_method() with arguments @p g_id and @p h_id. | |||
| */ | |||
| """ | |||
| if (g_id, h_id) not in self.__node_maps: | |||
| raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_node_map(' + str(g_id) + ',' + str(h_id) + ').') | |||
| return self.__node_maps[(g_id, h_id)] | |||
| def get_forward_map(self, g_id, h_id) : | |||
| """ | |||
| Returns the forward map (or the half of the adjacence matrix) between nodes of the two indicated graphs. | |||
| :param g: The Id of the first compared graph | |||
| :param h: The Id of the second compared graph | |||
| :type g: size_t | |||
| :type h: size_t | |||
| :return: The forward map to the adjacence matrix between nodes of the two graphs | |||
| :rtype: list[npy_uint32] | |||
| .. seealso:: run_method(), get_upper_bound(), get_lower_bound(), get_backward_map(), get_runtime(), quasimetric_cost(), get_node_map(), get_assignment_matrix() | |||
| .. warning:: run_method() between the same two graph must be called before this function. | |||
| .. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work ! | |||
| """ | |||
| return self.get_node_map(g_id, h_id).forward_map | |||
| def get_backward_map(self, g_id, h_id) : | |||
| """ | |||
| Returns the backward map (or the half of the adjacence matrix) between nodes of the two indicated graphs. | |||
| :param g: The Id of the first compared graph | |||
| :param h: The Id of the second compared graph | |||
| :type g: size_t | |||
| :type h: size_t | |||
| :return: The backward map to the adjacence matrix between nodes of the two graphs | |||
| :rtype: list[npy_uint32] | |||
| .. seealso:: run_method(), get_upper_bound(), get_lower_bound(), get_forward_map(), get_runtime(), quasimetric_cost(), get_node_map(), get_assignment_matrix() | |||
| .. warning:: run_method() between the same two graph must be called before this function. | |||
| .. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work ! | |||
| """ | |||
| return self.get_node_map(g_id, h_id).backward_map | |||
| def get_all_graph_ids(self): | |||
| return [i for i in range(0, self.__ged_data._num_graphs_without_shuffled_copies)] | |||
| @@ -6,15 +6,27 @@ Created on Wed Apr 22 11:31:26 2020 | |||
| @author: ljia | |||
| """ | |||
| import numpy as np | |||
| from gklearn.utils import dummy_node, undefined_node | |||
| class NodeMap(object): | |||
| def __init__(self, num_nodes_g, num_nodes_h): | |||
| self.__forward_map = [np.inf] * num_nodes_g | |||
| self.__backward_map = [np.inf] * num_nodes_h | |||
| self.__forward_map = [undefined_node()] * num_nodes_g | |||
| self.__backward_map = [undefined_node()] * num_nodes_h | |||
| self.__induced_cost = np.inf | |||
| def clear(self): | |||
| """ | |||
| /*! | |||
| * @brief Clears the node map. | |||
| */ | |||
| """ | |||
| self.__forward_map = [undefined_node() for i in range(len(self.__forward_map))] | |||
| self.__backward_map = [undefined_node() for i in range(len(self.__backward_map))] | |||
| def num_source_nodes(self): | |||
| return len(self.__forward_map) | |||
| @@ -28,7 +40,7 @@ class NodeMap(object): | |||
| return self.__forward_map[node] | |||
| else: | |||
| raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | |||
| return np.inf | |||
| return undefined_node() | |||
| def pre_image(self, node): | |||
| @@ -36,28 +48,28 @@ class NodeMap(object): | |||
| return self.__backward_map[node] | |||
| else: | |||
| raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') | |||
| return np.inf | |||
| return undefined_node() | |||
| def as_relation(self, relation): | |||
| relation.clear() | |||
| for i in range(0, len(self.__forward_map)): | |||
| k = self.__forward_map[i] | |||
| if k != np.inf: | |||
| if k != undefined_node(): | |||
| relation.append(tuple((i, k))) | |||
| for k in range(0, len(self.__backward_map)): | |||
| i = self.__backward_map[k] | |||
| if i == np.inf: | |||
| if i == dummy_node(): | |||
| relation.append(tuple((i, k))) | |||
| def add_assignment(self, i, k): | |||
| if i != np.inf: | |||
| if i != dummy_node(): | |||
| if i < len(self.__forward_map): | |||
| self.__forward_map[i] = k | |||
| else: | |||
| raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') | |||
| if k != np.inf: | |||
| if k != dummy_node(): | |||
| if k < len(self.__backward_map): | |||
| self.__backward_map[k] = i | |||
| else: | |||
| @@ -0,0 +1,3 @@ | |||
| from gklearn.ged.methods.ged_method import GEDMethod | |||
| from gklearn.ged.methods.lsape_based_method import LSAPEBasedMethod | |||
| from gklearn.ged.methods.bipartite import Bipartite | |||
| @@ -0,0 +1,117 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Thu Jun 18 16:09:29 2020 | |||
| @author: ljia | |||
| """ | |||
| import numpy as np | |||
| import networkx as nx | |||
| from gklearn.ged.methods import LSAPEBasedMethod | |||
| from gklearn.ged.util import LSAPESolver | |||
| from gklearn.utils import SpecialLabel | |||
| class Bipartite(LSAPEBasedMethod): | |||
| def __init__(self, ged_data): | |||
| super().__init__(ged_data) | |||
| self._compute_lower_bound = False | |||
| ########################################################################### | |||
| # Inherited member functions from LSAPEBasedMethod. | |||
| ########################################################################### | |||
| def _lsape_populate_instance(self, g, h, master_problem): | |||
| # #ifdef _OPENMP | |||
| for row_in_master in range(0, nx.number_of_nodes(g)): | |||
| for col_in_master in range(0, nx.number_of_nodes(h)): | |||
| master_problem[row_in_master, col_in_master] = self._compute_substitution_cost(g, h, row_in_master, col_in_master) | |||
| for row_in_master in range(0, nx.number_of_nodes(g)): | |||
| master_problem[row_in_master, nx.number_of_nodes(h) + row_in_master] = self._compute_deletion_cost(g, row_in_master) | |||
| for col_in_master in range(0, nx.number_of_nodes(h)): | |||
| master_problem[nx.number_of_nodes(g) + col_in_master, col_in_master] = self._compute_insertion_cost(h, col_in_master) | |||
| # for row_in_master in range(0, master_problem.shape[0]): | |||
| # for col_in_master in range(0, master_problem.shape[1]): | |||
| # if row_in_master < nx.number_of_nodes(g) and col_in_master < nx.number_of_nodes(h): | |||
| # master_problem[row_in_master, col_in_master] = self._compute_substitution_cost(g, h, row_in_master, col_in_master) | |||
| # elif row_in_master < nx.number_of_nodes(g): | |||
| # master_problem[row_in_master, nx.number_of_nodes(h)] = self._compute_deletion_cost(g, row_in_master) | |||
| # elif col_in_master < nx.number_of_nodes(h): | |||
| # master_problem[nx.number_of_nodes(g), col_in_master] = self._compute_insertion_cost(h, col_in_master) | |||
| ########################################################################### | |||
| # Helper member functions. | |||
| ########################################################################### | |||
| def _compute_substitution_cost(self, g, h, u, v): | |||
| # Collect node substitution costs. | |||
| cost = self._ged_data.node_cost(g.nodes[u]['label'], h.nodes[v]['label']) | |||
| # Initialize subproblem. | |||
| d1, d2 = g.degree[u], h.degree[v] | |||
| subproblem = np.ones((d1 + d2, d1 + d2)) * np.inf | |||
| subproblem[d1:, d2:] = 0 | |||
| # subproblem = np.empty((g.degree[u] + 1, h.degree[v] + 1)) | |||
| # Collect edge deletion costs. | |||
| i = 0 # @todo: should directed graphs be considered? | |||
| for label in g[u].values(): # all u's neighbor | |||
| subproblem[i, d2 + i] = self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY) | |||
| # subproblem[i, h.degree[v]] = self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY) | |||
| i += 1 | |||
| # Collect edge insertion costs. | |||
| i = 0 # @todo: should directed graphs be considered? | |||
| for label in h[v].values(): # all u's neighbor | |||
| subproblem[d1 + i, i] = self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label']) | |||
| # subproblem[g.degree[u], i] = self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label']) | |||
| i += 1 | |||
| # Collect edge relabelling costs. | |||
| i = 0 | |||
| for label1 in g[u].values(): | |||
| j = 0 | |||
| for label2 in h[v].values(): | |||
| subproblem[i, j] = self._ged_data.edge_cost(label1['label'], label2['label']) | |||
| j += 1 | |||
| i += 1 | |||
| # Solve subproblem. | |||
| subproblem_solver = LSAPESolver(subproblem) | |||
| subproblem_solver.set_model(self._lsape_model) | |||
| subproblem_solver.solve() | |||
| # Update and return overall substitution cost. | |||
| cost += subproblem_solver.minimal_cost() | |||
| return cost | |||
| def _compute_deletion_cost(self, g, v): | |||
| # Collect node deletion cost. | |||
| cost = self._ged_data.node_cost(g.nodes[v]['label'], SpecialLabel.DUMMY) | |||
| # Collect edge deletion costs. | |||
| for label in g[v].values(): | |||
| cost += self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY) | |||
| # Return overall deletion cost. | |||
| return cost | |||
| def _compute_insertion_cost(self, g, v): | |||
| # Collect node insertion cost. | |||
| cost = self._ged_data.node_cost(SpecialLabel.DUMMY, g.nodes[v]['label']) | |||
| # Collect edge insertion costs. | |||
| for label in g[v].values(): | |||
| cost += self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label']) | |||
| # Return overall insertion cost. | |||
| return cost | |||
| @@ -0,0 +1,195 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Thu Jun 18 15:52:35 2020 | |||
| @author: ljia | |||
| """ | |||
| import numpy as np | |||
| import time | |||
| import networkx as nx | |||
| class GEDMethod(object): | |||
| def __init__(self, ged_data): | |||
| self._initialized = False | |||
| self._ged_data = ged_data | |||
| self._options = None | |||
| self._lower_bound = 0 | |||
| self._upper_bound = np.inf | |||
| self._node_map = [0, 0] # @todo | |||
| self._runtime = None | |||
| self._init_time = None | |||
| def init(self): | |||
| """Initializes the method with options specified by set_options(). | |||
| """ | |||
| start = time.time() | |||
| self._ged_init() | |||
| end = time.time() | |||
| self._init_time = end - start | |||
| self._initialized = True | |||
| def set_options(self, options): | |||
| """ | |||
| /*! | |||
| * @brief Sets the options of the method. | |||
| * @param[in] options String of the form <tt>[--@<option@> @<arg@>] [...]</tt>, where @p option contains neither spaces nor single quotes, | |||
| * and @p arg contains neither spaces nor single quotes or is of the form <tt>'[--@<sub-option@> @<sub-arg@>] [...]'</tt>, | |||
| * where both @p sub-option and @p sub-arg contain neither spaces nor single quotes. | |||
| */ | |||
| """ | |||
| self._ged_set_default_options() | |||
| for key, val in options.items(): | |||
| if not self._ged_parse_option(key, val): | |||
| raise Exception('Invalid option "', key, '". Usage: options = "' + self._ged_valid_options_string() + '".') # @todo: not implemented. | |||
| self._initialized = False | |||
| def run(self, g_id, h_id): | |||
| """ | |||
| /*! | |||
| * @brief Runs the method with options specified by set_options(). | |||
| * @param[in] g_id ID of input graph. | |||
| * @param[in] h_id ID of input graph. | |||
| */ | |||
| """ | |||
| start = time.time() | |||
| result = self.run_as_util(self._ged_data._graphs[g_id], self._ged_data._graphs[h_id]) | |||
| end = time.time() | |||
| self._lower_bound = result['lower_bound'] | |||
| self._upper_bound = result['upper_bound'] | |||
| if len(result['node_maps']) > 0: | |||
| self._node_map = result['node_maps'][0] | |||
| self._runtime = end - start | |||
| def run_as_util(self, g, h): | |||
| """ | |||
| /*! | |||
| * @brief Runs the method with options specified by set_options(). | |||
| * @param[in] g Input graph. | |||
| * @param[in] h Input graph. | |||
| * @param[out] result Result variable. | |||
| */ | |||
| """ | |||
| # Compute optimal solution and return if at least one of the two graphs is empty. | |||
| if nx.number_of_nodes(g) == 0 or nx.number_of_nodes(h) == 0: | |||
| print('This is not implemented.') | |||
| pass # @todo: | |||
| # Run the method. | |||
| return self._ged_run(g, h) | |||
| def get_upper_bound(self): | |||
| """ | |||
| /*! | |||
| * @brief Returns an upper bound. | |||
| * @return Upper bound for graph edit distance provided by last call to run() or -1 if the method does not yield an upper bound. | |||
| */ | |||
| """ | |||
| return self._upper_bound | |||
| def get_lower_bound(self): | |||
| """ | |||
| /*! | |||
| * @brief Returns a lower bound. | |||
| * @return Lower bound for graph edit distance provided by last call to run() or -1 if the method does not yield a lower bound. | |||
| */ | |||
| """ | |||
| return self._lower_bound | |||
| def get_runtime(self): | |||
| """ | |||
| /*! | |||
| * @brief Returns the runtime. | |||
| * @return Runtime of last call to run() in seconds. | |||
| */ | |||
| """ | |||
| return self._runtime | |||
| def get_init_time(self): | |||
| """ | |||
| /*! | |||
| * @brief Returns the initialization time. | |||
| * @return Runtime of last call to init() in seconds. | |||
| */ | |||
| """ | |||
| return self._init_time | |||
| def get_node_map(self): | |||
| """ | |||
| /*! | |||
| * @brief Returns a graph matching. | |||
| * @return Constant reference to graph matching provided by last call to run() or to an empty matching if the method does not yield a matching. | |||
| */ | |||
| """ | |||
| return self._node_map | |||
| def _ged_init(self): | |||
| """ | |||
| /*! | |||
| * @brief Initializes the method. | |||
| * @note Must be overridden by derived classes that require initialization. | |||
| */ | |||
| """ | |||
| pass | |||
| def _ged_parse_option(self, option, arg): | |||
| """ | |||
| /*! | |||
| * @brief Parses one option. | |||
| * @param[in] option The name of the option. | |||
| * @param[in] arg The argument of the option. | |||
| * @return Boolean @p true if @p option is a valid option name for the method and @p false otherwise. | |||
| * @note Must be overridden by derived classes that have options. | |||
| */ | |||
| """ | |||
| return False | |||
| def _ged_run(self, g, h): | |||
| """ | |||
| /*! | |||
| * @brief Runs the method with options specified by set_options(). | |||
| * @param[in] g Input graph. | |||
| * @param[in] h Input graph. | |||
| * @param[out] result Result variable. | |||
| * @note Must be overridden by derived classes. | |||
| */ | |||
| """ | |||
| return {} | |||
| def _ged_valid_options_string(self): | |||
| """ | |||
| /*! | |||
| * @brief Returns string of all valid options. | |||
| * @return String of the form <tt>[--@<option@> @<arg@>] [...]</tt>. | |||
| * @note Must be overridden by derived classes that have options. | |||
| */ | |||
| """ | |||
| return '' | |||
| def _ged_set_default_options(self): | |||
| """ | |||
| /*! | |||
| * @brief Sets all options to default values. | |||
| * @note Must be overridden by derived classes that have options. | |||
| */ | |||
| """ | |||
| pass | |||
| @@ -0,0 +1,254 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Thu Jun 18 16:01:24 2020 | |||
| @author: ljia | |||
| """ | |||
| import numpy as np | |||
| import networkx as nx | |||
| from gklearn.ged.methods import GEDMethod | |||
| from gklearn.ged.util import LSAPESolver, misc | |||
| from gklearn.ged.env import NodeMap | |||
| class LSAPEBasedMethod(GEDMethod): | |||
| def __init__(self, ged_data): | |||
| super().__init__(ged_data) | |||
| self._lsape_model = None # @todo: LSAPESolver::ECBP | |||
| self._greedy_method = None # @todo: LSAPESolver::BASIC | |||
| self._compute_lower_bound = True | |||
| self._solve_optimally = True | |||
| self._num_threads = 1 | |||
| self._centrality_method = 'NODE' # @todo | |||
| self._centrality_weight = 0.7 | |||
| self._centralities = {} | |||
| self._max_num_solutions = 1 | |||
| def populate_instance_and_run_as_util(self, g, h): #, lsape_instance): | |||
| """ | |||
| /*! | |||
| * @brief Runs the method with options specified by set_options() and provides access to constructed LSAPE instance. | |||
| * @param[in] g Input graph. | |||
| * @param[in] h Input graph. | |||
| * @param[out] result Result variable. | |||
| * @param[out] lsape_instance LSAPE instance. | |||
| */ | |||
| """ | |||
| result = {'node_maps': [], 'lower_bound': 0, 'upper_bound': np.inf} | |||
| # Populate the LSAPE instance and set up the solver. | |||
| nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h) | |||
| lsape_instance = np.ones((nb1 + nb2, nb1 + nb2)) * np.inf | |||
| # lsape_instance = np.empty((nx.number_of_nodes(g) + 1, nx.number_of_nodes(h) + 1)) | |||
| self.populate_instance(g, h, lsape_instance) | |||
| # nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h) | |||
| # lsape_instance_new = np.empty((nb1 + nb2, nb1 + nb2)) * np.inf | |||
| # lsape_instance_new[nb1:, nb2:] = 0 | |||
| # lsape_instance_new[0:nb1, 0:nb2] = lsape_instance[0:nb1, 0:nb2] | |||
| # for i in range(nb1): # all u's neighbor | |||
| # lsape_instance_new[i, nb2 + i] = lsape_instance[i, nb2] | |||
| # for i in range(nb2): # all u's neighbor | |||
| # lsape_instance_new[nb1 + i, i] = lsape_instance[nb2, i] | |||
| # lsape_solver = LSAPESolver(lsape_instance_new) | |||
| lsape_solver = LSAPESolver(lsape_instance) | |||
| # Solve the LSAPE instance. | |||
| if self._solve_optimally: | |||
| lsape_solver.set_model(self._lsape_model) | |||
| else: | |||
| lsape_solver.set_greedy_method(self._greedy_method) | |||
| lsape_solver.solve(self._max_num_solutions) | |||
| # Compute and store lower and upper bound. | |||
| if self._compute_lower_bound and self._solve_optimally: | |||
| result['lower_bound'] = lsape_solver.minimal_cost() * self._lsape_lower_bound_scaling_factor(g, h) # @todo: test | |||
| for solution_id in range(0, lsape_solver.num_solutions()): | |||
| result['node_maps'].append(NodeMap(nx.number_of_nodes(g), nx.number_of_nodes(h))) | |||
| misc.construct_node_map_from_solver(lsape_solver, result['node_maps'][-1], solution_id) | |||
| self._ged_data.compute_induced_cost(g, h, result['node_maps'][-1]) | |||
| # Add centralities and reoptimize. | |||
| if self._centrality_weight > 0 and self._centrality_method != 'NODE': | |||
| print('This is not implemented.') | |||
| pass # @todo | |||
| # Sort the node maps and set the upper bound. | |||
| if len(result['node_maps']) > 1 or len(result['node_maps']) > self._max_num_solutions: | |||
| print('This is not implemented.') # @todo: | |||
| pass | |||
| if len(result['node_maps']) == 0: | |||
| result['upper_bound'] = np.inf | |||
| else: | |||
| result['upper_bound'] = result['node_maps'][0].induced_cost() | |||
| return result | |||
| def populate_instance(self, g, h, lsape_instance): | |||
| """ | |||
| /*! | |||
| * @brief Populates the LSAPE instance. | |||
| * @param[in] g Input graph. | |||
| * @param[in] h Input graph. | |||
| * @param[out] lsape_instance LSAPE instance. | |||
| */ | |||
| """ | |||
| if not self._initialized: | |||
| pass | |||
| # @todo: if (not this->initialized_) { | |||
| self._lsape_populate_instance(g, h, lsape_instance) | |||
| lsape_instance[nx.number_of_nodes(g):, nx.number_of_nodes(h):] = 0 | |||
| # lsape_instance[nx.number_of_nodes(g), nx.number_of_nodes(h)] = 0 | |||
| ########################################################################### | |||
| # Member functions inherited from GEDMethod. | |||
| ########################################################################### | |||
| def _ged_init(self): | |||
| self._lsape_pre_graph_init(False) | |||
| for graph in self._ged_data._graphs: | |||
| self._init_graph(graph) | |||
| self._lsape_init() | |||
| def _ged_run(self, g, h): | |||
| # lsape_instance = np.empty((0, 0)) | |||
| result = self.populate_instance_and_run_as_util(g, h) # , lsape_instance) | |||
| return result | |||
| def _ged_parse_option(self, option, arg): | |||
| is_valid_option = False | |||
| if option == 'threads': # @todo: try.. catch... | |||
| self._num_threads = arg | |||
| is_valid_option = True | |||
| elif option == 'lsape_model': | |||
| self._lsape_model = arg # @todo | |||
| is_valid_option = True | |||
| elif option == 'greedy_method': | |||
| self._greedy_method = arg # @todo | |||
| is_valid_option = True | |||
| elif option == 'optimal': | |||
| self._solve_optimally = arg # @todo | |||
| is_valid_option = True | |||
| elif option == 'centrality_method': | |||
| self._centrality_method = arg # @todo | |||
| is_valid_option = True | |||
| elif option == 'centrality_weight': | |||
| self._centrality_weight = arg # @todo | |||
| is_valid_option = True | |||
| elif option == 'max_num_solutions': | |||
| if arg == 'ALL': | |||
| self._max_num_solutions = -1 | |||
| else: | |||
| self._max_num_solutions = arg # @todo | |||
| is_valid_option = True | |||
| is_valid_option = is_valid_option or self._lsape_parse_option(option, arg) | |||
| is_valid_option = True # @todo: this is not in the C++ code. | |||
| return is_valid_option | |||
| def _ged_set_default_options(self): | |||
| self._lsape_model = None # @todo: LSAPESolver::ECBP | |||
| self._greedy_method = None # @todo: LSAPESolver::BASIC | |||
| self._solve_optimally = True | |||
| self._num_threads = 1 | |||
| self._centrality_method = 'NODE' # @todo | |||
| self._centrality_weight = 0.7 | |||
| self._max_num_solutions = 1 | |||
| ########################################################################### | |||
| # Private helper member functions. | |||
| ########################################################################### | |||
| def _init_graph(self, graph): | |||
| if self._centrality_method != 'NODE': | |||
| self._init_centralities(graph) # @todo | |||
| self._lsape_init_graph(graph) | |||
| ########################################################################### | |||
| # Virtual member functions to be overridden by derived classes. | |||
| ########################################################################### | |||
| def _lsape_init(self): | |||
| """ | |||
| /*! | |||
| * @brief Initializes the method after initializing the global variables for the graphs. | |||
| * @note Must be overridden by derived classes of ged::LSAPEBasedMethod that require custom initialization. | |||
| */ | |||
| """ | |||
| pass | |||
| def _lsape_parse_option(self, option, arg): | |||
| """ | |||
| /*! | |||
| * @brief Parses one option that is not among the ones shared by all derived classes of ged::LSAPEBasedMethod. | |||
| * @param[in] option The name of the option. | |||
| * @param[in] arg The argument of the option. | |||
| * @return Returns true if @p option is a valid option name for the method and false otherwise. | |||
| * @note Must be overridden by derived classes of ged::LSAPEBasedMethod that have options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod. | |||
| */ | |||
| """ | |||
| return False | |||
| def _lsape_set_default_options(self): | |||
| """ | |||
| /*! | |||
| * @brief Sets all options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod to default values. | |||
| * @note Must be overridden by derived classes of ged::LSAPEBasedMethod that have options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod. | |||
| */ | |||
| """ | |||
| pass | |||
| def _lsape_populate_instance(self, g, h, lsape_instance): | |||
| """ | |||
| /*! | |||
| * @brief Populates the LSAPE instance. | |||
| * @param[in] g Input graph. | |||
| * @param[in] h Input graph. | |||
| * @param[out] lsape_instance LSAPE instance of size (n + 1) x (m + 1), where n and m are the number of nodes in @p g and @p h. The last row and the last column represent insertion and deletion. | |||
| * @note Must be overridden by derived classes of ged::LSAPEBasedMethod. | |||
| */ | |||
| """ | |||
| pass | |||
| def _lsape_init_graph(self, graph): | |||
| """ | |||
| /*! | |||
| * @brief Initializes global variables for one graph. | |||
| * @param[in] graph Graph for which the global variables have to be initialized. | |||
| * @note Must be overridden by derived classes of ged::LSAPEBasedMethod that require to initialize custom global variables. | |||
| */ | |||
| """ | |||
| pass | |||
| def _lsape_pre_graph_init(self, called_at_runtime): | |||
| """ | |||
| /*! | |||
| * @brief Initializes the method at runtime or during initialization before initializing the global variables for the graphs. | |||
| * @param[in] called_at_runtime Equals @p true if called at runtime and @p false if called during initialization. | |||
| * @brief Must be overridden by derived classes of ged::LSAPEBasedMethod that require default initialization at runtime before initializing the global variables for the graphs. | |||
| */ | |||
| """ | |||
| pass | |||
| @@ -1 +1,3 @@ | |||
| from gklearn.ged.util.lsape_solver import LSAPESolver | |||
| from gklearn.ged.util.util import compute_geds, ged_options_to_string | |||
| from gklearn.ged.util.util import compute_geds_cml | |||
| @@ -0,0 +1,121 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Mon Jun 22 15:37:36 2020 | |||
| @author: ljia | |||
| """ | |||
| import numpy as np | |||
| from scipy.optimize import linear_sum_assignment | |||
| class LSAPESolver(object): | |||
| def __init__(self, cost_matrix=None): | |||
| """ | |||
| /*! | |||
| * @brief Constructs solver for LSAPE problem instance. | |||
| * @param[in] cost_matrix Pointer to the LSAPE problem instance that should be solved. | |||
| */ | |||
| """ | |||
| self.__cost_matrix = cost_matrix | |||
| self.__model = 'ECBP' | |||
| self.__greedy_method = 'BASIC' | |||
| self.__solve_optimally = True | |||
| self.__minimal_cost = 0 | |||
| self.__row_to_col_assignments = [] | |||
| self.__col_to_row_assignments = [] | |||
| self.__dual_var_rows = [] # @todo | |||
| self.__dual_var_cols = [] # @todo | |||
| def clear_solution(self): | |||
| """Clears a previously computed solution. | |||
| """ | |||
| self.__minimal_cost = 0 | |||
| self.__row_to_col_assignments.clear() | |||
| self.__col_to_row_assignments.clear() | |||
| self.__row_to_col_assignments.append([]) # @todo | |||
| self.__col_to_row_assignments.append([]) | |||
| self.__dual_var_rows = [] # @todo | |||
| self.__dual_var_cols = [] # @todo | |||
| def set_model(self, model): | |||
| """ | |||
| /*! | |||
| * @brief Makes the solver use a specific model for optimal solving. | |||
| * @param[in] model The model that should be used. | |||
| */ | |||
| """ | |||
| self.__solve_optimally = True | |||
| self.__model = model | |||
| def solve(self, num_solutions=1): | |||
| """ | |||
| /*! | |||
| * @brief Solves the LSAPE problem instance. | |||
| * @param[in] num_solutions The maximal number of solutions that should be computed. | |||
| */ | |||
| """ | |||
| self.clear_solution() | |||
| if self.__solve_optimally: | |||
| row_id, col_id = linear_sum_assignment(self.__cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used. | |||
| self.__row_to_col_assignments[0] = col_id | |||
| self.__col_to_row_assignments[0] = np.argsort(col_id) # @todo: might be slow, can use row_id | |||
| self.__compute_cost_from_assignments() | |||
| if num_solutions > 1: | |||
| pass # @todo: | |||
| else: | |||
| print('here is non op.') | |||
| pass # @todo: greedy. | |||
| # self.__ | |||
| def minimal_cost(self): | |||
| """ | |||
| /*! | |||
| * @brief Returns the cost of the computed solutions. | |||
| * @return Cost of computed solutions. | |||
| */ | |||
| """ | |||
| return self.__minimal_cost | |||
| def get_assigned_col(self, row, solution_id=0): | |||
| """ | |||
| /*! | |||
| * @brief Returns the assigned column. | |||
| * @param[in] row Row whose assigned column should be returned. | |||
| * @param[in] solution_id ID of the solution where the assignment should be looked up. | |||
| * @returns Column to which @p row is assigned to in solution with ID @p solution_id or ged::undefined() if @p row is not assigned to any column. | |||
| */ | |||
| """ | |||
| return self.__row_to_col_assignments[solution_id][row] | |||
| def get_assigned_row(self, col, solution_id=0): | |||
| """ | |||
| /*! | |||
| * @brief Returns the assigned row. | |||
| * @param[in] col Column whose assigned row should be returned. | |||
| * @param[in] solution_id ID of the solution where the assignment should be looked up. | |||
| * @returns Row to which @p col is assigned to in solution with ID @p solution_id or ged::undefined() if @p col is not assigned to any row. | |||
| */ | |||
| """ | |||
| return self.__col_to_row_assignments[solution_id][col] | |||
| def num_solutions(self): | |||
| """ | |||
| /*! | |||
| * @brief Returns the number of solutions. | |||
| * @returns Actual number of solutions computed by solve(). Might be smaller than @p num_solutions. | |||
| */ | |||
| """ | |||
| return len(self.__row_to_col_assignments) | |||
| def __compute_cost_from_assignments(self): # @todo | |||
| self.__minimal_cost = np.sum(self.__cost_matrix[range(0, len(self.__row_to_col_assignments[0])), self.__row_to_col_assignments[0]]) | |||
| @@ -5,6 +5,27 @@ Created on Thu Mar 19 18:13:56 2020 | |||
| @author: ljia | |||
| """ | |||
| from gklearn.utils import dummy_node | |||
| def construct_node_map_from_solver(solver, node_map, solution_id): | |||
| node_map.clear() | |||
| num_nodes_g = node_map.num_source_nodes() | |||
| num_nodes_h = node_map.num_target_nodes() | |||
| # add deletions and substitutions | |||
| for row in range(0, num_nodes_g): | |||
| col = solver.get_assigned_col(row, solution_id) | |||
| if col >= num_nodes_h: | |||
| node_map.add_assignment(row, dummy_node()) | |||
| else: | |||
| node_map.add_assignment(row, col) | |||
| # insertions. | |||
| for col in range(0, num_nodes_h): | |||
| if solver.get_assigned_row(col, solution_id) >= num_nodes_g: | |||
| node_map.add_assignment(dummy_node(), col) | |||
| def options_string_to_options_map(options_string): | |||
| """Transforms an options string into an options map. | |||
| @@ -13,6 +13,7 @@ from functools import partial | |||
| import sys | |||
| from tqdm import tqdm | |||
| import networkx as nx | |||
| from gklearn.ged.env import GEDEnv | |||
| from gklearn.gedlib import librariesImport, gedlibpy | |||
| @@ -22,7 +23,7 @@ def compute_ged(g1, g2, options): | |||
| ged_env.add_nx_graph(g1, '') | |||
| ged_env.add_nx_graph(g2, '') | |||
| listID = ged_env.get_all_graph_ids() | |||
| ged_env.init() | |||
| ged_env.init(init_type=options['init_option']) | |||
| ged_env.set_method(options['method'], ged_options_to_string(options)) | |||
| ged_env.init_method() | |||
| @@ -46,6 +47,82 @@ def compute_ged(g1, g2, options): | |||
| return dis, pi_forward, pi_backward | |||
| def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True): | |||
| # initialize ged env. | |||
| ged_env = GEDEnv() | |||
| ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants']) | |||
| for g in graphs: | |||
| ged_env.add_nx_graph(g, '') | |||
| listID = ged_env.get_all_graph_ids() | |||
| ged_env.init(init_type=options['init_option']) | |||
| if parallel: | |||
| options['threads'] = 1 | |||
| ged_env.set_method(options['method'], options) | |||
| ged_env.init_method() | |||
| # compute ged. | |||
| neo_options = {'edit_cost': options['edit_cost'], | |||
| 'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'], | |||
| 'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']} | |||
| ged_mat = np.zeros((len(graphs), len(graphs))) | |||
| if parallel: | |||
| len_itr = int(len(graphs) * (len(graphs) - 1) / 2) | |||
| ged_vec = [0 for i in range(len_itr)] | |||
| n_edit_operations = [0 for i in range(len_itr)] | |||
| itr = combinations(range(0, len(graphs)), 2) | |||
| n_jobs = multiprocessing.cpu_count() | |||
| if len_itr < 100 * n_jobs: | |||
| chunksize = int(len_itr / n_jobs) + 1 | |||
| else: | |||
| chunksize = 100 | |||
| def init_worker(graphs_toshare, ged_env_toshare, listID_toshare): | |||
| global G_graphs, G_ged_env, G_listID | |||
| G_graphs = graphs_toshare | |||
| G_ged_env = ged_env_toshare | |||
| G_listID = listID_toshare | |||
| do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort) | |||
| pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID)) | |||
| if verbose: | |||
| iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), | |||
| desc='computing GEDs', file=sys.stdout) | |||
| else: | |||
| iterator = pool.imap_unordered(do_partial, itr, chunksize) | |||
| # iterator = pool.imap_unordered(do_partial, itr, chunksize) | |||
| for i, j, dis, n_eo_tmp in iterator: | |||
| idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2) | |||
| ged_vec[idx_itr] = dis | |||
| ged_mat[i][j] = dis | |||
| ged_mat[j][i] = dis | |||
| n_edit_operations[idx_itr] = n_eo_tmp | |||
| # print('\n-------------------------------------------') | |||
| # print(i, j, idx_itr, dis) | |||
| pool.close() | |||
| pool.join() | |||
| else: | |||
| ged_vec = [] | |||
| n_edit_operations = [] | |||
| if verbose: | |||
| iterator = tqdm(range(len(graphs)), desc='computing GEDs', file=sys.stdout) | |||
| else: | |||
| iterator = range(len(graphs)) | |||
| for i in iterator: | |||
| # for i in range(len(graphs)): | |||
| for j in range(i + 1, len(graphs)): | |||
| if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes(graphs[j]) or not sort: | |||
| dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j]) | |||
| else: | |||
| dis, pi_backward, pi_forward = _compute_ged(ged_env, listID[j], listID[i], graphs[j], graphs[i]) | |||
| ged_vec.append(dis) | |||
| ged_mat[i][j] = dis | |||
| ged_mat[j][i] = dis | |||
| n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options) | |||
| n_edit_operations.append(n_eo_tmp) | |||
| return ged_vec, ged_mat, n_edit_operations | |||
| def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True): | |||
| # initialize ged env. | |||
| ged_env = gedlibpy.GEDEnv() | |||
| @@ -13,5 +13,6 @@ __date__ = "March 2020" | |||
| from gklearn.preimage.preimage_generator import PreimageGenerator | |||
| from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator | |||
| from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator | |||
| from gklearn.preimage.median_preimage_generator_cml import MedianPreimageGeneratorCML | |||
| from gklearn.preimage.kernel_knn_cv import kernel_knn_cv | |||
| from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class | |||
| @@ -0,0 +1,57 @@ | |||
| """Tests of GEDEnv. | |||
| """ | |||
| def test_GEDEnv(): | |||
| """Test GEDEnv. | |||
| """ | |||
| """**1. Get dataset.**""" | |||
| from gklearn.utils import Dataset | |||
| # Predefined dataset name, use dataset "MUTAG". | |||
| ds_name = 'MUTAG' | |||
| # Initialize a Dataset. | |||
| dataset = Dataset() | |||
| # Load predefined dataset "MUTAG". | |||
| dataset.load_predefined_dataset(ds_name) | |||
| graph1 = dataset.graphs[0] | |||
| graph2 = dataset.graphs[1] | |||
| """**2. Compute graph edit distance.**""" | |||
| try: | |||
| from gklearn.ged.env import GEDEnv | |||
| ged_env = GEDEnv() # initailize GED environment. | |||
| ged_env.set_edit_cost('CONSTANT', # GED cost type. | |||
| edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. | |||
| ) | |||
| ged_env.add_nx_graph(graph1, '') # add graph1 | |||
| ged_env.add_nx_graph(graph2, '') # add graph2 | |||
| listID = ged_env.get_all_graph_ids() # get list IDs of graphs | |||
| ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. | |||
| options = {'initialization_method': 'RANDOM', # or 'NODE', etc. | |||
| 'threads': 1 # parallel threads. | |||
| } | |||
| ged_env.set_method('BIPARTITE', # GED method. | |||
| options # options for GED method. | |||
| ) | |||
| ged_env.init_method() # initialize GED method. | |||
| ged_env.run_method(listID[0], listID[1]) # run. | |||
| pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map. | |||
| pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map. | |||
| dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. | |||
| import networkx as nx | |||
| assert len(pi_forward) == nx.number_of_nodes(graph1), len(pi_backward) == nx.number_of_nodes(graph2) | |||
| except Exception as exception: | |||
| assert False, exception | |||
| if __name__ == "__main__": | |||
| test_GEDEnv() | |||
| @@ -68,4 +68,7 @@ def test_median_preimage_generator(): | |||
| print('\n-------------------------------------') | |||
| print('fit method:', fit_method, '\n') | |||
| mpg_options['fit_method'] = fit_method | |||
| generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4)) | |||
| try: | |||
| generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4)) | |||
| except Exception as exception: | |||
| assert False, exception | |||
| @@ -20,7 +20,7 @@ from gklearn.utils.graph_files import load_dataset, save_dataset | |||
| from gklearn.utils.timer import Timer | |||
| from gklearn.utils.utils import get_graph_kernel_by_name | |||
| from gklearn.utils.utils import compute_gram_matrices_by_class | |||
| from gklearn.utils.utils import SpecialLabel | |||
| from gklearn.utils.utils import SpecialLabel, dummy_node, undefined_node, dummy_edge | |||
| from gklearn.utils.utils import normalize_gram_matrix, compute_distance_matrix | |||
| from gklearn.utils.trie import Trie | |||
| from gklearn.utils.knn import knn_cv, knn_classification | |||
| @@ -472,14 +472,6 @@ def get_mlti_dim_edge_attrs(G, attr_names): | |||
| for ed, attrs in G.edges(data=True): | |||
| attributes.append(tuple(attrs[aname] for aname in attr_names)) | |||
| return attributes | |||
| @unique | |||
| class SpecialLabel(Enum): | |||
| """can be used to define special labels. | |||
| """ | |||
| DUMMY = 1 # The dummy label. | |||
| # DUMMY = auto # enum.auto does not exist in Python 3.5. | |||
| def normalize_gram_matrix(gram_matrix): | |||
| @@ -506,4 +498,44 @@ def compute_distance_matrix(gram_matrix): | |||
| dis_max = np.max(np.max(dis_mat)) | |||
| dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | |||
| dis_mean = np.mean(np.mean(dis_mat)) | |||
| return dis_mat, dis_max, dis_min, dis_mean | |||
| return dis_mat, dis_max, dis_min, dis_mean | |||
| def dummy_node(): | |||
| """ | |||
| /*! | |||
| * @brief Returns a dummy node. | |||
| * @return ID of dummy node. | |||
| */ | |||
| """ | |||
| return np.inf # @todo: in GEDLIB, this is the max - 1 rather than max, I don't know why. | |||
| def undefined_node(): | |||
| """ | |||
| /*! | |||
| * @brief Returns an undefined node. | |||
| * @return ID of undefined node. | |||
| */ | |||
| """ | |||
| return np.inf | |||
| def dummy_edge(): | |||
| """ | |||
| /*! | |||
| * @brief Returns a dummy edge. | |||
| * @return ID of dummy edge. | |||
| */ | |||
| """ | |||
| return np.inf | |||
| @unique | |||
| class SpecialLabel(Enum): | |||
| """can be used to define special labels. | |||
| """ | |||
| DUMMY = 1 # The dummy label. | |||
| # DUMMY = auto # enum.auto does not exist in Python 3.5. | |||