| @@ -47,6 +47,18 @@ class NodeMap(object): | |||||
| return self.__backward_map | return self.__backward_map | ||||
| def as_relation(self, relation): | |||||
| relation.clear() | |||||
| for i in range(0, len(self.__forward_map)): | |||||
| k = self.__forward_map[i] | |||||
| if k != np.inf: | |||||
| relation.append(tuple((i, k))) | |||||
| for k in range(0, len(self.__backward_map)): | |||||
| i = self.__backward_map[k] | |||||
| if i == np.inf: | |||||
| relation.append(tuple((i, k))) | |||||
| def add_assignment(self, i, k): | def add_assignment(self, i, k): | ||||
| if i != np.inf: | if i != np.inf: | ||||
| if i < len(self.__forward_map): | if i < len(self.__forward_map): | ||||
| @@ -491,7 +491,7 @@ class MedianGraphEstimator(object): | |||||
| # Refine the sum of distances and the node maps for the converged median. | # Refine the sum of distances and the node maps for the converged median. | ||||
| self.__converged_sum_of_distances = self.__sum_of_distances | self.__converged_sum_of_distances = self.__sum_of_distances | ||||
| if self.__refine: | if self.__refine: | ||||
| self.__improve_sum_of_distances(timer) # @todo | |||||
| self.__improve_sum_of_distances(timer) | |||||
| # Record end time, set runtime and reset the number of initial medians. | # Record end time, set runtime and reset the number of initial medians. | ||||
| end = time.time() | end = time.time() | ||||
| @@ -526,8 +526,52 @@ class MedianGraphEstimator(object): | |||||
| print('Overall number of times the order decreased: ', self.__num_decrease_order) | print('Overall number of times the order decreased: ', self.__num_decrease_order) | ||||
| print('Overall number of times the order increased: ', self.__num_increase_order) | print('Overall number of times the order increased: ', self.__num_increase_order) | ||||
| print('===========================================================\n') | print('===========================================================\n') | ||||
| def __improve_sum_of_distances(self, timer): # @todo: go through and test | |||||
| # Use method selected for refinement phase. | |||||
| self.__ged_env.set_method(self.__refine_method, self.__refine_options) | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| progress = tqdm(desc='Improving node maps', total=len(self.__node_maps_from_median), file=sys.stdout) | |||||
| print('\n===========================================================') | |||||
| print('Improving node maps and SOD for converged median.') | |||||
| print('-----------------------------------------------------------') | |||||
| progress.update(1) | |||||
| # Improving the node maps. | |||||
| for graph_id, node_map in self.__node_maps_from_median.items(): | |||||
| if time.expired(): | |||||
| if self.__state == AlgorithmState.TERMINATED: | |||||
| self.__state = AlgorithmState.CONVERGED | |||||
| break | |||||
| self.__ged_env.run_method(self.__gen_median_id, graph_id) | |||||
| if self.__ged_env.get_upper_bound(self.__gen_median_id, graph_id) < node_map.induced_cost(): | |||||
| self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__gen_median_id, graph_id) | |||||
| self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost() | |||||
| # Print information. | |||||
| if self.__print_to_stdout == 2: | |||||
| progress.update(1) | |||||
| self.__sum_of_distances = 0.0 | |||||
| for key, val in self.__node_maps_from_median.items(): | |||||
| self.__sum_of_distances += val.induced_cost() | |||||
| # Print information. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('===========================================================\n') | |||||
| def __median_available(self): | |||||
| return self.__gen_median_id != np.inf | |||||
| def get_state(self): | |||||
| if not self.__median_available(): | |||||
| raise Exception('No median has been computed. Call run() before calling get_state().') | |||||
| return self.__state | |||||
| def get_sum_of_distances(self, state=''): | def get_sum_of_distances(self, state=''): | ||||
| """Returns the sum of distances. | """Returns the sum of distances. | ||||
| @@ -852,7 +896,7 @@ class MedianGraphEstimator(object): | |||||
| increased_order = False | increased_order = False | ||||
| # Increase the order as long as the best insertion delta is negative. | # Increase the order as long as the best insertion delta is negative. | ||||
| while self.__compute_best_insertion_delta(graphs, best_config, best_label) < - self.__epsilon: | |||||
| while self.__compute_best_insertion_delta(graphs, best_config, best_label) > - self.__epsilon: | |||||
| increased_order = True | increased_order = True | ||||
| self.__add_node_to_median(best_config, best_label, median) | self.__add_node_to_median(best_config, best_label, median) | ||||
| @@ -862,7 +906,291 @@ class MedianGraphEstimator(object): | |||||
| # Return true iff the order was increased. | # Return true iff the order was increased. | ||||
| return increased_order | return increased_order | ||||
| def __compute_best_insertion_delta(self, graphs, best_config, best_label): | |||||
| # Construct sets of inserted nodes. | |||||
| no_inserted_node = True | |||||
| inserted_nodes = {} | |||||
| for graph_id, graph in graphs.items(): | |||||
| inserted_nodes[graph_id] = [] | |||||
| best_config[graph_id] = np.inf | |||||
| for k in range(nx.number_of_nodes(graph)): | |||||
| if self.__node_maps_from_median[graph_id].pre_image(k) == np.inf: | |||||
| no_inserted_node = False | |||||
| inserted_nodes[graph_id].append((k, tuple(item for item in graph.nodes[k].items()))) # @todo: can order of label names be garantteed? | |||||
| # Return 0.0 if no node is inserted in any of the graphs. | |||||
| if no_inserted_node: | |||||
| return 0.0 | |||||
| # Compute insertion configuration, label, and delta. | |||||
| best_delta = 0.0 # @todo | |||||
| if len(self.__label_names['node_labels']) == 0 and len(self.__label_names['node_attrs']) == 0: # @todo | |||||
| best_delta = self.__compute_insertion_delta_unlabeled(inserted_nodes, best_config, best_label) | |||||
| elif self.__constant_node_costs: | |||||
| best_delta = self.__compute_insertion_delta_constant(inserted_nodes, best_config, best_label) | |||||
| else: | |||||
| best_delta = self.__compute_insertion_delta_generic(inserted_nodes, best_config, best_label) | |||||
| # Return the best delta. | |||||
| return best_delta | |||||
| def __compute_insertion_delta_unlabeled(self, inserted_nodes, best_config, best_label): | |||||
| # Construct the nest configuration and compute its insertion delta. | |||||
| best_delta = 0.0 | |||||
| best_config.clear() | |||||
| for graph_id, node_set in inserted_nodes.items(): | |||||
| if len(node_set) == 0: | |||||
| best_config[graph_id] = np.inf | |||||
| best_delta += self.__node_del_cost | |||||
| else: | |||||
| best_config[graph_id] = node_set[0][0] | |||||
| best_delta -= self.__node_ins_cost | |||||
| # Return the best insertion delta. | |||||
| return best_delta | |||||
| def __compute_insertion_delta_constant(self, inserted_nodes, best_config, best_label): | |||||
| # Construct histogram and inverse label maps. | |||||
| hist = {} | |||||
| inverse_label_maps = {} | |||||
| for graph_id, node_set in inserted_nodes.items(): | |||||
| inverse_label_maps[graph_id] = {} | |||||
| for node in node_set: | |||||
| k = node[0] | |||||
| label = node[1] | |||||
| if label not in inverse_label_maps[graph_id]: | |||||
| inverse_label_maps[graph_id][label] = k | |||||
| if label not in hist: | |||||
| hist[label] = 1 | |||||
| else: | |||||
| hist[label] += 1 | |||||
| # Determine the best label. | |||||
| best_count = 0 | |||||
| for key, val in hist.items(): | |||||
| if val > best_count: | |||||
| best_count = val | |||||
| best_label_tuple = key | |||||
| # get best label. | |||||
| best_label.clear() | |||||
| for key, val in best_label_tuple: | |||||
| best_label[key] = val | |||||
| # Construct the best configuration and compute its insertion delta. | |||||
| best_config.clear() | |||||
| best_delta = 0.0 | |||||
| node_rel_cost = self.__ged_env.get_node_rel_cost(self.__ged_env.get_node_label(1), self.__ged_env.get_node_label(2)) | |||||
| triangle_ineq_holds = (node_rel_cost <= self.__node_del_cost + self.__node_ins_cost) | |||||
| for graph_id, _ in inserted_nodes.items(): | |||||
| if best_label_tuple in inverse_label_maps[graph_id]: | |||||
| best_config[graph_id] = inverse_label_maps[graph_id][best_label_tuple] | |||||
| best_delta -= self.__node_ins_cost | |||||
| elif triangle_ineq_holds and not len(inserted_nodes[graph_id]) == 0: | |||||
| best_config[graph_id] = inserted_nodes[graph_id][0][0] | |||||
| best_delta += node_rel_cost - self.__node_ins_cost | |||||
| else: | |||||
| best_config[graph_id] = np.inf | |||||
| best_delta += self.__node_del_cost | |||||
| # Return the best insertion delta. | |||||
| return best_delta | |||||
| def __compute_insertion_delta_generic(self, inserted_nodes, best_config, best_label): | |||||
| # Collect all node labels of inserted nodes. | |||||
| node_labels = [] | |||||
| for _, node_set in inserted_nodes.items(): | |||||
| for node in node_set: | |||||
| node_labels.append(node[1]) | |||||
| # Compute node label medians that serve as initial solutions for block gradient descent. | |||||
| initial_node_labels = [] | |||||
| self.__compute_initial_node_labels(node_labels, initial_node_labels) | |||||
| # Determine best insertion configuration, label, and delta via parallel block gradient descent from all initial node labels. | |||||
| best_delta = 0.0 | |||||
| for node_label in initial_node_labels: | |||||
| # Construct local configuration. | |||||
| config = {} | |||||
| for graph_id, _ in inserted_nodes.items(): | |||||
| config[graph_id] = tuple((np.inf, self.__ged_env.get_node_label(1))) | |||||
| # Run block gradient descent. | |||||
| converged = False | |||||
| itr = 0 | |||||
| while not self.__insertion_termination_criterion_met(converged, itr): | |||||
| converged = not self.__update_config_(node_label, inserted_nodes, config, node_labels) | |||||
| converged = converged and (not self.__update_node_label(node_labels, node_label)) | |||||
| itr += 1 | |||||
| # Compute insertion delta of converged solution. | |||||
| delta = 0.0 | |||||
| for _, node in config.items(): | |||||
| if node[0] == np.inf: | |||||
| delta += self.__node_del_cost | |||||
| else: | |||||
| delta += self.__ged_env.node_rel_cost(node_label, node[1]) - self.__node_ins_cost | |||||
| # Update best delta and global configuration if improvement has been found. | |||||
| if delta < best_delta - self.__epsilon: | |||||
| best_delta = delta | |||||
| best_label = node_label # @todo: may be wrong. | |||||
| best_config.clear() | |||||
| for graph_id, k in config.items(): | |||||
| best_config[graph_id] = k | |||||
| # Return the best delta. | |||||
| return best_delta | |||||
| def __compute_initial_node_labels(self, node_labels, median_labels): | |||||
| median_labels.clear() | |||||
| if self.__use_real_randomness: # @todo: may not work if parallelized. | |||||
| rng = np.random.randint(size=1) | |||||
| urng = np.random.RandomState(seed=rng[0]) | |||||
| else: | |||||
| urng = np.random.RandomState(seed=self.__seed) | |||||
| # Generate the initial node label medians. | |||||
| if self.__init_type_increase_order == 'K-MEANS++': | |||||
| # Use k-means++ heuristic to generate the initial node label medians. | |||||
| already_selected = [False] * len(node_labels) | |||||
| selected_label_id = urng.uniform(low=0, high=len(node_labels), size=1)[0] | |||||
| median_labels.append(node_labels[selected_label_id]) | |||||
| already_selected[selected_label_id] = True | |||||
| while len(median_labels) > self.__num_inits_increase_order: | |||||
| weights = [np.inf] * len(node_labels) | |||||
| for label_id in range(0, len(node_labels)): | |||||
| if already_selected[label_id]: | |||||
| weights[label_id] = 0 | |||||
| continue | |||||
| for label in median_labels: | |||||
| weights[label_id] = min(weights[label_id], self.__ged_env.node_rel_cost(label, node_labels[label_id])) | |||||
| selected_label_id = urng.choice(range(0, len(weights)), size=1, p=weights) | |||||
| median_labels.append(node_labels[selected_label_id]) | |||||
| already_selected[selected_label_id] = True | |||||
| else: | |||||
| # Compute the initial node medians as the medians of randomly generated clusters of (roughly) equal size. | |||||
| # @todo: go through and test. | |||||
| shuffled_node_labels = [np.inf] * len(node_labels) #@todo: random? | |||||
| # @todo: std::shuffle(shuffled_node_labels.begin(), shuffled_node_labels.end(), urng);? | |||||
| cluster_size = len(node_labels) / self.__num_inits_increase_order | |||||
| pos = 0.0 | |||||
| cluster = [] | |||||
| while len(median_labels) < self.__num_inits_increase_order - 1: | |||||
| while pos < (len(median_labels) + 1) * cluster_size: | |||||
| cluster.append(shuffled_node_labels[pos]) | |||||
| pos += 1 | |||||
| median_labels.append(self.__get_median_node_label(cluster)) | |||||
| cluster.clear() | |||||
| while pos < len(shuffled_node_labels): | |||||
| pos += 1 | |||||
| cluster.append(shuffled_node_labels[pos]) | |||||
| median_labels.append(self.__get_median_node_label(cluster)) | |||||
| cluster.clear() | |||||
| # Run Lloyd's Algorithm. | |||||
| converged = False | |||||
| closest_median_ids = [np.inf] * len(node_labels) | |||||
| clusters = [[] for _ in len(median_labels)] | |||||
| itr = 1 | |||||
| while not self.__insertion_termination_criterion_met(converged, itr): | |||||
| converged = not self.__update_clusters(node_labels, median_labels, closest_median_ids) | |||||
| if not converged: | |||||
| for cluster in clusters: | |||||
| cluster.clear() | |||||
| for label_id in range(0, len(node_labels)): | |||||
| cluster[closest_median_ids[label_id]].append(node_labels[label_id]) | |||||
| for cluster_id in range(0, len(clusters)): | |||||
| self.__update_node_label(cluster[cluster_id], median_labels[cluster_id]) | |||||
| itr += 1 | |||||
| def __insertion_termination_criterion_met(self, converged, itr): | |||||
| return converged or (itr >= self.__max_itrs_increase_order if self.__max_itrs_increase_order > 0 else False) | |||||
| def __update_config_(self, node_label, inserted_nodes, config, node_labels): | |||||
| # Determine the best configuration. | |||||
| config_modified = False | |||||
| for graph_id, node_set in inserted_nodes.items(): | |||||
| best_assignment = config[graph_id] | |||||
| best_cost = 0.0 | |||||
| if best_assignment[0] == np.inf: | |||||
| best_cost = self.__node_del_cost | |||||
| else: | |||||
| bets_cost = self.__ged_env.node_rel_cost(node_label, best_assignment[1]) - self.__node_ins_cost | |||||
| for node in node_set: | |||||
| cost = self.__ged_env.node_rel_cost(node_label, node[1]) - self.__node_ins_cost | |||||
| if cost < best_cost - self.__epsilon: | |||||
| best_cost = cost | |||||
| best_assignment = node | |||||
| config_modified = True | |||||
| if self.__node_del_cost < best_cost - self.__epsilon: | |||||
| best_cost = self.__node_del_cost | |||||
| best_assignment[0] = np.inf # @todo: work? | |||||
| config_modified = True | |||||
| config[graph_id] = best_assignment | |||||
| # Collect the node labels contained in the best configuration. | |||||
| node_labels.clear() | |||||
| for key, val in config.items(): | |||||
| if val[0] != np.inf: | |||||
| node_labels.append(val[1]) | |||||
| # Return true if the configuration was modified. | |||||
| return config_modified | |||||
| def __update_node_label(self, node_labels, node_label): | |||||
| new_node_label = self.__get_median_node_label(node_labels) | |||||
| if self.__ged_env.node_rel_cost(new_node_label, node_label) > self.__epsilon: | |||||
| node_label = new_node_label # @todo: may be wrong | |||||
| return True | |||||
| return False | |||||
| def __update_clusters(self, node_labels, median_labels, closest_median_ids): | |||||
| # Determine the closest median for each node label. | |||||
| clusters_modified = False | |||||
| for label_id in range(0, len(node_labels)): | |||||
| closest_median_id = np.inf | |||||
| dist_to_closest_median = np.inf | |||||
| for median_id in range(0, len(median_labels)): | |||||
| dist_to_median = self.__ged_env.node_rel_cost(median_labels[median_id], node_labels[label_id]) | |||||
| if dist_to_median < dist_to_closest_median - self.__epsilon: | |||||
| dist_to_closest_median = dist_to_median | |||||
| closest_median_id = median_id | |||||
| if closest_median_id != closest_median_ids[label_id]: | |||||
| closest_median_ids[label_id] = closest_median_id | |||||
| clusters_modified = True | |||||
| # Return true if the clusters were modified. | |||||
| return clusters_modified | |||||
| def __add_node_to_median(self, best_config, best_label, median): | |||||
| # Update the median. | |||||
| median.add_node(nx.number_of_nodes(median), **best_label) | |||||
| # Update the node maps. | |||||
| for graph_id, node_map in self.__node_maps_from_median.items(): | |||||
| node_map_as_rel = [] | |||||
| node_map.as_relation(node_map_as_rel) | |||||
| new_node_map = NodeMap(nx.number_of_nodes(median), node_map.num_target_nodes()) | |||||
| for assignment in node_map_as_rel: | |||||
| new_node_map.add_assignment(assignment[0], assignment[1]) | |||||
| new_node_map.add_assignment(nx.number_of_nodes(median) - 1, best_config[graph_id]) | |||||
| self.__node_maps_from_median[graph_id] = new_node_map | |||||
| # Increase overall number of increases. | |||||
| self.__num_increase_order += 1 | |||||
| def __improve_sum_of_distances(self, timer): | def __improve_sum_of_distances(self, timer): | ||||
| pass | pass | ||||
| @@ -53,7 +53,7 @@ def test_median_graph_estimator(): | |||||
| mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | ||||
| mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | ||||
| mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE'# @todo: std::to_string(rng()) | |||||
| mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE --randomness PSEUDO '# @todo: std::to_string(rng()) | |||||
| # Select the GED algorithm. | # Select the GED algorithm. | ||||
| algo_options = '--threads ' + str(threads) + algo_options_suffix | algo_options = '--threads ' + str(threads) + algo_options_suffix | ||||
| @@ -155,5 +155,5 @@ def test_median_graph_estimator_symb(): | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| # set_median, gen_median = test_median_graph_estimator() | |||||
| set_median, gen_median = test_median_graph_estimator_symb() | |||||
| set_median, gen_median = test_median_graph_estimator() | |||||
| # set_median, gen_median = test_median_graph_estimator_symb() | |||||
| @@ -18,6 +18,7 @@ import numpy as np | |||||
| import networkx as nx | import networkx as nx | ||||
| from collections import Counter | from collections import Counter | ||||
| from functools import partial | from functools import partial | ||||
| from gklearn.utils import SpecialLabel | |||||
| from gklearn.utils.parallel import parallel_gm, parallel_me | from gklearn.utils.parallel import parallel_gm, parallel_me | ||||
| from gklearn.kernels import GraphKernel | from gklearn.kernels import GraphKernel | ||||
| from gklearn.utils import Trie | from gklearn.utils import Trie | ||||
| @@ -582,11 +583,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||||
| def __add_dummy_labels(self, Gn): | def __add_dummy_labels(self, Gn): | ||||
| if self.__k_func is not None: | if self.__k_func is not None: | ||||
| if len(self.__node_labels) == 0: | |||||
| for G in Gn: | |||||
| nx.set_node_attributes(G, '0', 'dummy') | |||||
| self.__node_labels.append('dummy') | |||||
| if len(self.__edge_labels) == 0: | |||||
| for G in Gn: | |||||
| nx.set_edge_attributes(G, '0', 'dummy') | |||||
| self.__edge_labels.append('dummy') | |||||
| if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.__node_labels = [SpecialLabel.DUMMY] | |||||
| if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.__edge_labels = [SpecialLabel.DUMMY] | |||||
| @@ -18,6 +18,7 @@ import numpy as np | |||||
| import networkx as nx | import networkx as nx | ||||
| from collections import Counter | from collections import Counter | ||||
| from itertools import chain | from itertools import chain | ||||
| from gklearn.utils import SpecialLabel | |||||
| from gklearn.utils.parallel import parallel_gm, parallel_me | from gklearn.utils.parallel import parallel_gm, parallel_me | ||||
| from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs | from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs | ||||
| from gklearn.kernels import GraphKernel | from gklearn.kernels import GraphKernel | ||||
| @@ -495,11 +496,11 @@ class Treelet(GraphKernel): | |||||
| def __add_dummy_labels(self, Gn): | def __add_dummy_labels(self, Gn): | ||||
| if len(self.__node_labels) == 0: | |||||
| for G in Gn: | |||||
| nx.set_node_attributes(G, '0', 'dummy') | |||||
| self.__node_labels.append('dummy') | |||||
| if len(self.__edge_labels) == 0: | |||||
| for G in Gn: | |||||
| nx.set_edge_attributes(G, '0', 'dummy') | |||||
| self.__edge_labels.append('dummy') | |||||
| if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.__node_labels = [SpecialLabel.DUMMY] | |||||
| if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.__edge_labels = [SpecialLabel.DUMMY] | |||||
| @@ -16,6 +16,7 @@ import numpy as np | |||||
| import networkx as nx | import networkx as nx | ||||
| from collections import Counter | from collections import Counter | ||||
| from functools import partial | from functools import partial | ||||
| from gklearn.utils import SpecialLabel | |||||
| from gklearn.utils.parallel import parallel_gm | from gklearn.utils.parallel import parallel_gm | ||||
| from gklearn.kernels import GraphKernel | from gklearn.kernels import GraphKernel | ||||
| @@ -469,10 +470,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
| def __add_dummy_node_labels(self, Gn): | def __add_dummy_node_labels(self, Gn): | ||||
| if len(self.__node_labels) == 0: | |||||
| for G in Gn: | |||||
| nx.set_node_attributes(G, '0', 'dummy') | |||||
| self.__node_labels.append('dummy') | |||||
| if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.__node_labels = [SpecialLabel.DUMMY] | |||||
| class WLSubtree(WeisfeilerLehman): | class WLSubtree(WeisfeilerLehman): | ||||
| @@ -8,11 +8,83 @@ Created on Tue Jan 14 15:39:29 2020 | |||||
| import multiprocessing | import multiprocessing | ||||
| import functools | import functools | ||||
| import sys | import sys | ||||
| import os | |||||
| from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | ||||
| from gklearn.preimage.utils import generate_median_preimages_by_class | from gklearn.preimage.utils import generate_median_preimages_by_class | ||||
| from gklearn.utils import compute_gram_matrices_by_class | from gklearn.utils import compute_gram_matrices_by_class | ||||
| def xp_median_preimage_14_1(): | |||||
| """xp 14_1: DD, PathUpToH, using CONSTANT. | |||||
| """ | |||||
| # set parameters. | |||||
| ds_name = 'DD' # | |||||
| mpg_options = {'fit_method': 'k-graphs', | |||||
| 'init_ecc': [4, 4, 2, 1, 1, 1], # | |||||
| 'ds_name': ds_name, | |||||
| 'parallel': True, # False | |||||
| 'time_limit_in_sec': 0, | |||||
| 'max_itrs': 100, # | |||||
| 'max_itrs_without_update': 3, | |||||
| 'epsilon_residual': 0.01, | |||||
| 'epsilon_ec': 0.1, | |||||
| 'verbose': 2} | |||||
| kernel_options = {'name': 'PathUpToH', | |||||
| 'depth': 2, # | |||||
| 'k_func': 'MinMax', # | |||||
| 'compute_method': 'trie', | |||||
| 'parallel': 'imap_unordered', | |||||
| # 'parallel': None, | |||||
| 'n_jobs': multiprocessing.cpu_count(), | |||||
| 'normalize': True, | |||||
| 'verbose': 2} | |||||
| ged_options = {'method': 'IPFP', | |||||
| 'initialization_method': 'RANDOM', # 'NODE' | |||||
| 'initial_solutions': 10, # 1 | |||||
| 'edit_cost': 'CONSTANT', # | |||||
| 'attr_distance': 'euclidean', | |||||
| 'ratio_runs_from_initial_solutions': 1, | |||||
| 'threads': multiprocessing.cpu_count(), | |||||
| 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} | |||||
| mge_options = {'init_type': 'MEDOID', | |||||
| 'random_inits': 10, | |||||
| 'time_limit': 0, | |||||
| 'verbose': 2, | |||||
| 'update_order': False, | |||||
| 'refine': False} | |||||
| save_results = True | |||||
| dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
| irrelevant_labels = None # | |||||
| edge_required = False # | |||||
| if not os.path.exists(dir_save): | |||||
| os.makedirs(dir_save) | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | |||||
| # # compute gram matrices for each class a priori. | |||||
| # print('Compute gram matrices for each class a priori.') | |||||
| # compute_gram_matrices_by_class(ds_name, kernel_options, save_results=save_results, dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) | |||||
| # print settings. | |||||
| print('parameters:') | |||||
| print('dataset name:', ds_name) | |||||
| print('mpg_options:', mpg_options) | |||||
| print('kernel_options:', kernel_options) | |||||
| print('ged_options:', ged_options) | |||||
| print('mge_options:', mge_options) | |||||
| print('save_results:', save_results) | |||||
| print('irrelevant_labels:', irrelevant_labels) | |||||
| print() | |||||
| # generate preimages. | |||||
| for fit_method in ['k-graphs'] + ['random'] * 5: | |||||
| print('\n-------------------------------------') | |||||
| print('fit method:', fit_method, '\n') | |||||
| mpg_options['fit_method'] = fit_method | |||||
| generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) | |||||
| def xp_median_preimage_13_1(): | def xp_median_preimage_13_1(): | ||||
| """xp 13_1: PAH, StructuralSP, using NON_SYMBOLIC. | """xp 13_1: PAH, StructuralSP, using NON_SYMBOLIC. | ||||
| """ | """ | ||||
| @@ -60,7 +132,7 @@ def xp_median_preimage_13_1(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -125,7 +197,7 @@ def xp_median_preimage_13_2(): | |||||
| edge_required = True # | edge_required = True # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -192,7 +264,7 @@ def xp_median_preimage_12_1(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -256,7 +328,7 @@ def xp_median_preimage_12_2(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -320,7 +392,9 @@ def xp_median_preimage_12_3(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| if not os.path.exists(dir_save): | |||||
| os.makedirs(dir_save) | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -383,7 +457,7 @@ def xp_median_preimage_12_4(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -452,7 +526,7 @@ def xp_median_preimage_12_5(): | |||||
| edge_required = True # | edge_required = True # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -519,7 +593,7 @@ def xp_median_preimage_9_1(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -583,7 +657,7 @@ def xp_median_preimage_9_2(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -647,7 +721,7 @@ def xp_median_preimage_9_3(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -710,7 +784,7 @@ def xp_median_preimage_9_4(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -781,7 +855,7 @@ def xp_median_preimage_8_1(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -845,7 +919,7 @@ def xp_median_preimage_8_2(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -909,7 +983,7 @@ def xp_median_preimage_8_3(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -972,7 +1046,7 @@ def xp_median_preimage_8_4(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1039,7 +1113,7 @@ def xp_median_preimage_7_1(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1103,7 +1177,7 @@ def xp_median_preimage_7_2(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1167,7 +1241,7 @@ def xp_median_preimage_7_3(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1230,7 +1304,7 @@ def xp_median_preimage_7_4(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1297,7 +1371,7 @@ def xp_median_preimage_6_1(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1362,7 +1436,9 @@ def xp_median_preimage_6_2(): | |||||
| edge_required = True # | edge_required = True # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| if not os.path.exists(dir_save): | |||||
| os.makedirs(dir_save) | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1429,7 +1505,7 @@ def xp_median_preimage_5_1(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1496,7 +1572,7 @@ def xp_median_preimage_4_1(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1522,7 +1598,7 @@ def xp_median_preimage_3_2(): | |||||
| # set parameters. | # set parameters. | ||||
| ds_name = 'Fingerprint' # | ds_name = 'Fingerprint' # | ||||
| mpg_options = {'fit_method': 'k-graphs', | mpg_options = {'fit_method': 'k-graphs', | ||||
| 'init_ecc': [0.525, 0.525, 0.001, 0.125, 0.125], # | |||||
| 'init_ecc': [0.525, 0.525, 0.01, 0.125, 0.125], # | |||||
| 'ds_name': ds_name, | 'ds_name': ds_name, | ||||
| 'parallel': True, # False | 'parallel': True, # False | ||||
| 'time_limit_in_sec': 0, | 'time_limit_in_sec': 0, | ||||
| @@ -1561,7 +1637,9 @@ def xp_median_preimage_3_2(): | |||||
| edge_required = True # | edge_required = True # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| if not os.path.exists(dir_save): | |||||
| os.makedirs(dir_save) | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1587,7 +1665,7 @@ def xp_median_preimage_3_1(): | |||||
| # set parameters. | # set parameters. | ||||
| ds_name = 'Fingerprint' # | ds_name = 'Fingerprint' # | ||||
| mpg_options = {'fit_method': 'k-graphs', | mpg_options = {'fit_method': 'k-graphs', | ||||
| 'init_ecc': [0.525, 0.525, 0.001, 0.125, 0.125], # | |||||
| 'init_ecc': [0.525, 0.525, 0.01, 0.125, 0.125], # | |||||
| 'ds_name': ds_name, | 'ds_name': ds_name, | ||||
| 'parallel': True, # False | 'parallel': True, # False | ||||
| 'time_limit_in_sec': 0, | 'time_limit_in_sec': 0, | ||||
| @@ -1628,7 +1706,9 @@ def xp_median_preimage_3_1(): | |||||
| edge_required = False # | edge_required = False # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| if not os.path.exists(dir_save): | |||||
| os.makedirs(dir_save) | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1685,7 +1765,7 @@ def xp_median_preimage_2_1(): | |||||
| 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} | 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} | ||||
| mge_options = {'init_type': 'MEDOID', | mge_options = {'init_type': 'MEDOID', | ||||
| 'random_inits': 10, | 'random_inits': 10, | ||||
| 'time_limit': 600, | |||||
| 'time_limit': 0, | |||||
| 'verbose': 2, | 'verbose': 2, | ||||
| 'update_order': False, | 'update_order': False, | ||||
| 'refine': False} | 'refine': False} | ||||
| @@ -1694,7 +1774,9 @@ def xp_median_preimage_2_1(): | |||||
| irrelevant_labels = {'edge_labels': ['valence']} | irrelevant_labels = {'edge_labels': ['valence']} | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| if not os.path.exists(dir_save): | |||||
| os.makedirs(dir_save) | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1763,7 +1845,7 @@ def xp_median_preimage_1_1(): | |||||
| dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1826,7 +1908,7 @@ def xp_median_preimage_1_2(): | |||||
| edge_required = True # | edge_required = True # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1891,7 +1973,7 @@ def xp_median_preimage_10_1(): | |||||
| dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -1954,7 +2036,7 @@ def xp_median_preimage_10_2(): | |||||
| edge_required = True # | edge_required = True # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -2019,7 +2101,7 @@ def xp_median_preimage_11_1(): | |||||
| dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -2082,7 +2164,7 @@ def xp_median_preimage_11_2(): | |||||
| edge_required = True # | edge_required = True # | ||||
| # print settings. | # print settings. | ||||
| file_output = open(dir_save + 'output.txt', 'w') | |||||
| file_output = open(dir_save + 'output.txt', 'a') | |||||
| sys.stdout = file_output | sys.stdout = file_output | ||||
| print('parameters:') | print('parameters:') | ||||
| print('dataset name:', ds_name) | print('dataset name:', ds_name) | ||||
| @@ -2147,7 +2229,7 @@ if __name__ == "__main__": | |||||
| # # xp_median_preimage_7_1() | # # xp_median_preimage_7_1() | ||||
| # #### xp 7_2: MUTAG, PathUpToH, using CONSTANT. | # #### xp 7_2: MUTAG, PathUpToH, using CONSTANT. | ||||
| # # xp_median_preimage_7_2() | |||||
| # xp_median_preimage_7_2() | |||||
| # #### xp 7_3: MUTAG, Treelet, using CONSTANT. | # #### xp 7_3: MUTAG, Treelet, using CONSTANT. | ||||
| # # xp_median_preimage_7_3() | # # xp_median_preimage_7_3() | ||||
| @@ -2200,6 +2282,10 @@ if __name__ == "__main__": | |||||
| #### xp 13_2: PAH, ShortestPath, using NON_SYMBOLIC. | #### xp 13_2: PAH, ShortestPath, using NON_SYMBOLIC. | ||||
| # xp_median_preimage_13_2() | # xp_median_preimage_13_2() | ||||
| #### xp 14_1: DD, PathUpToH, using CONSTANT. | |||||
| xp_median_preimage_14_1() | |||||
| # #### xp 1_1: Letter-high, StructuralSP. | # #### xp 1_1: Letter-high, StructuralSP. | ||||
| @@ -2221,10 +2307,10 @@ if __name__ == "__main__": | |||||
| # xp_median_preimage_11_2() | # xp_median_preimage_11_2() | ||||
| # | # | ||||
| # #### xp 2_1: COIL-DEL, StructuralSP, using LETTER2, only node attrs. | # #### xp 2_1: COIL-DEL, StructuralSP, using LETTER2, only node attrs. | ||||
| # # xp_median_preimage_2_1() | |||||
| # xp_median_preimage_2_1() | |||||
| # | # | ||||
| # #### xp 3_1: Fingerprint, StructuralSP, using LETTER2, only node attrs. | # #### xp 3_1: Fingerprint, StructuralSP, using LETTER2, only node attrs. | ||||
| # # xp_median_preimage_3_1() | |||||
| # xp_median_preimage_3_1() | |||||
| # #### xp 3_2: Fingerprint, ShortestPath, using LETTER2, only node attrs. | # #### xp 3_2: Fingerprint, ShortestPath, using LETTER2, only node attrs. | ||||
| # xp_median_preimage_3_2() | # xp_median_preimage_3_2() | ||||
| @@ -2266,35 +2352,35 @@ if __name__ == "__main__": | |||||
| # xp_median_preimage_8_4() | # xp_median_preimage_8_4() | ||||
| # #### xp 9_1: MAO, StructuralSP, using CONSTANT, symbolic only. | # #### xp 9_1: MAO, StructuralSP, using CONSTANT, symbolic only. | ||||
| xp_median_preimage_9_1() | |||||
| # xp_median_preimage_9_1() | |||||
| # #### xp 9_2: MAO, PathUpToH, using CONSTANT, symbolic only. | # #### xp 9_2: MAO, PathUpToH, using CONSTANT, symbolic only. | ||||
| xp_median_preimage_9_2() | |||||
| # xp_median_preimage_9_2() | |||||
| # #### xp 9_3: MAO, Treelet, using CONSTANT, symbolic only. | # #### xp 9_3: MAO, Treelet, using CONSTANT, symbolic only. | ||||
| xp_median_preimage_9_3() | |||||
| # xp_median_preimage_9_3() | |||||
| # #### xp 9_4: MAO, WeisfeilerLehman, using CONSTANT, symbolic only. | # #### xp 9_4: MAO, WeisfeilerLehman, using CONSTANT, symbolic only. | ||||
| xp_median_preimage_9_4() | |||||
| # xp_median_preimage_9_4() | |||||
| #### xp 12_1: PAH, StructuralSP, using NON_SYMBOLIC, unlabeled. | #### xp 12_1: PAH, StructuralSP, using NON_SYMBOLIC, unlabeled. | ||||
| xp_median_preimage_12_1() | |||||
| # xp_median_preimage_12_1() | |||||
| #### xp 12_2: PAH, PathUpToH, using CONSTANT, unlabeled. | #### xp 12_2: PAH, PathUpToH, using CONSTANT, unlabeled. | ||||
| xp_median_preimage_12_2() | |||||
| # xp_median_preimage_12_2() | |||||
| #### xp 12_3: PAH, Treelet, using CONSTANT, unlabeled. | #### xp 12_3: PAH, Treelet, using CONSTANT, unlabeled. | ||||
| xp_median_preimage_12_3() | |||||
| # xp_median_preimage_12_3() | |||||
| #### xp 12_4: PAH, WeisfeilerLehman, using CONSTANT, unlabeled. | #### xp 12_4: PAH, WeisfeilerLehman, using CONSTANT, unlabeled. | ||||
| xp_median_preimage_12_4() | |||||
| # xp_median_preimage_12_4() | |||||
| #### xp 12_5: PAH, ShortestPath, using NON_SYMBOLIC, unlabeled. | #### xp 12_5: PAH, ShortestPath, using NON_SYMBOLIC, unlabeled. | ||||
| xp_median_preimage_12_5() | |||||
| # xp_median_preimage_12_5() | |||||
| #### xp 13_1: PAH, StructuralSP, using NON_SYMBOLIC. | #### xp 13_1: PAH, StructuralSP, using NON_SYMBOLIC. | ||||
| xp_median_preimage_13_1() | |||||
| # xp_median_preimage_13_1() | |||||
| #### xp 13_2: PAH, ShortestPath, using NON_SYMBOLIC. | #### xp 13_2: PAH, ShortestPath, using NON_SYMBOLIC. | ||||
| xp_median_preimage_13_2() | |||||
| # xp_median_preimage_13_2() | |||||
| @@ -419,6 +419,8 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel=' | |||||
| Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | ||||
| height=4, base_kernel='subtree', parallel=None, | height=4, base_kernel='subtree', parallel=None, | ||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
| else: | |||||
| raise Exception('The graph kernel "', graph_kernel, '" is not defined.') | |||||
| # normalization | # normalization | ||||
| Kmatrix_diag = Kmatrix.diagonal().copy() | Kmatrix_diag = Kmatrix.diagonal().copy() | ||||
| @@ -20,4 +20,5 @@ from gklearn.utils.graph_files import load_dataset, save_dataset | |||||
| from gklearn.utils.timer import Timer | from gklearn.utils.timer import Timer | ||||
| from gklearn.utils.utils import get_graph_kernel_by_name | from gklearn.utils.utils import get_graph_kernel_by_name | ||||
| from gklearn.utils.utils import compute_gram_matrices_by_class | from gklearn.utils.utils import compute_gram_matrices_by_class | ||||
| from gklearn.utils.utils import SpecialLabel | |||||
| from gklearn.utils.trie import Trie | from gklearn.utils.trie import Trie | ||||
| @@ -90,6 +90,9 @@ class Dataset(object): | |||||
| elif ds_name == 'Cuneiform': | elif ds_name == 'Cuneiform': | ||||
| ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' | ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' | ||||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
| elif ds_name == 'DD': | |||||
| ds_file = current_path + '../../datasets/DD/DD_A.txt' | |||||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
| elif ds_name == 'Fingerprint': | elif ds_name == 'Fingerprint': | ||||
| ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | ||||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
| @@ -124,6 +127,8 @@ class Dataset(object): | |||||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
| elif ds_name == 'Synthie': | elif ds_name == 'Synthie': | ||||
| pass | pass | ||||
| else: | |||||
| raise Exception('The dataset name "', ds_name, '" is not pre-defined.') | |||||
| self.__node_labels = label_names['node_labels'] | self.__node_labels = label_names['node_labels'] | ||||
| self.__node_attrs = label_names['node_attrs'] | self.__node_attrs = label_names['node_attrs'] | ||||
| @@ -1,6 +1,7 @@ | |||||
| import networkx as nx | import networkx as nx | ||||
| import numpy as np | import numpy as np | ||||
| from copy import deepcopy | from copy import deepcopy | ||||
| from enum import Enum, auto | |||||
| #from itertools import product | #from itertools import product | ||||
| # from tqdm import tqdm | # from tqdm import tqdm | ||||
| @@ -343,13 +344,15 @@ def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attr | |||||
| return graph_kernel | return graph_kernel | ||||
| def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None): | |||||
| def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False): | |||||
| import os | |||||
| from gklearn.utils import Dataset, split_dataset_by_target | from gklearn.utils import Dataset, split_dataset_by_target | ||||
| # 1. get dataset. | # 1. get dataset. | ||||
| print('1. getting dataset...') | print('1. getting dataset...') | ||||
| dataset_all = Dataset() | dataset_all = Dataset() | ||||
| dataset_all.load_predefined_dataset(ds_name) | dataset_all.load_predefined_dataset(ds_name) | ||||
| dataset_all.trim_dataset(edge_required=edge_required) | |||||
| if not irrelevant_labels is None: | if not irrelevant_labels is None: | ||||
| dataset_all.remove_labels(**irrelevant_labels) | dataset_all.remove_labels(**irrelevant_labels) | ||||
| # dataset_all.cut_graphs(range(0, 10)) | # dataset_all.cut_graphs(range(0, 10)) | ||||
| @@ -385,6 +388,8 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d | |||||
| print() | print() | ||||
| print('4. saving results...') | print('4. saving results...') | ||||
| if save_results: | if save_results: | ||||
| if not os.path.exists(dir_save): | |||||
| os.makedirs(dir_save) | |||||
| np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | ||||
| print('\ncomplete.') | print('\ncomplete.') | ||||
| @@ -460,4 +465,10 @@ def get_mlti_dim_edge_attrs(G, attr_names): | |||||
| attributes = [] | attributes = [] | ||||
| for ed, attrs in G.edges(data=True): | for ed, attrs in G.edges(data=True): | ||||
| attributes.append(tuple(attrs[aname] for aname in attr_names)) | attributes.append(tuple(attrs[aname] for aname in attr_names)) | ||||
| return attributes | |||||
| return attributes | |||||
| class SpecialLabel(Enum): | |||||
| """can be used to define special labels. | |||||
| """ | |||||
| DUMMY = auto # The dummy label. | |||||