2. update load_tud function. 3. update MedianPreimageGenerator.tags/v0.2.0
| @@ -666,7 +666,8 @@ class MedianGraphEstimator(object): | |||
| # Compute the median label and update the median. | |||
| if len(node_labels) > 0: | |||
| median_label = self.__ged_env.get_median_node_label(node_labels) | |||
| # median_label = self.__ged_env.get_median_node_label(node_labels) | |||
| median_label = self.__get_median_node_label(node_labels) | |||
| if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon: | |||
| nx.set_node_attributes(median, {i: median_label}) | |||
| @@ -701,7 +702,7 @@ class MedianGraphEstimator(object): | |||
| if median.has_edge(i, j): | |||
| median_label = median.edges[(i, j)] | |||
| if self.__labeled_edges and len(edge_labels) > 0: | |||
| new_median_label = self.__ged_env.median_edge_label(edge_labels) | |||
| new_median_label = self.__get_median_edge_label(edge_labels) | |||
| if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon: | |||
| median_label = new_median_label | |||
| for edge_label in edge_labels: | |||
| @@ -821,4 +822,144 @@ class MedianGraphEstimator(object): | |||
| def compute_my_cost(g, h, node_map): | |||
| cost = 0.0 | |||
| for node in g.nodes: | |||
| cost += 0 | |||
| cost += 0 | |||
| def __get_median_node_label(self, node_labels): | |||
| if True: | |||
| return self.__get_median_label_nonsymbolic(node_labels) | |||
| else: | |||
| return self.__get_median_node_label_symbolic(node_labels) | |||
| def __get_median_edge_label(self, edge_labels): | |||
| if True: | |||
| return self.__get_median_label_nonsymbolic(edge_labels) | |||
| else: | |||
| return self.__get_median_edge_label_symbolic(edge_labels) | |||
| def __get_median_label_nonsymbolic(self, labels): | |||
| if len(labels) == 0: | |||
| return {} # @todo | |||
| else: | |||
| # Transform the labels into coordinates and compute mean label as initial solution. | |||
| labels_as_coords = [] | |||
| sums = {} | |||
| for key, val in labels[0].items(): | |||
| sums[key] = 0 | |||
| for label in labels: | |||
| coords = {} | |||
| for key, val in label.items(): | |||
| label = float(val) | |||
| sums[key] += label | |||
| coords[key] = label | |||
| labels_as_coords.append(coords) | |||
| median = {} | |||
| for key, val in sums.items(): | |||
| median[key] = val / len(labels) | |||
| # Run main loop of Weiszfeld's Algorithm. | |||
| epsilon = 0.0001 | |||
| delta = 1.0 | |||
| num_itrs = 0 | |||
| all_equal = False | |||
| while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)): | |||
| numerator = {} | |||
| for key, val in sums.items(): | |||
| numerator[key] = 0 | |||
| denominator = 0 | |||
| for label_as_coord in labels_as_coords: | |||
| norm = 0 | |||
| for key, val in label_as_coord.items(): | |||
| norm += (val - median[key]) ** 2 | |||
| norm += np.sqrt(norm) | |||
| if norm > 0: | |||
| for key, val in label_as_coord.items(): | |||
| numerator[key] += val / norm | |||
| denominator += 1.0 / norm | |||
| if denominator == 0: | |||
| all_equal = True | |||
| else: | |||
| new_median = {} | |||
| delta = 0.0 | |||
| for key, val in numerator.items(): | |||
| this_median = val / denominator | |||
| new_median[key] = this_median | |||
| delta += np.abs(median[key] - this_median) | |||
| median = new_median | |||
| num_itrs += 1 | |||
| # Transform the solution to strings and return it. | |||
| median_label = {} | |||
| for key, val in median.items(): | |||
| median_label[key] = str(val) | |||
| return median_label | |||
| def __get_median_node_label_symbolic(self, node_labels): | |||
| pass | |||
| def __get_median_edge_label_symbolic(self, edge_labels): | |||
| pass | |||
| # def __get_median_edge_label_nonsymbolic(self, edge_labels): | |||
| # if len(edge_labels) == 0: | |||
| # return {} | |||
| # else: | |||
| # # Transform the labels into coordinates and compute mean label as initial solution. | |||
| # edge_labels_as_coords = [] | |||
| # sums = {} | |||
| # for key, val in edge_labels[0].items(): | |||
| # sums[key] = 0 | |||
| # for edge_label in edge_labels: | |||
| # coords = {} | |||
| # for key, val in edge_label.items(): | |||
| # label = float(val) | |||
| # sums[key] += label | |||
| # coords[key] = label | |||
| # edge_labels_as_coords.append(coords) | |||
| # median = {} | |||
| # for key, val in sums.items(): | |||
| # median[key] = val / len(edge_labels) | |||
| # | |||
| # # Run main loop of Weiszfeld's Algorithm. | |||
| # epsilon = 0.0001 | |||
| # delta = 1.0 | |||
| # num_itrs = 0 | |||
| # all_equal = False | |||
| # while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)): | |||
| # numerator = {} | |||
| # for key, val in sums.items(): | |||
| # numerator[key] = 0 | |||
| # denominator = 0 | |||
| # for edge_label_as_coord in edge_labels_as_coords: | |||
| # norm = 0 | |||
| # for key, val in edge_label_as_coord.items(): | |||
| # norm += (val - median[key]) ** 2 | |||
| # norm += np.sqrt(norm) | |||
| # if norm > 0: | |||
| # for key, val in edge_label_as_coord.items(): | |||
| # numerator[key] += val / norm | |||
| # denominator += 1.0 / norm | |||
| # if denominator == 0: | |||
| # all_equal = True | |||
| # else: | |||
| # new_median = {} | |||
| # delta = 0.0 | |||
| # for key, val in numerator.items(): | |||
| # this_median = val / denominator | |||
| # new_median[key] = this_median | |||
| # delta += np.abs(median[key] - this_median) | |||
| # median = new_median | |||
| # | |||
| # num_itrs += 1 | |||
| # | |||
| # # Transform the solution to ged::GXLLabel and return it. | |||
| # median_label = {} | |||
| # for key, val in median.items(): | |||
| # median_label[key] = str(val) | |||
| # return median_label | |||
| @@ -96,7 +96,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| if self.__runtime_precompute_gm is None: | |||
| raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') | |||
| self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm | |||
| self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) | |||
| if self._kernel_options['normalize']: | |||
| self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) | |||
| else: | |||
| self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) | |||
| end_precompute_gm = time.time() | |||
| start -= self.__runtime_precompute_gm | |||
| @@ -447,31 +450,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])], | |||
| np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
| prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
| try: | |||
| prob.solve(verbose=True) | |||
| except MemoryError as error0: | |||
| if self._verbose >= 2: | |||
| print('\nUsing solver "OSQP" caused a memory error.') | |||
| print('the original error message is\n', error0) | |||
| print('solver status: ', prob.status) | |||
| print('trying solver "CVXOPT" instead...\n') | |||
| try: | |||
| prob.solve(solver=cp.CVXOPT, verbose=True) | |||
| except Exception as error1: | |||
| if self._verbose >= 2: | |||
| print('\nAn error occured when using solver "CVXOPT".') | |||
| print('the original error message is\n', error1) | |||
| print('solver status: ', prob.status) | |||
| print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n') | |||
| prob.solve(solver=cp.MOSEK, verbose=True) | |||
| else: | |||
| if self._verbose >= 2: | |||
| print('solver status: ', prob.status) | |||
| else: | |||
| if self._verbose >= 2: | |||
| print('solver status: ', prob.status) | |||
| if self._verbose >= 2: | |||
| print() | |||
| self.__execute_cvx(prob) | |||
| edit_costs_new = x.value | |||
| residual = np.sqrt(prob.value) | |||
| elif rw_constraints == '2constraints': | |||
| @@ -551,9 +530,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])], | |||
| np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
| prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
| prob.solve() | |||
| if self._verbose >= 2: | |||
| print(x.value) | |||
| self.execute_cvx(prob) | |||
| edit_costs_new = np.concatenate((x.value, np.array([0.0]))) | |||
| residual = np.sqrt(prob.value) | |||
| elif not is_n_attr and is_e_attr: | |||
| @@ -616,6 +593,34 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| return edit_costs_new, residual | |||
| def __execute_cvx(self, prob): | |||
| try: | |||
| prob.solve(verbose=(self._verbose>=2)) | |||
| except MemoryError as error0: | |||
| if self._verbose >= 2: | |||
| print('\nUsing solver "OSQP" caused a memory error.') | |||
| print('the original error message is\n', error0) | |||
| print('solver status: ', prob.status) | |||
| print('trying solver "CVXOPT" instead...\n') | |||
| try: | |||
| prob.solve(solver=cp.CVXOPT, verbose=(self._verbose>=2)) | |||
| except Exception as error1: | |||
| if self._verbose >= 2: | |||
| print('\nAn error occured when using solver "CVXOPT".') | |||
| print('the original error message is\n', error1) | |||
| print('solver status: ', prob.status) | |||
| print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n') | |||
| prob.solve(solver=cp.MOSEK, verbose=(self._verbose>=2)) | |||
| else: | |||
| if self._verbose >= 2: | |||
| print('solver status: ', prob.status) | |||
| else: | |||
| if self._verbose >= 2: | |||
| print('solver status: ', prob.status) | |||
| if self._verbose >= 2: | |||
| print() | |||
| def __generate_preimage_iam(self): | |||
| # Set up the ged environment. | |||
| ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible. | |||
| @@ -67,8 +67,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
| gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||
| gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | |||
| if gmfile_exist: | |||
| gmfile = np.load(gm_fname) | |||
| gram_matrix_unnorm_list = gmfile['gram_matrix_unnorm_list'] | |||
| gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. | |||
| gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']] | |||
| time_precompute_gm_list = gmfile['run_time_list'].tolist() | |||
| else: | |||
| gram_matrix_unnorm_list = [] | |||
| @@ -87,6 +87,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
| print('start generating preimage for each class of target...') | |||
| idx_offset = 0 | |||
| for idx, dataset in enumerate(datasets): | |||
| target = dataset.targets[0] | |||
| print('\ntarget =', target, '\n') | |||
| @@ -96,14 +97,15 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
| num_graphs = len(dataset.graphs) | |||
| if num_graphs < 2: | |||
| print('\nnumber of graphs = ', num_graphs, ', skip.\n') | |||
| idx_offset += 1 | |||
| continue | |||
| # 2. set parameters. | |||
| print('2. initializing mpg and setting parameters...') | |||
| if load_gm: | |||
| if gmfile_exist: | |||
| mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx] | |||
| mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx] | |||
| mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx - idx_offset] | |||
| mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx - idx_offset] | |||
| mpg = MedianPreimageGenerator() | |||
| mpg.dataset = dataset | |||
| mpg.set_options(**mpg_options.copy()) | |||
| @@ -92,9 +92,11 @@ class Dataset(object): | |||
| elif ds_name == 'COIL-RAG': | |||
| pass | |||
| elif ds_name == 'COLORS-3': | |||
| pass | |||
| ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt' | |||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
| elif ds_name == 'FRANKENSTEIN': | |||
| pass | |||
| ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt' | |||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
| self.__node_labels = label_names['node_labels'] | |||
| self.__node_attrs = label_names['node_attrs'] | |||
| @@ -541,10 +541,21 @@ def load_tud(filename): | |||
| content_gi = open(fgi).read().splitlines() # graph indicator | |||
| content_am = open(fam).read().splitlines() # adjacency matrix | |||
| content_gl = open(fgl).read().splitlines() # graph labels | |||
| # load targets. | |||
| if 'fgl' in locals(): | |||
| content_targets = open(fgl).read().splitlines() # targets (classification) | |||
| targets = [float(i) for i in content_targets] | |||
| elif 'fga' in locals(): | |||
| content_targets = open(fga).read().splitlines() # targets (regression) | |||
| targets = [int(i) for i in content_targets] | |||
| if 'class_label_map' in locals(): | |||
| targets = [class_label_map[t] for t in targets] | |||
| else: | |||
| raise Exception('Can not find targets file. Please make sure there is a "', ds_name, '_graph_labels.txt" or "', ds_name, '_graph_attributes.txt"', 'file in your dataset folder.') | |||
| # create graphs and add nodes | |||
| data = [nx.Graph(name=str(i)) for i in range(0, len(content_gl))] | |||
| data = [nx.Graph(name=str(i)) for i in range(0, len(content_targets))] | |||
| if 'fnl' in locals(): | |||
| content_nl = open(fnl).read().splitlines() # node labels | |||
| for idx, line in enumerate(content_gi): | |||
| @@ -619,11 +630,6 @@ def load_tud(filename): | |||
| for i, a_name in enumerate(label_names['edge_attrs']): | |||
| data[g].edges[n[0], n[1]][a_name] = attrs[i] | |||
| # load targets. | |||
| targets = [int(i) for i in content_gl] | |||
| if 'class_label_map' in locals(): | |||
| targets = [class_label_map[t] for t in targets] | |||
| return data, targets, label_names | |||