2. update README.md. 3. update preimage module, class Dataset. 4. update requirements. 5. add helper function to compute Gram matrix for each class.tags/v0.2.0
| @@ -4,7 +4,7 @@ | |||
| [](https://graphkit-learn.readthedocs.io/en/master/?badge=master) | |||
| [](https://badge.fury.io/py/graphkit-learn) | |||
| A python package for graph kernels. | |||
| A python package for graph kernels, graph edit distances and graph pre-image problem. | |||
| ## Requirements | |||
| @@ -348,7 +348,7 @@ class MedianGraphEstimator(object): | |||
| # Print information about current iteration. | |||
| if self.__print_to_stdout == 2: | |||
| progress = tqdm(desc='\rComputing initial node maps', total=len(graph_ids), file=sys.stdout) | |||
| progress = tqdm(desc='Computing initial node maps', total=len(graph_ids), file=sys.stdout) | |||
| # Compute node maps and sum of distances for initial median. | |||
| self.__sum_of_distances = 0 | |||
| @@ -457,7 +457,7 @@ class MedianGraphEstimator(object): | |||
| self.__itrs[median_pos] += 1 | |||
| # Update the best median. | |||
| if self.__sum_of_distances < self.__best_init_sum_of_distances: | |||
| if self.__sum_of_distances < best_sum_of_distances: | |||
| best_sum_of_distances = self.__sum_of_distances | |||
| node_maps_from_best_median = self.__node_maps_from_median | |||
| best_median = median | |||
| @@ -588,7 +588,7 @@ class MedianGraphEstimator(object): | |||
| # Print information about current iteration. | |||
| if self.__print_to_stdout == 2: | |||
| progress = tqdm(desc='\rComputing medoid', total=len(graph_ids), file=sys.stdout) | |||
| progress = tqdm(desc='Computing medoid', total=len(graph_ids), file=sys.stdout) | |||
| # Compute the medoid. | |||
| medoid_id = graph_ids[0] | |||
| @@ -718,7 +718,7 @@ class MedianGraphEstimator(object): | |||
| def __update_node_maps(self): | |||
| # Print information about current iteration. | |||
| if self.__print_to_stdout == 2: | |||
| progress = tqdm(desc='\rUpdating node maps', total=len(self.__node_maps_from_median), file=sys.stdout) | |||
| progress = tqdm(desc='Updating node maps', total=len(self.__node_maps_from_median), file=sys.stdout) | |||
| # Update the node maps. | |||
| node_maps_were_modified = False | |||
| @@ -307,7 +307,7 @@ def ged_options_to_string(options): | |||
| opt_str = ' ' | |||
| for key, val in options.items(): | |||
| if key == 'initialization_method': | |||
| opt_str += '--initial_solutions ' + str(val) + ' ' | |||
| opt_str += '--initialization-method ' + str(val) + ' ' | |||
| elif key == 'initialization_options': | |||
| opt_str += '--initialization-options ' + str(val) + ' ' | |||
| elif key == 'lower_bound_method': | |||
| @@ -76,11 +76,11 @@ class GraphKernel(object): | |||
| def compute_distance_matrix(self): | |||
| dis_mat = np.empty((len(self._graphs), len(self._graphs))) | |||
| if self._gram_matrix is None: | |||
| raise Exception('Please compute the Gram matrix before computing distance matrix.') | |||
| for i in range(len(self._graphs)): | |||
| for j in range(i, len(self._graphs)): | |||
| dis_mat = np.empty((len(self._gram_matrix), len(self._gram_matrix))) | |||
| for i in range(len(self._gram_matrix)): | |||
| for j in range(i, len(self._gram_matrix)): | |||
| dis = self._gram_matrix[i, i] + self._gram_matrix[j, j] - 2 * self._gram_matrix[i, j] | |||
| if dis < 0: | |||
| if dis > -1e-10: | |||
| @@ -184,18 +184,22 @@ class GraphKernel(object): | |||
| def parallel(self): | |||
| return self._parallel | |||
| @property | |||
| def n_jobs(self): | |||
| return self._n_jobs | |||
| @property | |||
| def verbose(self): | |||
| return self._verbose | |||
| @property | |||
| def normalize(self): | |||
| return self._normalize | |||
| @property | |||
| def run_time(self): | |||
| return self._run_time | |||
| @@ -205,7 +209,15 @@ class GraphKernel(object): | |||
| def gram_matrix(self): | |||
| return self._gram_matrix | |||
| @gram_matrix.setter | |||
| def gram_matrix(self, value): | |||
| self._gram_matrix = value | |||
| @property | |||
| def gram_matrix_unnorm(self): | |||
| return self._gram_matrix_unnorm | |||
| @gram_matrix_unnorm.setter | |||
| def gram_matrix_unnorm(self, value): | |||
| self._gram_matrix_unnorm = value | |||
| @@ -36,10 +36,9 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| self.__time_limit_in_sec = 0 | |||
| self.__max_itrs = 100 | |||
| self.__max_itrs_without_update = 3 | |||
| self.__epsilon_ratio = 0.01 | |||
| self.__epsilon_residual = 0.01 | |||
| self.__epsilon_ec = 0.1 | |||
| # values to compute. | |||
| self.__edit_cost_constants = [] | |||
| self.__runtime_precompute_gm = None | |||
| self.__runtime_optimize_ec = None | |||
| self.__runtime_generate_preimage = None | |||
| self.__runtime_total = None | |||
| @@ -54,7 +53,11 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| self.__itrs = 0 | |||
| self.__converged = False | |||
| self.__num_updates_ecc = 0 | |||
| # values that can be set or to be computed. | |||
| self.__edit_cost_constants = [] | |||
| self.__gram_matrix_unnorm = None | |||
| self.__runtime_precompute_gm = None | |||
| def set_options(self, **kwargs): | |||
| self._kernel_options = kwargs.get('kernel_options', {}) | |||
| @@ -71,7 +74,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) | |||
| self.__max_itrs = kwargs.get('max_itrs', 100) | |||
| self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3) | |||
| self.__epsilon_ratio = kwargs.get('epsilon_ratio', 0.01) | |||
| self.__epsilon_residual = kwargs.get('epsilon_residual', 0.01) | |||
| self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1) | |||
| self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | |||
| self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | |||
| def run(self): | |||
| @@ -81,9 +87,18 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| start = time.time() | |||
| # 1. precompute gram matrix. | |||
| gram_matrix, run_time = self.__graph_kernel.compute(self._dataset.graphs, **self._kernel_options) | |||
| end_precompute_gm = time.time() | |||
| self.__runtime_precompute_gm = end_precompute_gm - start | |||
| if self.__gram_matrix_unnorm is None: | |||
| gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) | |||
| self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm | |||
| end_precompute_gm = time.time() | |||
| self.__runtime_precompute_gm = end_precompute_gm - start | |||
| else: | |||
| if self.__runtime_precompute_gm is None: | |||
| raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') | |||
| self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm | |||
| self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) | |||
| end_precompute_gm = time.time() | |||
| start -= self.__runtime_precompute_gm | |||
| # 2. optimize edit cost constants. | |||
| self.__optimize_edit_cost_constants() | |||
| @@ -134,6 +149,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| print('Total number of updating edit costs:', self.__num_updates_ecc) | |||
| print('Is optimization of edit costs converged:', self.__converged) | |||
| print('================================================================================') | |||
| print() | |||
| # collect return values. | |||
| # return (sod_sm, sod_gm), \ | |||
| @@ -222,7 +238,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| def __optimize_ecc_by_kernel_distances(self): | |||
| # compute distances in feature space. | |||
| dis_k_mat, _, _, _ = self.__graph_kernel.compute_distance_matrix() | |||
| dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix() | |||
| dis_k_vec = [] | |||
| for i in range(len(dis_k_mat)): | |||
| # for j in range(i, len(dis_k_mat)): | |||
| @@ -256,7 +272,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| timer = Timer(self.__time_limit_in_sec) | |||
| while not self.__termination_criterion_met(self.__converged, timer, self.__itrs, itrs_without_update): | |||
| if self._verbose >= 2: | |||
| print('\niteration', self.__itrs) | |||
| print('\niteration', self.__itrs + 1) | |||
| time0 = time.time() | |||
| # "fit" geds to distances in feature space by tuning edit costs using theLeast Squares Method. | |||
| # np.savez('results/xp_fit_method/fit_data_debug' + str(self.__itrs) + '.gm', | |||
| @@ -286,21 +302,21 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| # check convergency. | |||
| ec_changed = False | |||
| for i, cost in enumerate(self.__edit_cost_constants): | |||
| # if cost == 0: | |||
| # if edit_cost_list[-2][i] > self.__epsilon_ratio: | |||
| # ec_changed = True | |||
| # break | |||
| # elif abs(cost - edit_cost_list[-2][i]) / cost > self.__epsilon_ratio: | |||
| # ec_changed = True | |||
| # break | |||
| if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ratio: | |||
| if cost == 0: | |||
| if edit_cost_list[-2][i] > self.__epsilon_ec: | |||
| ec_changed = True | |||
| break | |||
| elif abs(cost - edit_cost_list[-2][i]) / cost > self.__epsilon_ec: | |||
| ec_changed = True | |||
| break | |||
| # if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec: | |||
| # ec_changed = True | |||
| # break | |||
| residual_changed = False | |||
| if residual_list[-1] == 0: | |||
| if residual_list[-2] > self.__epsilon_ratio: | |||
| if residual_list[-2] > self.__epsilon_residual: | |||
| residual_changed = True | |||
| elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self.__epsilon_ratio: | |||
| elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self.__epsilon_residual: | |||
| residual_changed = True | |||
| self.__converged = not (ec_changed or residual_changed) | |||
| if self.__converged: | |||
| @@ -313,14 +329,14 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| if self._verbose >= 2: | |||
| print() | |||
| print('-------------------------------------------------------------------------') | |||
| print('States of iteration', str(self.__itrs)) | |||
| print('States of iteration', self.__itrs + 1) | |||
| print('-------------------------------------------------------------------------') | |||
| # print('Time spend:', self.__runtime_optimize_ec) | |||
| print('Total number of iterations for optimizing:', self.__itrs) | |||
| print('Total number of iterations for optimizing:', self.__itrs + 1) | |||
| print('Total number of updating edit costs:', self.__num_updates_ecc) | |||
| print('Is optimization of edit costs converged:', self.__converged) | |||
| print('Does edit cost changed:', ec_changed) | |||
| print('Does residual changed:', residual_changed) | |||
| print('Was optimization of edit costs converged:', self.__converged) | |||
| print('Did edit costs change:', ec_changed) | |||
| print('Did residual change:', residual_changed) | |||
| print('Iterations without update:', itrs_without_update) | |||
| print('Current edit cost constants:', self.__edit_cost_constants) | |||
| print('Residual list:', residual_list) | |||
| @@ -634,11 +650,11 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| def __compute_distances_to_true_median(self): | |||
| # compute distance in kernel space for set median. | |||
| kernels_to_sm, _ = self.__graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options) | |||
| kernel_sm, _ = self.__graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options) | |||
| kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__graph_kernel.gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize | |||
| kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options) | |||
| kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options) | |||
| kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize | |||
| # @todo: not correct kernel value | |||
| gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self.__graph_kernel.gram_matrix)), axis=0) | |||
| gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | |||
| gram_with_sm = np.concatenate((np.array([[1] + kernels_to_sm]).T, gram_with_sm), axis=1) | |||
| self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | |||
| [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | |||
| @@ -649,10 +665,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| # print(set_median.edges(data=True)) | |||
| # compute distance in kernel space for generalized median. | |||
| kernels_to_gm, _ = self.__graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options) | |||
| kernel_gm, _ = self.__graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options) | |||
| kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__graph_kernel.gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize | |||
| gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self.__graph_kernel.gram_matrix)), axis=0) | |||
| kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options) | |||
| kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options) | |||
| kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize | |||
| gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | |||
| gram_with_gm = np.concatenate((np.array([[1] + kernels_to_gm]).T, gram_with_gm), axis=1) | |||
| self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | |||
| [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | |||
| @@ -679,12 +695,12 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| def __set_graph_kernel_by_name(self): | |||
| if self.kernel_options['name'] == 'structuralspkernel': | |||
| from gklearn.kernels import StructuralSP | |||
| self.__graph_kernel = StructuralSP(node_labels=self.dataset.node_labels, | |||
| edge_labels=self.dataset.edge_labels, | |||
| node_attrs=self.dataset.node_attrs, | |||
| edge_attrs=self.dataset.edge_attrs, | |||
| ds_infos=self.dataset.get_dataset_infos(keys=['directed']), | |||
| **self.kernel_options) | |||
| self._graph_kernel = StructuralSP(node_labels=self._dataset.node_labels, | |||
| edge_labels=self._dataset.edge_labels, | |||
| node_attrs=self._dataset.node_attrs, | |||
| edge_attrs=self._dataset.edge_attrs, | |||
| ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
| **self._kernel_options) | |||
| # def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
| @@ -692,7 +708,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| """ | |||
| Cleans node and edge labels and attributes of the given graph. | |||
| """ | |||
| G_new = nx.Graph() | |||
| G_new = nx.Graph(**G.graph) | |||
| for nd, attrs in G.nodes(data=True): | |||
| G_new.add_node(str(nd)) # @todo: should we keep this as str()? | |||
| for l_name in self._dataset.node_labels: | |||
| @@ -760,4 +776,13 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
| @property | |||
| def best_from_dataset(self): | |||
| return self.__best_from_dataset | |||
| return self.__best_from_dataset | |||
| @property | |||
| def gram_matrix_unnorm(self): | |||
| return self.__gram_matrix_unnorm | |||
| @gram_matrix_unnorm.setter | |||
| def gram_matrix_unnorm(self, value): | |||
| self.__gram_matrix_unnorm = value | |||
| @@ -5,7 +5,7 @@ Created on Thu Mar 26 18:26:36 2020 | |||
| @author: ljia | |||
| """ | |||
| from gklearn.utils import Dataset | |||
| # from gklearn.utils import Dataset | |||
| class PreimageGenerator(object): | |||
| @@ -32,6 +32,11 @@ class PreimageGenerator(object): | |||
| @kernel_options.setter | |||
| def kernel_options(self, value): | |||
| self._kernel_options = value | |||
| @property | |||
| def graph_kernel(self): | |||
| return self._graph_kernel | |||
| @property | |||
| @@ -41,3 +46,4 @@ class PreimageGenerator(object): | |||
| @verbose.setter | |||
| def verbose(self, value): | |||
| self._verbose = value | |||
| @@ -21,21 +21,23 @@ from gklearn.kernels.treeletKernel import treeletkernel | |||
| from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel | |||
| from gklearn.utils import Dataset | |||
| import csv | |||
| import matplotlib.pyplot as plt | |||
| import networkx as nx | |||
| def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, dir_save='', ): | |||
| def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None): | |||
| import os.path | |||
| from gklearn.preimage import MedianPreimageGenerator | |||
| from gklearn.utils import split_dataset_by_target | |||
| from gklearn.utils.graphfiles import saveGXL | |||
| # 1. get dataset. | |||
| print('getting dataset...') | |||
| print('1. getting dataset...') | |||
| dataset_all = Dataset() | |||
| dataset_all.load_predefined_dataset(ds_name) | |||
| if not irrelevant_labels is None: | |||
| dataset_all.remove_labels(**irrelevant_labels) | |||
| # dataset_all.cut_graphs(range(0, 100)) | |||
| datasets = split_dataset_by_target(dataset_all) | |||
| # dataset.cut_graphs(range(0, 10)) | |||
| if save_results: | |||
| # create result files. | |||
| @@ -47,7 +49,6 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_ | |||
| dis_k_sm_list = [] | |||
| dis_k_gm_list = [] | |||
| dis_k_gi_min_list = [] | |||
| time_precompute_gm_list = [] | |||
| time_optimize_ec_list = [] | |||
| time_generate_list = [] | |||
| time_total_list = [] | |||
| @@ -58,6 +59,26 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_ | |||
| nb_dis_k_sm2gm = [0, 0, 0] | |||
| nb_dis_k_gi2sm = [0, 0, 0] | |||
| nb_dis_k_gi2gm = [0, 0, 0] | |||
| dis_k_max_list = [] | |||
| dis_k_min_list = [] | |||
| dis_k_mean_list = [] | |||
| if load_gm == 'auto': | |||
| gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||
| gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | |||
| if gmfile_exist: | |||
| gmfile = np.load(gm_fname) | |||
| gram_matrix_unnorm_list = gmfile['gram_matrix_unnorm_list'] | |||
| time_precompute_gm_list = gmfile['run_time_list'].tolist() | |||
| else: | |||
| gram_matrix_unnorm_list = [] | |||
| time_precompute_gm_list = [] | |||
| elif not load_gm: | |||
| gram_matrix_unnorm_list = [] | |||
| time_precompute_gm_list = [] | |||
| else: | |||
| gmfile = np.load() | |||
| gram_matrix_unnorm_list = gmfile['gram_matrix_unnorm_list'] | |||
| time_precompute_gm_list = gmfile['run_time_list'] | |||
| # repeats_better_sod_sm2gm = [] | |||
| # repeats_better_dis_k_sm2gm = [] | |||
| # repeats_better_dis_k_gi2sm = [] | |||
| @@ -65,16 +86,23 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_ | |||
| print('start generating preimage for each class of target...') | |||
| for dataset in datasets: | |||
| print('\ntarget =', dataset.targets[0], '\n') | |||
| num_graphs = len(dataset.graphs) | |||
| for idx, dataset in enumerate(datasets): | |||
| target = dataset.targets[0] | |||
| print('\ntarget =', target, '\n') | |||
| # if target != 1: | |||
| # continue | |||
| num_graphs = len(dataset.graphs) | |||
| if num_graphs < 2: | |||
| print('\nnumber of graphs = ', num_graphs, ', skip.\n') | |||
| continue | |||
| # 2. set parameters. | |||
| print('1. initializing mpg and setting parameters...') | |||
| print('2. initializing mpg and setting parameters...') | |||
| if load_gm: | |||
| if gmfile_exist: | |||
| mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx] | |||
| mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx] | |||
| mpg = MedianPreimageGenerator() | |||
| mpg.dataset = dataset | |||
| mpg.set_options(**mpg_options.copy()) | |||
| @@ -83,10 +111,19 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_ | |||
| mpg.mge_options = mge_options.copy() | |||
| # 3. compute median preimage. | |||
| print('2. computing median preimage...') | |||
| print('3. computing median preimage...') | |||
| mpg.run() | |||
| results = mpg.get_results() | |||
| # 4. compute pairwise kernel distances. | |||
| print('4. computing pairwise kernel distances...') | |||
| _, dis_k_max, dis_k_min, dis_k_mean = mpg.graph_kernel.compute_distance_matrix() | |||
| dis_k_max_list.append(dis_k_max) | |||
| dis_k_min_list.append(dis_k_min) | |||
| dis_k_mean_list.append(dis_k_mean) | |||
| # 5. save results (and median graphs). | |||
| print('5. saving results (and median graphs)...') | |||
| # write result detail. | |||
| if save_results: | |||
| print('writing results to files...') | |||
| @@ -99,7 +136,7 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_ | |||
| csv.writer(f_detail).writerow([ds_name, kernel_options['name'], | |||
| ged_options['edit_cost'], ged_options['method'], | |||
| ged_options['attr_distance'], mpg_options['fit_method'], | |||
| num_graphs, dataset.targets[0], 1, | |||
| num_graphs, target, 1, | |||
| results['sod_set_median'], results['sod_gen_median'], | |||
| results['k_dis_set_median'], results['k_dis_gen_median'], | |||
| results['k_dis_dataset'], sod_sm2gm, dis_k_sm2gm, | |||
| @@ -161,7 +198,7 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_ | |||
| csv.writer(f_summary).writerow([ds_name, kernel_options['name'], | |||
| ged_options['edit_cost'], ged_options['method'], | |||
| ged_options['attr_distance'], mpg_options['fit_method'], | |||
| num_graphs, dataset.targets[0], | |||
| num_graphs, target, | |||
| results['sod_set_median'], results['sod_gen_median'], | |||
| results['k_dis_set_median'], results['k_dis_gen_median'], | |||
| results['k_dis_dataset'], sod_sm2gm, dis_k_sm2gm, | |||
| @@ -175,17 +212,18 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_ | |||
| # save median graphs. | |||
| if save_medians: | |||
| fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options['fit_method'] + '.k' + str(num_graphs) + '.y' + str(dataset.targets[0]) + '.repeat' + str(1) | |||
| print('Saving median graphs to files...') | |||
| fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | |||
| saveGXL(mpg.set_median, fn_pre_sm + '.gxl', method='default', | |||
| node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||
| node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||
| node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | |||
| fn_pre_gm = dir_save + 'medians/gen_median.' + mpg_options['fit_method'] + '.k' + str(num_graphs) + '.y' + str(dataset.targets[0]) + '.repeat' + str(1) | |||
| fn_pre_gm = dir_save + 'medians/gen_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | |||
| saveGXL(mpg.gen_median, fn_pre_gm + '.gxl', method='default', | |||
| node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||
| node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||
| node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | |||
| fn_best_dataset = dir_save + 'medians/g_best_dataset.' + mpg_options['fit_method'] + '.k' + str(num_graphs) + '.y' + str(dataset.targets[0]) + '.repeat' + str(1) | |||
| fn_best_dataset = dir_save + 'medians/g_best_dataset.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | |||
| saveGXL(mpg.best_from_dataset, fn_best_dataset + '.gxl', method='default', | |||
| node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||
| node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||
| node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | |||
| # plot median graphs. | |||
| @@ -194,7 +232,9 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_ | |||
| draw_Letter_graph(mpg.set_median, fn_pre_sm) | |||
| draw_Letter_graph(mpg.gen_median, fn_pre_gm) | |||
| draw_Letter_graph(mpg.best_from_dataset, fn_best_dataset) | |||
| if (load_gm == 'auto' and not gmfile_exist) or not load_gm: | |||
| gram_matrix_unnorm_list.append(mpg.gram_matrix_unnorm) | |||
| # write result summary for each letter. | |||
| if save_results: | |||
| @@ -227,6 +267,18 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_ | |||
| num_converged, num_updates_ecc_mean]) | |||
| f_summary.close() | |||
| # save total pairwise kernel distances. | |||
| dis_k_max = np.max(dis_k_max_list) | |||
| dis_k_min = np.min(dis_k_min_list) | |||
| dis_k_mean = np.mean(dis_k_mean_list) | |||
| print('The maximum pairwise distance in kernel space:', dis_k_max) | |||
| print('The minimum pairwise distance in kernel space:', dis_k_min) | |||
| print('The average pairwise distance in kernel space:', dis_k_mean) | |||
| # write Gram matrices to file. | |||
| if (load_gm == 'auto' and not gmfile_exist) or not load_gm: | |||
| np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) | |||
| print('\ncomplete.') | |||
| @@ -235,7 +287,7 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||
| fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' | |||
| f_detail = open(dir_output + fn_output_detail, 'a') | |||
| csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'edit cost', | |||
| 'GED method', 'attr distance', 'fit method', 'k', | |||
| 'GED method', 'attr distance', 'fit method', 'num graphs', | |||
| 'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', | |||
| 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||
| 'dis_k gi -> GM', 'edit cost constants', 'time precompute gm', | |||
| @@ -247,7 +299,7 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||
| fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv' | |||
| f_summary = open(dir_output + fn_output_summary, 'a') | |||
| csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'edit cost', | |||
| 'GED method', 'attr distance', 'fit method', 'k', | |||
| 'GED method', 'attr distance', 'fit method', 'num graphs', | |||
| 'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', | |||
| 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||
| 'dis_k gi -> GM', 'time precompute gm', 'time optimize ec', | |||
| @@ -263,24 +315,28 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||
| def get_relations(sign): | |||
| if sign == -1: | |||
| return 'better' | |||
| elif sign == 0: | |||
| return 'same' | |||
| elif sign == 1: | |||
| return 'worse' | |||
| if sign == -1: | |||
| return 'better' | |||
| elif sign == 0: | |||
| return 'same' | |||
| elif sign == 1: | |||
| return 'worse' | |||
| #Dessin median courrant | |||
| def draw_Letter_graph(graph, file_prefix): | |||
| plt.figure() | |||
| pos = {} | |||
| for n in graph.nodes: | |||
| pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])]) | |||
| nx.draw_networkx(graph, pos) | |||
| plt.savefig(file_prefix + '.eps', format='eps', dpi=300) | |||
| # plt.show() | |||
| plt.clf() | |||
| import matplotlib | |||
| matplotlib.use('agg') | |||
| import matplotlib.pyplot as plt | |||
| plt.figure() | |||
| pos = {} | |||
| for n in graph.nodes: | |||
| pos[n] = np.array([float(graph.nodes[n]['x']),float(graph.nodes[n]['y'])]) | |||
| nx.draw_networkx(graph, pos) | |||
| plt.savefig(file_prefix + '.eps', format='eps', dpi=300) | |||
| # plt.show() | |||
| plt.clf() | |||
| plt.close() | |||
| def remove_edges(Gn): | |||
| @@ -288,6 +344,7 @@ def remove_edges(Gn): | |||
| for _, _, attrs in G.edges(data=True): | |||
| attrs.clear() | |||
| def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||
| term1 = Kmatrix[idx_g, idx_g] | |||
| term2 = 0 | |||
| @@ -17,3 +17,5 @@ __date__ = "November 2017" | |||
| # from utils import utils | |||
| from gklearn.utils.dataset import Dataset, split_dataset_by_target | |||
| from gklearn.utils.timer import Timer | |||
| from gklearn.utils.utils import get_graph_kernel_by_name | |||
| from gklearn.utils.utils import compute_gram_matrices_by_class | |||
| @@ -56,9 +56,10 @@ class Dataset(object): | |||
| def load_graphs(self, graphs, targets=None): | |||
| # this has to be followed by set_labels(). | |||
| self.__graphs = graphs | |||
| self.__targets = targets | |||
| self.set_labels_attrs() | |||
| # self.set_labels_attrs() | |||
| def load_predefined_dataset(self, ds_name): | |||
| @@ -94,6 +95,13 @@ class Dataset(object): | |||
| self.set_labels_attrs() | |||
| def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): | |||
| self.__node_labels = node_labels | |||
| self.__node_attrs = node_attrs | |||
| self.__edge_labels = edge_labels | |||
| self.__edge_attrs = edge_attrs | |||
| def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None): | |||
| # @todo: remove labels which have only one possible values. | |||
| @@ -371,9 +379,34 @@ class Dataset(object): | |||
| print(OrderedDict(sorted(infos.items(), key=lambda i: keys.index(i[0])))) | |||
| def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
| for g in self.__graphs: | |||
| for nd in g.nodes(): | |||
| for nl in node_labels: | |||
| del g.nodes[nd][nl] | |||
| for na in node_attrs: | |||
| del g.nodes[nd][na] | |||
| for ed in g.edges(): | |||
| for el in edge_labels: | |||
| del g.edges[ed][el] | |||
| for ea in edge_attrs: | |||
| del g.edges[ed][ea] | |||
| if len(node_labels) > 0: | |||
| self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||
| if len(edge_labels) > 0: | |||
| self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||
| if len(node_attrs) > 0: | |||
| self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||
| if len(edge_attrs) > 0: | |||
| self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||
| def cut_graphs(self, range_): | |||
| self.__graphs = [self.__graphs[i] for i in range_] | |||
| self.set_labels_attrs() | |||
| if self.__targets is not None: | |||
| self.__targets = [self.__targets[i] for i in range_] | |||
| # @todo | |||
| # self.set_labels_attrs() | |||
| def __get_dataset_size(self): | |||
| @@ -574,5 +607,6 @@ def split_dataset_by_target(dataset): | |||
| sub_graphs = [graphs[i] for i in val] | |||
| sub_dataset = Dataset() | |||
| sub_dataset.load_graphs(sub_graphs, [key] * len(val)) | |||
| sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs) | |||
| datasets.append(sub_dataset) | |||
| return datasets | |||
| @@ -296,3 +296,59 @@ def get_edge_labels(Gn, edge_label): | |||
| for G in Gn: | |||
| el = el | set(nx.get_edge_attributes(G, edge_label).values()) | |||
| return el | |||
| def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): | |||
| if name == 'structuralspkernel': | |||
| from gklearn.kernels import StructuralSP | |||
| graph_kernel = StructuralSP(node_labels=node_labels, edge_labels=edge_labels, | |||
| node_attrs=node_attrs, edge_attrs=edge_attrs, | |||
| ds_infos=ds_infos, **kernel_options) | |||
| return graph_kernel | |||
| def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None): | |||
| from gklearn.utils import Dataset, split_dataset_by_target | |||
| # 1. get dataset. | |||
| print('1. getting dataset...') | |||
| dataset_all = Dataset() | |||
| dataset_all.load_predefined_dataset(ds_name) | |||
| if not irrelevant_labels is None: | |||
| dataset_all.remove_labels(**irrelevant_labels) | |||
| # dataset_all.cut_graphs(range(0, 10)) | |||
| datasets = split_dataset_by_target(dataset_all) | |||
| gram_matrix_unnorm_list = [] | |||
| run_time_list = [] | |||
| print('start generating preimage for each class of target...') | |||
| for idx, dataset in enumerate(datasets): | |||
| target = dataset.targets[0] | |||
| print('\ntarget =', target, '\n') | |||
| # 2. initialize graph kernel. | |||
| print('2. initializing graph kernel and setting parameters...') | |||
| graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||
| node_labels=dataset.node_labels, | |||
| edge_labels=dataset.edge_labels, | |||
| node_attrs=dataset.node_attrs, | |||
| edge_attrs=dataset.edge_attrs, | |||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
| kernel_options=kernel_options) | |||
| # 3. compute gram matrix. | |||
| print('3. computing gram matrix...') | |||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options) | |||
| gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm | |||
| gram_matrix_unnorm_list.append(gram_matrix_unnorm) | |||
| run_time_list.append(run_time) | |||
| # 4. save results. | |||
| print() | |||
| print('4. saving results...') | |||
| if save_results: | |||
| np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | |||
| print('\ncomplete.') | |||
| @@ -0,0 +1,33 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Fri Apr 3 10:38:59 2020 | |||
| @author: ljia | |||
| """ | |||
| from tqdm import tqdm | |||
| import sys | |||
| print('start') | |||
| for i in tqdm(range(10000000), file=sys.stdout): | |||
| x = i | |||
| # print(x) | |||
| # ============================================================================= | |||
| # summary | |||
| # terminal, IPython 7.0.1 (Spyder 4): Works. | |||
| # write to file: does not work. Progress bar splits as the progress goes. | |||
| # Jupyter: | |||
| # ============================================================================= | |||
| # for i in tqdm(range(10000000)): | |||
| # x = i | |||
| # print(x) | |||
| # ============================================================================= | |||
| # summary | |||
| # terminal, IPython 7.0.1 (Spyder 4): does not work. When combines with other | |||
| # print, progress bar splits. | |||
| # write to file: does not work. Cannot write progress bar to file. | |||
| # Jupyter: | |||
| # ============================================================================= | |||
| @@ -1,7 +1,10 @@ | |||
| numpy==1.15.2 | |||
| scipy==1.1.0 | |||
| matplotlib==3.0.0 | |||
| networkx==2.2 | |||
| scikit-learn==0.20.0 | |||
| tabulate==0.8.2 | |||
| tqdm==4.26.0 | |||
| numpy>=1.15.2 | |||
| scipy>=1.1.0 | |||
| matplotlib>=3.0.0 | |||
| networkx>=2.2 | |||
| scikit-learn>=0.20.0 | |||
| tabulate>=0.8.2 | |||
| tqdm>=4.26.0 | |||
| # cvxpy # for preimage. | |||
| # cvxopt # for preimage. | |||
| # mosek # for preimage. | |||