| @@ -101,7 +101,7 @@ def get_shortest_paths(G, weight, directed): | |||||
| # each edge walk is counted twice, starting from both its extreme nodes. | # each edge walk is counted twice, starting from both its extreme nodes. | ||||
| if not directed: | if not directed: | ||||
| sp += [sptemp[::-1] for sptemp in spltemp] | sp += [sptemp[::-1] for sptemp in spltemp] | ||||
| # add single nodes as length 0 paths. | # add single nodes as length 0 paths. | ||||
| sp += [[n] for n in G.nodes()] | sp += [[n] for n in G.nodes()] | ||||
| return sp | return sp | ||||
| @@ -233,7 +233,7 @@ def direct_product_graph(G1, G2, node_labels, edge_labels): | |||||
| A list of node attributes used as labels. | A list of node attributes used as labels. | ||||
| edge_labels : list | edge_labels : list | ||||
| A list of edge attributes used as labels. | A list of edge attributes used as labels. | ||||
| Return | Return | ||||
| ------ | ------ | ||||
| gt : NetworkX graph | gt : NetworkX graph | ||||
| @@ -287,9 +287,9 @@ def direct_product_graph(G1, G2, node_labels, edge_labels): | |||||
| def graph_deepcopy(G): | def graph_deepcopy(G): | ||||
| """Deep copy a graph, including deep copy of all nodes, edges and | |||||
| """Deep copy a graph, including deep copy of all nodes, edges and | |||||
| attributes of the graph, nodes and edges. | attributes of the graph, nodes and edges. | ||||
| Note | Note | ||||
| ---- | ---- | ||||
| It is the same as the NetworkX function graph.copy(), as far as I know. | It is the same as the NetworkX function graph.copy(), as far as I know. | ||||
| @@ -302,28 +302,28 @@ def graph_deepcopy(G): | |||||
| G_copy = nx.DiGraph(**labels) | G_copy = nx.DiGraph(**labels) | ||||
| else: | else: | ||||
| G_copy = nx.Graph(**labels) | G_copy = nx.Graph(**labels) | ||||
| # add nodes | |||||
| # add nodes | |||||
| for nd, attrs in G.nodes(data=True): | for nd, attrs in G.nodes(data=True): | ||||
| labels = {} | labels = {} | ||||
| for k, v in attrs.items(): | for k, v in attrs.items(): | ||||
| labels[k] = deepcopy(v) | labels[k] = deepcopy(v) | ||||
| G_copy.add_node(nd, **labels) | G_copy.add_node(nd, **labels) | ||||
| # add edges. | # add edges. | ||||
| for nd1, nd2, attrs in G.edges(data=True): | for nd1, nd2, attrs in G.edges(data=True): | ||||
| labels = {} | labels = {} | ||||
| for k, v in attrs.items(): | for k, v in attrs.items(): | ||||
| labels[k] = deepcopy(v) | labels[k] = deepcopy(v) | ||||
| G_copy.add_edge(nd1, nd2, **labels) | G_copy.add_edge(nd1, nd2, **labels) | ||||
| return G_copy | return G_copy | ||||
| def graph_isIdentical(G1, G2): | def graph_isIdentical(G1, G2): | ||||
| """Check if two graphs are identical, including: same nodes, edges, node | """Check if two graphs are identical, including: same nodes, edges, node | ||||
| labels/attributes, edge labels/attributes. | labels/attributes, edge labels/attributes. | ||||
| Notes | Notes | ||||
| ----- | ----- | ||||
| 1. The type of graphs has to be the same. | 1. The type of graphs has to be the same. | ||||
| @@ -341,7 +341,7 @@ def graph_isIdentical(G1, G2): | |||||
| if not elist1 == elist2: | if not elist1 == elist2: | ||||
| return False | return False | ||||
| # check graph attributes. | # check graph attributes. | ||||
| return True | return True | ||||
| @@ -363,7 +363,9 @@ def get_edge_labels(Gn, edge_label): | |||||
| return el | return el | ||||
| def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): | |||||
| def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}, **kwargs): | |||||
| if len(kwargs) != 0: | |||||
| kernel_options = kwargs | |||||
| if name == 'Marginalized': | if name == 'Marginalized': | ||||
| from gklearn.kernels import Marginalized | from gklearn.kernels import Marginalized | ||||
| graph_kernel = Marginalized(node_labels=node_labels, | graph_kernel = Marginalized(node_labels=node_labels, | ||||
| @@ -379,7 +381,7 @@ def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attr | |||||
| elif name == 'StructuralSP': | elif name == 'StructuralSP': | ||||
| from gklearn.kernels import StructuralSP | from gklearn.kernels import StructuralSP | ||||
| graph_kernel = StructuralSP(node_labels=node_labels, | graph_kernel = StructuralSP(node_labels=node_labels, | ||||
| edge_labels=edge_labels, | |||||
| edge_labels=edge_labels, | |||||
| node_attrs=node_attrs, | node_attrs=node_attrs, | ||||
| edge_attrs=edge_attrs, | edge_attrs=edge_attrs, | ||||
| ds_infos=ds_infos, | ds_infos=ds_infos, | ||||
| @@ -417,7 +419,7 @@ def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attr | |||||
| def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False): | def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False): | ||||
| import os | import os | ||||
| from gklearn.utils import Dataset, split_dataset_by_target | from gklearn.utils import Dataset, split_dataset_by_target | ||||
| # 1. get dataset. | # 1. get dataset. | ||||
| print('1. getting dataset...') | print('1. getting dataset...') | ||||
| dataset_all = Dataset() | dataset_all = Dataset() | ||||
| @@ -427,20 +429,20 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d | |||||
| dataset_all.remove_labels(**irrelevant_labels) | dataset_all.remove_labels(**irrelevant_labels) | ||||
| # dataset_all.cut_graphs(range(0, 10)) | # dataset_all.cut_graphs(range(0, 10)) | ||||
| datasets = split_dataset_by_target(dataset_all) | datasets = split_dataset_by_target(dataset_all) | ||||
| gram_matrix_unnorm_list = [] | gram_matrix_unnorm_list = [] | ||||
| run_time_list = [] | run_time_list = [] | ||||
| print('start generating preimage for each class of target...') | print('start generating preimage for each class of target...') | ||||
| for idx, dataset in enumerate(datasets): | for idx, dataset in enumerate(datasets): | ||||
| target = dataset.targets[0] | target = dataset.targets[0] | ||||
| print('\ntarget =', target, '\n') | print('\ntarget =', target, '\n') | ||||
| # 2. initialize graph kernel. | # 2. initialize graph kernel. | ||||
| print('2. initializing graph kernel and setting parameters...') | print('2. initializing graph kernel and setting parameters...') | ||||
| graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||||
| graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||||
| node_labels=dataset.node_labels, | node_labels=dataset.node_labels, | ||||
| edge_labels=dataset.edge_labels, | |||||
| edge_labels=dataset.edge_labels, | |||||
| node_attrs=dataset.node_attrs, | node_attrs=dataset.node_attrs, | ||||
| edge_attrs=dataset.edge_attrs, | edge_attrs=dataset.edge_attrs, | ||||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | ds_infos=dataset.get_dataset_infos(keys=['directed']), | ||||
| @@ -450,24 +452,24 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d | |||||
| print('3. computing gram matrix...') | print('3. computing gram matrix...') | ||||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options) | gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options) | ||||
| gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm | gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm | ||||
| gram_matrix_unnorm_list.append(gram_matrix_unnorm) | gram_matrix_unnorm_list.append(gram_matrix_unnorm) | ||||
| run_time_list.append(run_time) | run_time_list.append(run_time) | ||||
| # 4. save results. | # 4. save results. | ||||
| print() | print() | ||||
| print('4. saving results...') | print('4. saving results...') | ||||
| if save_results: | if save_results: | ||||
| os.makedirs(dir_save, exist_ok=True) | os.makedirs(dir_save, exist_ok=True) | ||||
| np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | |||||
| np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | |||||
| print('\ncomplete.') | |||||
| print('\ncomplete.') | |||||
| def find_paths(G, source_node, length): | def find_paths(G, source_node, length): | ||||
| """Find all paths with a certain length those start from a source node. | |||||
| """Find all paths with a certain length those start from a source node. | |||||
| A recursive depth first search is applied. | A recursive depth first search is applied. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| G : NetworkX graphs | G : NetworkX graphs | ||||
| @@ -476,7 +478,7 @@ def find_paths(G, source_node, length): | |||||
| The number of the node from where all paths start. | The number of the node from where all paths start. | ||||
| length : integer | length : integer | ||||
| The length of paths. | The length of paths. | ||||
| Return | Return | ||||
| ------ | ------ | ||||
| path : list of list | path : list of list | ||||
| @@ -492,14 +494,14 @@ def find_paths(G, source_node, length): | |||||
| def find_all_paths(G, length, is_directed): | def find_all_paths(G, length, is_directed): | ||||
| """Find all paths with a certain length in a graph. A recursive depth first | """Find all paths with a certain length in a graph. A recursive depth first | ||||
| search is applied. | search is applied. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| G : NetworkX graphs | G : NetworkX graphs | ||||
| The graph in which paths are searched. | The graph in which paths are searched. | ||||
| length : integer | length : integer | ||||
| The length of paths. | The length of paths. | ||||
| Return | Return | ||||
| ------ | ------ | ||||
| path : list of list | path : list of list | ||||
| @@ -508,18 +510,18 @@ def find_all_paths(G, length, is_directed): | |||||
| all_paths = [] | all_paths = [] | ||||
| for node in G: | for node in G: | ||||
| all_paths.extend(find_paths(G, node, length)) | all_paths.extend(find_paths(G, node, length)) | ||||
| if not is_directed: | if not is_directed: | ||||
| # For each path, two presentations are retrieved from its two extremities. | |||||
| # For each path, two presentations are retrieved from its two extremities. | |||||
| # Remove one of them. | # Remove one of them. | ||||
| all_paths_r = [path[::-1] for path in all_paths] | |||||
| all_paths_r = [path[::-1] for path in all_paths] | |||||
| for idx, path in enumerate(all_paths[:-1]): | for idx, path in enumerate(all_paths[:-1]): | ||||
| for path2 in all_paths_r[idx+1::]: | for path2 in all_paths_r[idx+1::]: | ||||
| if path == path2: | if path == path2: | ||||
| all_paths[idx] = [] | all_paths[idx] = [] | ||||
| break | break | ||||
| all_paths = list(filter(lambda a: a != [], all_paths)) | all_paths = list(filter(lambda a: a != [], all_paths)) | ||||
| return all_paths | return all_paths | ||||
| @@ -535,8 +537,8 @@ def get_mlti_dim_edge_attrs(G, attr_names): | |||||
| for ed, attrs in G.edges(data=True): | for ed, attrs in G.edges(data=True): | ||||
| attributes.append(tuple(attrs[aname] for aname in attr_names)) | attributes.append(tuple(attrs[aname] for aname in attr_names)) | ||||
| return attributes | return attributes | ||||
| def normalize_gram_matrix(gram_matrix): | def normalize_gram_matrix(gram_matrix): | ||||
| diag = gram_matrix.diagonal().copy() | diag = gram_matrix.diagonal().copy() | ||||
| for i in range(len(gram_matrix)): | for i in range(len(gram_matrix)): | ||||
| @@ -544,8 +546,8 @@ def normalize_gram_matrix(gram_matrix): | |||||
| gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j]) | gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j]) | ||||
| gram_matrix[j][i] = gram_matrix[i][j] | gram_matrix[j][i] = gram_matrix[i][j] | ||||
| return gram_matrix | return gram_matrix | ||||
| def compute_distance_matrix(gram_matrix): | def compute_distance_matrix(gram_matrix): | ||||
| dis_mat = np.empty((len(gram_matrix), len(gram_matrix))) | dis_mat = np.empty((len(gram_matrix), len(gram_matrix))) | ||||
| for i in range(len(gram_matrix)): | for i in range(len(gram_matrix)): | ||||
| @@ -573,9 +575,9 @@ def compute_vertex_kernels(g1, g2, node_kernels, node_labels=[], node_attrs=[]): | |||||
| g1, g2 : NetworkX graph | g1, g2 : NetworkX graph | ||||
| The kernels bewteen pairs of vertices in these two graphs are computed. | The kernels bewteen pairs of vertices in these two graphs are computed. | ||||
| node_kernels : dict | node_kernels : dict | ||||
| A dictionary of kernel functions for nodes, including 3 items: 'symb' | |||||
| for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix' | |||||
| for both labels. The first 2 functions take two node labels as | |||||
| A dictionary of kernel functions for nodes, including 3 items: 'symb' | |||||
| for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix' | |||||
| for both labels. The first 2 functions take two node labels as | |||||
| parameters, and the 'mix' function takes 4 parameters, a symbolic and a | parameters, and the 'mix' function takes 4 parameters, a symbolic and a | ||||
| non-symbolic label for each the two nodes. Each label is in form of 2-D | non-symbolic label for each the two nodes. Each label is in form of 2-D | ||||
| dimension array (n_samples, n_features). Each function returns a number | dimension array (n_samples, n_features). Each function returns a number | ||||
| @@ -590,18 +592,18 @@ def compute_vertex_kernels(g1, g2, node_kernels, node_labels=[], node_attrs=[]): | |||||
| ------- | ------- | ||||
| vk_dict : dict | vk_dict : dict | ||||
| Vertex kernels keyed by vertices. | Vertex kernels keyed by vertices. | ||||
| Notes | Notes | ||||
| ----- | ----- | ||||
| This function is used by ``gklearn.kernels.FixedPoint'' and | |||||
| This function is used by ``gklearn.kernels.FixedPoint'' and | |||||
| ``gklearn.kernels.StructuralSP''. The method is borrowed from FCSP [1]. | ``gklearn.kernels.StructuralSP''. The method is borrowed from FCSP [1]. | ||||
| References | References | ||||
| ---------- | ---------- | ||||
| .. [1] Lifan Xu, Wei Wang, M Alvarez, John Cavazos, and Dongping Zhang. | |||||
| Parallelization of shortest path graph kernels on multi-core cpus and gpus. | |||||
| Proceedings of the Programmability Issues for Heterogeneous Multicores | |||||
| (MultiProg), Vienna, Austria, 2014. | |||||
| .. [1] Lifan Xu, Wei Wang, M Alvarez, John Cavazos, and Dongping Zhang. | |||||
| Parallelization of shortest path graph kernels on multi-core cpus and gpus. | |||||
| Proceedings of the Programmability Issues for Heterogeneous Multicores | |||||
| (MultiProg), Vienna, Austria, 2014. | |||||
| """ | """ | ||||
| vk_dict = {} # shortest path matrices dict | vk_dict = {} # shortest path matrices dict | ||||
| if len(node_labels) > 0: | if len(node_labels) > 0: | ||||