| @@ -0,0 +1,605 @@ | |||
| import networkx as nx | |||
| import numpy as np | |||
| from copy import deepcopy | |||
| from enum import Enum, unique | |||
| #from itertools import product | |||
| # from tqdm import tqdm | |||
| def getSPLengths(G1): | |||
| sp = nx.shortest_path(G1) | |||
| distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) | |||
| for i in sp.keys(): | |||
| for j in sp[i].keys(): | |||
| distances[i, j] = len(sp[i][j]) - 1 | |||
| return distances | |||
| def getSPGraph(G, edge_weight=None): | |||
| """Transform graph G to its corresponding shortest-paths graph. | |||
| Parameters | |||
| ---------- | |||
| G : NetworkX graph | |||
| The graph to be tramsformed. | |||
| edge_weight : string | |||
| edge attribute corresponding to the edge weight. | |||
| Return | |||
| ------ | |||
| S : NetworkX graph | |||
| The shortest-paths graph corresponding to G. | |||
| Notes | |||
| ------ | |||
| For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes. | |||
| References | |||
| ---------- | |||
| .. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
| """ | |||
| return floydTransformation(G, edge_weight=edge_weight) | |||
| def floydTransformation(G, edge_weight=None): | |||
| """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation. | |||
| Parameters | |||
| ---------- | |||
| G : NetworkX graph | |||
| The graph to be tramsformed. | |||
| edge_weight : string | |||
| edge attribute corresponding to the edge weight. The default edge weight is bond_type. | |||
| Return | |||
| ------ | |||
| S : NetworkX graph | |||
| The shortest-paths graph corresponding to G. | |||
| References | |||
| ---------- | |||
| .. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
| """ | |||
| spMatrix = nx.floyd_warshall_numpy(G, weight=edge_weight) | |||
| S = nx.Graph() | |||
| S.add_nodes_from(G.nodes(data=True)) | |||
| ns = list(G.nodes()) | |||
| for i in range(0, G.number_of_nodes()): | |||
| for j in range(i + 1, G.number_of_nodes()): | |||
| if spMatrix[i, j] != np.inf: | |||
| S.add_edge(ns[i], ns[j], cost=spMatrix[i, j]) | |||
| return S | |||
| def get_shortest_paths(G, weight, directed): | |||
| """Get all shortest paths of a graph. | |||
| Parameters | |||
| ---------- | |||
| G : NetworkX graphs | |||
| The graphs whose paths are calculated. | |||
| weight : string/None | |||
| edge attribute used as weight to calculate the shortest path. | |||
| directed: boolean | |||
| Whether graph is directed. | |||
| Return | |||
| ------ | |||
| sp : list of list | |||
| List of shortest paths of the graph, where each path is represented by a list of nodes. | |||
| """ | |||
| from itertools import combinations | |||
| sp = [] | |||
| for n1, n2 in combinations(G.nodes(), 2): | |||
| try: | |||
| spltemp = list(nx.all_shortest_paths(G, n1, n2, weight=weight)) | |||
| except nx.NetworkXNoPath: # nodes not connected | |||
| pass | |||
| else: | |||
| sp += spltemp | |||
| # each edge walk is counted twice, starting from both its extreme nodes. | |||
| if not directed: | |||
| sp += [sptemp[::-1] for sptemp in spltemp] | |||
| # add single nodes as length 0 paths. | |||
| sp += [[n] for n in G.nodes()] | |||
| return sp | |||
| def untotterTransformation(G, node_label, edge_label): | |||
| """Transform graph G according to Mahé et al.'s work to filter out tottering patterns of marginalized kernel and tree pattern kernel. | |||
| Parameters | |||
| ---------- | |||
| G : NetworkX graph | |||
| The graph to be tramsformed. | |||
| node_label : string | |||
| node attribute used as label. The default node label is 'atom'. | |||
| edge_label : string | |||
| edge attribute used as label. The default edge label is 'bond_type'. | |||
| Return | |||
| ------ | |||
| gt : NetworkX graph | |||
| The transformed graph corresponding to G. | |||
| References | |||
| ---------- | |||
| .. [1] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and Jean-Philippe Vert. Extensions of marginalized graph kernels. In Proceedings of the twenty-first international conference on Machine learning, page 70. ACM, 2004. | |||
| """ | |||
| # arrange all graphs in a list | |||
| G = G.to_directed() | |||
| gt = nx.Graph() | |||
| gt.graph = G.graph | |||
| gt.add_nodes_from(G.nodes(data=True)) | |||
| for edge in G.edges(): | |||
| gt.add_node(edge) | |||
| gt.nodes[edge].update({node_label: G.nodes[edge[1]][node_label]}) | |||
| gt.add_edge(edge[0], edge) | |||
| gt.edges[edge[0], edge].update({ | |||
| edge_label: | |||
| G[edge[0]][edge[1]][edge_label] | |||
| }) | |||
| for neighbor in G[edge[1]]: | |||
| if neighbor != edge[0]: | |||
| gt.add_edge(edge, (edge[1], neighbor)) | |||
| gt.edges[edge, (edge[1], neighbor)].update({ | |||
| edge_label: | |||
| G[edge[1]][neighbor][edge_label] | |||
| }) | |||
| # nx.draw_networkx(gt) | |||
| # plt.show() | |||
| # relabel nodes using consecutive integers for convenience of kernel calculation. | |||
| gt = nx.convert_node_labels_to_integers( | |||
| gt, first_label=0, label_attribute='label_orignal') | |||
| return gt | |||
| def direct_product(G1, G2, node_label, edge_label): | |||
| """Return the direct/tensor product of directed graphs G1 and G2. | |||
| Parameters | |||
| ---------- | |||
| G1, G2 : NetworkX graph | |||
| The original graphs. | |||
| node_label : string | |||
| node attribute used as label. The default node label is 'atom'. | |||
| edge_label : string | |||
| edge attribute used as label. The default edge label is 'bond_type'. | |||
| Return | |||
| ------ | |||
| gt : NetworkX graph | |||
| The direct product graph of G1 and G2. | |||
| Notes | |||
| ----- | |||
| This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph. | |||
| References | |||
| ---------- | |||
| .. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003. | |||
| """ | |||
| # arrange all graphs in a list | |||
| from itertools import product | |||
| # G = G.to_directed() | |||
| gt = nx.DiGraph() | |||
| # add nodes | |||
| for u, v in product(G1, G2): | |||
| if G1.nodes[u][node_label] == G2.nodes[v][node_label]: | |||
| gt.add_node((u, v)) | |||
| gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]}) | |||
| # add edges, faster for sparse graphs (no so many edges), which is the most case for now. | |||
| for (u1, v1), (u2, v2) in product(G1.edges, G2.edges): | |||
| if (u1, u2) in gt and ( | |||
| v1, v2 | |||
| ) in gt and G1.edges[u1, v1][edge_label] == G2.edges[u2, | |||
| v2][edge_label]: | |||
| gt.add_edge((u1, u2), (v1, v2)) | |||
| gt.edges[(u1, u2), (v1, v2)].update({ | |||
| edge_label: | |||
| G1.edges[u1, v1][edge_label] | |||
| }) | |||
| # # add edges, faster for dense graphs (a lot of edges, complete graph would be super). | |||
| # for u, v in product(gt, gt): | |||
| # if (u[0], v[0]) in G1.edges and ( | |||
| # u[1], v[1] | |||
| # ) in G2.edges and G1.edges[u[0], | |||
| # v[0]][edge_label] == G2.edges[u[1], | |||
| # v[1]][edge_label]: | |||
| # gt.add_edge((u[0], u[1]), (v[0], v[1])) | |||
| # gt.edges[(u[0], u[1]), (v[0], v[1])].update({ | |||
| # edge_label: | |||
| # G1.edges[u[0], v[0]][edge_label] | |||
| # }) | |||
| # relabel nodes using consecutive integers for convenience of kernel calculation. | |||
| # gt = nx.convert_node_labels_to_integers( | |||
| # gt, first_label=0, label_attribute='label_orignal') | |||
| return gt | |||
| def direct_product_graph(G1, G2, node_labels, edge_labels): | |||
| """Return the direct/tensor product of directed graphs G1 and G2. | |||
| Parameters | |||
| ---------- | |||
| G1, G2 : NetworkX graph | |||
| The original graphs. | |||
| node_labels : list | |||
| A list of node attributes used as labels. | |||
| edge_labels : list | |||
| A list of edge attributes used as labels. | |||
| Return | |||
| ------ | |||
| gt : NetworkX graph | |||
| The direct product graph of G1 and G2. | |||
| Notes | |||
| ----- | |||
| This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph. | |||
| References | |||
| ---------- | |||
| .. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003. | |||
| """ | |||
| # arrange all graphs in a list | |||
| from itertools import product | |||
| # G = G.to_directed() | |||
| gt = nx.DiGraph() | |||
| # add nodes | |||
| for u, v in product(G1, G2): | |||
| label1 = tuple(G1.nodes[u][nl] for nl in node_labels) | |||
| label2 = tuple(G2.nodes[v][nl] for nl in node_labels) | |||
| if label1 == label2: | |||
| gt.add_node((u, v), node_label=label1) | |||
| # add edges, faster for sparse graphs (no so many edges), which is the most case for now. | |||
| for (u1, v1), (u2, v2) in product(G1.edges, G2.edges): | |||
| if (u1, u2) in gt and (v1, v2) in gt: | |||
| label1 = tuple(G1.edges[u1, v1][el] for el in edge_labels) | |||
| label2 = tuple(G2.edges[u2, v2][el] for el in edge_labels) | |||
| if label1 == label2: | |||
| gt.add_edge((u1, u2), (v1, v2), edge_label=label1) | |||
| # # add edges, faster for dense graphs (a lot of edges, complete graph would be super). | |||
| # for u, v in product(gt, gt): | |||
| # if (u[0], v[0]) in G1.edges and ( | |||
| # u[1], v[1] | |||
| # ) in G2.edges and G1.edges[u[0], | |||
| # v[0]][edge_label] == G2.edges[u[1], | |||
| # v[1]][edge_label]: | |||
| # gt.add_edge((u[0], u[1]), (v[0], v[1])) | |||
| # gt.edges[(u[0], u[1]), (v[0], v[1])].update({ | |||
| # edge_label: | |||
| # G1.edges[u[0], v[0]][edge_label] | |||
| # }) | |||
| # relabel nodes using consecutive integers for convenience of kernel calculation. | |||
| # gt = nx.convert_node_labels_to_integers( | |||
| # gt, first_label=0, label_attribute='label_orignal') | |||
| return gt | |||
| def graph_deepcopy(G): | |||
| """Deep copy a graph, including deep copy of all nodes, edges and | |||
| attributes of the graph, nodes and edges. | |||
| Note | |||
| ---- | |||
| It is the same as the NetworkX function graph.copy(), as far as I know. | |||
| """ | |||
| # add graph attributes. | |||
| labels = {} | |||
| for k, v in G.graph.items(): | |||
| labels[k] = deepcopy(v) | |||
| if G.is_directed(): | |||
| G_copy = nx.DiGraph(**labels) | |||
| else: | |||
| G_copy = nx.Graph(**labels) | |||
| # add nodes | |||
| for nd, attrs in G.nodes(data=True): | |||
| labels = {} | |||
| for k, v in attrs.items(): | |||
| labels[k] = deepcopy(v) | |||
| G_copy.add_node(nd, **labels) | |||
| # add edges. | |||
| for nd1, nd2, attrs in G.edges(data=True): | |||
| labels = {} | |||
| for k, v in attrs.items(): | |||
| labels[k] = deepcopy(v) | |||
| G_copy.add_edge(nd1, nd2, **labels) | |||
| return G_copy | |||
| def graph_isIdentical(G1, G2): | |||
| """Check if two graphs are identical, including: same nodes, edges, node | |||
| labels/attributes, edge labels/attributes. | |||
| Notes | |||
| ----- | |||
| 1. The type of graphs has to be the same. | |||
| 2. Global/Graph attributes are neglected as they may contain names for graphs. | |||
| """ | |||
| # check nodes. | |||
| nlist1 = [n for n in G1.nodes(data=True)] | |||
| nlist2 = [n for n in G2.nodes(data=True)] | |||
| if not nlist1 == nlist2: | |||
| return False | |||
| # check edges. | |||
| elist1 = [n for n in G1.edges(data=True)] | |||
| elist2 = [n for n in G2.edges(data=True)] | |||
| if not elist1 == elist2: | |||
| return False | |||
| # check graph attributes. | |||
| return True | |||
| def get_node_labels(Gn, node_label): | |||
| """Get node labels of dataset Gn. | |||
| """ | |||
| nl = set() | |||
| for G in Gn: | |||
| nl = nl | set(nx.get_node_attributes(G, node_label).values()) | |||
| return nl | |||
| def get_edge_labels(Gn, edge_label): | |||
| """Get edge labels of dataset Gn. | |||
| """ | |||
| el = set() | |||
| for G in Gn: | |||
| el = el | set(nx.get_edge_attributes(G, edge_label).values()) | |||
| return el | |||
| def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): | |||
| if name == 'Marginalized': | |||
| from gklearn.kernels import Marginalized | |||
| graph_kernel = Marginalized(node_labels=node_labels, | |||
| edge_labels=edge_labels, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| elif name == 'ShortestPath': | |||
| from gklearn.kernels import ShortestPath | |||
| graph_kernel = ShortestPath(node_labels=node_labels, | |||
| node_attrs=node_attrs, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| elif name == 'StructuralSP': | |||
| from gklearn.kernels import StructuralSP | |||
| graph_kernel = StructuralSP(node_labels=node_labels, | |||
| edge_labels=edge_labels, | |||
| node_attrs=node_attrs, | |||
| edge_attrs=edge_attrs, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| elif name == 'PathUpToH': | |||
| from gklearn.kernels import PathUpToH | |||
| graph_kernel = PathUpToH(node_labels=node_labels, | |||
| edge_labels=edge_labels, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| elif name == 'Treelet': | |||
| from gklearn.kernels import Treelet | |||
| graph_kernel = Treelet(node_labels=node_labels, | |||
| edge_labels=edge_labels, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| elif name == 'WLSubtree': | |||
| from gklearn.kernels import WLSubtree | |||
| graph_kernel = WLSubtree(node_labels=node_labels, | |||
| edge_labels=edge_labels, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| elif name == 'WeisfeilerLehman': | |||
| from gklearn.kernels import WeisfeilerLehman | |||
| graph_kernel = WeisfeilerLehman(node_labels=node_labels, | |||
| edge_labels=edge_labels, | |||
| ds_infos=ds_infos, | |||
| **kernel_options) | |||
| else: | |||
| raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WLSubtree", "WeisfeilerLehman".') | |||
| return graph_kernel | |||
| def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False): | |||
| import os | |||
| from gklearn.utils import Dataset, split_dataset_by_target | |||
| # 1. get dataset. | |||
| print('1. getting dataset...') | |||
| dataset_all = Dataset() | |||
| dataset_all.load_predefined_dataset(ds_name) | |||
| dataset_all.trim_dataset(edge_required=edge_required) | |||
| if not irrelevant_labels is None: | |||
| dataset_all.remove_labels(**irrelevant_labels) | |||
| # dataset_all.cut_graphs(range(0, 10)) | |||
| datasets = split_dataset_by_target(dataset_all) | |||
| gram_matrix_unnorm_list = [] | |||
| run_time_list = [] | |||
| print('start generating preimage for each class of target...') | |||
| for idx, dataset in enumerate(datasets): | |||
| target = dataset.targets[0] | |||
| print('\ntarget =', target, '\n') | |||
| # 2. initialize graph kernel. | |||
| print('2. initializing graph kernel and setting parameters...') | |||
| graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||
| node_labels=dataset.node_labels, | |||
| edge_labels=dataset.edge_labels, | |||
| node_attrs=dataset.node_attrs, | |||
| edge_attrs=dataset.edge_attrs, | |||
| ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
| kernel_options=kernel_options) | |||
| # 3. compute gram matrix. | |||
| print('3. computing gram matrix...') | |||
| gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options) | |||
| gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm | |||
| gram_matrix_unnorm_list.append(gram_matrix_unnorm) | |||
| run_time_list.append(run_time) | |||
| # 4. save results. | |||
| print() | |||
| print('4. saving results...') | |||
| if save_results: | |||
| if not os.path.exists(dir_save): | |||
| os.makedirs(dir_save) | |||
| np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | |||
| print('\ncomplete.') | |||
| def find_paths(G, source_node, length): | |||
| """Find all paths with a certain length those start from a source node. | |||
| A recursive depth first search is applied. | |||
| Parameters | |||
| ---------- | |||
| G : NetworkX graphs | |||
| The graph in which paths are searched. | |||
| source_node : integer | |||
| The number of the node from where all paths start. | |||
| length : integer | |||
| The length of paths. | |||
| Return | |||
| ------ | |||
| path : list of list | |||
| List of paths retrieved, where each path is represented by a list of nodes. | |||
| """ | |||
| if length == 0: | |||
| return [[source_node]] | |||
| path = [[source_node] + path for neighbor in G[source_node] \ | |||
| for path in find_paths(G, neighbor, length - 1) if source_node not in path] | |||
| return path | |||
| def find_all_paths(G, length, is_directed): | |||
| """Find all paths with a certain length in a graph. A recursive depth first | |||
| search is applied. | |||
| Parameters | |||
| ---------- | |||
| G : NetworkX graphs | |||
| The graph in which paths are searched. | |||
| length : integer | |||
| The length of paths. | |||
| Return | |||
| ------ | |||
| path : list of list | |||
| List of paths retrieved, where each path is represented by a list of nodes. | |||
| """ | |||
| all_paths = [] | |||
| for node in G: | |||
| all_paths.extend(find_paths(G, node, length)) | |||
| if not is_directed: | |||
| # For each path, two presentations are retrieved from its two extremities. | |||
| # Remove one of them. | |||
| all_paths_r = [path[::-1] for path in all_paths] | |||
| for idx, path in enumerate(all_paths[:-1]): | |||
| for path2 in all_paths_r[idx+1::]: | |||
| if path == path2: | |||
| all_paths[idx] = [] | |||
| break | |||
| all_paths = list(filter(lambda a: a != [], all_paths)) | |||
| return all_paths | |||
| def get_mlti_dim_node_attrs(G, attr_names): | |||
| attributes = [] | |||
| for nd, attrs in G.nodes(data=True): | |||
| attributes.append(tuple(attrs[aname] for aname in attr_names)) | |||
| return attributes | |||
| def get_mlti_dim_edge_attrs(G, attr_names): | |||
| attributes = [] | |||
| for ed, attrs in G.edges(data=True): | |||
| attributes.append(tuple(attrs[aname] for aname in attr_names)) | |||
| return attributes | |||
| def normalize_gram_matrix(gram_matrix): | |||
| diag = gram_matrix.diagonal().copy() | |||
| for i in range(len(gram_matrix)): | |||
| for j in range(i, len(gram_matrix)): | |||
| gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j]) | |||
| gram_matrix[j][i] = gram_matrix[i][j] | |||
| return gram_matrix | |||
| def compute_distance_matrix(gram_matrix): | |||
| dis_mat = np.empty((len(gram_matrix), len(gram_matrix))) | |||
| for i in range(len(gram_matrix)): | |||
| for j in range(i, len(gram_matrix)): | |||
| dis = gram_matrix[i, i] + gram_matrix[j, j] - 2 * gram_matrix[i, j] | |||
| if dis < 0: | |||
| if dis > -1e-10: | |||
| dis = 0 | |||
| else: | |||
| raise ValueError('The distance is negative.') | |||
| dis_mat[i, j] = np.sqrt(dis) | |||
| dis_mat[j, i] = dis_mat[i, j] | |||
| dis_max = np.max(np.max(dis_mat)) | |||
| dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | |||
| dis_mean = np.mean(np.mean(dis_mat)) | |||
| return dis_mat, dis_max, dis_min, dis_mean | |||
| def dummy_node(): | |||
| """ | |||
| /*! | |||
| * @brief Returns a dummy node. | |||
| * @return ID of dummy node. | |||
| */ | |||
| """ | |||
| return np.inf # @todo: in GEDLIB, this is the max - 1 rather than max, I don't know why. | |||
| def undefined_node(): | |||
| """ | |||
| /*! | |||
| * @brief Returns an undefined node. | |||
| * @return ID of undefined node. | |||
| */ | |||
| """ | |||
| return np.inf | |||
| def dummy_edge(): | |||
| """ | |||
| /*! | |||
| * @brief Returns a dummy edge. | |||
| * @return ID of dummy edge. | |||
| */ | |||
| """ | |||
| return np.inf | |||
| @unique | |||
| class SpecialLabel(Enum): | |||
| """can be used to define special labels. | |||
| """ | |||
| DUMMY = 1 # The dummy label. | |||
| # DUMMY = auto # enum.auto does not exist in Python 3.5. | |||