| @@ -1,21 +0,0 @@ | |||
| # -*-coding:utf-8 -*- | |||
| """ | |||
| Pygraph | |||
| This package contains 4 sub packages : | |||
| * c_ext : binders to C++ code | |||
| * ged : allows to compute graph edit distance between networkX graphs | |||
| * kernels : computation of graph kernels, ie graph similarity measure compatible with SVM | |||
| * notebooks : examples of code using this library | |||
| * utils : Diverse computation on graphs | |||
| """ | |||
| # info | |||
| __version__ = "0.1" | |||
| __author__ = "Benoit Gaüzère" | |||
| __date__ = "November 2017" | |||
| # import sub modules | |||
| from pygraph import c_ext | |||
| from pygraph import ged | |||
| from pygraph import utils | |||
| @@ -1,116 +0,0 @@ | |||
| import sys | |||
| import pathlib | |||
| sys.path.insert(0, "../") | |||
| import networkx as nx | |||
| import numpy as np | |||
| import time | |||
| def marginalizedkernel(*args): | |||
| """Calculate marginalized graph kernels between graphs. | |||
| Parameters | |||
| ---------- | |||
| Gn : List of NetworkX graph | |||
| List of graphs between which the kernels are calculated. | |||
| / | |||
| G1, G2 : NetworkX graphs | |||
| 2 graphs between which the kernel is calculated. | |||
| p_quit : integer | |||
| the termination probability in the random walks generating step | |||
| itr : integer | |||
| time of iterations to calculate R_inf | |||
| Return | |||
| ------ | |||
| Kmatrix/Kernel : Numpy matrix/int | |||
| Kernel matrix, each element of which is the marginalized kernel between 2 praphs. / Marginalized Kernel between 2 graphs. | |||
| References | |||
| ---------- | |||
| [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003. | |||
| """ | |||
| if len(args) == 3: # for a list of graphs | |||
| Gn = args[0] | |||
| Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
| start_time = time.time() | |||
| for i in range(0, len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| Kmatrix[i][j] = marginalizedkernel(Gn[i], Gn[j], args[1], args[2]) | |||
| Kmatrix[j][i] = Kmatrix[i][j] | |||
| print("\n --- marginalized kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) | |||
| return Kmatrix | |||
| else: # for only 2 graphs | |||
| # init parameters | |||
| G1 = args[0] | |||
| G2 = args[1] | |||
| p_quit = args[2] # the termination probability in the random walks generating step | |||
| itr = args[3] # time of iterations to calculate R_inf | |||
| kernel = 0 | |||
| num_nodes_G1 = nx.number_of_nodes(G1) | |||
| num_nodes_G2 = nx.number_of_nodes(G2) | |||
| p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|) | |||
| p_init_G2 = 1 / num_nodes_G2 | |||
| q = p_quit * p_quit | |||
| r1 = q | |||
| # initial R_inf | |||
| R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes | |||
| # calculate R_inf with a simple interative method | |||
| for i in range(1, itr): | |||
| R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) | |||
| R_inf_new.fill(r1) | |||
| # calculate R_inf for each pair of nodes | |||
| for node1 in G1.nodes(data = True): | |||
| neighbor_n1 = G1[node1[0]] | |||
| p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex) | |||
| for node2 in G2.nodes(data = True): | |||
| neighbor_n2 = G2[node2[0]] | |||
| p_trans_n2 = (1 - p_quit) / len(neighbor_n2) | |||
| for neighbor1 in neighbor_n1: | |||
| for neighbor2 in neighbor_n2: | |||
| t = p_trans_n1 * p_trans_n2 * \ | |||
| deltaKernel(G1.node[neighbor1]['label'] == G2.node[neighbor2]['label']) * \ | |||
| deltaKernel(neighbor_n1[neighbor1]['label'] == neighbor_n2[neighbor2]['label']) | |||
| R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8) | |||
| R_inf[:] = R_inf_new | |||
| # add elements of R_inf up and calculate kernel | |||
| for node1 in G1.nodes(data = True): | |||
| for node2 in G2.nodes(data = True): | |||
| s = p_init_G1 * p_init_G2 * deltaKernel(node1[1]['label'] == node2[1]['label']) | |||
| kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6) | |||
| return kernel | |||
| def deltaKernel(condition): | |||
| """Return 1 if condition holds, 0 otherwise. | |||
| Parameters | |||
| ---------- | |||
| condition : Boolean | |||
| A condition, according to which the kernel is set to 1 or 0. | |||
| Return | |||
| ------ | |||
| Kernel : integer | |||
| Delta Kernel. | |||
| References | |||
| ---------- | |||
| [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003. | |||
| """ | |||
| return (1 if condition else 0) | |||
| @@ -1,68 +0,0 @@ | |||
| import sys | |||
| import pathlib | |||
| sys.path.insert(0, "../") | |||
| import networkx as nx | |||
| import numpy as np | |||
| import time | |||
| from utils.utils import getSPGraph | |||
| def spkernel(*args): | |||
| """Calculate shortest-path kernels between graphs. | |||
| Parameters | |||
| ---------- | |||
| Gn : List of NetworkX graph | |||
| List of graphs between which the kernels are calculated. | |||
| / | |||
| G1, G2 : NetworkX graphs | |||
| 2 graphs between which the kernel is calculated. | |||
| Return | |||
| ------ | |||
| Kmatrix/Kernel : Numpy matrix/int | |||
| Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs. | |||
| References | |||
| ---------- | |||
| [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
| """ | |||
| if len(args) == 1: # for a list of graphs | |||
| Gn = args[0] | |||
| Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
| Sn = [] # get shortest path graphs of Gn | |||
| for i in range(0, len(Gn)): | |||
| Sn.append(getSPGraph(Gn[i])) | |||
| start_time = time.time() | |||
| for i in range(0, len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| for e1 in Sn[i].edges(data = True): | |||
| for e2 in Sn[j].edges(data = True): | |||
| if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
| Kmatrix[i][j] += 1 | |||
| Kmatrix[j][i] += (0 if i == j else 1) | |||
| print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) | |||
| return Kmatrix | |||
| else: # for only 2 graphs | |||
| G1 = args[0] | |||
| G2 = args[1] | |||
| kernel = 0 | |||
| for e1 in G1.edges(data = True): | |||
| for e2 in G2.edges(data = True): | |||
| if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
| kernel += 1 | |||
| print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) | |||
| return kernel | |||
| @@ -1,68 +0,0 @@ | |||
| import sys | |||
| import pathlib | |||
| sys.path.insert(0, "../") | |||
| import networkx as nx | |||
| import numpy as np | |||
| import time | |||
| from utils.utils import getSPGraph | |||
| def spkernel(*args): | |||
| """Calculate shortest-path kernels between graphs. | |||
| Parameters | |||
| ---------- | |||
| Gn : List of NetworkX graph | |||
| List of graphs between which the kernels are calculated. | |||
| / | |||
| G1, G2 : NetworkX graphs | |||
| 2 graphs between which the kernel is calculated. | |||
| Return | |||
| ------ | |||
| Kmatrix/Kernel : Numpy matrix/int | |||
| Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs. | |||
| References | |||
| ---------- | |||
| [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
| """ | |||
| if len(args) == 1: # for a list of graphs | |||
| Gn = args[0] | |||
| Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
| Sn = [] # get shortest path graphs of Gn | |||
| for i in range(0, len(Gn)): | |||
| Sn.append(getSPGraph(Gn[i])) | |||
| start_time = time.time() | |||
| for i in range(0, len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| for e1 in Sn[i].edges(data = True): | |||
| for e2 in Sn[j].edges(data = True): | |||
| if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
| Kmatrix[i][j] += 1 | |||
| Kmatrix[j][i] += (0 if i == j else 1) | |||
| print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) | |||
| return Kmatrix | |||
| else: # for only 2 graphs | |||
| G1 = args[0] | |||
| G2 = args[1] | |||
| kernel = 0 | |||
| for e1 in G1.edges(data = True): | |||
| for e2 in G2.edges(data = True): | |||
| if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
| kernel += 1 | |||
| print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) | |||
| return kernel | |||
| @@ -1,17 +0,0 @@ | |||
| # -*-coding:utf-8 -*- | |||
| """Pygraph - utils module | |||
| Implement some methods to manage graphs | |||
| graphfiles.py : load .gxl and .ct files | |||
| utils.py : compute some properties on networkX graphs | |||
| """ | |||
| # info | |||
| __version__ = "0.1" | |||
| __author__ = "Benoit Gaüzère" | |||
| __date__ = "November 2017" | |||
| from utils import graphfiles | |||
| from utils import utils | |||
| @@ -1,87 +0,0 @@ | |||
| import networkx as nx | |||
| def loadCT(filename): | |||
| """load data from .ct file. | |||
| Notes | |||
| ------ | |||
| a typical example of data in .ct is like this: | |||
| 3 2 <- number of nodes and edges | |||
| 0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers? | |||
| 0.0000 0.0000 0.0000 C | |||
| 0.0000 0.0000 0.0000 O | |||
| 1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers? | |||
| 2 3 1 1 | |||
| """ | |||
| content = open(filename).read().splitlines() | |||
| G = nx.Graph(name=str(content[0])) # set name of the graph | |||
| tmp = content[1].split(" ") | |||
| if tmp[0] == '': | |||
| nb_nodes = int(tmp[1]) # number of the nodes | |||
| nb_edges = int(tmp[2]) # number of the edges | |||
| else: | |||
| nb_nodes = int(tmp[0]) | |||
| nb_edges = int(tmp[1]) | |||
| for i in range(0, nb_nodes): | |||
| tmp = content[i + 2].split(" ") | |||
| tmp = [x for x in tmp if x != ''] | |||
| G.add_node(i, label=tmp[3]) | |||
| for i in range(0, nb_edges): | |||
| tmp = content[i + G.number_of_nodes() + 2].split(" ") | |||
| tmp = [x for x in tmp if x != ''] | |||
| G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3])) | |||
| return G | |||
| def loadGXL(filename): | |||
| import networkx as nx | |||
| import xml.etree.ElementTree as ET | |||
| tree = ET.parse(filename) | |||
| root = tree.getroot() | |||
| index = 0 | |||
| G = nx.Graph() | |||
| dic={} | |||
| for node in root.iter('node'): | |||
| label = node.find('attr')[0].text | |||
| dic[node.attrib['id']] = index | |||
| G.add_node(index, id=node.attrib['id'], label=label) | |||
| index += 1 | |||
| for edge in root.iter('edge'): | |||
| label = edge.find('attr')[0].text | |||
| G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label) | |||
| return G | |||
| def loadDataset(filename): | |||
| """load file list of the dataset. | |||
| """ | |||
| from os.path import dirname, splitext | |||
| dirname_dataset = dirname(filename) | |||
| extension = splitext(filename)[1][1:] | |||
| data = [] | |||
| y = [] | |||
| if(extension == "ds"): | |||
| content = open(filename).read().splitlines() | |||
| for i in range(0, len(content)): | |||
| tmp = content[i].split(' ') | |||
| data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names | |||
| y.append(float(tmp[1])) | |||
| elif(extension == "cxl"): | |||
| import xml.etree.ElementTree as ET | |||
| tree = ET.parse(filename) | |||
| root = tree.getroot() | |||
| data = [] | |||
| y = [] | |||
| for graph in root.iter('print'): | |||
| mol_filename = graph.attrib['file'] | |||
| mol_class = graph.attrib['class'] | |||
| data.append(loadGXL(dirname_dataset + '/' + mol_filename)) | |||
| y.append(mol_class) | |||
| return data, y | |||
| @@ -1,59 +0,0 @@ | |||
| import networkx as nx | |||
| import numpy as np | |||
| def getSPLengths(G1): | |||
| sp = nx.shortest_path(G1) | |||
| distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) | |||
| for i in np.keys(): | |||
| for j in np[i].keys(): | |||
| distances[i, j] = len(sp[i][j])-1 | |||
| return distances | |||
| def getSPGraph(G): | |||
| """Transform graph G to its corresponding shortest-paths graph. | |||
| Parameters | |||
| ---------- | |||
| G : NetworkX graph | |||
| The graph to be tramsformed. | |||
| Return | |||
| ------ | |||
| S : NetworkX graph | |||
| The shortest-paths graph corresponding to G. | |||
| Notes | |||
| ------ | |||
| For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes. | |||
| References | |||
| ---------- | |||
| [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
| """ | |||
| return floydTransformation(G) | |||
| def floydTransformation(G): | |||
| """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation. | |||
| Parameters | |||
| ---------- | |||
| G : NetworkX graph | |||
| The graph to be tramsformed. | |||
| Return | |||
| ------ | |||
| S : NetworkX graph | |||
| The shortest-paths graph corresponding to G. | |||
| References | |||
| ---------- | |||
| [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
| """ | |||
| spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered | |||
| S = nx.Graph() | |||
| S.add_nodes_from(G.nodes(data=True)) | |||
| for i in range(0, G.number_of_nodes()): | |||
| for j in range(0, G.number_of_nodes()): | |||
| S.add_edge(i, j, cost = spMatrix[i, j]) | |||
| return S | |||
| @@ -1,5 +0,0 @@ | |||
| # You must specify your env variable LSAPE_DIR | |||
| #LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/ | |||
| liblsap.so:lsap.cpp | |||
| g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR) | |||
| @@ -1,6 +0,0 @@ | |||
| Python wrapper for lsape method | |||
| Specify your LSAPE_DIR env variable with the location of the source | |||
| code to compile | |||
| source code : https://bougleux.users.greyc.fr/lsape/ | |||
| @@ -1,17 +0,0 @@ | |||
| # -*-coding:utf-8 -*- | |||
| """Pygraph - c_ext module | |||
| This package binds some C++ code to python | |||
| lsape_binders.py : binders to C++ code of LSAPE methods implemented in | |||
| https://bougleux.users.greyc.fr/lsape/ | |||
| """ | |||
| # info | |||
| __version__ = "0.1" | |||
| __author__ = "Benoit Gaüzère" | |||
| __date__ = "November 2017" | |||
| # import sub modules | |||
| from pygraph.c_ext import lsape_binders | |||
| @@ -1,43 +0,0 @@ | |||
| /* | |||
| Python wrapper | |||
| */ | |||
| #include "hungarian-lsape.hh" | |||
| #include "hungarian-lsap.hh" | |||
| #include <cstdio> | |||
| extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){ | |||
| double * u = new double[nm]; | |||
| double * v = new double[nm]; | |||
| int * rho_int = new int[nm]; | |||
| int * varrho_int = new int[nm]; | |||
| hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int); | |||
| //Find a better way to do | |||
| for (int i =0;i<nm;i++){ | |||
| rho[i] = (long)(rho_int[i]); | |||
| varrho[i] = (long)(varrho_int[i]); | |||
| } | |||
| return 0; | |||
| } | |||
| extern "C" int * lsape(double * C, const int n, const int m, long * rho, long * varrho){ | |||
| double * u = new double[n]; | |||
| double * v = new double[m]; | |||
| int * rho_int = new int[n]; | |||
| int * varrho_int = new int[m]; | |||
| hungarianLSAPE(C,n,m,rho_int,varrho_int,u,v); | |||
| for (int i =0;i<n;i++) | |||
| rho[i] = (long)(rho_int[i]); | |||
| for (int i =0;i<m;i++) | |||
| varrho[i] = (long)(varrho_int[i]); | |||
| return 0; | |||
| } | |||
| @@ -1,23 +0,0 @@ | |||
| import numpy as np | |||
| import ctypes as c | |||
| from ctypes import cdll | |||
| import os.path | |||
| def lsap_solverHG(C): | |||
| ''' Binding for lsape hungarian solver ''' | |||
| nm = C.shape[0] | |||
| dll_name = 'liblsap.so' | |||
| lib = cdll.LoadLibrary(os.path.abspath( | |||
| os.path.join(os.path.dirname(__file__), dll_name))) | |||
| lib.lsap.restype = c.c_int | |||
| rho = np.zeros((nm, 1), int) | |||
| varrho = np.zeros((nm, 1), int) | |||
| C[C == np.inf] = 10000 | |||
| lib.lsap(c.c_void_p(C.transpose().ctypes.data), | |||
| c.c_int(nm), | |||
| c.c_void_p(rho.ctypes.data), | |||
| c.c_void_p(varrho.ctypes.data)) | |||
| return np.array(range(0, nm)), np.array([c.c_int(i).value for i in varrho]) | |||
| @@ -1,72 +0,0 @@ | |||
| from pygraph.ged.costfunctions import ConstantCostFunction, RiesenCostFunction | |||
| from pygraph.ged.costfunctions import NeighboorhoodCostFunction | |||
| from pygraph.ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping | |||
| from scipy.optimize import linear_sum_assignment | |||
| def ged(G1, G2, method='Riesen', rho=None, varrho=None, | |||
| cf=ConstantCostFunction(1, 3, 1, 3), | |||
| solver=linear_sum_assignment): | |||
| """Compute Graph Edit Distance between G1 and G2 according to mapping | |||
| encoded within rho and varrho. Graph's node must be indexed by a | |||
| index which is used is rho and varrho | |||
| NB: Utilisation de | |||
| dictionnaire pour etre plus versatile ? | |||
| """ | |||
| if ((rho is None) or (varrho is None)): | |||
| if(method == 'Riesen'): | |||
| cf_bp = RiesenCostFunction(cf,lsap_solver=solver) | |||
| elif(method == 'Neighboorhood'): | |||
| cf_bp = NeighboorhoodCostFunction(cf,lsap_solver=solver) | |||
| elif(method == 'Basic'): | |||
| cf_bp = cf | |||
| else: | |||
| raise NameError('Non existent method ') | |||
| rho, varrho = getOptimalMapping( | |||
| computeBipartiteCostMatrix(G1, G2, cf_bp), lsap_solver=solver) | |||
| n = G1.number_of_nodes() | |||
| m = G2.number_of_nodes() | |||
| ged = 0 | |||
| for i in G1.nodes(): | |||
| phi_i = rho[i] | |||
| if(phi_i >= m): | |||
| ged += cf.cnd(i, G1) | |||
| else: | |||
| ged += cf.cns(i, phi_i, G1, G2) | |||
| for j in G2.nodes(): | |||
| phi_j = varrho[j] | |||
| if(phi_j >= n): | |||
| ged += cf.cni(j, G2) | |||
| for e in G1.edges(data=True): | |||
| i = e[0] | |||
| j = e[1] | |||
| phi_i = rho[i] | |||
| phi_j = rho[j] | |||
| if (phi_i < m) and (phi_j < m): | |||
| mappedEdge = len(list(filter(lambda x: True if | |||
| x == phi_j else False, G2[phi_i]))) | |||
| if(mappedEdge): | |||
| e2 = [phi_i, phi_j, G2[phi_i][phi_j]] | |||
| min_cost = min(cf.ces(e, e2, G1, G2), | |||
| cf.ced(e, G1) + cf.cei(e2, G2)) | |||
| ged += min_cost | |||
| else: | |||
| ged += cf.ced(e, G1) | |||
| else: | |||
| ged += cf.ced(e, G1) | |||
| for e in G2.edges(data=True): | |||
| i = e[0] | |||
| j = e[1] | |||
| phi_i = varrho[i] | |||
| phi_j = varrho[j] | |||
| if (phi_i < n) and (phi_j < n): | |||
| mappedEdge = len(list(filter(lambda x: True if x == phi_j | |||
| else False, G1[phi_i]))) | |||
| if(not mappedEdge): | |||
| ged += cf.cei(e, G2) | |||
| else: | |||
| ged += cf.ced(e, G2) | |||
| return ged, rho, varrho | |||
| @@ -1,17 +0,0 @@ | |||
| # -*-coding:utf-8 -*- | |||
| """Pygraph - ged module | |||
| Implement some methods to compute ged between graphs | |||
| """ | |||
| # info | |||
| __version__ = "0.1" | |||
| __author__ = "Benoit Gaüzère" | |||
| __date__ = "November 2017" | |||
| from pygraph.ged import costfunctions | |||
| from pygraph.ged import bipartiteGED | |||
| from pygraph.ged import GED | |||
| @@ -1,33 +0,0 @@ | |||
| import numpy as np | |||
| from scipy.optimize import linear_sum_assignment | |||
| from pygraph.ged.costfunctions import ConstantCostFunction | |||
| def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)): | |||
| """Compute a Cost Matrix according to cost function cf""" | |||
| n = G1.number_of_nodes() | |||
| m = G2.number_of_nodes() | |||
| nm = n + m | |||
| C = np.ones([nm, nm])*np.inf | |||
| C[n:, m:] = 0 | |||
| for u in G1.nodes(): | |||
| for v in G2.nodes(): | |||
| cost = cf.cns(u, v, G1, G2) | |||
| C[u, v] = cost | |||
| for v in G1.nodes(): | |||
| C[v, m + v] = cf.cnd(v, G1) | |||
| for v in G2.nodes(): | |||
| C[n + v, v] = cf.cni(v, G2) | |||
| return C | |||
| def getOptimalMapping(C, lsap_solver=linear_sum_assignment): | |||
| """Compute an optimal linear mapping according to cost Matrix C | |||
| inclure les progs C de Seb | |||
| """ | |||
| row_ind, col_ind = lsap_solver(C) | |||
| return col_ind, row_ind[np.argsort(col_ind)] | |||
| @@ -1,138 +0,0 @@ | |||
| import numpy as np | |||
| from scipy.optimize import linear_sum_assignment | |||
| class ConstantCostFunction: | |||
| """ Define a symmetric constant cost fonction for edit operations """ | |||
| def __init__(self, cns, cni, ces, cei): | |||
| self.cns_ = cns | |||
| self.cni_ = self.cnd_ = cni | |||
| self.ces_ = ces | |||
| self.cei_ = self.ced_ = cei | |||
| def cns(self, node_u, node_v, g1, g2): | |||
| """ return substitution edit operation cost between node_u of G1 and node_v of G2""" | |||
| return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_ | |||
| def cnd(self, u, G1): | |||
| return self.cnd_ | |||
| def cni(self, v, G2): | |||
| return self.cni_ | |||
| def ces(self, e1, e2, G1, G2): | |||
| """tester avec des attributs autres que symboliques en testant | |||
| l'operateur __eq__""" | |||
| return (e1[2]['label'] != e2[2]['label'])*self.ces_ | |||
| def ced(self, e1, G1): | |||
| return self.ced_ | |||
| def cei(self, e2, G2): | |||
| return self.cei_ | |||
| class RiesenCostFunction(): | |||
| """ Cost function associated to the computation of a cost matrix between nodes for LSAP""" | |||
| def __init__(self, cf, lsap_solver=linear_sum_assignment): | |||
| self.cf_ = cf | |||
| self.lsap_solver_ = lsap_solver | |||
| def cns(self, u, v, G1, G2): | |||
| """ u et v sont des id de noeuds """ | |||
| n = len(G1[u]) | |||
| m = len(G2[v]) | |||
| sub_C = np.ones([n+m, n+m]) * np.inf | |||
| sub_C[n:, m:] = 0 | |||
| i = 0 | |||
| l_nbr_u = G1[u] | |||
| l_nbr_v = G2[v] | |||
| for nbr_u in l_nbr_u: | |||
| j = 0 | |||
| e1 = [u, nbr_u, G1[u][nbr_u]] | |||
| for nbr_v in G2[v]: | |||
| e2 = [v, nbr_v, G2[v][nbr_v]] | |||
| sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) | |||
| j += 1 | |||
| i += 1 | |||
| i = 0 | |||
| for nbr_u in l_nbr_u: | |||
| sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||
| i += 1 | |||
| j = 0 | |||
| for nbr_v in l_nbr_v: | |||
| sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||
| j += 1 | |||
| row_ind, col_ind = self.lsap_solver_(sub_C) | |||
| cost = np.sum(sub_C[row_ind, col_ind]) | |||
| return self.cf_.cns(u, v, G1, G2) + cost | |||
| def cnd(self, u, G1): | |||
| cost = 0 | |||
| for nbr in G1[u]: | |||
| cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1) | |||
| return self.cf_.cnd(u,G1) + cost | |||
| def cni(self, v, G2): | |||
| cost = 0 | |||
| for nbr in G2[v]: | |||
| cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2) | |||
| return self.cf_.cni(v, G2) + cost | |||
| class NeighboorhoodCostFunction(): | |||
| """ Cost function associated to the computation of a cost matrix between nodes for LSAP""" | |||
| def __init__(self, cf, lsap_solver=linear_sum_assignment): | |||
| self.cf_ = cf | |||
| self.lsap_solver_ = lsap_solver | |||
| def cns(self, u, v, G1, G2): | |||
| """ u et v sont des id de noeuds """ | |||
| n = len(G1[u]) | |||
| m = len(G2[v]) | |||
| sub_C = np.ones([n+m, n+m]) * np.inf | |||
| sub_C[n:, m:] = 0 | |||
| i = 0 | |||
| l_nbr_u = G1[u] | |||
| l_nbr_v = G2[v] | |||
| for nbr_u in l_nbr_u: | |||
| j = 0 | |||
| e1 = [u, nbr_u, G1[u][nbr_u]] | |||
| for nbr_v in G2[v]: | |||
| e2 = [v, nbr_v, G2[v][nbr_v]] | |||
| sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) | |||
| sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2) | |||
| j += 1 | |||
| i += 1 | |||
| i = 0 | |||
| for nbr_u in l_nbr_u: | |||
| sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||
| sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1) | |||
| i += 1 | |||
| j = 0 | |||
| for nbr_v in l_nbr_v: | |||
| sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||
| sub_C[n+j, j] += self.cf_.cni(nbr_v, G2) | |||
| j += 1 | |||
| row_ind, col_ind = self.lsap_solver_(sub_C) | |||
| cost = np.sum(sub_C[row_ind, col_ind]) | |||
| return self.cf_.cns(u, v, G1, G2) + cost | |||
| def cnd(self, u, G1): | |||
| cost = 0 | |||
| for nbr in G1[u]: | |||
| cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1) | |||
| return self.cf_.cnd(u, G1) + cost | |||
| def cni(self, v, G2): | |||
| cost = 0 | |||
| for nbr in G2[v]: | |||
| cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2) | |||
| return self.cf_.cni(v, G2) + cost | |||
| @@ -13,5 +13,5 @@ __version__ = "0.1" | |||
| __author__ = "Benoit Gaüzère" | |||
| __date__ = "November 2017" | |||
| from pygraph.utils import graphfiles | |||
| from pygraph.utils import utils | |||
| from utils import graphfiles | |||
| from utils import utils | |||
| @@ -5,8 +5,8 @@ import numpy as np | |||
| def getSPLengths(G1): | |||
| sp = nx.shortest_path(G1) | |||
| distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) | |||
| for i in sp.keys(): | |||
| for j in sp[i].keys(): | |||
| for i in np.keys(): | |||
| for j in np[i].keys(): | |||
| distances[i, j] = len(sp[i][j])-1 | |||
| return distances | |||
| @@ -1,5 +0,0 @@ | |||
| To use the library : | |||
| $> virtualenv --python=/usr/bin/python3.5 venv | |||
| $> pip install -r requirements.txt | |||
| $> source venv/bin/activate | |||
| ... Go use pygraph | |||
| @@ -1,66 +0,0 @@ | |||
| import ot | |||
| import sys | |||
| import pathlib | |||
| sys.path.insert(0, "../") | |||
| from pygraph.utils.graphfiles import loadDataset | |||
| from pygraph.ged.costfunctions import ConstantCostFunction | |||
| from pygraph.utils.utils import getSPLengths | |||
| from tqdm import tqdm | |||
| import numpy as np | |||
| from scipy.optimize import linear_sum_assignment | |||
| from pygraph.ged.GED import ged | |||
| import scipy | |||
| def pad(C, n): | |||
| C_pad = np.zeros((n, n)) | |||
| C_pad[:C.shape[0], :C.shape[1]] = C | |||
| return C_pad | |||
| if (__name__ == "__main__"): | |||
| ds_filename = "/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds" | |||
| dataset, y = loadDataset(ds_filename) | |||
| cf = ConstantCostFunction(1, 3, 1, 3) | |||
| N = len(dataset) | |||
| pairs = list() | |||
| ged_distances = list() #np.zeros((N, N)) | |||
| gw_distances = list() #np.zeros((N, N)) | |||
| for i in tqdm(range(0, N)): | |||
| for j in tqdm(range(i, N)): | |||
| G1 = dataset[i] | |||
| G2 = dataset[j] | |||
| n = G1.number_of_nodes() | |||
| m = G2.number_of_nodes() | |||
| if(n == m): | |||
| C1 = getSPLengths(G1) | |||
| C2 = getSPLengths(G2) | |||
| C1 /= C1.max() | |||
| C2 /= C2.max() | |||
| dim = max(n, m) | |||
| if(n < m): | |||
| C1 = pad(C1, dim) | |||
| elif (m < n): | |||
| C2 = pad(C2, dim) | |||
| p = ot.unif(dim) | |||
| q = ot.unif(dim) | |||
| gw = ot.gromov_wasserstein(C1, C2, p, q, | |||
| 'square_loss', epsilon=5e-3) | |||
| row_ind, col_ind = linear_sum_assignment(-gw) | |||
| rho = col_ind | |||
| varrho = row_ind[np.argsort(col_ind)] | |||
| pairs.append((i,j)) | |||
| gw_distances.append(ged(G1, G2, cf=cf, rho=rho, varrho=varrho)[0]) | |||
| ged_distances.append(ged(G1, G2, cf=cf)[0]) | |||
| print("Moyenne sur Riesen : {}".format(np.mean(ged_distances))) | |||
| print("Moyenne sur GW : {} ".format(np.mean(gw_distances))) | |||
| np.save("distances_riesen", ged_distances) | |||
| np.save("distances_gw", gw_distances) | |||
| @@ -1,16 +0,0 @@ | |||
| cycler==0.10.0 | |||
| Cython==0.27.3 | |||
| decorator==4.1.2 | |||
| matplotlib==2.1.0 | |||
| networkx==2.0 | |||
| numpy==1.13.3 | |||
| pkg-resources==0.0.0 | |||
| POT==0.4.0 | |||
| pyparsing==2.2.0 | |||
| python-dateutil==2.6.1 | |||
| pytz==2017.3 | |||
| scikit-learn==0.19.1 | |||
| scipy==1.0.0 | |||
| six==1.11.0 | |||
| sklearn==0.0 | |||
| tqdm==4.19.4 | |||