| @@ -0,0 +1,170 @@ | |||||
| { | |||||
| "cells": [ | |||||
| { | |||||
| "cell_type": "code", | |||||
| "execution_count": 1, | |||||
| "metadata": { | |||||
| "autoscroll": false, | |||||
| "ein.tags": "worksheet-0", | |||||
| "slideshow": { | |||||
| "slide_type": "-" | |||||
| } | |||||
| }, | |||||
| "outputs": [], | |||||
| "source": [ | |||||
| "import numpy as np\n", | |||||
| "import paths\n", | |||||
| "\n", | |||||
| "import pygraph\n", | |||||
| "\n", | |||||
| "from pygraph.utils.graphfiles import loadDataset\n" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "cell_type": "code", | |||||
| "execution_count": 2, | |||||
| "metadata": { | |||||
| "autoscroll": false, | |||||
| "ein.tags": "worksheet-0", | |||||
| "slideshow": { | |||||
| "slide_type": "-" | |||||
| } | |||||
| }, | |||||
| "outputs": [], | |||||
| "source": [ | |||||
| "import networkx as nx\n", | |||||
| "import numpy as np\n", | |||||
| "import matplotlib.pyplot as plt\n", | |||||
| "\n", | |||||
| "# We load a ds dataset\n", | |||||
| "# load it from https://brunl01.users.greyc.fr/CHEMISTRY/Acyclic.tar.gz\n", | |||||
| "dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "cell_type": "code", | |||||
| "execution_count": 3, | |||||
| "metadata": { | |||||
| "autoscroll": false, | |||||
| "ein.tags": "worksheet-0", | |||||
| "slideshow": { | |||||
| "slide_type": "-" | |||||
| } | |||||
| }, | |||||
| "outputs": [ | |||||
| { | |||||
| "name": "stderr", | |||||
| "output_type": "stream", | |||||
| "text": [ | |||||
| "100%|██████████| 183/183 [07:41<00:00, 2.52s/it]\n", | |||||
| "100%|██████████| 183/183 [08:39<00:00, 2.84s/it]\n", | |||||
| "100%|██████████| 183/183 [05:19<00:00, 1.75s/it]\n", | |||||
| "100%|██████████| 183/183 [05:50<00:00, 1.91s/it]\n" | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "source": [ | |||||
| "#Compute graph edit distances\n", | |||||
| "\n", | |||||
| "from tqdm import tqdm\n", | |||||
| "from pygraph.c_ext.lsape_binders import lsap_solverHG\n", | |||||
| "from pygraph.ged.costfunctions import ConstantCostFunction\n", | |||||
| "from pygraph.ged.GED import ged\n", | |||||
| "import time\n", | |||||
| "\n", | |||||
| "cf = ConstantCostFunction(1,3,1,3)\n", | |||||
| "N=len(dataset)\n", | |||||
| "\n", | |||||
| "methods=['Riesen + LSAP', 'Neigh + LSAP', 'Riesen + LSAPE', 'Neigh + LSAPE']\n", | |||||
| "ged_distances = [ np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N))]\n", | |||||
| "\n", | |||||
| "times = list()\n", | |||||
| "start = time.clock()\n", | |||||
| "for i in tqdm(range(0,N)):\n", | |||||
| " for j in range(0,N):\n", | |||||
| " ged_distances[0][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n", | |||||
| "times.append(time.clock() - start)\n", | |||||
| "\n", | |||||
| "\n", | |||||
| "start = time.clock()\n", | |||||
| "for i in tqdm(range(0,N)):\n", | |||||
| " for j in range(0,N):\n", | |||||
| " ged_distances[1][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood')[0]\n", | |||||
| "\n", | |||||
| "times.append(time.clock() - start)\n", | |||||
| "\n", | |||||
| "start = time.clock()\n", | |||||
| "for i in tqdm(range(0,N)):\n", | |||||
| " for j in range(0,N):\n", | |||||
| " ged_distances[2][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen',solver=lsap_solverHG)[0]\n", | |||||
| "times.append(time.clock() - start)\n", | |||||
| "\n", | |||||
| "start = time.clock()\n", | |||||
| "for i in tqdm(range(0,N)):\n", | |||||
| " for j in range(0,N):\n", | |||||
| " ged_distances[3][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood',solver=lsap_solverHG)[0]\n", | |||||
| "times.append(time.clock() - start)" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "cell_type": "code", | |||||
| "execution_count": 5, | |||||
| "metadata": { | |||||
| "autoscroll": false, | |||||
| "ein.tags": "worksheet-0", | |||||
| "slideshow": { | |||||
| "slide_type": "-" | |||||
| } | |||||
| }, | |||||
| "outputs": [ | |||||
| { | |||||
| "name": "stdout", | |||||
| "output_type": "stream", | |||||
| "text": [ | |||||
| " method \t mean \t mean \t time\n", | |||||
| " Riesen + LSAP \t 37.79903849025053 \t 35.31207262086058 \t 463.300405 \n", | |||||
| " Neigh + LSAP \t 36.2281047508137 \t 33.85869987159963 \t 521.7821730000001 \n", | |||||
| " Riesen + LSAPE \t 35.95508973095643 \t 34.10092866314312 \t 319.83455500000014 \n", | |||||
| " Neigh + LSAPE \t 34.5005822807489 \t 32.5735614679447 \t 350.48029599999995 \n" | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "source": [ | |||||
| "print(\" method \\t mean \\t mean \\t time\")\n", | |||||
| "data = list()\n", | |||||
| "for i in range(0,len(ged_distances)):\n", | |||||
| " ged_ = np.minimum(ged_distances[i],ged_distances[i].transpose())\n", | |||||
| " print(\" {} \\t {} \\t {} \\t {} \".format(methods[i], np.mean(ged_distances[i]),np.mean(ged_), times[i]))\n" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "cell_type": "code", | |||||
| "execution_count": null, | |||||
| "metadata": {}, | |||||
| "outputs": [], | |||||
| "source": [] | |||||
| } | |||||
| ], | |||||
| "metadata": { | |||||
| "kernelspec": { | |||||
| "display_name": "Python 3", | |||||
| "language": "python", | |||||
| "name": "python3" | |||||
| }, | |||||
| "language_info": { | |||||
| "codemirror_mode": { | |||||
| "name": "ipython", | |||||
| "version": 3 | |||||
| }, | |||||
| "file_extension": ".py", | |||||
| "mimetype": "text/x-python", | |||||
| "name": "python", | |||||
| "nbconvert_exporter": "python", | |||||
| "pygments_lexer": "ipython3", | |||||
| "version": "3.6.2" | |||||
| }, | |||||
| "name": "py-graph_test.ipynb" | |||||
| }, | |||||
| "nbformat": 4, | |||||
| "nbformat_minor": 2 | |||||
| } | |||||
| @@ -0,0 +1,21 @@ | |||||
| # -*-coding:utf-8 -*- | |||||
| """ | |||||
| Pygraph | |||||
| This package contains 4 sub packages : | |||||
| * c_ext : binders to C++ code | |||||
| * ged : allows to compute graph edit distance between networkX graphs | |||||
| * kernels : computation of graph kernels, ie graph similarity measure compatible with SVM | |||||
| * notebooks : examples of code using this library | |||||
| * utils : Diverse computation on graphs | |||||
| """ | |||||
| # info | |||||
| __version__ = "0.1" | |||||
| __author__ = "Benoit Gaüzère" | |||||
| __date__ = "November 2017" | |||||
| # import sub modules | |||||
| from pygraph import c_ext | |||||
| from pygraph import ged | |||||
| from pygraph import utils | |||||
| @@ -0,0 +1,5 @@ | |||||
| # You must specify your env variable LSAPE_DIR | |||||
| #LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/ | |||||
| liblsap.so:lsap.cpp | |||||
| g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR) | |||||
| @@ -0,0 +1,6 @@ | |||||
| Python wrapper for lsape method | |||||
| Specify your LSAPE_DIR env variable with the location of the source | |||||
| code to compile | |||||
| source code : https://bougleux.users.greyc.fr/lsape/ | |||||
| @@ -0,0 +1,17 @@ | |||||
| # -*-coding:utf-8 -*- | |||||
| """Pygraph - c_ext module | |||||
| This package binds some C++ code to python | |||||
| lsape_binders.py : binders to C++ code of LSAPE methods implemented in | |||||
| https://bougleux.users.greyc.fr/lsape/ | |||||
| """ | |||||
| # info | |||||
| __version__ = "0.1" | |||||
| __author__ = "Benoit Gaüzère" | |||||
| __date__ = "November 2017" | |||||
| # import sub modules | |||||
| from pygraph.c_ext import lsape_binders | |||||
| @@ -0,0 +1,43 @@ | |||||
| /* | |||||
| Python wrapper | |||||
| */ | |||||
| #include "hungarian-lsape.hh" | |||||
| #include "hungarian-lsap.hh" | |||||
| #include <cstdio> | |||||
| extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){ | |||||
| double * u = new double[nm]; | |||||
| double * v = new double[nm]; | |||||
| int * rho_int = new int[nm]; | |||||
| int * varrho_int = new int[nm]; | |||||
| hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int); | |||||
| //Find a better way to do | |||||
| for (int i =0;i<nm;i++){ | |||||
| rho[i] = (long)(rho_int[i]); | |||||
| varrho[i] = (long)(varrho_int[i]); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| extern "C" int * lsape(double * C, const int n, const int m, long * rho, long * varrho){ | |||||
| double * u = new double[n]; | |||||
| double * v = new double[m]; | |||||
| int * rho_int = new int[n]; | |||||
| int * varrho_int = new int[m]; | |||||
| hungarianLSAPE(C,n,m,rho_int,varrho_int,u,v); | |||||
| for (int i =0;i<n;i++) | |||||
| rho[i] = (long)(rho_int[i]); | |||||
| for (int i =0;i<m;i++) | |||||
| varrho[i] = (long)(varrho_int[i]); | |||||
| return 0; | |||||
| } | |||||
| @@ -0,0 +1,23 @@ | |||||
| import numpy as np | |||||
| import ctypes as c | |||||
| from ctypes import cdll | |||||
| import os.path | |||||
| def lsap_solverHG(C): | |||||
| ''' Binding for lsape hungarian solver ''' | |||||
| nm = C.shape[0] | |||||
| dll_name = 'liblsap.so' | |||||
| lib = cdll.LoadLibrary(os.path.abspath( | |||||
| os.path.join(os.path.dirname(__file__), dll_name))) | |||||
| lib.lsap.restype = c.c_int | |||||
| rho = np.zeros((nm, 1), int) | |||||
| varrho = np.zeros((nm, 1), int) | |||||
| C[C == np.inf] = 10000 | |||||
| lib.lsap(c.c_void_p(C.transpose().ctypes.data), | |||||
| c.c_int(nm), | |||||
| c.c_void_p(rho.ctypes.data), | |||||
| c.c_void_p(varrho.ctypes.data)) | |||||
| return np.array(range(0, nm)), np.array([c.c_int(i).value for i in varrho]) | |||||
| @@ -0,0 +1,72 @@ | |||||
| from pygraph.ged.costfunctions import ConstantCostFunction, RiesenCostFunction | |||||
| from pygraph.ged.costfunctions import NeighboorhoodCostFunction | |||||
| from pygraph.ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping | |||||
| from scipy.optimize import linear_sum_assignment | |||||
| def ged(G1, G2, method='Riesen', rho=None, varrho=None, | |||||
| cf=ConstantCostFunction(1, 3, 1, 3), | |||||
| solver=linear_sum_assignment): | |||||
| """Compute Graph Edit Distance between G1 and G2 according to mapping | |||||
| encoded within rho and varrho. Graph's node must be indexed by a | |||||
| index which is used is rho and varrho | |||||
| NB: Utilisation de | |||||
| dictionnaire pour etre plus versatile ? | |||||
| """ | |||||
| if ((rho is None) or (varrho is None)): | |||||
| if(method == 'Riesen'): | |||||
| cf_bp = RiesenCostFunction(cf,lsap_solver=solver) | |||||
| elif(method == 'Neighboorhood'): | |||||
| cf_bp = NeighboorhoodCostFunction(cf,lsap_solver=solver) | |||||
| elif(method == 'Basic'): | |||||
| cf_bp = cf | |||||
| else: | |||||
| raise NameError('Non existent method ') | |||||
| rho, varrho = getOptimalMapping( | |||||
| computeBipartiteCostMatrix(G1, G2, cf_bp), lsap_solver=solver) | |||||
| n = G1.number_of_nodes() | |||||
| m = G2.number_of_nodes() | |||||
| ged = 0 | |||||
| for i in G1.nodes(): | |||||
| phi_i = rho[i] | |||||
| if(phi_i >= m): | |||||
| ged += cf.cnd(i, G1) | |||||
| else: | |||||
| ged += cf.cns(i, phi_i, G1, G2) | |||||
| for j in G2.nodes(): | |||||
| phi_j = varrho[j] | |||||
| if(phi_j >= n): | |||||
| ged += cf.cni(j, G2) | |||||
| for e in G1.edges(data=True): | |||||
| i = e[0] | |||||
| j = e[1] | |||||
| phi_i = rho[i] | |||||
| phi_j = rho[j] | |||||
| if (phi_i < m) and (phi_j < m): | |||||
| mappedEdge = len(list(filter(lambda x: True if | |||||
| x == phi_j else False, G2[phi_i]))) | |||||
| if(mappedEdge): | |||||
| e2 = [phi_i, phi_j, G2[phi_i][phi_j]] | |||||
| min_cost = min(cf.ces(e, e2, G1, G2), | |||||
| cf.ced(e, G1) + cf.cei(e2, G2)) | |||||
| ged += min_cost | |||||
| else: | |||||
| ged += cf.ced(e, G1) | |||||
| else: | |||||
| ged += cf.ced(e, G1) | |||||
| for e in G2.edges(data=True): | |||||
| i = e[0] | |||||
| j = e[1] | |||||
| phi_i = varrho[i] | |||||
| phi_j = varrho[j] | |||||
| if (phi_i < n) and (phi_j < n): | |||||
| mappedEdge = len(list(filter(lambda x: True if x == phi_j | |||||
| else False, G1[phi_i]))) | |||||
| if(not mappedEdge): | |||||
| ged += cf.cei(e, G2) | |||||
| else: | |||||
| ged += cf.ced(e, G2) | |||||
| return ged, rho, varrho | |||||
| @@ -0,0 +1,17 @@ | |||||
| # -*-coding:utf-8 -*- | |||||
| """Pygraph - ged module | |||||
| Implement some methods to compute ged between graphs | |||||
| """ | |||||
| # info | |||||
| __version__ = "0.1" | |||||
| __author__ = "Benoit Gaüzère" | |||||
| __date__ = "November 2017" | |||||
| from pygraph.ged import costfunctions | |||||
| from pygraph.ged import bipartiteGED | |||||
| from pygraph.ged import GED | |||||
| @@ -0,0 +1,33 @@ | |||||
| import numpy as np | |||||
| from scipy.optimize import linear_sum_assignment | |||||
| from pygraph.ged.costfunctions import ConstantCostFunction | |||||
| def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)): | |||||
| """Compute a Cost Matrix according to cost function cf""" | |||||
| n = G1.number_of_nodes() | |||||
| m = G2.number_of_nodes() | |||||
| nm = n + m | |||||
| C = np.ones([nm, nm])*np.inf | |||||
| C[n:, m:] = 0 | |||||
| for u in G1.nodes(): | |||||
| for v in G2.nodes(): | |||||
| cost = cf.cns(u, v, G1, G2) | |||||
| C[u, v] = cost | |||||
| for v in G1.nodes(): | |||||
| C[v, m + v] = cf.cnd(v, G1) | |||||
| for v in G2.nodes(): | |||||
| C[n + v, v] = cf.cni(v, G2) | |||||
| return C | |||||
| def getOptimalMapping(C, lsap_solver=linear_sum_assignment): | |||||
| """Compute an optimal linear mapping according to cost Matrix C | |||||
| inclure les progs C de Seb | |||||
| """ | |||||
| row_ind, col_ind = lsap_solver(C) | |||||
| return col_ind, row_ind[np.argsort(col_ind)] | |||||
| @@ -0,0 +1,138 @@ | |||||
| import numpy as np | |||||
| from scipy.optimize import linear_sum_assignment | |||||
| class ConstantCostFunction: | |||||
| """ Define a symmetric constant cost fonction for edit operations """ | |||||
| def __init__(self, cns, cni, ces, cei): | |||||
| self.cns_ = cns | |||||
| self.cni_ = self.cnd_ = cni | |||||
| self.ces_ = ces | |||||
| self.cei_ = self.ced_ = cei | |||||
| def cns(self, node_u, node_v, g1, g2): | |||||
| """ return substitution edit operation cost between node_u of G1 and node_v of G2""" | |||||
| return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_ | |||||
| def cnd(self, u, G1): | |||||
| return self.cnd_ | |||||
| def cni(self, v, G2): | |||||
| return self.cni_ | |||||
| def ces(self, e1, e2, G1, G2): | |||||
| """tester avec des attributs autres que symboliques en testant | |||||
| l'operateur __eq__""" | |||||
| return (e1[2]['label'] != e2[2]['label'])*self.ces_ | |||||
| def ced(self, e1, G1): | |||||
| return self.ced_ | |||||
| def cei(self, e2, G2): | |||||
| return self.cei_ | |||||
| class RiesenCostFunction(): | |||||
| """ Cost function associated to the computation of a cost matrix between nodes for LSAP""" | |||||
| def __init__(self, cf, lsap_solver=linear_sum_assignment): | |||||
| self.cf_ = cf | |||||
| self.lsap_solver_ = lsap_solver | |||||
| def cns(self, u, v, G1, G2): | |||||
| """ u et v sont des id de noeuds """ | |||||
| n = len(G1[u]) | |||||
| m = len(G2[v]) | |||||
| sub_C = np.ones([n+m, n+m]) * np.inf | |||||
| sub_C[n:, m:] = 0 | |||||
| i = 0 | |||||
| l_nbr_u = G1[u] | |||||
| l_nbr_v = G2[v] | |||||
| for nbr_u in l_nbr_u: | |||||
| j = 0 | |||||
| e1 = [u, nbr_u, G1[u][nbr_u]] | |||||
| for nbr_v in G2[v]: | |||||
| e2 = [v, nbr_v, G2[v][nbr_v]] | |||||
| sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) | |||||
| j += 1 | |||||
| i += 1 | |||||
| i = 0 | |||||
| for nbr_u in l_nbr_u: | |||||
| sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||||
| i += 1 | |||||
| j = 0 | |||||
| for nbr_v in l_nbr_v: | |||||
| sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||||
| j += 1 | |||||
| row_ind, col_ind = self.lsap_solver_(sub_C) | |||||
| cost = np.sum(sub_C[row_ind, col_ind]) | |||||
| return self.cf_.cns(u, v, G1, G2) + cost | |||||
| def cnd(self, u, G1): | |||||
| cost = 0 | |||||
| for nbr in G1[u]: | |||||
| cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1) | |||||
| return self.cf_.cnd(u,G1) + cost | |||||
| def cni(self, v, G2): | |||||
| cost = 0 | |||||
| for nbr in G2[v]: | |||||
| cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2) | |||||
| return self.cf_.cni(v, G2) + cost | |||||
| class NeighboorhoodCostFunction(): | |||||
| """ Cost function associated to the computation of a cost matrix between nodes for LSAP""" | |||||
| def __init__(self, cf, lsap_solver=linear_sum_assignment): | |||||
| self.cf_ = cf | |||||
| self.lsap_solver_ = lsap_solver | |||||
| def cns(self, u, v, G1, G2): | |||||
| """ u et v sont des id de noeuds """ | |||||
| n = len(G1[u]) | |||||
| m = len(G2[v]) | |||||
| sub_C = np.ones([n+m, n+m]) * np.inf | |||||
| sub_C[n:, m:] = 0 | |||||
| i = 0 | |||||
| l_nbr_u = G1[u] | |||||
| l_nbr_v = G2[v] | |||||
| for nbr_u in l_nbr_u: | |||||
| j = 0 | |||||
| e1 = [u, nbr_u, G1[u][nbr_u]] | |||||
| for nbr_v in G2[v]: | |||||
| e2 = [v, nbr_v, G2[v][nbr_v]] | |||||
| sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) | |||||
| sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2) | |||||
| j += 1 | |||||
| i += 1 | |||||
| i = 0 | |||||
| for nbr_u in l_nbr_u: | |||||
| sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||||
| sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1) | |||||
| i += 1 | |||||
| j = 0 | |||||
| for nbr_v in l_nbr_v: | |||||
| sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||||
| sub_C[n+j, j] += self.cf_.cni(nbr_v, G2) | |||||
| j += 1 | |||||
| row_ind, col_ind = self.lsap_solver_(sub_C) | |||||
| cost = np.sum(sub_C[row_ind, col_ind]) | |||||
| return self.cf_.cns(u, v, G1, G2) + cost | |||||
| def cnd(self, u, G1): | |||||
| cost = 0 | |||||
| for nbr in G1[u]: | |||||
| cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1) | |||||
| return self.cf_.cnd(u, G1) + cost | |||||
| def cni(self, v, G2): | |||||
| cost = 0 | |||||
| for nbr in G2[v]: | |||||
| cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2) | |||||
| return self.cf_.cni(v, G2) + cost | |||||
| @@ -0,0 +1,68 @@ | |||||
| import sys | |||||
| import pathlib | |||||
| sys.path.insert(0, "../") | |||||
| import networkx as nx | |||||
| import numpy as np | |||||
| import time | |||||
| from utils.utils import getSPGraph | |||||
| def spkernel(*args): | |||||
| """Calculate shortest-path kernels between graphs. | |||||
| Parameters | |||||
| ---------- | |||||
| Gn : List of NetworkX graph | |||||
| List of graphs between which the kernels are calculated. | |||||
| / | |||||
| G1, G2 : NetworkX graphs | |||||
| 2 graphs between which the kernel is calculated. | |||||
| Return | |||||
| ------ | |||||
| Kmatrix/Kernel : Numpy matrix/int | |||||
| Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs. | |||||
| References | |||||
| ---------- | |||||
| [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||||
| """ | |||||
| if len(args) == 1: # for a list of graphs | |||||
| Gn = args[0] | |||||
| Kmatrix = np.zeros((len(Gn), len(Gn))) | |||||
| Sn = [] # get shortest path graphs of Gn | |||||
| for i in range(0, len(Gn)): | |||||
| Sn.append(getSPGraph(Gn[i])) | |||||
| start_time = time.time() | |||||
| for i in range(0, len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| for e1 in Sn[i].edges(data = True): | |||||
| for e2 in Sn[j].edges(data = True): | |||||
| if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||||
| Kmatrix[i][j] += 1 | |||||
| Kmatrix[j][i] += (0 if i == j else 1) | |||||
| print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) | |||||
| return Kmatrix | |||||
| else: # for only 2 graphs | |||||
| G1 = args[0] | |||||
| G2 = args[1] | |||||
| kernel = 0 | |||||
| for e1 in G1.edges(data = True): | |||||
| for e2 in G2.edges(data = True): | |||||
| if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||||
| kernel += 1 | |||||
| print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) | |||||
| return kernel | |||||
| @@ -0,0 +1,17 @@ | |||||
| # -*-coding:utf-8 -*- | |||||
| """Pygraph - utils module | |||||
| Implement some methods to manage graphs | |||||
| graphfiles.py : load .gxl and .ct files | |||||
| utils.py : compute some properties on networkX graphs | |||||
| """ | |||||
| # info | |||||
| __version__ = "0.1" | |||||
| __author__ = "Benoit Gaüzère" | |||||
| __date__ = "November 2017" | |||||
| from pygraph.utils import graphfiles | |||||
| from pygraph.utils import utils | |||||
| @@ -0,0 +1,87 @@ | |||||
| import networkx as nx | |||||
| def loadCT(filename): | |||||
| """load data from .ct file. | |||||
| Notes | |||||
| ------ | |||||
| a typical example of data in .ct is like this: | |||||
| 3 2 <- number of nodes and edges | |||||
| 0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers? | |||||
| 0.0000 0.0000 0.0000 C | |||||
| 0.0000 0.0000 0.0000 O | |||||
| 1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers? | |||||
| 2 3 1 1 | |||||
| """ | |||||
| content = open(filename).read().splitlines() | |||||
| G = nx.Graph(name=str(content[0])) # set name of the graph | |||||
| tmp = content[1].split(" ") | |||||
| if tmp[0] == '': | |||||
| nb_nodes = int(tmp[1]) # number of the nodes | |||||
| nb_edges = int(tmp[2]) # number of the edges | |||||
| else: | |||||
| nb_nodes = int(tmp[0]) | |||||
| nb_edges = int(tmp[1]) | |||||
| for i in range(0, nb_nodes): | |||||
| tmp = content[i + 2].split(" ") | |||||
| tmp = [x for x in tmp if x != ''] | |||||
| G.add_node(i, label=tmp[3]) | |||||
| for i in range(0, nb_edges): | |||||
| tmp = content[i + G.number_of_nodes() + 2].split(" ") | |||||
| tmp = [x for x in tmp if x != ''] | |||||
| G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3])) | |||||
| return G | |||||
| def loadGXL(filename): | |||||
| import networkx as nx | |||||
| import xml.etree.ElementTree as ET | |||||
| tree = ET.parse(filename) | |||||
| root = tree.getroot() | |||||
| index = 0 | |||||
| G = nx.Graph() | |||||
| dic={} | |||||
| for node in root.iter('node'): | |||||
| label = node.find('attr')[0].text | |||||
| dic[node.attrib['id']] = index | |||||
| G.add_node(index, id=node.attrib['id'], label=label) | |||||
| index += 1 | |||||
| for edge in root.iter('edge'): | |||||
| label = edge.find('attr')[0].text | |||||
| G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label) | |||||
| return G | |||||
| def loadDataset(filename): | |||||
| """load file list of the dataset. | |||||
| """ | |||||
| from os.path import dirname, splitext | |||||
| dirname_dataset = dirname(filename) | |||||
| extension = splitext(filename)[1][1:] | |||||
| data = [] | |||||
| y = [] | |||||
| if(extension == "ds"): | |||||
| content = open(filename).read().splitlines() | |||||
| for i in range(0, len(content)): | |||||
| tmp = content[i].split(' ') | |||||
| data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names | |||||
| y.append(float(tmp[1])) | |||||
| elif(extension == "cxl"): | |||||
| import xml.etree.ElementTree as ET | |||||
| tree = ET.parse(filename) | |||||
| root = tree.getroot() | |||||
| data = [] | |||||
| y = [] | |||||
| for graph in root.iter('print'): | |||||
| mol_filename = graph.attrib['file'] | |||||
| mol_class = graph.attrib['class'] | |||||
| data.append(loadGXL(dirname_dataset + '/' + mol_filename)) | |||||
| y.append(mol_class) | |||||
| return data, y | |||||
| @@ -0,0 +1,59 @@ | |||||
| import networkx as nx | |||||
| import numpy as np | |||||
| def getSPLengths(G1): | |||||
| sp = nx.shortest_path(G1) | |||||
| distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) | |||||
| for i in sp.keys(): | |||||
| for j in sp[i].keys(): | |||||
| distances[i, j] = len(sp[i][j])-1 | |||||
| return distances | |||||
| def getSPGraph(G): | |||||
| """Transform graph G to its corresponding shortest-paths graph. | |||||
| Parameters | |||||
| ---------- | |||||
| G : NetworkX graph | |||||
| The graph to be tramsformed. | |||||
| Return | |||||
| ------ | |||||
| S : NetworkX graph | |||||
| The shortest-paths graph corresponding to G. | |||||
| Notes | |||||
| ------ | |||||
| For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes. | |||||
| References | |||||
| ---------- | |||||
| [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||||
| """ | |||||
| return floydTransformation(G) | |||||
| def floydTransformation(G): | |||||
| """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation. | |||||
| Parameters | |||||
| ---------- | |||||
| G : NetworkX graph | |||||
| The graph to be tramsformed. | |||||
| Return | |||||
| ------ | |||||
| S : NetworkX graph | |||||
| The shortest-paths graph corresponding to G. | |||||
| References | |||||
| ---------- | |||||
| [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||||
| """ | |||||
| spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered | |||||
| S = nx.Graph() | |||||
| S.add_nodes_from(G.nodes(data=True)) | |||||
| for i in range(0, G.number_of_nodes()): | |||||
| for j in range(0, G.number_of_nodes()): | |||||
| S.add_edge(i, j, cost = spMatrix[i, j]) | |||||
| return S | |||||
| @@ -0,0 +1,5 @@ | |||||
| To use the library : | |||||
| $> virtualenv --python=/usr/bin/python3.5 venv | |||||
| $> pip install -r requirements.txt | |||||
| $> source venv/bin/activate | |||||
| ... Go use pygraph | |||||
| @@ -0,0 +1,66 @@ | |||||
| import ot | |||||
| import sys | |||||
| import pathlib | |||||
| sys.path.insert(0, "../") | |||||
| from pygraph.utils.graphfiles import loadDataset | |||||
| from pygraph.ged.costfunctions import ConstantCostFunction | |||||
| from pygraph.utils.utils import getSPLengths | |||||
| from tqdm import tqdm | |||||
| import numpy as np | |||||
| from scipy.optimize import linear_sum_assignment | |||||
| from pygraph.ged.GED import ged | |||||
| import scipy | |||||
| def pad(C, n): | |||||
| C_pad = np.zeros((n, n)) | |||||
| C_pad[:C.shape[0], :C.shape[1]] = C | |||||
| return C_pad | |||||
| if (__name__ == "__main__"): | |||||
| ds_filename = "/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds" | |||||
| dataset, y = loadDataset(ds_filename) | |||||
| cf = ConstantCostFunction(1, 3, 1, 3) | |||||
| N = len(dataset) | |||||
| pairs = list() | |||||
| ged_distances = list() #np.zeros((N, N)) | |||||
| gw_distances = list() #np.zeros((N, N)) | |||||
| for i in tqdm(range(0, N)): | |||||
| for j in tqdm(range(i, N)): | |||||
| G1 = dataset[i] | |||||
| G2 = dataset[j] | |||||
| n = G1.number_of_nodes() | |||||
| m = G2.number_of_nodes() | |||||
| if(n == m): | |||||
| C1 = getSPLengths(G1) | |||||
| C2 = getSPLengths(G2) | |||||
| C1 /= C1.max() | |||||
| C2 /= C2.max() | |||||
| dim = max(n, m) | |||||
| if(n < m): | |||||
| C1 = pad(C1, dim) | |||||
| elif (m < n): | |||||
| C2 = pad(C2, dim) | |||||
| p = ot.unif(dim) | |||||
| q = ot.unif(dim) | |||||
| gw = ot.gromov_wasserstein(C1, C2, p, q, | |||||
| 'square_loss', epsilon=5e-3) | |||||
| row_ind, col_ind = linear_sum_assignment(-gw) | |||||
| rho = col_ind | |||||
| varrho = row_ind[np.argsort(col_ind)] | |||||
| pairs.append((i,j)) | |||||
| gw_distances.append(ged(G1, G2, cf=cf, rho=rho, varrho=varrho)[0]) | |||||
| ged_distances.append(ged(G1, G2, cf=cf)[0]) | |||||
| print("Moyenne sur Riesen : {}".format(np.mean(ged_distances))) | |||||
| print("Moyenne sur GW : {} ".format(np.mean(gw_distances))) | |||||
| np.save("distances_riesen", ged_distances) | |||||
| np.save("distances_gw", gw_distances) | |||||
| @@ -0,0 +1,16 @@ | |||||
| cycler==0.10.0 | |||||
| Cython==0.27.3 | |||||
| decorator==4.1.2 | |||||
| matplotlib==2.1.0 | |||||
| networkx==2.0 | |||||
| numpy==1.13.3 | |||||
| pkg-resources==0.0.0 | |||||
| POT==0.4.0 | |||||
| pyparsing==2.2.0 | |||||
| python-dateutil==2.6.1 | |||||
| pytz==2017.3 | |||||
| scikit-learn==0.19.1 | |||||
| scipy==1.0.0 | |||||
| six==1.11.0 | |||||
| sklearn==0.0 | |||||
| tqdm==4.19.4 | |||||