add pygraph/kernels/spkernel.py modify pygraph/utils/util.py and pygraph/utils/graphfiles.pytags/v0.1
| @@ -0,0 +1,170 @@ | |||
| { | |||
| "cells": [ | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 1, | |||
| "metadata": { | |||
| "autoscroll": false, | |||
| "ein.tags": "worksheet-0", | |||
| "slideshow": { | |||
| "slide_type": "-" | |||
| } | |||
| }, | |||
| "outputs": [], | |||
| "source": [ | |||
| "import numpy as np\n", | |||
| "import paths\n", | |||
| "\n", | |||
| "import pygraph\n", | |||
| "\n", | |||
| "from pygraph.utils.graphfiles import loadDataset\n" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 2, | |||
| "metadata": { | |||
| "autoscroll": false, | |||
| "ein.tags": "worksheet-0", | |||
| "slideshow": { | |||
| "slide_type": "-" | |||
| } | |||
| }, | |||
| "outputs": [], | |||
| "source": [ | |||
| "import networkx as nx\n", | |||
| "import numpy as np\n", | |||
| "import matplotlib.pyplot as plt\n", | |||
| "\n", | |||
| "# We load a ds dataset\n", | |||
| "# load it from https://brunl01.users.greyc.fr/CHEMISTRY/Acyclic.tar.gz\n", | |||
| "dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 3, | |||
| "metadata": { | |||
| "autoscroll": false, | |||
| "ein.tags": "worksheet-0", | |||
| "slideshow": { | |||
| "slide_type": "-" | |||
| } | |||
| }, | |||
| "outputs": [ | |||
| { | |||
| "name": "stderr", | |||
| "output_type": "stream", | |||
| "text": [ | |||
| "100%|██████████| 183/183 [07:41<00:00, 2.52s/it]\n", | |||
| "100%|██████████| 183/183 [08:39<00:00, 2.84s/it]\n", | |||
| "100%|██████████| 183/183 [05:19<00:00, 1.75s/it]\n", | |||
| "100%|██████████| 183/183 [05:50<00:00, 1.91s/it]\n" | |||
| ] | |||
| } | |||
| ], | |||
| "source": [ | |||
| "#Compute graph edit distances\n", | |||
| "\n", | |||
| "from tqdm import tqdm\n", | |||
| "from pygraph.c_ext.lsape_binders import lsap_solverHG\n", | |||
| "from pygraph.ged.costfunctions import ConstantCostFunction\n", | |||
| "from pygraph.ged.GED import ged\n", | |||
| "import time\n", | |||
| "\n", | |||
| "cf = ConstantCostFunction(1,3,1,3)\n", | |||
| "N=len(dataset)\n", | |||
| "\n", | |||
| "methods=['Riesen + LSAP', 'Neigh + LSAP', 'Riesen + LSAPE', 'Neigh + LSAPE']\n", | |||
| "ged_distances = [ np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N))]\n", | |||
| "\n", | |||
| "times = list()\n", | |||
| "start = time.clock()\n", | |||
| "for i in tqdm(range(0,N)):\n", | |||
| " for j in range(0,N):\n", | |||
| " ged_distances[0][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n", | |||
| "times.append(time.clock() - start)\n", | |||
| "\n", | |||
| "\n", | |||
| "start = time.clock()\n", | |||
| "for i in tqdm(range(0,N)):\n", | |||
| " for j in range(0,N):\n", | |||
| " ged_distances[1][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood')[0]\n", | |||
| "\n", | |||
| "times.append(time.clock() - start)\n", | |||
| "\n", | |||
| "start = time.clock()\n", | |||
| "for i in tqdm(range(0,N)):\n", | |||
| " for j in range(0,N):\n", | |||
| " ged_distances[2][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen',solver=lsap_solverHG)[0]\n", | |||
| "times.append(time.clock() - start)\n", | |||
| "\n", | |||
| "start = time.clock()\n", | |||
| "for i in tqdm(range(0,N)):\n", | |||
| " for j in range(0,N):\n", | |||
| " ged_distances[3][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood',solver=lsap_solverHG)[0]\n", | |||
| "times.append(time.clock() - start)" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 5, | |||
| "metadata": { | |||
| "autoscroll": false, | |||
| "ein.tags": "worksheet-0", | |||
| "slideshow": { | |||
| "slide_type": "-" | |||
| } | |||
| }, | |||
| "outputs": [ | |||
| { | |||
| "name": "stdout", | |||
| "output_type": "stream", | |||
| "text": [ | |||
| " method \t mean \t mean \t time\n", | |||
| " Riesen + LSAP \t 37.79903849025053 \t 35.31207262086058 \t 463.300405 \n", | |||
| " Neigh + LSAP \t 36.2281047508137 \t 33.85869987159963 \t 521.7821730000001 \n", | |||
| " Riesen + LSAPE \t 35.95508973095643 \t 34.10092866314312 \t 319.83455500000014 \n", | |||
| " Neigh + LSAPE \t 34.5005822807489 \t 32.5735614679447 \t 350.48029599999995 \n" | |||
| ] | |||
| } | |||
| ], | |||
| "source": [ | |||
| "print(\" method \\t mean \\t mean \\t time\")\n", | |||
| "data = list()\n", | |||
| "for i in range(0,len(ged_distances)):\n", | |||
| " ged_ = np.minimum(ged_distances[i],ged_distances[i].transpose())\n", | |||
| " print(\" {} \\t {} \\t {} \\t {} \".format(methods[i], np.mean(ged_distances[i]),np.mean(ged_), times[i]))\n" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [] | |||
| } | |||
| ], | |||
| "metadata": { | |||
| "kernelspec": { | |||
| "display_name": "Python 3", | |||
| "language": "python", | |||
| "name": "python3" | |||
| }, | |||
| "language_info": { | |||
| "codemirror_mode": { | |||
| "name": "ipython", | |||
| "version": 3 | |||
| }, | |||
| "file_extension": ".py", | |||
| "mimetype": "text/x-python", | |||
| "name": "python", | |||
| "nbconvert_exporter": "python", | |||
| "pygments_lexer": "ipython3", | |||
| "version": "3.6.2" | |||
| }, | |||
| "name": "py-graph_test.ipynb" | |||
| }, | |||
| "nbformat": 4, | |||
| "nbformat_minor": 2 | |||
| } | |||
| @@ -0,0 +1,21 @@ | |||
| # -*-coding:utf-8 -*- | |||
| """ | |||
| Pygraph | |||
| This package contains 4 sub packages : | |||
| * c_ext : binders to C++ code | |||
| * ged : allows to compute graph edit distance between networkX graphs | |||
| * kernels : computation of graph kernels, ie graph similarity measure compatible with SVM | |||
| * notebooks : examples of code using this library | |||
| * utils : Diverse computation on graphs | |||
| """ | |||
| # info | |||
| __version__ = "0.1" | |||
| __author__ = "Benoit Gaüzère" | |||
| __date__ = "November 2017" | |||
| # import sub modules | |||
| from pygraph import c_ext | |||
| from pygraph import ged | |||
| from pygraph import utils | |||
| @@ -0,0 +1,5 @@ | |||
| # You must specify your env variable LSAPE_DIR | |||
| #LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/ | |||
| liblsap.so:lsap.cpp | |||
| g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR) | |||
| @@ -0,0 +1,6 @@ | |||
| Python wrapper for lsape method | |||
| Specify your LSAPE_DIR env variable with the location of the source | |||
| code to compile | |||
| source code : https://bougleux.users.greyc.fr/lsape/ | |||
| @@ -0,0 +1,17 @@ | |||
| # -*-coding:utf-8 -*- | |||
| """Pygraph - c_ext module | |||
| This package binds some C++ code to python | |||
| lsape_binders.py : binders to C++ code of LSAPE methods implemented in | |||
| https://bougleux.users.greyc.fr/lsape/ | |||
| """ | |||
| # info | |||
| __version__ = "0.1" | |||
| __author__ = "Benoit Gaüzère" | |||
| __date__ = "November 2017" | |||
| # import sub modules | |||
| from pygraph.c_ext import lsape_binders | |||
| @@ -0,0 +1,43 @@ | |||
| /* | |||
| Python wrapper | |||
| */ | |||
| #include "hungarian-lsape.hh" | |||
| #include "hungarian-lsap.hh" | |||
| #include <cstdio> | |||
| extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){ | |||
| double * u = new double[nm]; | |||
| double * v = new double[nm]; | |||
| int * rho_int = new int[nm]; | |||
| int * varrho_int = new int[nm]; | |||
| hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int); | |||
| //Find a better way to do | |||
| for (int i =0;i<nm;i++){ | |||
| rho[i] = (long)(rho_int[i]); | |||
| varrho[i] = (long)(varrho_int[i]); | |||
| } | |||
| return 0; | |||
| } | |||
| extern "C" int * lsape(double * C, const int n, const int m, long * rho, long * varrho){ | |||
| double * u = new double[n]; | |||
| double * v = new double[m]; | |||
| int * rho_int = new int[n]; | |||
| int * varrho_int = new int[m]; | |||
| hungarianLSAPE(C,n,m,rho_int,varrho_int,u,v); | |||
| for (int i =0;i<n;i++) | |||
| rho[i] = (long)(rho_int[i]); | |||
| for (int i =0;i<m;i++) | |||
| varrho[i] = (long)(varrho_int[i]); | |||
| return 0; | |||
| } | |||
| @@ -0,0 +1,23 @@ | |||
| import numpy as np | |||
| import ctypes as c | |||
| from ctypes import cdll | |||
| import os.path | |||
| def lsap_solverHG(C): | |||
| ''' Binding for lsape hungarian solver ''' | |||
| nm = C.shape[0] | |||
| dll_name = 'liblsap.so' | |||
| lib = cdll.LoadLibrary(os.path.abspath( | |||
| os.path.join(os.path.dirname(__file__), dll_name))) | |||
| lib.lsap.restype = c.c_int | |||
| rho = np.zeros((nm, 1), int) | |||
| varrho = np.zeros((nm, 1), int) | |||
| C[C == np.inf] = 10000 | |||
| lib.lsap(c.c_void_p(C.transpose().ctypes.data), | |||
| c.c_int(nm), | |||
| c.c_void_p(rho.ctypes.data), | |||
| c.c_void_p(varrho.ctypes.data)) | |||
| return np.array(range(0, nm)), np.array([c.c_int(i).value for i in varrho]) | |||
| @@ -1,10 +1,11 @@ | |||
| from ged.costfunctions import BasicCostFunction, RiesenCostFunction | |||
| from ged.costfunctions import NeighboorhoodCostFunction | |||
| from ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping | |||
| from pygraph.ged.costfunctions import ConstantCostFunction, RiesenCostFunction | |||
| from pygraph.ged.costfunctions import NeighboorhoodCostFunction | |||
| from pygraph.ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping | |||
| from scipy.optimize import linear_sum_assignment | |||
| def ged(G1, G2, method='Riesen', rho=None, varrho=None, | |||
| cf=BasicCostFunction(1, 3, 1, 3)): | |||
| cf=ConstantCostFunction(1, 3, 1, 3), | |||
| solver=linear_sum_assignment): | |||
| """Compute Graph Edit Distance between G1 and G2 according to mapping | |||
| encoded within rho and varrho. Graph's node must be indexed by a | |||
| index which is used is rho and varrho | |||
| @@ -14,31 +15,32 @@ def ged(G1, G2, method='Riesen', rho=None, varrho=None, | |||
| """ | |||
| if ((rho is None) or (varrho is None)): | |||
| if(method == 'Riesen'): | |||
| cf_bp = RiesenCostFunction(cf) | |||
| cf_bp = RiesenCostFunction(cf,lsap_solver=solver) | |||
| elif(method == 'Neighboorhood'): | |||
| cf_bp = NeighboorhoodCostFunction(cf) | |||
| cf_bp = NeighboorhoodCostFunction(cf,lsap_solver=solver) | |||
| elif(method == 'Basic'): | |||
| cf_bp = cf | |||
| else: | |||
| raise NameError('Non existent method ') | |||
| rho, varrho = getOptimalMapping(computeBipartiteCostMatrix(G1, G2, cf_bp)) | |||
| rho, varrho = getOptimalMapping( | |||
| computeBipartiteCostMatrix(G1, G2, cf_bp), lsap_solver=solver) | |||
| n = G1.number_of_nodes() | |||
| m = G2.number_of_nodes() | |||
| ged = 0 | |||
| for i in G1.nodes_iter(): | |||
| for i in G1.nodes(): | |||
| phi_i = rho[i] | |||
| if(phi_i >= m): | |||
| ged += cf.cnd(i, G1) | |||
| else: | |||
| ged += cf.cns(i, phi_i, G1, G2) | |||
| for j in G2.nodes_iter(): | |||
| for j in G2.nodes(): | |||
| phi_j = varrho[j] | |||
| if(phi_j >= n): | |||
| ged += cf.cni(j, G2) | |||
| for e in G1.edges_iter(data=True): | |||
| for e in G1.edges(data=True): | |||
| i = e[0] | |||
| j = e[1] | |||
| phi_i = rho[i] | |||
| @@ -49,13 +51,13 @@ def ged(G1, G2, method='Riesen', rho=None, varrho=None, | |||
| if(mappedEdge): | |||
| e2 = [phi_i, phi_j, G2[phi_i][phi_j]] | |||
| min_cost = min(cf.ces(e, e2, G1, G2), | |||
| cf.ced(e, G1), cf.cei(e2, G2)) | |||
| cf.ced(e, G1) + cf.cei(e2, G2)) | |||
| ged += min_cost | |||
| else: | |||
| ged += cf.ced(e, G1) | |||
| else: | |||
| ged += cf.ced(e, G1) | |||
| for e in G2.edges_iter(data=True): | |||
| for e in G2.edges(data=True): | |||
| i = e[0] | |||
| j = e[1] | |||
| phi_i = varrho[i] | |||
| @@ -68,7 +70,3 @@ def ged(G1, G2, method='Riesen', rho=None, varrho=None, | |||
| else: | |||
| ged += cf.ced(e, G2) | |||
| return ged, rho, varrho | |||
| def computeDistanceMatrix(dataset): | |||
| pass | |||
| @@ -0,0 +1,17 @@ | |||
| # -*-coding:utf-8 -*- | |||
| """Pygraph - ged module | |||
| Implement some methods to compute ged between graphs | |||
| """ | |||
| # info | |||
| __version__ = "0.1" | |||
| __author__ = "Benoit Gaüzère" | |||
| __date__ = "November 2017" | |||
| from pygraph.ged import costfunctions | |||
| from pygraph.ged import bipartiteGED | |||
| from pygraph.ged import GED | |||
| @@ -1,9 +1,9 @@ | |||
| import numpy as np | |||
| from scipy.optimize import linear_sum_assignment | |||
| from ged.costfunctions import BasicCostFunction | |||
| from pygraph.ged.costfunctions import ConstantCostFunction | |||
| def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)): | |||
| def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)): | |||
| """Compute a Cost Matrix according to cost function cf""" | |||
| n = G1.number_of_nodes() | |||
| m = G2.number_of_nodes() | |||
| @@ -11,23 +11,23 @@ def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)): | |||
| C = np.ones([nm, nm])*np.inf | |||
| C[n:, m:] = 0 | |||
| for u in G1.nodes_iter(): | |||
| for v in G2.nodes_iter(): | |||
| for u in G1.nodes(): | |||
| for v in G2.nodes(): | |||
| cost = cf.cns(u, v, G1, G2) | |||
| C[u, v] = cost | |||
| for v in G1.nodes_iter(): | |||
| for v in G1.nodes(): | |||
| C[v, m + v] = cf.cnd(v, G1) | |||
| for v in G2.nodes_iter(): | |||
| for v in G2.nodes(): | |||
| C[n + v, v] = cf.cni(v, G2) | |||
| return C | |||
| def getOptimalMapping(C): | |||
| def getOptimalMapping(C, lsap_solver=linear_sum_assignment): | |||
| """Compute an optimal linear mapping according to cost Matrix C | |||
| inclure les progs C de Seb | |||
| """ | |||
| row_ind, col_ind = linear_sum_assignment(C) | |||
| row_ind, col_ind = lsap_solver(C) | |||
| return col_ind, row_ind[np.argsort(col_ind)] | |||
| @@ -2,15 +2,17 @@ import numpy as np | |||
| from scipy.optimize import linear_sum_assignment | |||
| class BasicCostFunction: | |||
| class ConstantCostFunction: | |||
| """ Define a symmetric constant cost fonction for edit operations """ | |||
| def __init__(self, cns, cni, ces, cei): | |||
| self.cns_ = cns | |||
| self.cni_ = self.cnd_ = cni | |||
| self.ces_ = ces | |||
| self.cei_ = self.ced_ = cei | |||
| def cns(self, u, v, G1, G2): | |||
| return (G1.node[u]['label'] != G2.node[v]['label'])*self.cns_ | |||
| def cns(self, node_u, node_v, g1, g2): | |||
| """ return substitution edit operation cost between node_u of G1 and node_v of G2""" | |||
| return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_ | |||
| def cnd(self, u, G1): | |||
| return self.cnd_ | |||
| @@ -30,9 +32,11 @@ class BasicCostFunction: | |||
| return self.cei_ | |||
| class RiesenCostFunction(BasicCostFunction): | |||
| def __init__(self, cf): | |||
| BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_) | |||
| class RiesenCostFunction(): | |||
| """ Cost function associated to the computation of a cost matrix between nodes for LSAP""" | |||
| def __init__(self, cf, lsap_solver=linear_sum_assignment): | |||
| self.cf_ = cf | |||
| self.lsap_solver_ = lsap_solver | |||
| def cns(self, u, v, G1, G2): | |||
| """ u et v sont des id de noeuds """ | |||
| @@ -48,41 +52,43 @@ class RiesenCostFunction(BasicCostFunction): | |||
| e1 = [u, nbr_u, G1[u][nbr_u]] | |||
| for nbr_v in G2[v]: | |||
| e2 = [v, nbr_v, G2[v][nbr_v]] | |||
| sub_C[i, j] = self.ces(e1, e2, G1, G2) | |||
| sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) | |||
| j += 1 | |||
| i += 1 | |||
| i = 0 | |||
| for nbr_u in l_nbr_u: | |||
| sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||
| sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||
| i += 1 | |||
| j = 0 | |||
| for nbr_v in l_nbr_v: | |||
| sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||
| sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||
| j += 1 | |||
| row_ind, col_ind = linear_sum_assignment(sub_C) | |||
| row_ind, col_ind = self.lsap_solver_(sub_C) | |||
| cost = np.sum(sub_C[row_ind, col_ind]) | |||
| return BasicCostFunction.cns(self, u, v, G1, G2) + cost | |||
| return self.cf_.cns(u, v, G1, G2) + cost | |||
| def cnd(self, u, G1): | |||
| cost = 0 | |||
| for nbr in G1[u]: | |||
| cost += BasicCostFunction.ced(self,[u,nbr,G1[u][nbr]],G1) | |||
| cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1) | |||
| return BasicCostFunction.cnd(self,u,G1) + cost | |||
| return self.cf_.cnd(u,G1) + cost | |||
| def cni(self, v, G2): | |||
| cost = 0 | |||
| for nbr in G2[v]: | |||
| cost += BasicCostFunction.cei(self, [v,nbr,G2[v][nbr]], G2) | |||
| cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2) | |||
| return BasicCostFunction.cni(self, v, G2) + cost | |||
| return self.cf_.cni(v, G2) + cost | |||
| class NeighboorhoodCostFunction(BasicCostFunction): | |||
| def __init__(self, cf): | |||
| BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_) | |||
| class NeighboorhoodCostFunction(): | |||
| """ Cost function associated to the computation of a cost matrix between nodes for LSAP""" | |||
| def __init__(self, cf, lsap_solver=linear_sum_assignment): | |||
| self.cf_ = cf | |||
| self.lsap_solver_ = lsap_solver | |||
| def cns(self, u, v, G1, G2): | |||
| """ u et v sont des id de noeuds """ | |||
| @@ -98,36 +104,35 @@ class NeighboorhoodCostFunction(BasicCostFunction): | |||
| e1 = [u, nbr_u, G1[u][nbr_u]] | |||
| for nbr_v in G2[v]: | |||
| e2 = [v, nbr_v, G2[v][nbr_v]] | |||
| sub_C[i, j] = self.ces(e1, e2, G1, G2) | |||
| sub_C[i, j] += BasicCostFunction.cns(self, | |||
| nbr_u, nbr_v, G1, G2) | |||
| sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) | |||
| sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2) | |||
| j += 1 | |||
| i += 1 | |||
| i = 0 | |||
| for nbr_u in l_nbr_u: | |||
| sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||
| sub_C[i, m+i] += BasicCostFunction.cnd(self, nbr_u, G1) | |||
| sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||
| sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1) | |||
| i += 1 | |||
| j = 0 | |||
| for nbr_v in l_nbr_v: | |||
| sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||
| sub_C[n+j, j] += BasicCostFunction.cni(self, nbr_v, G2) | |||
| sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||
| sub_C[n+j, j] += self.cf_.cni(nbr_v, G2) | |||
| j += 1 | |||
| row_ind, col_ind = linear_sum_assignment(sub_C) | |||
| row_ind, col_ind = self.lsap_solver_(sub_C) | |||
| cost = np.sum(sub_C[row_ind, col_ind]) | |||
| return BasicCostFunction.cns(self, u, v, G1, G2) + cost | |||
| return self.cf_.cns(u, v, G1, G2) + cost | |||
| def cnd(self, u, G1): | |||
| cost = 0 | |||
| for nbr in G1[u]: | |||
| cost += BasicCostFunction.ced(self, [u, nbr, G1[u][nbr]], G1) | |||
| return BasicCostFunction.cnd(self, u, G1) + cost | |||
| cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1) | |||
| return self.cf_.cnd(u, G1) + cost | |||
| def cni(self, v, G2): | |||
| cost = 0 | |||
| for nbr in G2[v]: | |||
| cost += BasicCostFunction.cei(self, [v, nbr, G2[v][nbr]], G2) | |||
| return BasicCostFunction.cni(self, v, G2) + cost | |||
| cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2) | |||
| return self.cf_.cni(v, G2) + cost | |||
| @@ -0,0 +1,68 @@ | |||
| import sys | |||
| import pathlib | |||
| sys.path.insert(0, "../") | |||
| import networkx as nx | |||
| import numpy as np | |||
| import time | |||
| from utils.utils import getSPGraph | |||
| def spkernel(*args): | |||
| """Calculate shortest-path kernels between graphs. | |||
| Parameters | |||
| ---------- | |||
| Gn : List of NetworkX graph | |||
| List of graphs between which the kernels are calculated. | |||
| / | |||
| G1, G2 : NetworkX graphs | |||
| 2 graphs between which the kernel is calculated. | |||
| Return | |||
| ------ | |||
| Kmatrix/Kernel : Numpy matrix/int | |||
| Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs. | |||
| References | |||
| ---------- | |||
| [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
| """ | |||
| if len(args) == 1: # for a list of graphs | |||
| Gn = args[0] | |||
| Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
| Sn = [] # get shortest path graphs of Gn | |||
| for i in range(0, len(Gn)): | |||
| Sn.append(getSPGraph(Gn[i])) | |||
| start_time = time.time() | |||
| for i in range(0, len(Gn)): | |||
| for j in range(i, len(Gn)): | |||
| for e1 in Sn[i].edges(data = True): | |||
| for e2 in Sn[j].edges(data = True): | |||
| if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
| Kmatrix[i][j] += 1 | |||
| Kmatrix[j][i] += (0 if i == j else 1) | |||
| print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) | |||
| return Kmatrix | |||
| else: # for only 2 graphs | |||
| G1 = args[0] | |||
| G2 = args[1] | |||
| kernel = 0 | |||
| for e1 in G1.edges(data = True): | |||
| for e2 in G2.edges(data = True): | |||
| if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
| kernel += 1 | |||
| print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) | |||
| return kernel | |||
| @@ -0,0 +1,17 @@ | |||
| # -*-coding:utf-8 -*- | |||
| """Pygraph - utils module | |||
| Implement some methods to manage graphs | |||
| graphfiles.py : load .gxl and .ct files | |||
| utils.py : compute some properties on networkX graphs | |||
| """ | |||
| # info | |||
| __version__ = "0.1" | |||
| __author__ = "Benoit Gaüzère" | |||
| __date__ = "November 2017" | |||
| from pygraph.utils import graphfiles | |||
| from pygraph.utils import utils | |||
| @@ -1,13 +1,25 @@ | |||
| import networkx as nx | |||
| def loadCT(filename): | |||
| """load data from .ct file. | |||
| Notes | |||
| ------ | |||
| a typical example of data in .ct is like this: | |||
| 3 2 <- number of nodes and edges | |||
| 0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers? | |||
| 0.0000 0.0000 0.0000 C | |||
| 0.0000 0.0000 0.0000 O | |||
| 1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers? | |||
| 2 3 1 1 | |||
| """ | |||
| content = open(filename).read().splitlines() | |||
| G = nx.Graph(name=str(content[0])) | |||
| G = nx.Graph(name=str(content[0])) # set name of the graph | |||
| tmp = content[1].split(" ") | |||
| if tmp[0] == '': | |||
| nb_nodes = int(tmp[1]) | |||
| nb_edges = int(tmp[2]) | |||
| nb_nodes = int(tmp[1]) # number of the nodes | |||
| nb_edges = int(tmp[2]) # number of the edges | |||
| else: | |||
| nb_nodes = int(tmp[0]) | |||
| nb_edges = int(tmp[1]) | |||
| @@ -18,7 +30,7 @@ def loadCT(filename): | |||
| G.add_node(i, label=tmp[3]) | |||
| for i in range(0, nb_edges): | |||
| tmp = content[i+G.number_of_nodes()+2].split(" ") | |||
| tmp = content[i + G.number_of_nodes() + 2].split(" ") | |||
| tmp = [x for x in tmp if x != ''] | |||
| G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3])) | |||
| return G | |||
| @@ -43,9 +55,10 @@ def loadGXL(filename): | |||
| label = edge.find('attr')[0].text | |||
| G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label) | |||
| return G | |||
| def loadDataset(filename): | |||
| """load file list of the dataset. | |||
| """ | |||
| from os.path import dirname, splitext | |||
| dirname_dataset = dirname(filename) | |||
| @@ -56,7 +69,7 @@ def loadDataset(filename): | |||
| content = open(filename).read().splitlines() | |||
| for i in range(0, len(content)): | |||
| tmp = content[i].split(' ') | |||
| data.append(loadCT(dirname_dataset + '/' + tmp[0])) | |||
| data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names | |||
| y.append(float(tmp[1])) | |||
| elif(extension == "cxl"): | |||
| import xml.etree.ElementTree as ET | |||
| @@ -0,0 +1,59 @@ | |||
| import networkx as nx | |||
| import numpy as np | |||
| def getSPLengths(G1): | |||
| sp = nx.shortest_path(G1) | |||
| distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) | |||
| for i in np.keys(): | |||
| for j in np[i].keys(): | |||
| distances[i, j] = len(sp[i][j])-1 | |||
| return distances | |||
| def getSPGraph(G): | |||
| """Transform graph G to its corresponding shortest-paths graph. | |||
| Parameters | |||
| ---------- | |||
| G : NetworkX graph | |||
| The graph to be tramsformed. | |||
| Return | |||
| ------ | |||
| S : NetworkX graph | |||
| The shortest-paths graph corresponding to G. | |||
| Notes | |||
| ------ | |||
| For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes. | |||
| References | |||
| ---------- | |||
| [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
| """ | |||
| return floydTransformation(G) | |||
| def floydTransformation(G): | |||
| """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation. | |||
| Parameters | |||
| ---------- | |||
| G : NetworkX graph | |||
| The graph to be tramsformed. | |||
| Return | |||
| ------ | |||
| S : NetworkX graph | |||
| The shortest-paths graph corresponding to G. | |||
| References | |||
| ---------- | |||
| [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
| """ | |||
| spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered | |||
| S = nx.Graph() | |||
| S.add_nodes_from(G.nodes(data=True)) | |||
| for i in range(0, G.number_of_nodes()): | |||
| for j in range(0, G.number_of_nodes()): | |||
| S.add_edge(i, j, cost = spMatrix[i, j]) | |||
| return S | |||
| @@ -0,0 +1,5 @@ | |||
| To use the library : | |||
| $> virtualenv --python=/usr/bin/python3.5 venv | |||
| $> pip install -r requirements.txt | |||
| $> source venv/bin/activate | |||
| ... Go use pygraph | |||
| @@ -0,0 +1,66 @@ | |||
| import ot | |||
| import sys | |||
| import pathlib | |||
| sys.path.insert(0, "../") | |||
| from pygraph.utils.graphfiles import loadDataset | |||
| from pygraph.ged.costfunctions import ConstantCostFunction | |||
| from pygraph.utils.utils import getSPLengths | |||
| from tqdm import tqdm | |||
| import numpy as np | |||
| from scipy.optimize import linear_sum_assignment | |||
| from pygraph.ged.GED import ged | |||
| import scipy | |||
| def pad(C, n): | |||
| C_pad = np.zeros((n, n)) | |||
| C_pad[:C.shape[0], :C.shape[1]] = C | |||
| return C_pad | |||
| if (__name__ == "__main__"): | |||
| ds_filename = "/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds" | |||
| dataset, y = loadDataset(ds_filename) | |||
| cf = ConstantCostFunction(1, 3, 1, 3) | |||
| N = len(dataset) | |||
| pairs = list() | |||
| ged_distances = list() #np.zeros((N, N)) | |||
| gw_distances = list() #np.zeros((N, N)) | |||
| for i in tqdm(range(0, N)): | |||
| for j in tqdm(range(i, N)): | |||
| G1 = dataset[i] | |||
| G2 = dataset[j] | |||
| n = G1.number_of_nodes() | |||
| m = G2.number_of_nodes() | |||
| if(n == m): | |||
| C1 = getSPLengths(G1) | |||
| C2 = getSPLengths(G2) | |||
| C1 /= C1.max() | |||
| C2 /= C2.max() | |||
| dim = max(n, m) | |||
| if(n < m): | |||
| C1 = pad(C1, dim) | |||
| elif (m < n): | |||
| C2 = pad(C2, dim) | |||
| p = ot.unif(dim) | |||
| q = ot.unif(dim) | |||
| gw = ot.gromov_wasserstein(C1, C2, p, q, | |||
| 'square_loss', epsilon=5e-3) | |||
| row_ind, col_ind = linear_sum_assignment(-gw) | |||
| rho = col_ind | |||
| varrho = row_ind[np.argsort(col_ind)] | |||
| pairs.append((i,j)) | |||
| gw_distances.append(ged(G1, G2, cf=cf, rho=rho, varrho=varrho)[0]) | |||
| ged_distances.append(ged(G1, G2, cf=cf)[0]) | |||
| print("Moyenne sur Riesen : {}".format(np.mean(ged_distances))) | |||
| print("Moyenne sur GW : {} ".format(np.mean(gw_distances))) | |||
| np.save("distances_riesen", ged_distances) | |||
| np.save("distances_gw", gw_distances) | |||
| @@ -0,0 +1,16 @@ | |||
| cycler==0.10.0 | |||
| Cython==0.27.3 | |||
| decorator==4.1.2 | |||
| matplotlib==2.1.0 | |||
| networkx==2.0 | |||
| numpy==1.13.3 | |||
| pkg-resources==0.0.0 | |||
| POT==0.4.0 | |||
| pyparsing==2.2.0 | |||
| python-dateutil==2.6.1 | |||
| pytz==2017.3 | |||
| scikit-learn==0.19.1 | |||
| scipy==1.0.0 | |||
| six==1.11.0 | |||
| sklearn==0.0 | |||
| tqdm==4.19.4 | |||
| @@ -1,10 +0,0 @@ | |||
| import networkx as nx | |||
| import numpy as np | |||
| def getSPLengths(G1): | |||
| sp = nx.shortest_path(G1) | |||
| distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) | |||
| for i in np.keys(): | |||
| for j in np[i].keys(): | |||
| distances[i, j] = len(sp[i][j])-1 | |||