| @@ -0,0 +1,74 @@ | |||
| from ged.costfunctions import BasicCostFunction, RiesenCostFunction | |||
| from ged.costfunctions import NeighboorhoodCostFunction | |||
| from ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping | |||
| def ged(G1, G2, method='Riesen', rho=None, varrho=None, | |||
| cf=BasicCostFunction(1, 3, 1, 3)): | |||
| """Compute Graph Edit Distance between G1 and G2 according to mapping | |||
| encoded within rho and varrho. Graph's node must be indexed by a | |||
| index which is used is rho and varrho | |||
| NB: Utilisation de | |||
| dictionnaire pour etre plus versatile ? | |||
| """ | |||
| if ((rho is None) or (varrho is None)): | |||
| if(method == 'Riesen'): | |||
| cf_bp = RiesenCostFunction(cf) | |||
| elif(method == 'Neighboorhood'): | |||
| cf_bp = NeighboorhoodCostFunction(cf) | |||
| elif(method == 'Basic'): | |||
| cf_bp = cf | |||
| else: | |||
| raise NameError('Non existent method ') | |||
| rho, varrho = getOptimalMapping(computeBipartiteCostMatrix(G1, G2, cf_bp)) | |||
| n = G1.number_of_nodes() | |||
| m = G2.number_of_nodes() | |||
| ged = 0 | |||
| for i in G1.nodes_iter(): | |||
| phi_i = rho[i] | |||
| if(phi_i >= m): | |||
| ged += cf.cnd(i, G1) | |||
| else: | |||
| ged += cf.cns(i, phi_i, G1, G2) | |||
| for j in G2.nodes_iter(): | |||
| phi_j = varrho[j] | |||
| if(phi_j >= n): | |||
| ged += cf.cni(j, G2) | |||
| for e in G1.edges_iter(data=True): | |||
| i = e[0] | |||
| j = e[1] | |||
| phi_i = rho[i] | |||
| phi_j = rho[j] | |||
| if (phi_i < m) and (phi_j < m): | |||
| mappedEdge = len(list(filter(lambda x: True if | |||
| x == phi_j else False, G2[phi_i]))) | |||
| if(mappedEdge): | |||
| e2 = [phi_i, phi_j, G2[phi_i][phi_j]] | |||
| min_cost = min(cf.ces(e, e2, G1, G2), | |||
| cf.ced(e, G1), cf.cei(e2, G2)) | |||
| ged += min_cost | |||
| else: | |||
| ged += cf.ced(e, G1) | |||
| else: | |||
| ged += cf.ced(e, G1) | |||
| for e in G2.edges_iter(data=True): | |||
| i = e[0] | |||
| j = e[1] | |||
| phi_i = varrho[i] | |||
| phi_j = varrho[j] | |||
| if (phi_i < n) and (phi_j < n): | |||
| mappedEdge = len(list(filter(lambda x: True if x == phi_j | |||
| else False, G1[phi_i]))) | |||
| if(not mappedEdge): | |||
| ged += cf.cei(e, G2) | |||
| else: | |||
| ged += cf.ced(e, G2) | |||
| return ged, rho, varrho | |||
| def computeDistanceMatrix(dataset): | |||
| pass | |||
| @@ -0,0 +1,33 @@ | |||
| import numpy as np | |||
| from scipy.optimize import linear_sum_assignment | |||
| from ged.costfunctions import BasicCostFunction | |||
| def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)): | |||
| """Compute a Cost Matrix according to cost function cf""" | |||
| n = G1.number_of_nodes() | |||
| m = G2.number_of_nodes() | |||
| nm = n + m | |||
| C = np.ones([nm, nm])*np.inf | |||
| C[n:, m:] = 0 | |||
| for u in G1.nodes_iter(): | |||
| for v in G2.nodes_iter(): | |||
| cost = cf.cns(u, v, G1, G2) | |||
| C[u, v] = cost | |||
| for v in G1.nodes_iter(): | |||
| C[v, m + v] = cf.cnd(v, G1) | |||
| for v in G2.nodes_iter(): | |||
| C[n + v, v] = cf.cni(v, G2) | |||
| return C | |||
| def getOptimalMapping(C): | |||
| """Compute an optimal linear mapping according to cost Matrix C | |||
| inclure les progs C de Seb | |||
| """ | |||
| row_ind, col_ind = linear_sum_assignment(C) | |||
| return col_ind, row_ind[np.argsort(col_ind)] | |||
| @@ -0,0 +1,133 @@ | |||
| import numpy as np | |||
| from scipy.optimize import linear_sum_assignment | |||
| class BasicCostFunction: | |||
| def __init__(self, cns, cni, ces, cei): | |||
| self.cns_ = cns | |||
| self.cni_ = self.cnd_ = cni | |||
| self.ces_ = ces | |||
| self.cei_ = self.ced_ = cei | |||
| def cns(self, u, v, G1, G2): | |||
| return (G1.node[u]['label'] != G2.node[v]['label'])*self.cns_ | |||
| def cnd(self, u, G1): | |||
| return self.cnd_ | |||
| def cni(self, v, G2): | |||
| return self.cni_ | |||
| def ces(self, e1, e2, G1, G2): | |||
| """tester avec des attributs autres que symboliques en testant | |||
| l'operateur __eq__""" | |||
| return (e1[2]['label'] != e2[2]['label'])*self.ces_ | |||
| def ced(self, e1, G1): | |||
| return self.ced_ | |||
| def cei(self, e2, G2): | |||
| return self.cei_ | |||
| class RiesenCostFunction(BasicCostFunction): | |||
| def __init__(self, cf): | |||
| BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_) | |||
| def cns(self, u, v, G1, G2): | |||
| """ u et v sont des id de noeuds """ | |||
| n = len(G1[u]) | |||
| m = len(G2[v]) | |||
| sub_C = np.ones([n+m, n+m]) * np.inf | |||
| sub_C[n:, m:] = 0 | |||
| i = 0 | |||
| l_nbr_u = G1[u] | |||
| l_nbr_v = G2[v] | |||
| for nbr_u in l_nbr_u: | |||
| j = 0 | |||
| e1 = [u, nbr_u, G1[u][nbr_u]] | |||
| for nbr_v in G2[v]: | |||
| e2 = [v, nbr_v, G2[v][nbr_v]] | |||
| sub_C[i, j] = self.ces(e1, e2, G1, G2) | |||
| j += 1 | |||
| i += 1 | |||
| i = 0 | |||
| for nbr_u in l_nbr_u: | |||
| sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||
| i += 1 | |||
| j = 0 | |||
| for nbr_v in l_nbr_v: | |||
| sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||
| j += 1 | |||
| row_ind, col_ind = linear_sum_assignment(sub_C) | |||
| cost = np.sum(sub_C[row_ind, col_ind]) | |||
| return BasicCostFunction.cns(self, u, v, G1, G2) + cost | |||
| def cnd(self, u, G1): | |||
| cost = 0 | |||
| for nbr in G1[u]: | |||
| cost += BasicCostFunction.ced(self,[u,nbr,G1[u][nbr]],G1) | |||
| return BasicCostFunction.cnd(self,u,G1) + cost | |||
| def cni(self, v, G2): | |||
| cost = 0 | |||
| for nbr in G2[v]: | |||
| cost += BasicCostFunction.cei(self, [v,nbr,G2[v][nbr]], G2) | |||
| return BasicCostFunction.cni(self, v, G2) + cost | |||
| class NeighboorhoodCostFunction(BasicCostFunction): | |||
| def __init__(self, cf): | |||
| BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_) | |||
| def cns(self, u, v, G1, G2): | |||
| """ u et v sont des id de noeuds """ | |||
| n = len(G1[u]) | |||
| m = len(G2[v]) | |||
| sub_C = np.ones([n+m, n+m]) * np.inf | |||
| sub_C[n:, m:] = 0 | |||
| i = 0 | |||
| l_nbr_u = G1[u] | |||
| l_nbr_v = G2[v] | |||
| for nbr_u in l_nbr_u: | |||
| j = 0 | |||
| e1 = [u, nbr_u, G1[u][nbr_u]] | |||
| for nbr_v in G2[v]: | |||
| e2 = [v, nbr_v, G2[v][nbr_v]] | |||
| sub_C[i, j] = self.ces(e1, e2, G1, G2) | |||
| sub_C[i, j] += BasicCostFunction.cns(self, | |||
| nbr_u, nbr_v, G1, G2) | |||
| j += 1 | |||
| i += 1 | |||
| i = 0 | |||
| for nbr_u in l_nbr_u: | |||
| sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||
| sub_C[i, m+i] += BasicCostFunction.cnd(self, nbr_u, G1) | |||
| i += 1 | |||
| j = 0 | |||
| for nbr_v in l_nbr_v: | |||
| sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||
| sub_C[n+j, j] += BasicCostFunction.cni(self, nbr_v, G2) | |||
| j += 1 | |||
| row_ind, col_ind = linear_sum_assignment(sub_C) | |||
| cost = np.sum(sub_C[row_ind, col_ind]) | |||
| return BasicCostFunction.cns(self, u, v, G1, G2) + cost | |||
| def cnd(self, u, G1): | |||
| cost = 0 | |||
| for nbr in G1[u]: | |||
| cost += BasicCostFunction.ced(self, [u, nbr, G1[u][nbr]], G1) | |||
| return BasicCostFunction.cnd(self, u, G1) + cost | |||
| def cni(self, v, G2): | |||
| cost = 0 | |||
| for nbr in G2[v]: | |||
| cost += BasicCostFunction.cei(self, [v, nbr, G2[v][nbr]], G2) | |||
| return BasicCostFunction.cni(self, v, G2) + cost | |||
| @@ -0,0 +1,3 @@ | |||
| import sys | |||
| import pathlib | |||
| sys.path.insert(0, "../") | |||
| @@ -0,0 +1,74 @@ | |||
| import networkx as nx | |||
| def loadCT(filename): | |||
| content = open(filename).read().splitlines() | |||
| G = nx.Graph(name=str(content[0])) | |||
| tmp = content[1].split(" ") | |||
| if tmp[0] == '': | |||
| nb_nodes = int(tmp[1]) | |||
| nb_edges = int(tmp[2]) | |||
| else: | |||
| nb_nodes = int(tmp[0]) | |||
| nb_edges = int(tmp[1]) | |||
| for i in range(0, nb_nodes): | |||
| tmp = content[i + 2].split(" ") | |||
| tmp = [x for x in tmp if x != ''] | |||
| G.add_node(i, label=tmp[3]) | |||
| for i in range(0, nb_edges): | |||
| tmp = content[i+G.number_of_nodes()+2].split(" ") | |||
| tmp = [x for x in tmp if x != ''] | |||
| G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3])) | |||
| return G | |||
| def loadGXL(filename): | |||
| import networkx as nx | |||
| import xml.etree.ElementTree as ET | |||
| tree = ET.parse(filename) | |||
| root = tree.getroot() | |||
| index = 0 | |||
| G = nx.Graph() | |||
| dic={} | |||
| for node in root.iter('node'): | |||
| label = node.find('attr')[0].text | |||
| dic[node.attrib['id']] = index | |||
| G.add_node(index, id=node.attrib['id'], label=label) | |||
| index += 1 | |||
| for edge in root.iter('edge'): | |||
| label = edge.find('attr')[0].text | |||
| G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label) | |||
| return G | |||
| def loadDataset(filename): | |||
| from os.path import dirname, splitext | |||
| dirname_dataset = dirname(filename) | |||
| extension = splitext(filename)[1][1:] | |||
| data = [] | |||
| y = [] | |||
| if(extension == "ds"): | |||
| content = open(filename).read().splitlines() | |||
| for i in range(0, len(content)): | |||
| tmp = content[i].split(' ') | |||
| data.append(loadCT(dirname_dataset + '/' + tmp[0])) | |||
| y.append(float(tmp[1])) | |||
| elif(extension == "cxl"): | |||
| import xml.etree.ElementTree as ET | |||
| tree = ET.parse(filename) | |||
| root = tree.getroot() | |||
| data = [] | |||
| y = [] | |||
| for graph in root.iter('print'): | |||
| mol_filename = graph.attrib['file'] | |||
| mol_class = graph.attrib['class'] | |||
| data.append(loadGXL(dirname_dataset + '/' + mol_filename)) | |||
| y.append(mol_class) | |||
| return data, y | |||
| @@ -0,0 +1,10 @@ | |||
| import networkx as nx | |||
| import numpy as np | |||
| def getSPLengths(G1): | |||
| sp = nx.shortest_path(G1) | |||
| distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) | |||
| for i in np.keys(): | |||
| for j in np[i].keys(): | |||
| distances[i, j] = len(sp[i][j])-1 | |||