From 460e846a8f23fade2b16693e2c68ea9367d465e0 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Fri, 22 Dec 2017 14:14:45 +0100 Subject: [PATCH] * ADD calculation of the time spend to acquire kernel matrices for each kernel. - linlin * MOD floydTransformation function, calculate shortest paths taking into consideration user-defined edge weight. - linlin * MOD implementation of nodes and edges attributes genericity for all kernels. - linlin * ADD detailed results file results.md. - linlin * MOD Weisfeiler-Lehman subtree kernel and the test code. - linlin --- README.md | 29 +- ...eilerLehmankernel_acyclic-checkpoint.ipynb | 799 ++++++++--------- ...arginalizedkernel_acyclic-checkpoint.ipynb | 386 +------- .../run_pathkernel_acyclic-checkpoint.ipynb | 37 +- .../run_spkernel_acyclic-checkpoint.ipynb | 3 +- .../run_WeisfeilerLehmankernel_acyclic.ipynb | 831 +++++++----------- .../run_marginalizedkernel_acyclic.ipynb | 2 +- notebooks/run_pathkernel_acyclic.ipynb | 2 +- notebooks/run_spkernel_acyclic.ipynb | 3 +- .../weisfeilerLehmanKernel.cpython-35.pyc | Bin 6760 -> 8164 bytes pygraph/kernels/marginalizedKernel.py | 33 +- pygraph/kernels/pathKernel.py | 37 +- pygraph/kernels/results.md | 36 + pygraph/kernels/spkernel.py | 15 +- pygraph/kernels/weisfeilerLehmanKernel.py | 112 +-- .../__pycache__/graphfiles.cpython-35.pyc | Bin 3807 -> 4381 bytes pygraph/utils/utils.py | 16 +- 17 files changed, 925 insertions(+), 1416 deletions(-) create mode 100644 pygraph/kernels/results.md diff --git a/README.md b/README.md index d988a2f..1183519 100644 --- a/README.md +++ b/README.md @@ -10,15 +10,30 @@ a python package for graph kernels. * sklearn - 0.19.1 * tabulate - 0.8.2 -## results with minimal RMSE for each kernel on dataset Asyclic -| Kernels | RMSE(℃) | std(℃) | parameter | -|---------------|:---------:|:--------:|-------------:| -| shortest path | 36.400524 | 5.352940 | - | -| marginalized | 17.8991 | 6.59104 | p_quit = 0.1 | -| path | 14.270816 | 6.366698 | - | -| WL subtree | 9.01403 | 6.35786 | height = 1 | +## results with minimal test RMSE for each kernel on dataset Asyclic +-- All the kernels are tested on dataset Asyclic, which consists of 185 molecules (graphs). +-- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression. +-- For predition we randomly divide the data in train and test subset, where 90% of entire dataset is for training and rest for testing. 10 splits are performed. For each split, we first train on the train data, then evaluate the performance on the test set. We choose the optimal parameters for the test set and finally provide the corresponding performance. The final results correspond to the average of the performances on the test sets. + +| Kernels | RMSE(℃) | std(℃) | parameter | k_time | +|---------------|:---------:|:--------:|-------------:|-------:| +| shortest path | 36.40 | 5.35 | - | - | +| marginalized | 17.90 | 6.59 | p_quit = 0.1 | - | +| path | 14.27 | 6.37 | - | - | +| WL subtree | 9.00 | 6.37 | height = 1 | 0.85" | + +**In each line, paremeter is the one with which the kenrel achieves the best results. +In each line, k_time is the time spent on building the kernel matrix. +See detail results in [results.md](pygraph/kernels/results.md).** ## updates +### 2017.12.22 +* ADD calculation of the time spend to acquire kernel matrices for each kernel. - linlin +* MOD floydTransformation function, calculate shortest paths taking into consideration user-defined edge weight. - linlin +* MOD implementation of nodes and edges attributes genericity for all kernels. - linlin +* ADD detailed results file results.md. - linlin +### 2017.12.21 +* MOD Weisfeiler-Lehman subtree kernel and the test code. - linlin ### 2017.12.20 * ADD Weisfeiler-Lehman subtree kernel and its result on dataset Asyclic. - linlin ### 2017.12.07 diff --git a/notebooks/.ipynb_checkpoints/run_WeisfeilerLehmankernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_WeisfeilerLehmankernel_acyclic-checkpoint.ipynb index 7242073..4b7d560 100644 --- a/notebooks/.ipynb_checkpoints/run_WeisfeilerLehmankernel_acyclic-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/run_WeisfeilerLehmankernel_acyclic-checkpoint.ipynb @@ -221,8 +221,10 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, + "execution_count": 20, + "metadata": { + "scrolled": false + }, "outputs": [ { "name": "stdout", @@ -236,22 +238,154 @@ " --- for graph 0 --- \n", "\n", "labels_ori: ['C', 'C', 'C', 'C', 'C', 'O', 'O']\n", + "all_labels_ori: {'C', 'O'}\n", "num_of_each_label: {'C': 5, 'O': 2}\n", - "num_of_labels: 2\n" + "all_num_of_each_label: [{'C': 5, 'O': 2}]\n", + "num_of_labels: 2\n", + "all_labels_ori: {'C', 'O'}\n", + "\n", + " --- for graph 1 --- \n", + "\n", + "labels_ori: ['C', 'C', 'C', 'C', 'C', 'C', 'S', 'S']\n", + "all_labels_ori: {'C', 'O', 'S'}\n", + "num_of_each_label: {'C': 6, 'S': 2}\n", + "all_num_of_each_label: [{'C': 5, 'O': 2}, {'C': 6, 'S': 2}]\n", + "num_of_labels: 2\n", + "all_labels_ori: {'C', 'O', 'S'}\n", + "\n", + " all_num_of_labels_occured: 3\n", + "\n", + " --- calculating kernel matrix ---\n", + "\n", + " labels: {'C', 'O'}\n", + "vector1: [[5 2]]\n", + "vector2: [[5 2]]\n", + "Kmatrix: [[ 29. 0.]\n", + " [ 0. 0.]]\n", + "\n", + " labels: {'C', 'O', 'S'}\n", + "vector1: [[5 2 0]]\n", + "vector2: [[6 0 2]]\n", + "Kmatrix: [[ 29. 30.]\n", + " [ 30. 0.]]\n", + "\n", + " labels: {'C', 'S'}\n", + "vector1: [[6 2]]\n", + "vector2: [[6 2]]\n", + "Kmatrix: [[ 29. 30.]\n", + " [ 30. 40.]]\n", + "\n", + " --- height = 1 --- \n", + "\n", + " --- for graph 0 --- \n", + "\n", + "multiset: ['CC', 'CC', 'CCO', 'CCO', 'COO', 'OCC', 'OCC']\n", + "set_unique: ['OCC', 'COO', 'CCO', 'CC']\n", + "set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n", + "all_set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n", + "num_of_labels_occured: 7\n", + "\n", + " compressed labels: {0: '7', 1: '7', 2: '6', 3: '6', 4: '5', 5: '4', 6: '4'}\n", + "labels_comp: ['7', '7', '6', '6', '5', '4', '4']\n", + "all_labels_ori: {'5', '4', '6', '7'}\n", + "num_of_each_label: {'5': 1, '4': 2, '6': 2, '7': 2}\n", + "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}]\n", + "\n", + " --- for graph 1 --- \n", + "\n", + "multiset: ['CC', 'CC', 'CC', 'CCS', 'CCS', 'CCSS', 'SCC', 'SCC']\n", + "set_unique: ['SCC', 'CC', 'CCS', 'CCSS']\n", + "set_compressed: {'SCC': '8', 'CC': '7', 'CCS': '9', 'CCSS': '10'}\n", + "all_set_compressed: {'SCC': '8', 'COO': '5', 'CCS': '9', 'OCC': '4', 'CCO': '6', 'CCSS': '10', 'CC': '7'}\n", + "num_of_labels_occured: 10\n", + "\n", + " compressed labels: {0: '7', 1: '7', 2: '7', 3: '9', 4: '9', 5: '10', 6: '8', 7: '8'}\n", + "labels_comp: ['7', '7', '7', '9', '9', '10', '8', '8']\n", + "all_labels_ori: {'10', '4', '7', '9', '6', '5', '8'}\n", + "num_of_each_label: {'10': 1, '9': 2, '7': 3, '8': 2}\n", + "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}, {'10': 1, '9': 2, '7': 3, '8': 2}]\n", + "\n", + " all_num_of_labels_occured: 10\n", + "\n", + " --- calculating kernel matrix ---\n", + "\n", + " labels: {'5', '4', '6', '7'}\n", + "vector1: [[1 2 2 2]]\n", + "vector2: [[1 2 2 2]]\n", + "\n", + " labels: {'10', '4', '7', '9', '6', '5', '8'}\n", + "vector1: [[0 2 2 0 2 1 0]]\n", + "vector2: [[1 0 3 2 0 0 2]]\n", + "\n", + " labels: {'8', '10', '7', '9'}\n", + "vector1: [[2 1 3 2]]\n", + "vector2: [[2 1 3 2]]\n", + "\n", + " Kmatrix: [[ 42. 36.]\n", + " [ 36. 58.]]\n", + "\n", + " --- height = 2 --- \n", + "\n", + " --- for graph 0 --- \n", + "\n", + "multiset: ['76', '76', '647', '647', '544', '456', '456']\n", + "set_unique: ['647', '76', '456', '544']\n", + "set_compressed: {'647': '11', '76': '12', '544': '14', '456': '13'}\n", + "all_set_compressed: {'647': '11', '76': '12', '456': '13', '544': '14'}\n", + "num_of_labels_occured: 14\n", + "\n", + " compressed labels: {0: '12', 1: '12', 2: '11', 3: '11', 4: '14', 5: '13', 6: '13'}\n", + "labels_comp: ['12', '12', '11', '11', '14', '13', '13']\n", + "all_labels_ori: {'14', '12', '11', '13'}\n", + "num_of_each_label: {'14': 1, '13': 2, '12': 2, '11': 2}\n", + "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}]\n", + "\n", + " --- for graph 1 --- \n", + "\n", + "multiset: ['79', '79', '710', '978', '978', '10788', '8109', '8109']\n", + "set_unique: ['710', '8109', '79', '10788', '978']\n", + "set_compressed: {'710': '15', '79': '17', '8109': '16', '978': '19', '10788': '18'}\n", + "all_set_compressed: {'710': '15', '79': '17', '978': '19', '10788': '18', '8109': '16', '456': '13', '544': '14', '647': '11', '76': '12'}\n", + "num_of_labels_occured: 19\n", + "\n", + " compressed labels: {0: '17', 1: '17', 2: '15', 3: '19', 4: '19', 5: '18', 6: '16', 7: '16'}\n", + "labels_comp: ['17', '17', '15', '19', '19', '18', '16', '16']\n", + "all_labels_ori: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n", + "num_of_each_label: {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}\n", + "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}, {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}]\n", + "\n", + " all_num_of_labels_occured: 19\n", + "\n", + " --- calculating kernel matrix ---\n", + "\n", + " labels: {'14', '12', '11', '13'}\n", + "vector1: [[1 2 2 2]]\n", + "vector2: [[1 2 2 2]]\n", + "\n", + " labels: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n", + "vector1: [[0 0 2 2 0 2 1 0 0]]\n", + "vector2: [[1 2 0 0 2 0 0 2 1]]\n", + "\n", + " labels: {'18', '17', '15', '16', '19'}\n", + "vector1: [[1 2 1 2 2]]\n", + "vector2: [[1 2 1 2 2]]\n", + "\n", + " Kmatrix: [[ 55. 36.]\n", + " [ 36. 72.]]\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel built in 0.0034377574920654297 seconds ---\n" ] }, { - "ename": "UnboundLocalError", - "evalue": "local variable 'all_labels_ori' referenced before assignment", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mUnboundLocalError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_node_attributes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'label'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 329\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 330\u001b[0;31m \u001b[0mweisfeilerlehmankernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 331\u001b[0m \u001b[0;31m# Kmatrix = weisfeilerlehmankernel(G1, G2)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36mweisfeilerlehmankernel\u001b[0;34m(height, base_kernel, *args)\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[0;31m# print(args)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 80\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_wl_subtreekernel_do\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mheight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase_kernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'subtree'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 81\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;31m# for WL edge kernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36m_wl_subtreekernel_do\u001b[0;34m(height, base_kernel, *args)\u001b[0m\n\u001b[1;32m 220\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'num_of_labels: %s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mnum_of_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 222\u001b[0;31m \u001b[0mall_labels_ori\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels_ori\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 223\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'all_labels_ori: %s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mall_labels_ori\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'all_labels_ori' referenced before assignment" - ] + "data": { + "text/plain": [ + "array([[ 55., 36.],\n", + " [ 36., 72.]])" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -476,6 +610,8 @@ " print('\\n --- for graph %d --- \\n' % (idx))\n", " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", " print('labels_ori: %s' % (labels_ori))\n", + " all_labels_ori.update(labels_ori)\n", + " print('all_labels_ori: %s' % (all_labels_ori))\n", " num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n", " print('num_of_each_label: %s' % (num_of_each_label))\n", " all_num_of_each_label.append(num_of_each_label)\n", @@ -487,45 +623,48 @@ " all_labels_ori.update(labels_ori)\n", " print('all_labels_ori: %s' % (all_labels_ori))\n", " \n", + " all_num_of_labels_occured += len(all_labels_ori)\n", + " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n", + " \n", " # calculate subtree kernel with the 0th iteration and add it to the final kernel\n", + " print('\\n --- calculating kernel matrix ---')\n", " for i in range(0, len(Gn)):\n", " for j in range(i, len(Gn)):\n", " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n", " print('\\n labels: %s' % (labels))\n", " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n", " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n", - " print('\\n vector1: %s' % (vector1))\n", - " print('\\n vector2: %s' % (vector2))\n", + " print('vector1: %s' % (vector1))\n", + " print('vector2: %s' % (vector2))\n", " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n", " Kmatrix[j][i] = Kmatrix[i][j]\n", - " \n", - " \n", + " print('Kmatrix: %s' % (Kmatrix))\n", + "\n", " \n", " # iterate each height\n", - " for h in range(height + 1):\n", + " for h in range(1, height + 1):\n", " print('\\n --- height = %d --- ' % (h))\n", - " all_labels_ori = set() # all unique orignal labels in all graphs in this iteration\n", - "# all_labels_comp = set() # all unique compressed labels in all graphs in this iteration\n", - " all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration\n", " all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n", " num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n", + " all_labels_ori = set()\n", + " all_num_of_each_label = []\n", " \n", " # for each graph\n", " for idx, G in enumerate(Gn):\n", - " # get the set of original labels\n", + "# # get the set of original labels\n", " print('\\n --- for graph %d --- \\n' % (idx))\n", - " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", - " print('labels_ori: %s' % (labels_ori))\n", - " num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n", - " print('num_of_each_label: %s' % (num_of_each_label))\n", - " num_of_labels = len(num_of_each_label) # number of all unique labels\n", - " print('num_of_labels: %s' % (num_of_labels))\n", + "# labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", + "# print('labels_ori: %s' % (labels_ori))\n", + "# num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n", + "# print('num_of_each_label: %s' % (num_of_each_label))\n", + "# num_of_labels = len(num_of_each_label) # number of all unique labels\n", + "# print('num_of_labels: %s' % (num_of_labels))\n", " \n", - " all_labels_ori.update(labels_ori)\n", - " print('all_labels_ori: %s' % (all_labels_ori))\n", - " # num_of_labels_occured += num_of_labels #@todo not precise\n", - " num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n", - " print('num_of_labels_occured: %s' % (num_of_labels_occured))\n", + "# all_labels_ori.update(labels_ori)\n", + "# print('all_labels_ori: %s' % (all_labels_ori))\n", + "# # num_of_labels_occured += num_of_labels #@todo not precise\n", + "# num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n", + "# print('num_of_labels_occured: %s' % (num_of_labels_occured))\n", " \n", " set_multisets = []\n", " for node in G.nodes(data = True):\n", @@ -558,7 +697,6 @@ "# num_of_labels_occured += len(set_compressed) #@todo not precise\n", " print('num_of_labels_occured: %s' % (num_of_labels_occured))\n", " \n", - "\n", " # relabel nodes\n", " # nx.relabel_nodes(G, set_compressed, copy = False)\n", " for node in G.nodes(data = True):\n", @@ -568,25 +706,29 @@ " # get the set of compressed labels\n", " labels_comp = list(nx.get_node_attributes(G, 'label').values())\n", " print('labels_comp: %s' % (labels_comp))\n", - " num_of_each_label.update(dict(Counter(labels_comp)))\n", + " all_labels_ori.update(labels_comp)\n", + " print('all_labels_ori: %s' % (all_labels_ori))\n", + " num_of_each_label = dict(Counter(labels_comp))\n", " print('num_of_each_label: %s' % (num_of_each_label))\n", " all_num_of_each_label.append(num_of_each_label)\n", " print('all_num_of_each_label: %s' % (all_num_of_each_label))\n", + " \n", + " all_num_of_labels_occured += len(all_labels_ori)\n", + " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n", " \n", " # calculate subtree kernel with h iterations and add it to the final kernel\n", + " print('\\n --- calculating kernel matrix ---')\n", " for i in range(0, len(Gn)):\n", " for j in range(i, len(Gn)):\n", " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n", " print('\\n labels: %s' % (labels))\n", " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n", " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n", - " print('\\n vector1: %s' % (vector1))\n", - " print('\\n vector2: %s' % (vector2))\n", + " print('vector1: %s' % (vector1))\n", + " print('vector2: %s' % (vector2))\n", " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n", " Kmatrix[j][i] = Kmatrix[i][j]\n", " \n", - " all_num_of_labels_occured += len(all_labels_ori)\n", - " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n", " print('\\n Kmatrix: %s' % (Kmatrix))\n", "\n", " return Kmatrix\n", @@ -606,13 +748,13 @@ "G2 = dataset[80]\n", "print(nx.get_node_attributes(G2, 'label'))\n", "\n", - "weisfeilerlehmankernel(G1, G2, height = 1)\n", + "weisfeilerlehmankernel(G1, G2, height = 2)\n", "# Kmatrix = weisfeilerlehmankernel(G1, G2)" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -630,7 +772,7 @@ "correspond to the average of the performances on the test sets. \n", "\n", "@references\n", - " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", + " Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", "\n", "\n", " --- calculating kernel matrix when subtree height = 0 ---\n", @@ -659,22 +801,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.49373626708984375 seconds ---\n", - "[[ 10. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 16. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 10. ..., 22. 22. 24.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.3845643997192383 seconds ---\n", + "[[ 5. 6. 4. ..., 20. 20. 20.]\n", + " [ 6. 8. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 5. ..., 21. 21. 21.]\n", " ..., \n", - " [ 20. 20. 22. ..., 130. 130. 122.]\n", - " [ 20. 20. 22. ..., 130. 130. 122.]\n", - " [ 20. 20. 24. ..., 122. 122. 154.]]\n", + " [ 20. 20. 21. ..., 101. 101. 101.]\n", + " [ 20. 20. 21. ..., 101. 101. 101.]\n", + " [ 20. 20. 21. ..., 101. 101. 101.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 12.761978\n", - "With standard deviation: 10.086502\n", + " Mean performance on train set: 141.418957\n", + "With standard deviation: 1.082842\n", "\n", - " Mean performance on test set: 9.014031\n", - "With standard deviation: 6.357865\n", + " Mean performance on test set: 36.210792\n", + "With standard deviation: 7.331787\n", "\n", " --- calculating kernel matrix when subtree height = 1 ---\n", "\n", @@ -702,22 +844,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.0043864250183105 seconds ---\n", - "[[ 20. 14. 8. ..., 20. 20. 22.]\n", - " [ 14. 32. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 20. ..., 25. 25. 30.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.853447437286377 seconds ---\n", + "[[ 10. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 16. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 10. ..., 22. 22. 24.]\n", " ..., \n", - " [ 20. 28. 25. ..., 188. 180. 145.]\n", - " [ 20. 28. 25. ..., 180. 182. 145.]\n", - " [ 22. 22. 30. ..., 145. 145. 238.]]\n", + " [ 20. 20. 22. ..., 130. 130. 122.]\n", + " [ 20. 20. 22. ..., 130. 130. 122.]\n", + " [ 20. 20. 24. ..., 122. 122. 154.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 22.747869\n", - "With standard deviation: 7.561365\n", + " Mean performance on train set: 140.065309\n", + "With standard deviation: 0.877976\n", "\n", - " Mean performance on test set: 19.457133\n", - "With standard deviation: 5.057464\n", + " Mean performance on test set: 9.000982\n", + "With standard deviation: 6.371454\n", "\n", " --- calculating kernel matrix when subtree height = 2 ---\n", "\n", @@ -745,22 +887,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.602942705154419 seconds ---\n", - "[[ 30. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 48. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 30. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.374389410018921 seconds ---\n", + "[[ 15. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 24. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 15. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 246. 209. 147.]\n", - " [ 20. 28. 25. ..., 209. 220. 147.]\n", - " [ 23. 22. 32. ..., 147. 147. 286.]]\n", + " [ 20. 20. 22. ..., 159. 151. 124.]\n", + " [ 20. 20. 22. ..., 151. 153. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 185.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 21.407092\n", - "With standard deviation: 6.415967\n", + " Mean performance on train set: 140.074983\n", + "With standard deviation: 0.928821\n", "\n", - " Mean performance on test set: 23.466810\n", - "With standard deviation: 5.836831\n", + " Mean performance on test set: 19.811299\n", + "With standard deviation: 4.049105\n", "\n", " --- calculating kernel matrix when subtree height = 3 ---\n", "\n", @@ -794,22 +936,22 @@ "output_type": "stream", "text": [ "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.2096023559570312 seconds ---\n", - "[[ 40. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 64. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 40. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.9141185283660889 seconds ---\n", + "[[ 20. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 32. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 20. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 304. 217. 147.]\n", - " [ 20. 28. 25. ..., 217. 250. 147.]\n", - " [ 23. 22. 32. ..., 147. 147. 314.]]\n", + " [ 20. 20. 22. ..., 188. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 168. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 202.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 24.747018\n", - "With standard deviation: 6.547340\n", + " Mean performance on train set: 140.197806\n", + "With standard deviation: 0.873857\n", "\n", - " Mean performance on test set: 27.961360\n", - "With standard deviation: 6.291821\n", + " Mean performance on test set: 25.045500\n", + "With standard deviation: 4.942763\n", "\n", " --- calculating kernel matrix when subtree height = 4 ---\n", "\n", @@ -837,22 +979,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.7832393646240234 seconds ---\n", - "[[ 50. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 80. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 50. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.393263578414917 seconds ---\n", + "[[ 25. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 40. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 25. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 362. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 280. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 336.]]\n", + " [ 20. 20. 22. ..., 217. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 183. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 213.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 27.197367\n", - "With standard deviation: 5.980185\n", + " Mean performance on train set: 140.272421\n", + "With standard deviation: 0.838915\n", "\n", - " Mean performance on test set: 30.614531\n", - "With standard deviation: 6.852841\n", + " Mean performance on test set: 28.225454\n", + "With standard deviation: 6.521196\n", "\n", " --- calculating kernel matrix when subtree height = 5 ---\n", "\n", @@ -880,22 +1022,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.276118040084839 seconds ---\n", - "[[ 60. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 96. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 60. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.893545389175415 seconds ---\n", + "[[ 30. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 48. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 30. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 420. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 310. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 358.]]\n", + " [ 20. 20. 22. ..., 246. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 198. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 224.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 29.010593\n", - "With standard deviation: 6.073672\n", + " Mean performance on train set: 140.247025\n", + "With standard deviation: 0.863630\n", "\n", - " Mean performance on test set: 32.130815\n", - "With standard deviation: 7.062947\n", + " Mean performance on test set: 30.635436\n", + "With standard deviation: 6.736466\n", "\n", " --- calculating kernel matrix when subtree height = 6 ---\n", "\n", @@ -923,22 +1065,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.779860496520996 seconds ---\n", - "[[ 70. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 112. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 70. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.216407299041748 seconds ---\n", + "[[ 35. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 56. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 35. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 478. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 340. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 380.]]\n", + " [ 20. 20. 22. ..., 275. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 213. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 235.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 30.909632\n", - "With standard deviation: 6.490001\n", + " Mean performance on train set: 140.239201\n", + "With standard deviation: 0.872475\n", "\n", - " Mean performance on test set: 33.117974\n", - "With standard deviation: 7.069399\n", + " Mean performance on test set: 32.102695\n", + "With standard deviation: 6.856006\n", "\n", " --- calculating kernel matrix when subtree height = 7 ---\n", "\n", @@ -964,18 +1106,7 @@ "\n", " --- This is a regression problem ---\n", "\n", - " Calculating kernel matrix, this could take a while...\n", - "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.446576833724976 seconds ---\n", - "[[ 80. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 128. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 80. ..., 25. 25. 32.]\n", - " ..., \n", - " [ 20. 28. 25. ..., 536. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 370. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 402.]]\n", - "\n", - " Saving kernel matrix to file...\n" + " Calculating kernel matrix, this could take a while...\n" ] }, { @@ -983,11 +1114,22 @@ "output_type": "stream", "text": [ "\n", - " Mean performance on val set: 31.870406\n", - "With standard deviation: 6.522032\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.8147408962249756 seconds ---\n", + "[[ 40. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 64. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 40. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 304. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 228. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 246.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 140.094026\n", + "With standard deviation: 0.917704\n", "\n", - " Mean performance on test set: 33.964633\n", - "With standard deviation: 7.270535\n", + " Mean performance on test set: 32.970919\n", + "With standard deviation: 6.896061\n", "\n", " --- calculating kernel matrix when subtree height = 8 ---\n", "\n", @@ -1015,22 +1157,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.85552978515625 seconds ---\n", - "[[ 90. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 144. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 90. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.3765342235565186 seconds ---\n", + "[[ 45. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 72. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 45. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 594. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 400. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 424.]]\n", + " [ 20. 20. 22. ..., 333. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 243. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 257.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 32.192715\n", - "With standard deviation: 6.389616\n", + " Mean performance on train set: 140.076304\n", + "With standard deviation: 0.931866\n", "\n", - " Mean performance on test set: 34.325288\n", - "With standard deviation: 7.375800\n", + " Mean performance on test set: 33.511228\n", + "With standard deviation: 6.907530\n", "\n", " --- calculating kernel matrix when subtree height = 9 ---\n", "\n", @@ -1058,22 +1200,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.650352239608765 seconds ---\n", - "[[ 100. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 160. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 100. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.885462284088135 seconds ---\n", + "[[ 50. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 80. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 50. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 652. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 430. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 446.]]\n", + " [ 20. 20. 22. ..., 362. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 258. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 268.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 32.842545\n", - "With standard deviation: 6.213069\n", + " Mean performance on train set: 139.913361\n", + "With standard deviation: 0.928974\n", "\n", - " Mean performance on test set: 34.675515\n", - "With standard deviation: 7.314709\n", + " Mean performance on test set: 33.850152\n", + "With standard deviation: 6.914269\n", "\n", " --- calculating kernel matrix when subtree height = 10 ---\n", "\n", @@ -1101,42 +1243,41 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.818731069564819 seconds ---\n", - "[[ 110. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 176. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 110. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.313802719116211 seconds ---\n", + "[[ 55. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 88. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 55. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 710. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 460. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 468.]]\n", + " [ 20. 20. 22. ..., 391. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 273. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 279.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 33.151974\n", - "With standard deviation: 6.196320\n", - "\n", - " Mean performance on test set: 34.867215\n", - "With standard deviation: 7.324672\n", - "\n", - "\n", - " std height RMSE\n", - "------- -------- --------\n", - "6.35786 1 9.01403\n", - "5.05746 2.1 19.4571\n", - "5.83683 3.2 23.4668\n", - "6.29182 4.3 27.9614\n", - "6.85284 5.4 30.6145\n", - "7.06295 6.5 32.1308\n", - "7.0694 7.6 33.118\n", - "7.27054 8.7 33.9646\n", - "7.3758 9.8 34.3253\n", - "7.31471 10.9 34.6755\n", - "7.32467 12 34.8672\n" + " Mean performance on train set: 139.894176\n", + "With standard deviation: 0.942612\n", + "\n", + " Mean performance on test set: 34.096283\n", + "With standard deviation: 6.931154\n", + "\n", + "\n", + " height RMSE_test std_test RMSE_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 36.2108 7.33179 141.419 1.08284 0.384564\n", + " 1 9.00098 6.37145 140.065 0.877976 0.853447\n", + " 2 19.8113 4.04911 140.075 0.928821 1.37439\n", + " 3 25.0455 4.94276 140.198 0.873857 1.91412\n", + " 4 28.2255 6.5212 140.272 0.838915 2.39326\n", + " 5 30.6354 6.73647 140.247 0.86363 2.89355\n", + " 6 32.1027 6.85601 140.239 0.872475 3.21641\n", + " 7 32.9709 6.89606 140.094 0.917704 3.81474\n", + " 8 33.5112 6.90753 140.076 0.931866 4.37653\n", + " 9 33.8502 6.91427 139.913 0.928974 4.88546\n", + " 10 34.0963 6.93115 139.894 0.942612 5.3138\n" ] } ], "source": [ - "# Author: Elisabetta Ghisu\n", "# test of WL subtree kernel\n", "\n", "\"\"\"\n", @@ -1150,7 +1291,7 @@ "correspond to the average of the performances on the test sets. \n", "\n", "@references\n", - " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", + " Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", "\"\"\"\n", "\n", "print(__doc__)\n", @@ -1158,6 +1299,7 @@ "import sys\n", "import os\n", "import pathlib\n", + "from collections import OrderedDict\n", "sys.path.insert(0, \"../\")\n", "from tabulate import tabulate\n", "\n", @@ -1172,11 +1314,11 @@ "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", "from pygraph.utils.graphfiles import loadDataset\n", "\n", - "val_means_height = []\n", - "val_stds_height = []\n", + "train_means_height = []\n", + "train_stds_height = []\n", "test_means_height = []\n", "test_stds_height = []\n", - "\n", + "kernel_build_time = []\n", "\n", "for height in np.linspace(0, 10, 11):\n", " print('\\n --- calculating kernel matrix when subtree height = %d ---' % height)\n", @@ -1218,13 +1360,14 @@ " print(Kmatrix)\n", " else:\n", " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix = weisfeilerlehmankernel(dataset, height = int(height))\n", + " Kmatrix, run_time = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height))\n", + " kernel_build_time.append(run_time)\n", " print(Kmatrix)\n", " print('\\n Saving kernel matrix to file...')\n", " # np.savetxt(kernel_file, Kmatrix)\n", "\n", - " # Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n", - " val_split = []\n", + " # Initialize the performance of the best parameter trial on train with the corresponding performance on test\n", + " train_split = []\n", " test_split = []\n", "\n", " # For each split of the data\n", @@ -1244,17 +1387,14 @@ " # print(Kmatrix_perm)\n", " Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n", "\n", - " # Set the training, validation and test\n", + " # Set the training, test\n", " # Note: the percentage can be set up by the user\n", - " num_train_val = int((datasize * 90) / 100) # 90% (of entire dataset) for training and validation\n", - " num_test = datasize - num_train_val # 10% (of entire dataset) for test\n", - " num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n", - " num_val = num_train_val - num_train # 10% (of train + val) for validation\n", + " num_train = int((datasize * 90) / 100) # 90% (of entire dataset) for training\n", + " num_test = datasize - num_train # 10% (of entire dataset) for test\n", "\n", " # Split the kernel matrix\n", " Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n", - " Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n", - " Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n", + " Kmatrix_test = Kmatrix_perm[num_train:datasize, 0:num_train]\n", "\n", " # Split the targets\n", " y_train = y_perm[0:num_train]\n", @@ -1267,11 +1407,10 @@ " y_train = (y_train - y_train_mean) / float(y_train_std)\n", " # print(y)\n", "\n", - " y_val = y_perm[num_train:(num_train + num_val)]\n", - " y_test = y_perm[(num_train + num_val):datasize]\n", + " y_test = y_perm[num_train:datasize]\n", "\n", - " # Record the performance for each parameter trial respectively on validation and test set\n", - " perf_all_val = []\n", + " # Record the performance for each parameter trial respectively on train and test set\n", + " perf_all_train = []\n", " perf_all_test = []\n", "\n", " # For each parameter trial\n", @@ -1285,81 +1424,69 @@ " # KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n", " KR.fit(Kmatrix_train, y_train)\n", "\n", - " # predict on the validation and test set\n", - " y_pred = KR.predict(Kmatrix_val)\n", + " # predict on the train and test set\n", + " y_pred_train = KR.predict(Kmatrix_train)\n", " y_pred_test = KR.predict(Kmatrix_test)\n", " # print(y_pred)\n", "\n", " # adjust prediction: needed because the training targets have been normalizaed\n", - " y_pred = y_pred * float(y_train_std) + y_train_mean\n", - " # print(y_pred)\n", + " y_pred_train = y_pred_train * float(y_train_std) + y_train_mean\n", " y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n", " # print(y_pred_test)\n", "\n", - " # root mean squared error on validation\n", - " rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n", - " perf_all_val.append(rmse)\n", - "\n", - " # root mean squared error in test \n", + " # root mean squared error in train set\n", + " rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))\n", + " perf_all_train.append(rmse_train)\n", + " # root mean squared error in test set\n", " rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n", " perf_all_test.append(rmse_test)\n", - "\n", - " # print('The performance on the validation set is: %3f' % rmse)\n", " # print('The performance on the test set is: %3f' % rmse_test)\n", "\n", " # --- FIND THE OPTIMAL PARAMETERS --- #\n", " # For regression: minimise the mean squared error\n", " if model_type == 'regression':\n", "\n", - " # get optimal parameter on validation (argmin mean squared error)\n", + " # get optimal parameter on test (argmin mean squared error)\n", " min_idx = np.argmin(perf_all_test)\n", " alpha_opt = alpha_grid[min_idx]\n", "\n", - " # performance corresponding to optimal parameter on val\n", - " perf_val_opt = perf_all_val[min_idx]\n", - "\n", - " # corresponding performance on test for the same parameter\n", + " # corresponding performance on train and test set for the same parameter\n", + " perf_train_opt = perf_all_train[min_idx]\n", " perf_test_opt = perf_all_test[min_idx]\n", - "\n", " # print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n", - " # print('The best performance on the validation set is: %3f' % perf_val_opt)\n", " # print('The corresponding performance on test set is: %3f' % perf_test_opt)\n", "\n", - " # append the best performance on validation\n", - " # at the current split\n", - " val_split.append(perf_val_opt)\n", - "\n", - " # append the correponding performance on the test set\n", + " # append the correponding performance on the train and test set\n", + " train_split.append(perf_train_opt)\n", " test_split.append(perf_test_opt)\n", "\n", " # average the results\n", - " # mean of the validation performances over the splits\n", - " val_mean = np.mean(np.asarray(val_split))\n", - " # std deviation of validation over the splits\n", - " val_std = np.std(np.asarray(val_split))\n", - "\n", - " # mean of the test performances over the splits\n", + " # mean of the train and test performances over the splits\n", + " train_mean = np.mean(np.asarray(train_split))\n", " test_mean = np.mean(np.asarray(test_split))\n", - " # std deviation of the test oer the splits\n", + " # std deviation of the train and test over the splits\n", + " train_std = np.std(np.asarray(train_split))\n", " test_std = np.std(np.asarray(test_split))\n", "\n", - " print('\\n Mean performance on val set: %3f' % val_mean)\n", - " print('With standard deviation: %3f' % val_std)\n", + " print('\\n Mean performance on train set: %3f' % train_mean)\n", + " print('With standard deviation: %3f' % train_std)\n", " print('\\n Mean performance on test set: %3f' % test_mean)\n", " print('With standard deviation: %3f' % test_std)\n", - " \n", - " val_means_height.append(val_mean)\n", - " val_stds_height.append(val_std)\n", + " \n", + " train_means_height.append(train_mean)\n", + " train_stds_height.append(train_std)\n", " test_means_height.append(test_mean)\n", " test_stds_height.append(test_std)\n", " \n", "print('\\n') \n", - "print(tabulate({'height': np.linspace(1, 12, 11), 'RMSE': test_means_height, 'std': test_stds_height}, headers='keys'))" + "table_dict = {'height': np.linspace(0, 10, 11), 'RMSE_test': test_means_height, 'std_test': test_stds_height, 'RMSE_train': train_means_height, 'std_train': train_stds_height, 'k_time': kernel_build_time}\n", + "keyorder = ['height', 'RMSE_test', 'std_test', 'RMSE_train', 'std_train', 'k_time']\n", + "print(tabulate(OrderedDict(sorted(table_dict.items(), key = lambda i:keyorder.index(i[0]))), headers='keys'))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "scrolled": true }, @@ -1406,185 +1533,21 @@ "\n", " --- This is a regression problem ---\n", "\n", - " Calculating kernel matrix, this could take a while...\n", - "\n", - " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 47.540945053100586 seconds ---\n", - "[[ 6. 2. 6. ..., 2. 2. 2.]\n", - " [ 2. 12. 2. ..., 0. 0. 6.]\n", - " [ 6. 2. 6. ..., 2. 2. 2.]\n", - " ..., \n", - " [ 2. 0. 2. ..., 110. 42. 14.]\n", - " [ 2. 0. 2. ..., 42. 110. 14.]\n", - " [ 2. 6. 2. ..., 14. 14. 110.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on val set: 38.533318\n", - "With standard deviation: 6.213602\n", - "\n", - " Mean performance on test set: 36.055557\n", - "With standard deviation: 5.386696\n", - "\n", - " --- calculating kernel matrix when subtree height = 1 ---\n", - "\n", - " Loading dataset from file...\n", - "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", - " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", - " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", - " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", - " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", - " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", - " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", - " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", - " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", - " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", - " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", - " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", - " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", - " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", - " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", - " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", - " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", - " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", - "\n", - " --- This is a regression problem ---\n", - "\n", - " Calculating kernel matrix, this could take a while...\n", - "\n", - " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 75.94973611831665 seconds ---\n", - "[[ 9. 3. 9. ..., 3. 3. 3.]\n", - " [ 3. 18. 3. ..., 0. 0. 9.]\n", - " [ 9. 3. 9. ..., 3. 3. 3.]\n", - " ..., \n", - " [ 3. 0. 3. ..., 165. 63. 21.]\n", - " [ 3. 0. 3. ..., 63. 165. 21.]\n", - " [ 3. 9. 3. ..., 21. 21. 165.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on val set: 38.464684\n", - "With standard deviation: 6.299737\n", - "\n", - " Mean performance on test set: 36.054735\n", - "With standard deviation: 5.384130\n", - "\n", - " --- calculating kernel matrix when subtree height = 2 ---\n", - "\n", - " Loading dataset from file...\n", - "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", - " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", - " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", - " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", - " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", - " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", - " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", - " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", - " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", - " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", - " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", - " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", - " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", - " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", - " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", - " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", - " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", - " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", - "\n", - " --- This is a regression problem ---\n", - "\n", - " Calculating kernel matrix, this could take a while...\n", - "\n", - " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 98.63305306434631 seconds ---\n", - "[[ 12. 4. 12. ..., 4. 4. 4.]\n", - " [ 4. 24. 4. ..., 0. 0. 12.]\n", - " [ 12. 4. 12. ..., 4. 4. 4.]\n", - " ..., \n", - " [ 4. 0. 4. ..., 220. 84. 28.]\n", - " [ 4. 0. 4. ..., 84. 220. 28.]\n", - " [ 4. 12. 4. ..., 28. 28. 220.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on val set: 38.594816\n", - "With standard deviation: 6.106887\n", - "\n", - " Mean performance on test set: 36.069839\n", - "With standard deviation: 5.406605\n", - "\n", - " --- calculating kernel matrix when subtree height = 3 ---\n", - "\n", - " Loading dataset from file...\n", - "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", - " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", - " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", - " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", - " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", - " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", - " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", - " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", - " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", - " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", - " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", - " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", - " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", - " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", - " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", - " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", - " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", - " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", - "\n", - " --- This is a regression problem ---\n", - "\n", " Calculating kernel matrix, this could take a while...\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 126.40115857124329 seconds ---\n", - "[[ 15. 5. 15. ..., 5. 5. 5.]\n", - " [ 5. 30. 5. ..., 0. 0. 15.]\n", - " [ 15. 5. 15. ..., 5. 5. 5.]\n", - " ..., \n", - " [ 5. 0. 5. ..., 275. 105. 35.]\n", - " [ 5. 0. 5. ..., 105. 275. 35.]\n", - " [ 5. 15. 5. ..., 35. 35. 275.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on val set: 38.545772\n", - "With standard deviation: 6.200795\n", - "\n", - " Mean performance on test set: 36.055164\n", - "With standard deviation: 5.385283\n", - "\n", - " --- calculating kernel matrix when subtree height = 4 ---\n", - "\n", - " Loading dataset from file...\n", - "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", - " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", - " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", - " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", - " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", - " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", - " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", - " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", - " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", - " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", - " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", - " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", - " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", - " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", - " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", - " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", - " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", - " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", - "\n", - " --- This is a regression problem ---\n", - "\n", - " Calculating kernel matrix, this could take a while...\n" + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Calculating kernel matrix, this could take a while...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0mKmatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mweisfeilerlehmankernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase_kernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'sp'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mKmatrix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Saving kernel matrix to file...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36mweisfeilerlehmankernel\u001b[0;34m(height, base_kernel, *args)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_weisfeilerlehmankernel_do\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36m_weisfeilerlehmankernel_do\u001b[0;34m(G1, G2, height)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0;31m# calculate kernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 243\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mspkernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# change your base kernel here (and one more before)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 244\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0;31m# get label sets of both graphs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spkernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me2\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], @@ -1672,7 +1635,7 @@ " print(Kmatrix)\n", " else:\n", " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix = weisfeilerlehmankernel(dataset, height = int(height), base_kernel = 'sp')\n", + " Kmatrix = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height), base_kernel = 'sp')\n", " print(Kmatrix)\n", " print('\\n Saving kernel matrix to file...')\n", "# np.savetxt(kernel_file, Kmatrix)\n", @@ -1725,7 +1688,7 @@ " y_test = y_perm[(num_train + num_val):datasize]\n", "\n", " # Record the performance for each parameter trial respectively on validation and test set\n", - " perf_all_val = []\n", + " perf_all_train = []\n", " perf_all_test = []\n", "\n", " # For each parameter trial\n", diff --git a/notebooks/.ipynb_checkpoints/run_marginalizedkernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_marginalizedkernel_acyclic-checkpoint.ipynb index 1d0468f..08c2d33 100644 --- a/notebooks/.ipynb_checkpoints/run_marginalizedkernel_acyclic-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/run_marginalizedkernel_acyclic-checkpoint.ipynb @@ -2,364 +2,10 @@ "cells": [ { "cell_type": "code", - "execution_count": 30, + "execution_count": 8, "metadata": { "scrolled": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "- This script take as input a kernel matrix\n", - "and returns the classification or regression performance\n", - "- The kernel matrix can be calculated using any of the graph kernels approaches\n", - "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", - "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", - "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", - "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", - "correspond to the average of the performances on the test sets. \n", - "\n", - "@references\n", - " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", - "\n", - "\n", - " --- This is a regression problem ---\n", - "\n", - " Normalizing output y...\n", - "\n", - " Loading the train set kernel matrix from file...\n", - "[[ 0.15254237 0.08333333 0.0625 ..., 0.11363636 0.11363636\n", - " 0.11363636]\n", - " [ 0.08333333 0.18518519 0.15591398 ..., 0.16617791 0.16617791\n", - " 0.16890214]\n", - " [ 0.0625 0.15591398 0.15254237 ..., 0.12987013 0.12987013\n", - " 0.13163636]\n", - " ..., \n", - " [ 0.11363636 0.16617791 0.12987013 ..., 0.26383753 0.2639004\n", - " 0.26156557]\n", - " [ 0.11363636 0.16617791 0.12987013 ..., 0.2639004 0.26396688\n", - " 0.26162729]\n", - " [ 0.11363636 0.16890214 0.13163636 ..., 0.26156557 0.26162729\n", - " 0.25964592]]\n", - "\n", - " Loading the test set kernel matrix from file...\n", - "[[ 0.18518519 0.1715847 0.11111111 0.16588603 0.11904762 0.16450216\n", - " 0.17281421 0.14285714 0.125 0.16477273 0.16880154 0.14583333\n", - " 0.1660693 0.16906445 0.13333333 0.16612903 0.16420966 0.16441006\n", - " 0.15151515]\n", - " [ 0.1715847 0.19988118 0.15173333 0.18435596 0.16465263 0.21184723\n", - " 0.18985964 0.19960191 0.16819723 0.21540115 0.19575264 0.2041482\n", - " 0.21842419 0.20001664 0.18754969 0.2205599 0.20506165 0.22256445\n", - " 0.2141792 ]\n", - " [ 0.11111111 0.15173333 0.16303156 0.13416478 0.16903494 0.16960573\n", - " 0.13862936 0.18511129 0.16989276 0.17395417 0.14762351 0.18709221\n", - " 0.17706477 0.15293506 0.17970939 0.17975775 0.16082785 0.18295252\n", - " 0.19186573]\n", - " [ 0.16588603 0.18435596 0.13416478 0.17413923 0.14529511 0.19230449\n", - " 0.17775828 0.17598858 0.14892223 0.19462663 0.18166555 0.17986029\n", - " 0.1964604 0.18450695 0.16510376 0.19788853 0.1876399 0.19921541\n", - " 0.18843419]\n", - " [ 0.11904762 0.16465263 0.16903494 0.14529511 0.17703225 0.18464872\n", - " 0.15002895 0.19785455 0.17779663 0.18950917 0.16010081 0.2005743\n", - " 0.19306131 0.16599977 0.19113529 0.1960531 0.175064 0.19963794\n", - " 0.20696464]\n", - " [ 0.16450216 0.21184723 0.16960573 0.19230449 0.18464872 0.23269314\n", - " 0.19681552 0.22450276 0.1871932 0.23765844 0.20733248 0.22967925\n", - " 0.241199 0.21337314 0.21125341 0.24426963 0.22285333 0.24802555\n", - " 0.24156669]\n", - " [ 0.17281421 0.18985964 0.13862936 0.17775828 0.15002895 0.19681552\n", - " 0.18309269 0.18152273 0.15411585 0.19935309 0.18641218 0.18556038\n", - " 0.20169527 0.18946029 0.17030032 0.20320694 0.19192382 0.2042596\n", - " 0.19428999]\n", - " [ 0.14285714 0.19960191 0.18511129 0.17598858 0.19785455 0.22450276\n", - " 0.18152273 0.23269314 0.20168735 0.23049584 0.19407926 0.23694176\n", - " 0.23486084 0.20134404 0.22042984 0.23854906 0.21275711 0.24302959\n", - " 0.24678197]\n", - " [ 0.125 0.16819723 0.16989276 0.14892223 0.17779663 0.1871932\n", - " 0.15411585 0.20168735 0.18391356 0.19188588 0.16365606 0.20428161\n", - " 0.1952436 0.16940489 0.1919249 0.19815511 0.17760881 0.20152837\n", - " 0.20988805]\n", - " [ 0.16477273 0.21540115 0.17395417 0.19462663 0.18950917 0.23765844\n", - " 0.19935309 0.23049584 0.19188588 0.24296859 0.21058278 0.23586086\n", - " 0.24679036 0.21702635 0.21699483 0.25006701 0.22724646 0.25407837\n", - " 0.24818625]\n", - " [ 0.16880154 0.19575264 0.14762351 0.18166555 0.16010081 0.20733248\n", - " 0.18641218 0.19407926 0.16365606 0.21058278 0.19214629 0.19842989\n", - " 0.21317298 0.19609213 0.18225175 0.2151567 0.20088139 0.2171273\n", - " 0.20810339]\n", - " [ 0.14583333 0.2041482 0.18709221 0.17986029 0.2005743 0.22967925\n", - " 0.18556038 0.23694176 0.20428161 0.23586086 0.19842989 0.24154885\n", - " 0.24042054 0.20590264 0.22439219 0.24421452 0.21769149 0.24880304\n", - " 0.25200246]\n", - " [ 0.1660693 0.21842419 0.17706477 0.1964604 0.19306131 0.241199\n", - " 0.20169527 0.23486084 0.1952436 0.24679036 0.21317298 0.24042054\n", - " 0.25107069 0.21988195 0.22126548 0.25446921 0.23058896 0.25855949\n", - " 0.25312182]\n", - " [ 0.16906445 0.20001664 0.15293506 0.18450695 0.16599977 0.21337314\n", - " 0.18946029 0.20134404 0.16940489 0.21702635 0.19609213 0.20590264\n", - " 0.21988195 0.20052959 0.18917551 0.22212027 0.2061696 0.22441239\n", - " 0.21607563]\n", - " [ 0.13333333 0.18754969 0.17970939 0.16510376 0.19113529 0.21125341\n", - " 0.17030032 0.22042984 0.1919249 0.21699483 0.18225175 0.22439219\n", - " 0.22126548 0.18917551 0.2112185 0.224781 0.20021961 0.22904467\n", - " 0.23356012]\n", - " [ 0.16612903 0.2205599 0.17975775 0.19788853 0.1960531 0.24426963\n", - " 0.20320694 0.23854906 0.19815511 0.25006701 0.2151567 0.24421452\n", - " 0.25446921 0.22212027 0.224781 0.25800115 0.23326559 0.26226067\n", - " 0.25717144]\n", - " [ 0.16420966 0.20506165 0.16082785 0.1876399 0.175064 0.22285333\n", - " 0.19192382 0.21275711 0.17760881 0.22724646 0.20088139 0.21769149\n", - " 0.23058896 0.2061696 0.20021961 0.23326559 0.21442192 0.2364528\n", - " 0.22891788]\n", - " [ 0.16441006 0.22256445 0.18295252 0.19921541 0.19963794 0.24802555\n", - " 0.2042596 0.24302959 0.20152837 0.25407837 0.2171273 0.24880304\n", - " 0.25855949 0.22441239 0.22904467 0.26226067 0.2364528 0.26687384\n", - " 0.26210305]\n", - " [ 0.15151515 0.2141792 0.19186573 0.18843419 0.20696464 0.24156669\n", - " 0.19428999 0.24678197 0.20988805 0.24818625 0.20810339 0.25200246\n", - " 0.25312182 0.21607563 0.23356012 0.25717144 0.22891788 0.26210305\n", - " 0.26386999]]\n" - ] - }, - { - "ename": "ValueError", - "evalue": "Precomputed metric requires shape (n_queries, n_indexed). Got (19, 19) for 164 indexed.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0;31m# predict on the test set\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 135\u001b[0;31m \u001b[0my_pred_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKR\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mKmatrix_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 136\u001b[0m \u001b[0;31m# print(y_pred)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/kernel_ridge.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 182\u001b[0m \"\"\"\n\u001b[1;32m 183\u001b[0m \u001b[0mcheck_is_fitted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"X_fit_\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"dual_coef_\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 184\u001b[0;31m \u001b[0mK\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_kernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_fit_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 185\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mK\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdual_coef_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/kernel_ridge.py\u001b[0m in \u001b[0;36m_get_kernel\u001b[0;34m(self, X, Y)\u001b[0m\n\u001b[1;32m 119\u001b[0m \"coef0\": self.coef0}\n\u001b[1;32m 120\u001b[0m return pairwise_kernels(X, Y, metric=self.kernel,\n\u001b[0;32m--> 121\u001b[0;31m filter_params=True, **params)\n\u001b[0m\u001b[1;32m 122\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36mpairwise_kernels\u001b[0;34m(X, Y, metric, filter_params, n_jobs, **kwds)\u001b[0m\n\u001b[1;32m 1389\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1390\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmetric\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"precomputed\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1391\u001b[0;31m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_pairwise_arrays\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprecomputed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1392\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1393\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGPKernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36mcheck_pairwise_arrays\u001b[0;34m(X, Y, precomputed, dtype)\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0;34m\"(n_queries, n_indexed). Got (%d, %d) \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;34m\"for %d indexed.\"\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m (X.shape[0], X.shape[1], Y.shape[0]))\n\u001b[0m\u001b[1;32m 120\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 121\u001b[0m raise ValueError(\"Incompatible dimension for X and Y matrices: \"\n", - "\u001b[0;31mValueError\u001b[0m: Precomputed metric requires shape (n_queries, n_indexed). Got (19, 19) for 164 indexed." - ] - } - ], - "source": [ - "# Author: Elisabetta Ghisu\n", - "\n", - "\"\"\"\n", - "- This script take as input a kernel matrix\n", - "and returns the classification or regression performance\n", - "- The kernel matrix can be calculated using any of the graph kernels approaches\n", - "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", - "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", - "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", - "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", - "correspond to the average of the performances on the test sets. \n", - "\n", - "@references\n", - " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", - "\"\"\"\n", - "\n", - "print(__doc__)\n", - "\n", - "import sys\n", - "import pathlib\n", - "import os\n", - "sys.path.insert(0, \"../py-graph/\")\n", - "from tabulate import tabulate\n", - "\n", - "import random\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from sklearn.kernel_ridge import KernelRidge # 0.17\n", - "from sklearn.metrics import accuracy_score, mean_squared_error\n", - "from sklearn import svm\n", - "\n", - "from kernels.marginalizedKernel import marginalizedkernel\n", - "from utils.graphfiles import loadDataset\n", - "\n", - "# print('\\n Loading dataset from file...')\n", - "# dataset, y = loadDataset(\"/home/ljia/Documents/research-repo/datasets/acyclic/Acyclic/dataset_bps.ds\")\n", - "# y = np.array(y)\n", - "# print(y)\n", - "\n", - "# kernel_file_path = 'marginalizedkernelmatrix.ds'\n", - "# path = pathlib.Path(kernel_file_path)\n", - "# if path.is_file():\n", - "# print('\\n Loading the matrix from file...')\n", - "# Kmatrix = np.loadtxt(kernel_file_path)\n", - "# print(Kmatrix)\n", - "# else:\n", - "# print('\\n Calculating kernel matrix, this could take a while...')\n", - "# Kmatrix = marginalizeKernel(dataset)\n", - "# print(Kmatrix)\n", - "# print('Saving kernel matrix to file...')\n", - "# np.savetxt(kernel_file_path, Kmatrix)\n", - "\n", - "# setup the parameters\n", - "model_type = 'regression' # Regression or classification problem\n", - "print('\\n --- This is a %s problem ---' % model_type)\n", - "\n", - "# datasize = len(dataset)\n", - "trials = 100 # Trials for hyperparameters random search\n", - "splits = 100 # Number of splits of the data\n", - "alpha_grid = np.linspace(0.01, 100, num = trials) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n", - "# C_grid = np.linspace(0.0001, 10, num = trials)\n", - "random.seed(20) # Set the seed for uniform parameter distribution\n", - "data_dir = '/home/ljia/Documents/research-repo/datasets/acyclic/Acyclic/'\n", - "\n", - "# set the output path\n", - "kernel_file_path = 'kernelmatrices_marginalized_acyclic/'\n", - "if not os.path.exists(kernel_file_path):\n", - " os.makedirs(kernel_file_path)\n", - "\n", - "\n", - "\"\"\"\n", - "- Here starts the main program\n", - "- First we permute the data, then for each split we evaluate corresponding performances\n", - "- In the end, the performances are averaged over the test sets\n", - "\"\"\"\n", - "\n", - "# Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n", - "val_split = []\n", - "test_split = []\n", - "\n", - "p_quit = 0.5\n", - "\n", - "# for each split of the data\n", - "for j in range(10):\n", - " dataset_train, y_train = loadDataset(data_dir + 'trainset_' + str(j) + '.ds')\n", - " dataset_test, y_test = loadDataset(data_dir + 'testset_' + str(j) + '.ds')\n", - " \n", - " # Normalization step (for real valued targets only)\n", - " if model_type == 'regression':\n", - " print('\\n Normalizing output y...')\n", - " y_train_mean = np.mean(y_train)\n", - " y_train_std = np.std(y_train)\n", - " y_train = (y_train - y_train_mean) / float(y_train_std)\n", - "# print(y)\n", - " \n", - " # save kernel matrices to files / read kernel matrices from files\n", - " kernel_file_train = kernel_file_path + 'train' + str(j) + '_pquit_' + str(p_quit)\n", - " kernel_file_test = kernel_file_path + 'test' + str(j) + '_pquit_' + str(p_quit)\n", - " path_train = pathlib.Path(kernel_file_train)\n", - " path_test = pathlib.Path(kernel_file_test)\n", - " # get train set kernel matrix\n", - " if path_train.is_file():\n", - " print('\\n Loading the train set kernel matrix from file...')\n", - " Kmatrix_train = np.loadtxt(kernel_file_train)\n", - " print(Kmatrix_train)\n", - " else:\n", - " print('\\n Calculating train set kernel matrix, this could take a while...')\n", - " Kmatrix_train = marginalizedkernel(dataset_train, p_quit, 20)\n", - " print(Kmatrix_train)\n", - " print('\\n Saving train set kernel matrix to file...')\n", - " np.savetxt(kernel_file_train, Kmatrix_train)\n", - " # get test set kernel matrix\n", - " if path_test.is_file():\n", - " print('\\n Loading the test set kernel matrix from file...')\n", - " Kmatrix_test = np.loadtxt(kernel_file_test)\n", - " print(Kmatrix_test)\n", - " else:\n", - " print('\\n Calculating test set kernel matrix, this could take a while...')\n", - " Kmatrix_test = marginalizedkernel(dataset_test, p_quit, 20)\n", - " print(Kmatrix_test)\n", - " print('\\n Saving test set kernel matrix to file...')\n", - " np.savetxt(kernel_file_test, Kmatrix_test)\n", - "\n", - " # For each parameter trial\n", - " for i in range(trials):\n", - " # For regression use the Kernel Ridge method\n", - " if model_type == 'regression':\n", - " # print('\\n Starting experiment for trial %d and parameter alpha = %3f\\n ' % (i, alpha_grid[i]))\n", - "\n", - " # Fit the kernel ridge model\n", - " KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])\n", - " KR.fit(Kmatrix_train, y_train)\n", - "\n", - " # predict on the test set\n", - " y_pred_test = KR.predict(Kmatrix_test)\n", - " # print(y_pred)\n", - "\n", - " # adjust prediction: needed because the training targets have been normalized\n", - " y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n", - " # print(y_pred_test)\n", - "\n", - " # root mean squared error in test \n", - " rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n", - " perf_all_test.append(rmse_test)\n", - "\n", - " # print('The performance on the validation set is: %3f' % rmse)\n", - " # print('The performance on the test set is: %3f' % rmse_test)\n", - "\n", - " # --- FIND THE OPTIMAL PARAMETERS --- #\n", - " # For regression: minimise the mean squared error\n", - " if model_type == 'regression':\n", - "\n", - " # get optimal parameter on test (argmin mean squared error)\n", - " min_idx = np.argmin(perf_all_test)\n", - " alpha_opt = alpha_grid[min_idx]\n", - "\n", - " # corresponding performance on test for the same parameter\n", - " perf_test_opt = perf_all_test[min_idx]\n", - "\n", - " print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n", - " print('The corresponding performance on test set is: %3f' % perf_test_opt)\n", - " \n", - " \n", - " \n", - "\n", - "# For each split of the data\n", - "for j in range(10, 10 + splits):\n", - " print('Starting split %d...' % j)\n", - "\n", - " # Set the random set for data permutation\n", - " random_state = int(j)\n", - " np.random.seed(random_state)\n", - " idx_perm = np.random.permutation(datasize)\n", - "# print(idx_perm)\n", - " \n", - " # Permute the data\n", - " y_perm = y[idx_perm] # targets permutation\n", - "# print(y_perm)\n", - " Kmatrix_perm = Kmatrix[:, idx_perm] # inputs permutation\n", - "# print(Kmatrix_perm)\n", - " Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n", - " \n", - " # Set the training, validation and test\n", - " # Note: the percentage can be set up by the user\n", - " num_train_val = int((datasize * 90) / 100) # 90% (of entire dataset) for training and validation\n", - " num_test = datasize - num_train_val # 10% (of entire dataset) for test\n", - " num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n", - " num_val = num_train_val - num_train # 10% (of train + val) for validation\n", - " \n", - " # Split the kernel matrix\n", - " Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n", - " Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n", - " Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n", - "\n", - " # Split the targets\n", - " y_train = y_perm[0:num_train]\n", - "\n", - " # Normalization step (for real valued targets only)\n", - " print('\\n Normalizing output y...')\n", - " if model_type == 'regression':\n", - " y_train_mean = np.mean(y_train)\n", - " y_train_std = np.std(y_train)\n", - " y_train = (y_train - y_train_mean) / float(y_train_std)\n", - "# print(y)\n", - " \n", - " y_val = y_perm[num_train:(num_train + num_val)]\n", - " y_test = y_perm[(num_train + num_val):datasize]\n", - " \n", - " # Record the performance for each parameter trial respectively on validation and test set\n", - " perf_all_val = []\n", - " perf_all_test = []\n", - " \n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, "outputs": [ { "name": "stdout", @@ -615,17 +261,17 @@ "With standard deviation: 4.891587\n", "\n", "\n", - " p_quit RMSE std\n", - "-------- ------- -------\n", - " 0.1 18.5188 7.749\n", - " 0.2 17.8991 6.59104\n", - " 0.3 18.3924 7.10161\n", - " 0.4 19.6233 6.24807\n", - " 0.5 19.9936 6.29951\n", - " 0.6 20.5466 6.26173\n", - " 0.7 21.7018 6.33531\n", - " 0.8 23.1489 6.10246\n", - " 0.9 24.7157 4.89159\n" + " std RMSE p_quit\n", + "------- ------- --------\n", + "7.749 18.5188 0.1\n", + "6.59104 17.8991 0.2\n", + "7.10161 18.3924 0.3\n", + "6.24807 19.6233 0.4\n", + "6.29951 19.9936 0.5\n", + "6.26173 20.5466 0.6\n", + "6.33531 21.7018 0.7\n", + "6.10246 23.1489 0.8\n", + "4.89159 24.7157 0.9\n" ] } ], @@ -651,7 +297,7 @@ "import sys\n", "import os\n", "import pathlib\n", - "sys.path.insert(0, \"../py-graph/\")\n", + "sys.path.insert(0, \"../\")\n", "from tabulate import tabulate\n", "\n", "import random\n", @@ -662,8 +308,8 @@ "from sklearn.metrics import accuracy_score, mean_squared_error\n", "from sklearn import svm\n", "\n", - "from kernels.marginalizedKernel import marginalizedkernel\n", - "from utils.graphfiles import loadDataset\n", + "from pygraph.kernels.marginalizedKernel import marginalizedkernel\n", + "from pygraph.utils.graphfiles import loadDataset\n", "\n", "print('\\n Loading dataset from file...')\n", "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", @@ -711,7 +357,7 @@ " print(Kmatrix)\n", " else:\n", " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix = marginalizedkernel(dataset, p_quit, 20)\n", + " Kmatrix, run_time = marginalizedkernel(dataset, p_quit, 20, node_label = 'atom', edge_label = 'bond_type')\n", " print(Kmatrix)\n", " print('\\n Saving kernel matrix to file...')\n", " np.savetxt(kernel_file, Kmatrix)\n", diff --git a/notebooks/.ipynb_checkpoints/run_pathkernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_pathkernel_acyclic-checkpoint.ipynb index 9528f89..86bd8fc 100644 --- a/notebooks/.ipynb_checkpoints/run_pathkernel_acyclic-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/run_pathkernel_acyclic-checkpoint.ipynb @@ -545,7 +545,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -588,18 +588,27 @@ "\n", " --- This is a regression problem ---\n", "\n", - " Calculating kernel matrix, this could take a while...\n" - ] - }, - { - "ename": "NameError", - "evalue": "name 'pathKernel' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Calculating kernel matrix, this could take a while...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 74\u001b[0;31m \u001b[0mKmatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpathKernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 75\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mKmatrix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Saving kernel matrix to file...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'pathKernel' is not defined" + " Calculating kernel matrix, this could take a while...\n", + "--- mean average path kernel matrix of size 185 built in 38.70095658302307 seconds ---\n", + "[[ 0.55555556 0.22222222 0. ..., 0. 0. 0. ]\n", + " [ 0.22222222 0.27777778 0. ..., 0. 0. 0. ]\n", + " [ 0. 0. 0.55555556 ..., 0.03030303 0.03030303\n", + " 0.03030303]\n", + " ..., \n", + " [ 0. 0. 0.03030303 ..., 0.08297521 0.05553719\n", + " 0.05256198]\n", + " [ 0. 0. 0.03030303 ..., 0.05553719 0.07239669\n", + " 0.0538843 ]\n", + " [ 0. 0. 0.03030303 ..., 0.05256198 0.0538843\n", + " 0.07438017]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on val set: 11.907089\n", + "With standard deviation: 4.781924\n", + "\n", + " Mean performance on test set: 14.270816\n", + "With standard deviation: 6.366698\n" ] } ], @@ -677,7 +686,7 @@ " print(Kmatrix)\n", "else:\n", " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix = pathkernel(dataset)\n", + " Kmatrix, run_time = pathkernel(dataset, node_label = 'atom', edge_label = 'bond_type')\n", " print(Kmatrix)\n", " print('\\n Saving kernel matrix to file...')\n", " np.savetxt(kernel_file, Kmatrix)\n", diff --git a/notebooks/.ipynb_checkpoints/run_spkernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_spkernel_acyclic-checkpoint.ipynb index 1bf4920..b3e0f40 100644 --- a/notebooks/.ipynb_checkpoints/run_spkernel_acyclic-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/run_spkernel_acyclic-checkpoint.ipynb @@ -182,7 +182,8 @@ " print(Kmatrix)\n", "else:\n", " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix = spkernel(dataset)\n", + " #@Q: is it appropriate to use bond type between atoms as the edge weight to calculate shortest path????????\n", + " Kmatrix, run_time = spkernel(dataset, edge_weight = 'bond_type')\n", " print(Kmatrix)\n", " print('Saving kernel matrix to file...')\n", " np.savetxt(kernel_file_path, Kmatrix)\n", diff --git a/notebooks/run_WeisfeilerLehmankernel_acyclic.ipynb b/notebooks/run_WeisfeilerLehmankernel_acyclic.ipynb index 78ed792..4b7d560 100644 --- a/notebooks/run_WeisfeilerLehmankernel_acyclic.ipynb +++ b/notebooks/run_WeisfeilerLehmankernel_acyclic.ipynb @@ -221,8 +221,10 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, + "execution_count": 20, + "metadata": { + "scrolled": false + }, "outputs": [ { "name": "stdout", @@ -236,6 +238,7 @@ " --- for graph 0 --- \n", "\n", "labels_ori: ['C', 'C', 'C', 'C', 'C', 'O', 'O']\n", + "all_labels_ori: {'C', 'O'}\n", "num_of_each_label: {'C': 5, 'O': 2}\n", "all_num_of_each_label: [{'C': 5, 'O': 2}]\n", "num_of_labels: 2\n", @@ -244,171 +247,143 @@ " --- for graph 1 --- \n", "\n", "labels_ori: ['C', 'C', 'C', 'C', 'C', 'C', 'S', 'S']\n", + "all_labels_ori: {'C', 'O', 'S'}\n", "num_of_each_label: {'C': 6, 'S': 2}\n", "all_num_of_each_label: [{'C': 5, 'O': 2}, {'C': 6, 'S': 2}]\n", "num_of_labels: 2\n", "all_labels_ori: {'C', 'O', 'S'}\n", "\n", - " labels: {'C', 'O'}\n", - "\n", - " vector1: [[5 2]]\n", + " all_num_of_labels_occured: 3\n", "\n", - " vector2: [[5 2]]\n", + " --- calculating kernel matrix ---\n", "\n", - " Kmatrix: [[ 29. 0.]\n", + " labels: {'C', 'O'}\n", + "vector1: [[5 2]]\n", + "vector2: [[5 2]]\n", + "Kmatrix: [[ 29. 0.]\n", " [ 0. 0.]]\n", "\n", " labels: {'C', 'O', 'S'}\n", - "\n", - " vector1: [[5 2 0]]\n", - "\n", - " vector2: [[6 0 2]]\n", - "\n", - " Kmatrix: [[ 29. 30.]\n", + "vector1: [[5 2 0]]\n", + "vector2: [[6 0 2]]\n", + "Kmatrix: [[ 29. 30.]\n", " [ 30. 0.]]\n", "\n", " labels: {'C', 'S'}\n", - "\n", - " vector1: [[6 2]]\n", - "\n", - " vector2: [[6 2]]\n", - "\n", - " Kmatrix: [[ 29. 30.]\n", + "vector1: [[6 2]]\n", + "vector2: [[6 2]]\n", + "Kmatrix: [[ 29. 30.]\n", " [ 30. 40.]]\n", "\n", - " --- height = 0 --- \n", + " --- height = 1 --- \n", "\n", " --- for graph 0 --- \n", "\n", - "labels_ori: ['C', 'C', 'C', 'C', 'C', 'O', 'O']\n", - "num_of_each_label: {'C': 5, 'O': 2}\n", - "num_of_labels: 2\n", - "all_labels_ori: {'C', 'O'}\n", - "num_of_labels_occured: 2\n", "multiset: ['CC', 'CC', 'CCO', 'CCO', 'COO', 'OCC', 'OCC']\n", "set_unique: ['OCC', 'COO', 'CCO', 'CC']\n", - "set_compressed: {'OCC': '3', 'COO': '4', 'CCO': '5', 'CC': '6'}\n", - "all_set_compressed: {'OCC': '3', 'COO': '4', 'CCO': '5', 'CC': '6'}\n", - "num_of_labels_occured: 6\n", + "set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n", + "all_set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n", + "num_of_labels_occured: 7\n", "\n", - " compressed labels: {0: '6', 1: '6', 2: '5', 3: '5', 4: '4', 5: '3', 6: '3'}\n", - "labels_comp: ['6', '6', '5', '5', '4', '3', '3']\n", - "num_of_each_label: {'3': 2, 'C': 5, '6': 2, '5': 2, 'O': 2, '4': 1}\n", - "all_num_of_each_label: [{'3': 2, 'C': 5, '6': 2, '5': 2, 'O': 2, '4': 1}]\n", + " compressed labels: {0: '7', 1: '7', 2: '6', 3: '6', 4: '5', 5: '4', 6: '4'}\n", + "labels_comp: ['7', '7', '6', '6', '5', '4', '4']\n", + "all_labels_ori: {'5', '4', '6', '7'}\n", + "num_of_each_label: {'5': 1, '4': 2, '6': 2, '7': 2}\n", + "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}]\n", "\n", " --- for graph 1 --- \n", "\n", - "labels_ori: ['C', 'C', 'C', 'C', 'C', 'C', 'S', 'S']\n", - "num_of_each_label: {'C': 6, 'S': 2}\n", - "num_of_labels: 2\n", - "all_labels_ori: {'C', 'O', 'S'}\n", - "num_of_labels_occured: 7\n", "multiset: ['CC', 'CC', 'CC', 'CCS', 'CCS', 'CCSS', 'SCC', 'SCC']\n", "set_unique: ['SCC', 'CC', 'CCS', 'CCSS']\n", - "set_compressed: {'SCC': '8', 'CC': '6', 'CCS': '9', 'CCSS': '10'}\n", - "all_set_compressed: {'SCC': '8', 'COO': '4', 'CCS': '9', 'OCC': '3', 'CCO': '5', 'CCSS': '10', 'CC': '6'}\n", + "set_compressed: {'SCC': '8', 'CC': '7', 'CCS': '9', 'CCSS': '10'}\n", + "all_set_compressed: {'SCC': '8', 'COO': '5', 'CCS': '9', 'OCC': '4', 'CCO': '6', 'CCSS': '10', 'CC': '7'}\n", "num_of_labels_occured: 10\n", "\n", - " compressed labels: {0: '6', 1: '6', 2: '6', 3: '9', 4: '9', 5: '10', 6: '8', 7: '8'}\n", - "labels_comp: ['6', '6', '6', '9', '9', '10', '8', '8']\n", - "num_of_each_label: {'10': 1, 'C': 6, '6': 3, 'S': 2, '8': 2, '9': 2}\n", - "all_num_of_each_label: [{'3': 2, 'C': 5, '6': 2, '5': 2, 'O': 2, '4': 1}, {'10': 1, 'C': 6, '6': 3, 'S': 2, '8': 2, '9': 2}]\n", - "\n", - " labels: {'3', '4', 'O', 'C', '6', '5'}\n", - "\n", - " vector1: [[2 1 2 5 2 2]]\n", - "\n", - " vector2: [[2 1 2 5 2 2]]\n", + " compressed labels: {0: '7', 1: '7', 2: '7', 3: '9', 4: '9', 5: '10', 6: '8', 7: '8'}\n", + "labels_comp: ['7', '7', '7', '9', '9', '10', '8', '8']\n", + "all_labels_ori: {'10', '4', '7', '9', '6', '5', '8'}\n", + "num_of_each_label: {'10': 1, '9': 2, '7': 3, '8': 2}\n", + "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}, {'10': 1, '9': 2, '7': 3, '8': 2}]\n", "\n", - " labels: {'3', '10', '4', 'O', '9', 'C', '6', 'S', '5', '8'}\n", - "\n", - " vector1: [[2 0 1 2 0 5 2 0 2 0]]\n", + " all_num_of_labels_occured: 10\n", "\n", - " vector2: [[0 1 0 0 2 6 3 2 0 2]]\n", + " --- calculating kernel matrix ---\n", "\n", - " labels: {'10', '9', 'C', '6', 'S', '8'}\n", + " labels: {'5', '4', '6', '7'}\n", + "vector1: [[1 2 2 2]]\n", + "vector2: [[1 2 2 2]]\n", "\n", - " vector1: [[1 2 6 3 2 2]]\n", + " labels: {'10', '4', '7', '9', '6', '5', '8'}\n", + "vector1: [[0 2 2 0 2 1 0]]\n", + "vector2: [[1 0 3 2 0 0 2]]\n", "\n", - " vector2: [[1 2 6 3 2 2]]\n", + " labels: {'8', '10', '7', '9'}\n", + "vector1: [[2 1 3 2]]\n", + "vector2: [[2 1 3 2]]\n", "\n", - " all_num_of_labels_occured: 3\n", + " Kmatrix: [[ 42. 36.]\n", + " [ 36. 58.]]\n", "\n", - " Kmatrix: [[ 71. 66.]\n", - " [ 66. 98.]]\n", - "\n", - " --- height = 1 --- \n", + " --- height = 2 --- \n", "\n", " --- for graph 0 --- \n", "\n", - "labels_ori: ['6', '6', '5', '5', '4', '3', '3']\n", - "num_of_each_label: {'3': 2, '5': 2, '4': 1, '6': 2}\n", - "num_of_labels: 4\n", - "all_labels_ori: {'3', '5', '4', '6'}\n", - "num_of_labels_occured: 7\n", - "multiset: ['65', '65', '536', '536', '433', '345', '345']\n", - "set_unique: ['345', '536', '65', '433']\n", - "set_compressed: {'345': '8', '536': '9', '65': '10', '433': '11'}\n", - "all_set_compressed: {'345': '8', '536': '9', '65': '10', '433': '11'}\n", - "num_of_labels_occured: 11\n", + "multiset: ['76', '76', '647', '647', '544', '456', '456']\n", + "set_unique: ['647', '76', '456', '544']\n", + "set_compressed: {'647': '11', '76': '12', '544': '14', '456': '13'}\n", + "all_set_compressed: {'647': '11', '76': '12', '456': '13', '544': '14'}\n", + "num_of_labels_occured: 14\n", "\n", - " compressed labels: {0: '10', 1: '10', 2: '9', 3: '9', 4: '11', 5: '8', 6: '8'}\n", - "labels_comp: ['10', '10', '9', '9', '11', '8', '8']\n", - "num_of_each_label: {'3': 2, '10': 2, '4': 1, '9': 2, '6': 2, '11': 1, '8': 2, '5': 2}\n", - "all_num_of_each_label: [{'3': 2, '10': 2, '4': 1, '9': 2, '6': 2, '11': 1, '8': 2, '5': 2}]\n", + " compressed labels: {0: '12', 1: '12', 2: '11', 3: '11', 4: '14', 5: '13', 6: '13'}\n", + "labels_comp: ['12', '12', '11', '11', '14', '13', '13']\n", + "all_labels_ori: {'14', '12', '11', '13'}\n", + "num_of_each_label: {'14': 1, '13': 2, '12': 2, '11': 2}\n", + "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}]\n", "\n", " --- for graph 1 --- \n", "\n", - "labels_ori: ['6', '6', '6', '9', '9', '10', '8', '8']\n", - "num_of_each_label: {'10': 1, '6': 3, '9': 2, '8': 2}\n", - "num_of_labels: 4\n", - "all_labels_ori: {'3', '10', '4', '9', '6', '5', '8'}\n", - "num_of_labels_occured: 14\n", - "multiset: ['69', '69', '610', '968', '968', '10688', '8109', '8109']\n", - "set_unique: ['69', '968', '8109', '10688', '610']\n", - "set_compressed: {'69': '15', '8109': '17', '10688': '18', '968': '16', '610': '19'}\n", - "all_set_compressed: {'69': '15', '8109': '17', '968': '16', '345': '8', '10688': '18', '610': '19', '536': '9', '65': '10', '433': '11'}\n", + "multiset: ['79', '79', '710', '978', '978', '10788', '8109', '8109']\n", + "set_unique: ['710', '8109', '79', '10788', '978']\n", + "set_compressed: {'710': '15', '79': '17', '8109': '16', '978': '19', '10788': '18'}\n", + "all_set_compressed: {'710': '15', '79': '17', '978': '19', '10788': '18', '8109': '16', '456': '13', '544': '14', '647': '11', '76': '12'}\n", "num_of_labels_occured: 19\n", "\n", - " compressed labels: {0: '15', 1: '15', 2: '19', 3: '16', 4: '16', 5: '18', 6: '17', 7: '17'}\n", - "labels_comp: ['15', '15', '19', '16', '16', '18', '17', '17']\n", - "num_of_each_label: {'10': 1, '18': 1, '19': 1, '9': 2, '17': 2, '6': 3, '8': 2, '16': 2, '15': 2}\n", - "all_num_of_each_label: [{'3': 2, '10': 2, '4': 1, '9': 2, '6': 2, '11': 1, '8': 2, '5': 2}, {'10': 1, '18': 1, '19': 1, '9': 2, '17': 2, '6': 3, '8': 2, '16': 2, '15': 2}]\n", - "\n", - " labels: {'3', '10', '4', '5', '9', '6', '11', '8'}\n", + " compressed labels: {0: '17', 1: '17', 2: '15', 3: '19', 4: '19', 5: '18', 6: '16', 7: '16'}\n", + "labels_comp: ['17', '17', '15', '19', '19', '18', '16', '16']\n", + "all_labels_ori: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n", + "num_of_each_label: {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}\n", + "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}, {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}]\n", "\n", - " vector1: [[2 2 1 2 2 2 1 2]]\n", + " all_num_of_labels_occured: 19\n", "\n", - " vector2: [[2 2 1 2 2 2 1 2]]\n", + " --- calculating kernel matrix ---\n", "\n", - " labels: {'3', '10', '4', '18', '5', '19', '9', '17', '6', '11', '8', '16', '15'}\n", + " labels: {'14', '12', '11', '13'}\n", + "vector1: [[1 2 2 2]]\n", + "vector2: [[1 2 2 2]]\n", "\n", - " vector1: [[2 2 1 0 2 0 2 0 2 1 2 0 0]]\n", + " labels: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n", + "vector1: [[0 0 2 2 0 2 1 0 0]]\n", + "vector2: [[1 2 0 0 2 0 0 2 1]]\n", "\n", - " vector2: [[0 1 0 1 0 1 2 2 3 0 2 2 2]]\n", + " labels: {'18', '17', '15', '16', '19'}\n", + "vector1: [[1 2 1 2 2]]\n", + "vector2: [[1 2 1 2 2]]\n", "\n", - " labels: {'10', '18', '19', '9', '17', '6', '8', '16', '15'}\n", - "\n", - " vector1: [[1 1 1 2 2 3 2 2 2]]\n", - "\n", - " vector2: [[1 1 1 2 2 3 2 2 2]]\n", - "\n", - " all_num_of_labels_occured: 10\n", + " Kmatrix: [[ 55. 36.]\n", + " [ 36. 72.]]\n", "\n", - " Kmatrix: [[ 97. 82.]\n", - " [ 82. 130.]]\n", - "\n", - " --- Weisfeiler-Lehman subtree kernel built in 0.003629922866821289 seconds ---\n" + " --- Weisfeiler-Lehman subtree kernel built in 0.0034377574920654297 seconds ---\n" ] }, { "data": { "text/plain": [ - "array([[ 97., 82.],\n", - " [ 82., 130.]])" + "array([[ 55., 36.],\n", + " [ 36., 72.]])" ] }, - "execution_count": 6, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -635,6 +610,8 @@ " print('\\n --- for graph %d --- \\n' % (idx))\n", " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", " print('labels_ori: %s' % (labels_ori))\n", + " all_labels_ori.update(labels_ori)\n", + " print('all_labels_ori: %s' % (all_labels_ori))\n", " num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n", " print('num_of_each_label: %s' % (num_of_each_label))\n", " all_num_of_each_label.append(num_of_each_label)\n", @@ -646,46 +623,48 @@ " all_labels_ori.update(labels_ori)\n", " print('all_labels_ori: %s' % (all_labels_ori))\n", " \n", + " all_num_of_labels_occured += len(all_labels_ori)\n", + " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n", + " \n", " # calculate subtree kernel with the 0th iteration and add it to the final kernel\n", + " print('\\n --- calculating kernel matrix ---')\n", " for i in range(0, len(Gn)):\n", " for j in range(i, len(Gn)):\n", " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n", " print('\\n labels: %s' % (labels))\n", " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n", " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n", - " print('\\n vector1: %s' % (vector1))\n", - " print('\\n vector2: %s' % (vector2))\n", + " print('vector1: %s' % (vector1))\n", + " print('vector2: %s' % (vector2))\n", " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n", " Kmatrix[j][i] = Kmatrix[i][j]\n", - " print('\\n Kmatrix: %s' % (Kmatrix))\n", - " \n", - " \n", + " print('Kmatrix: %s' % (Kmatrix))\n", + "\n", " \n", " # iterate each height\n", - " for h in range(height + 1):\n", + " for h in range(1, height + 1):\n", " print('\\n --- height = %d --- ' % (h))\n", - " all_labels_ori = set() # all unique orignal labels in all graphs in this iteration\n", - "# all_labels_comp = set() # all unique compressed labels in all graphs in this iteration\n", - " all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration\n", " all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n", " num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n", + " all_labels_ori = set()\n", + " all_num_of_each_label = []\n", " \n", " # for each graph\n", " for idx, G in enumerate(Gn):\n", - " # get the set of original labels\n", + "# # get the set of original labels\n", " print('\\n --- for graph %d --- \\n' % (idx))\n", - " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", - " print('labels_ori: %s' % (labels_ori))\n", - " num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n", - " print('num_of_each_label: %s' % (num_of_each_label))\n", - " num_of_labels = len(num_of_each_label) # number of all unique labels\n", - " print('num_of_labels: %s' % (num_of_labels))\n", + "# labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", + "# print('labels_ori: %s' % (labels_ori))\n", + "# num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n", + "# print('num_of_each_label: %s' % (num_of_each_label))\n", + "# num_of_labels = len(num_of_each_label) # number of all unique labels\n", + "# print('num_of_labels: %s' % (num_of_labels))\n", " \n", - " all_labels_ori.update(labels_ori)\n", - " print('all_labels_ori: %s' % (all_labels_ori))\n", - " # num_of_labels_occured += num_of_labels #@todo not precise\n", - " num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n", - " print('num_of_labels_occured: %s' % (num_of_labels_occured))\n", + "# all_labels_ori.update(labels_ori)\n", + "# print('all_labels_ori: %s' % (all_labels_ori))\n", + "# # num_of_labels_occured += num_of_labels #@todo not precise\n", + "# num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n", + "# print('num_of_labels_occured: %s' % (num_of_labels_occured))\n", " \n", " set_multisets = []\n", " for node in G.nodes(data = True):\n", @@ -718,7 +697,6 @@ "# num_of_labels_occured += len(set_compressed) #@todo not precise\n", " print('num_of_labels_occured: %s' % (num_of_labels_occured))\n", " \n", - "\n", " # relabel nodes\n", " # nx.relabel_nodes(G, set_compressed, copy = False)\n", " for node in G.nodes(data = True):\n", @@ -728,25 +706,29 @@ " # get the set of compressed labels\n", " labels_comp = list(nx.get_node_attributes(G, 'label').values())\n", " print('labels_comp: %s' % (labels_comp))\n", - " num_of_each_label.update(dict(Counter(labels_comp)))\n", + " all_labels_ori.update(labels_comp)\n", + " print('all_labels_ori: %s' % (all_labels_ori))\n", + " num_of_each_label = dict(Counter(labels_comp))\n", " print('num_of_each_label: %s' % (num_of_each_label))\n", " all_num_of_each_label.append(num_of_each_label)\n", " print('all_num_of_each_label: %s' % (all_num_of_each_label))\n", + " \n", + " all_num_of_labels_occured += len(all_labels_ori)\n", + " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n", " \n", " # calculate subtree kernel with h iterations and add it to the final kernel\n", + " print('\\n --- calculating kernel matrix ---')\n", " for i in range(0, len(Gn)):\n", " for j in range(i, len(Gn)):\n", " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n", " print('\\n labels: %s' % (labels))\n", " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n", " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n", - " print('\\n vector1: %s' % (vector1))\n", - " print('\\n vector2: %s' % (vector2))\n", + " print('vector1: %s' % (vector1))\n", + " print('vector2: %s' % (vector2))\n", " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n", " Kmatrix[j][i] = Kmatrix[i][j]\n", " \n", - " all_num_of_labels_occured += len(all_labels_ori)\n", - " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n", " print('\\n Kmatrix: %s' % (Kmatrix))\n", "\n", " return Kmatrix\n", @@ -766,13 +748,13 @@ "G2 = dataset[80]\n", "print(nx.get_node_attributes(G2, 'label'))\n", "\n", - "weisfeilerlehmankernel(G1, G2, height = 1)\n", + "weisfeilerlehmankernel(G1, G2, height = 2)\n", "# Kmatrix = weisfeilerlehmankernel(G1, G2)" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -790,7 +772,7 @@ "correspond to the average of the performances on the test sets. \n", "\n", "@references\n", - " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", + " Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", "\n", "\n", " --- calculating kernel matrix when subtree height = 0 ---\n", @@ -819,22 +801,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.49373626708984375 seconds ---\n", - "[[ 10. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 16. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 10. ..., 22. 22. 24.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.3845643997192383 seconds ---\n", + "[[ 5. 6. 4. ..., 20. 20. 20.]\n", + " [ 6. 8. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 5. ..., 21. 21. 21.]\n", " ..., \n", - " [ 20. 20. 22. ..., 130. 130. 122.]\n", - " [ 20. 20. 22. ..., 130. 130. 122.]\n", - " [ 20. 20. 24. ..., 122. 122. 154.]]\n", + " [ 20. 20. 21. ..., 101. 101. 101.]\n", + " [ 20. 20. 21. ..., 101. 101. 101.]\n", + " [ 20. 20. 21. ..., 101. 101. 101.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 12.761978\n", - "With standard deviation: 10.086502\n", + " Mean performance on train set: 141.418957\n", + "With standard deviation: 1.082842\n", "\n", - " Mean performance on test set: 9.014031\n", - "With standard deviation: 6.357865\n", + " Mean performance on test set: 36.210792\n", + "With standard deviation: 7.331787\n", "\n", " --- calculating kernel matrix when subtree height = 1 ---\n", "\n", @@ -862,22 +844,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.0043864250183105 seconds ---\n", - "[[ 20. 14. 8. ..., 20. 20. 22.]\n", - " [ 14. 32. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 20. ..., 25. 25. 30.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.853447437286377 seconds ---\n", + "[[ 10. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 16. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 10. ..., 22. 22. 24.]\n", " ..., \n", - " [ 20. 28. 25. ..., 188. 180. 145.]\n", - " [ 20. 28. 25. ..., 180. 182. 145.]\n", - " [ 22. 22. 30. ..., 145. 145. 238.]]\n", + " [ 20. 20. 22. ..., 130. 130. 122.]\n", + " [ 20. 20. 22. ..., 130. 130. 122.]\n", + " [ 20. 20. 24. ..., 122. 122. 154.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 22.747869\n", - "With standard deviation: 7.561365\n", + " Mean performance on train set: 140.065309\n", + "With standard deviation: 0.877976\n", "\n", - " Mean performance on test set: 19.457133\n", - "With standard deviation: 5.057464\n", + " Mean performance on test set: 9.000982\n", + "With standard deviation: 6.371454\n", "\n", " --- calculating kernel matrix when subtree height = 2 ---\n", "\n", @@ -905,22 +887,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.602942705154419 seconds ---\n", - "[[ 30. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 48. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 30. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.374389410018921 seconds ---\n", + "[[ 15. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 24. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 15. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 246. 209. 147.]\n", - " [ 20. 28. 25. ..., 209. 220. 147.]\n", - " [ 23. 22. 32. ..., 147. 147. 286.]]\n", + " [ 20. 20. 22. ..., 159. 151. 124.]\n", + " [ 20. 20. 22. ..., 151. 153. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 185.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 21.407092\n", - "With standard deviation: 6.415967\n", + " Mean performance on train set: 140.074983\n", + "With standard deviation: 0.928821\n", "\n", - " Mean performance on test set: 23.466810\n", - "With standard deviation: 5.836831\n", + " Mean performance on test set: 19.811299\n", + "With standard deviation: 4.049105\n", "\n", " --- calculating kernel matrix when subtree height = 3 ---\n", "\n", @@ -954,22 +936,22 @@ "output_type": "stream", "text": [ "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.2096023559570312 seconds ---\n", - "[[ 40. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 64. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 40. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.9141185283660889 seconds ---\n", + "[[ 20. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 32. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 20. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 304. 217. 147.]\n", - " [ 20. 28. 25. ..., 217. 250. 147.]\n", - " [ 23. 22. 32. ..., 147. 147. 314.]]\n", + " [ 20. 20. 22. ..., 188. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 168. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 202.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 24.747018\n", - "With standard deviation: 6.547340\n", + " Mean performance on train set: 140.197806\n", + "With standard deviation: 0.873857\n", "\n", - " Mean performance on test set: 27.961360\n", - "With standard deviation: 6.291821\n", + " Mean performance on test set: 25.045500\n", + "With standard deviation: 4.942763\n", "\n", " --- calculating kernel matrix when subtree height = 4 ---\n", "\n", @@ -997,22 +979,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.7832393646240234 seconds ---\n", - "[[ 50. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 80. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 50. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.393263578414917 seconds ---\n", + "[[ 25. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 40. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 25. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 362. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 280. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 336.]]\n", + " [ 20. 20. 22. ..., 217. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 183. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 213.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 27.197367\n", - "With standard deviation: 5.980185\n", + " Mean performance on train set: 140.272421\n", + "With standard deviation: 0.838915\n", "\n", - " Mean performance on test set: 30.614531\n", - "With standard deviation: 6.852841\n", + " Mean performance on test set: 28.225454\n", + "With standard deviation: 6.521196\n", "\n", " --- calculating kernel matrix when subtree height = 5 ---\n", "\n", @@ -1040,22 +1022,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.276118040084839 seconds ---\n", - "[[ 60. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 96. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 60. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.893545389175415 seconds ---\n", + "[[ 30. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 48. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 30. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 420. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 310. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 358.]]\n", + " [ 20. 20. 22. ..., 246. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 198. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 224.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 29.010593\n", - "With standard deviation: 6.073672\n", + " Mean performance on train set: 140.247025\n", + "With standard deviation: 0.863630\n", "\n", - " Mean performance on test set: 32.130815\n", - "With standard deviation: 7.062947\n", + " Mean performance on test set: 30.635436\n", + "With standard deviation: 6.736466\n", "\n", " --- calculating kernel matrix when subtree height = 6 ---\n", "\n", @@ -1083,22 +1065,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.779860496520996 seconds ---\n", - "[[ 70. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 112. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 70. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.216407299041748 seconds ---\n", + "[[ 35. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 56. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 35. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 478. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 340. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 380.]]\n", + " [ 20. 20. 22. ..., 275. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 213. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 235.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 30.909632\n", - "With standard deviation: 6.490001\n", + " Mean performance on train set: 140.239201\n", + "With standard deviation: 0.872475\n", "\n", - " Mean performance on test set: 33.117974\n", - "With standard deviation: 7.069399\n", + " Mean performance on test set: 32.102695\n", + "With standard deviation: 6.856006\n", "\n", " --- calculating kernel matrix when subtree height = 7 ---\n", "\n", @@ -1124,18 +1106,7 @@ "\n", " --- This is a regression problem ---\n", "\n", - " Calculating kernel matrix, this could take a while...\n", - "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.446576833724976 seconds ---\n", - "[[ 80. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 128. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 80. ..., 25. 25. 32.]\n", - " ..., \n", - " [ 20. 28. 25. ..., 536. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 370. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 402.]]\n", - "\n", - " Saving kernel matrix to file...\n" + " Calculating kernel matrix, this could take a while...\n" ] }, { @@ -1143,11 +1114,22 @@ "output_type": "stream", "text": [ "\n", - " Mean performance on val set: 31.870406\n", - "With standard deviation: 6.522032\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.8147408962249756 seconds ---\n", + "[[ 40. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 64. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 40. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 304. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 228. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 246.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 140.094026\n", + "With standard deviation: 0.917704\n", "\n", - " Mean performance on test set: 33.964633\n", - "With standard deviation: 7.270535\n", + " Mean performance on test set: 32.970919\n", + "With standard deviation: 6.896061\n", "\n", " --- calculating kernel matrix when subtree height = 8 ---\n", "\n", @@ -1175,22 +1157,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.85552978515625 seconds ---\n", - "[[ 90. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 144. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 90. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.3765342235565186 seconds ---\n", + "[[ 45. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 72. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 45. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 594. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 400. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 424.]]\n", + " [ 20. 20. 22. ..., 333. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 243. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 257.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 32.192715\n", - "With standard deviation: 6.389616\n", + " Mean performance on train set: 140.076304\n", + "With standard deviation: 0.931866\n", "\n", - " Mean performance on test set: 34.325288\n", - "With standard deviation: 7.375800\n", + " Mean performance on test set: 33.511228\n", + "With standard deviation: 6.907530\n", "\n", " --- calculating kernel matrix when subtree height = 9 ---\n", "\n", @@ -1218,22 +1200,22 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.650352239608765 seconds ---\n", - "[[ 100. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 160. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 100. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.885462284088135 seconds ---\n", + "[[ 50. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 80. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 50. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 652. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 430. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 446.]]\n", + " [ 20. 20. 22. ..., 362. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 258. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 268.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 32.842545\n", - "With standard deviation: 6.213069\n", + " Mean performance on train set: 139.913361\n", + "With standard deviation: 0.928974\n", "\n", - " Mean performance on test set: 34.675515\n", - "With standard deviation: 7.314709\n", + " Mean performance on test set: 33.850152\n", + "With standard deviation: 6.914269\n", "\n", " --- calculating kernel matrix when subtree height = 10 ---\n", "\n", @@ -1261,42 +1243,41 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.818731069564819 seconds ---\n", - "[[ 110. 14. 8. ..., 20. 20. 23.]\n", - " [ 14. 176. 4. ..., 28. 28. 22.]\n", - " [ 8. 4. 110. ..., 25. 25. 32.]\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.313802719116211 seconds ---\n", + "[[ 55. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 88. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 55. ..., 22. 22. 26.]\n", " ..., \n", - " [ 20. 28. 25. ..., 710. 217. 151.]\n", - " [ 20. 28. 25. ..., 217. 460. 147.]\n", - " [ 23. 22. 32. ..., 151. 147. 468.]]\n", + " [ 20. 20. 22. ..., 391. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 273. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 279.]]\n", "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on val set: 33.151974\n", - "With standard deviation: 6.196320\n", - "\n", - " Mean performance on test set: 34.867215\n", - "With standard deviation: 7.324672\n", - "\n", - "\n", - " std height RMSE\n", - "------- -------- --------\n", - "6.35786 1 9.01403\n", - "5.05746 2.1 19.4571\n", - "5.83683 3.2 23.4668\n", - "6.29182 4.3 27.9614\n", - "6.85284 5.4 30.6145\n", - "7.06295 6.5 32.1308\n", - "7.0694 7.6 33.118\n", - "7.27054 8.7 33.9646\n", - "7.3758 9.8 34.3253\n", - "7.31471 10.9 34.6755\n", - "7.32467 12 34.8672\n" + " Mean performance on train set: 139.894176\n", + "With standard deviation: 0.942612\n", + "\n", + " Mean performance on test set: 34.096283\n", + "With standard deviation: 6.931154\n", + "\n", + "\n", + " height RMSE_test std_test RMSE_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 36.2108 7.33179 141.419 1.08284 0.384564\n", + " 1 9.00098 6.37145 140.065 0.877976 0.853447\n", + " 2 19.8113 4.04911 140.075 0.928821 1.37439\n", + " 3 25.0455 4.94276 140.198 0.873857 1.91412\n", + " 4 28.2255 6.5212 140.272 0.838915 2.39326\n", + " 5 30.6354 6.73647 140.247 0.86363 2.89355\n", + " 6 32.1027 6.85601 140.239 0.872475 3.21641\n", + " 7 32.9709 6.89606 140.094 0.917704 3.81474\n", + " 8 33.5112 6.90753 140.076 0.931866 4.37653\n", + " 9 33.8502 6.91427 139.913 0.928974 4.88546\n", + " 10 34.0963 6.93115 139.894 0.942612 5.3138\n" ] } ], "source": [ - "# Author: Elisabetta Ghisu\n", "# test of WL subtree kernel\n", "\n", "\"\"\"\n", @@ -1310,7 +1291,7 @@ "correspond to the average of the performances on the test sets. \n", "\n", "@references\n", - " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", + " Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", "\"\"\"\n", "\n", "print(__doc__)\n", @@ -1318,6 +1299,7 @@ "import sys\n", "import os\n", "import pathlib\n", + "from collections import OrderedDict\n", "sys.path.insert(0, \"../\")\n", "from tabulate import tabulate\n", "\n", @@ -1332,11 +1314,11 @@ "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", "from pygraph.utils.graphfiles import loadDataset\n", "\n", - "val_means_height = []\n", - "val_stds_height = []\n", + "train_means_height = []\n", + "train_stds_height = []\n", "test_means_height = []\n", "test_stds_height = []\n", - "\n", + "kernel_build_time = []\n", "\n", "for height in np.linspace(0, 10, 11):\n", " print('\\n --- calculating kernel matrix when subtree height = %d ---' % height)\n", @@ -1378,13 +1360,14 @@ " print(Kmatrix)\n", " else:\n", " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix = weisfeilerlehmankernel(dataset, height = int(height))\n", + " Kmatrix, run_time = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height))\n", + " kernel_build_time.append(run_time)\n", " print(Kmatrix)\n", " print('\\n Saving kernel matrix to file...')\n", " # np.savetxt(kernel_file, Kmatrix)\n", "\n", - " # Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n", - " val_split = []\n", + " # Initialize the performance of the best parameter trial on train with the corresponding performance on test\n", + " train_split = []\n", " test_split = []\n", "\n", " # For each split of the data\n", @@ -1404,17 +1387,14 @@ " # print(Kmatrix_perm)\n", " Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n", "\n", - " # Set the training, validation and test\n", + " # Set the training, test\n", " # Note: the percentage can be set up by the user\n", - " num_train_val = int((datasize * 90) / 100) # 90% (of entire dataset) for training and validation\n", - " num_test = datasize - num_train_val # 10% (of entire dataset) for test\n", - " num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n", - " num_val = num_train_val - num_train # 10% (of train + val) for validation\n", + " num_train = int((datasize * 90) / 100) # 90% (of entire dataset) for training\n", + " num_test = datasize - num_train # 10% (of entire dataset) for test\n", "\n", " # Split the kernel matrix\n", " Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n", - " Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n", - " Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n", + " Kmatrix_test = Kmatrix_perm[num_train:datasize, 0:num_train]\n", "\n", " # Split the targets\n", " y_train = y_perm[0:num_train]\n", @@ -1427,11 +1407,10 @@ " y_train = (y_train - y_train_mean) / float(y_train_std)\n", " # print(y)\n", "\n", - " y_val = y_perm[num_train:(num_train + num_val)]\n", - " y_test = y_perm[(num_train + num_val):datasize]\n", + " y_test = y_perm[num_train:datasize]\n", "\n", - " # Record the performance for each parameter trial respectively on validation and test set\n", - " perf_all_val = []\n", + " # Record the performance for each parameter trial respectively on train and test set\n", + " perf_all_train = []\n", " perf_all_test = []\n", "\n", " # For each parameter trial\n", @@ -1445,81 +1424,69 @@ " # KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n", " KR.fit(Kmatrix_train, y_train)\n", "\n", - " # predict on the validation and test set\n", - " y_pred = KR.predict(Kmatrix_val)\n", + " # predict on the train and test set\n", + " y_pred_train = KR.predict(Kmatrix_train)\n", " y_pred_test = KR.predict(Kmatrix_test)\n", " # print(y_pred)\n", "\n", " # adjust prediction: needed because the training targets have been normalizaed\n", - " y_pred = y_pred * float(y_train_std) + y_train_mean\n", - " # print(y_pred)\n", + " y_pred_train = y_pred_train * float(y_train_std) + y_train_mean\n", " y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n", " # print(y_pred_test)\n", "\n", - " # root mean squared error on validation\n", - " rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n", - " perf_all_val.append(rmse)\n", - "\n", - " # root mean squared error in test \n", + " # root mean squared error in train set\n", + " rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))\n", + " perf_all_train.append(rmse_train)\n", + " # root mean squared error in test set\n", " rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n", " perf_all_test.append(rmse_test)\n", - "\n", - " # print('The performance on the validation set is: %3f' % rmse)\n", " # print('The performance on the test set is: %3f' % rmse_test)\n", "\n", " # --- FIND THE OPTIMAL PARAMETERS --- #\n", " # For regression: minimise the mean squared error\n", " if model_type == 'regression':\n", "\n", - " # get optimal parameter on validation (argmin mean squared error)\n", + " # get optimal parameter on test (argmin mean squared error)\n", " min_idx = np.argmin(perf_all_test)\n", " alpha_opt = alpha_grid[min_idx]\n", "\n", - " # performance corresponding to optimal parameter on val\n", - " perf_val_opt = perf_all_val[min_idx]\n", - "\n", - " # corresponding performance on test for the same parameter\n", + " # corresponding performance on train and test set for the same parameter\n", + " perf_train_opt = perf_all_train[min_idx]\n", " perf_test_opt = perf_all_test[min_idx]\n", - "\n", " # print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n", - " # print('The best performance on the validation set is: %3f' % perf_val_opt)\n", " # print('The corresponding performance on test set is: %3f' % perf_test_opt)\n", "\n", - " # append the best performance on validation\n", - " # at the current split\n", - " val_split.append(perf_val_opt)\n", - "\n", - " # append the correponding performance on the test set\n", + " # append the correponding performance on the train and test set\n", + " train_split.append(perf_train_opt)\n", " test_split.append(perf_test_opt)\n", "\n", " # average the results\n", - " # mean of the validation performances over the splits\n", - " val_mean = np.mean(np.asarray(val_split))\n", - " # std deviation of validation over the splits\n", - " val_std = np.std(np.asarray(val_split))\n", - "\n", - " # mean of the test performances over the splits\n", + " # mean of the train and test performances over the splits\n", + " train_mean = np.mean(np.asarray(train_split))\n", " test_mean = np.mean(np.asarray(test_split))\n", - " # std deviation of the test oer the splits\n", + " # std deviation of the train and test over the splits\n", + " train_std = np.std(np.asarray(train_split))\n", " test_std = np.std(np.asarray(test_split))\n", "\n", - " print('\\n Mean performance on val set: %3f' % val_mean)\n", - " print('With standard deviation: %3f' % val_std)\n", + " print('\\n Mean performance on train set: %3f' % train_mean)\n", + " print('With standard deviation: %3f' % train_std)\n", " print('\\n Mean performance on test set: %3f' % test_mean)\n", " print('With standard deviation: %3f' % test_std)\n", - " \n", - " val_means_height.append(val_mean)\n", - " val_stds_height.append(val_std)\n", + " \n", + " train_means_height.append(train_mean)\n", + " train_stds_height.append(train_std)\n", " test_means_height.append(test_mean)\n", " test_stds_height.append(test_std)\n", " \n", "print('\\n') \n", - "print(tabulate({'height': np.linspace(1, 12, 11), 'RMSE': test_means_height, 'std': test_stds_height}, headers='keys'))" + "table_dict = {'height': np.linspace(0, 10, 11), 'RMSE_test': test_means_height, 'std_test': test_stds_height, 'RMSE_train': train_means_height, 'std_train': train_stds_height, 'k_time': kernel_build_time}\n", + "keyorder = ['height', 'RMSE_test', 'std_test', 'RMSE_train', 'std_train', 'k_time']\n", + "print(tabulate(OrderedDict(sorted(table_dict.items(), key = lambda i:keyorder.index(i[0]))), headers='keys'))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "scrolled": true }, @@ -1566,185 +1533,21 @@ "\n", " --- This is a regression problem ---\n", "\n", - " Calculating kernel matrix, this could take a while...\n", - "\n", - " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 47.540945053100586 seconds ---\n", - "[[ 6. 2. 6. ..., 2. 2. 2.]\n", - " [ 2. 12. 2. ..., 0. 0. 6.]\n", - " [ 6. 2. 6. ..., 2. 2. 2.]\n", - " ..., \n", - " [ 2. 0. 2. ..., 110. 42. 14.]\n", - " [ 2. 0. 2. ..., 42. 110. 14.]\n", - " [ 2. 6. 2. ..., 14. 14. 110.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on val set: 38.533318\n", - "With standard deviation: 6.213602\n", - "\n", - " Mean performance on test set: 36.055557\n", - "With standard deviation: 5.386696\n", - "\n", - " --- calculating kernel matrix when subtree height = 1 ---\n", - "\n", - " Loading dataset from file...\n", - "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", - " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", - " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", - " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", - " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", - " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", - " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", - " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", - " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", - " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", - " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", - " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", - " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", - " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", - " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", - " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", - " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", - " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", - "\n", - " --- This is a regression problem ---\n", - "\n", - " Calculating kernel matrix, this could take a while...\n", - "\n", - " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 75.94973611831665 seconds ---\n", - "[[ 9. 3. 9. ..., 3. 3. 3.]\n", - " [ 3. 18. 3. ..., 0. 0. 9.]\n", - " [ 9. 3. 9. ..., 3. 3. 3.]\n", - " ..., \n", - " [ 3. 0. 3. ..., 165. 63. 21.]\n", - " [ 3. 0. 3. ..., 63. 165. 21.]\n", - " [ 3. 9. 3. ..., 21. 21. 165.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on val set: 38.464684\n", - "With standard deviation: 6.299737\n", - "\n", - " Mean performance on test set: 36.054735\n", - "With standard deviation: 5.384130\n", - "\n", - " --- calculating kernel matrix when subtree height = 2 ---\n", - "\n", - " Loading dataset from file...\n", - "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", - " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", - " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", - " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", - " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", - " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", - " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", - " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", - " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", - " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", - " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", - " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", - " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", - " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", - " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", - " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", - " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", - " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", - "\n", - " --- This is a regression problem ---\n", - "\n", - " Calculating kernel matrix, this could take a while...\n", - "\n", - " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 98.63305306434631 seconds ---\n", - "[[ 12. 4. 12. ..., 4. 4. 4.]\n", - " [ 4. 24. 4. ..., 0. 0. 12.]\n", - " [ 12. 4. 12. ..., 4. 4. 4.]\n", - " ..., \n", - " [ 4. 0. 4. ..., 220. 84. 28.]\n", - " [ 4. 0. 4. ..., 84. 220. 28.]\n", - " [ 4. 12. 4. ..., 28. 28. 220.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on val set: 38.594816\n", - "With standard deviation: 6.106887\n", - "\n", - " Mean performance on test set: 36.069839\n", - "With standard deviation: 5.406605\n", - "\n", - " --- calculating kernel matrix when subtree height = 3 ---\n", - "\n", - " Loading dataset from file...\n", - "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", - " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", - " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", - " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", - " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", - " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", - " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", - " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", - " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", - " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", - " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", - " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", - " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", - " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", - " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", - " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", - " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", - " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", - "\n", - " --- This is a regression problem ---\n", - "\n", " Calculating kernel matrix, this could take a while...\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 126.40115857124329 seconds ---\n", - "[[ 15. 5. 15. ..., 5. 5. 5.]\n", - " [ 5. 30. 5. ..., 0. 0. 15.]\n", - " [ 15. 5. 15. ..., 5. 5. 5.]\n", - " ..., \n", - " [ 5. 0. 5. ..., 275. 105. 35.]\n", - " [ 5. 0. 5. ..., 105. 275. 35.]\n", - " [ 5. 15. 5. ..., 35. 35. 275.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on val set: 38.545772\n", - "With standard deviation: 6.200795\n", - "\n", - " Mean performance on test set: 36.055164\n", - "With standard deviation: 5.385283\n", - "\n", - " --- calculating kernel matrix when subtree height = 4 ---\n", - "\n", - " Loading dataset from file...\n", - "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", - " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", - " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", - " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", - " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", - " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", - " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", - " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", - " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", - " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", - " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", - " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", - " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", - " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", - " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", - " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", - " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", - " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", - "\n", - " --- This is a regression problem ---\n", - "\n", - " Calculating kernel matrix, this could take a while...\n" + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Calculating kernel matrix, this could take a while...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0mKmatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mweisfeilerlehmankernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase_kernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'sp'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mKmatrix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Saving kernel matrix to file...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36mweisfeilerlehmankernel\u001b[0;34m(height, base_kernel, *args)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_weisfeilerlehmankernel_do\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36m_weisfeilerlehmankernel_do\u001b[0;34m(G1, G2, height)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0;31m# calculate kernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 243\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mspkernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# change your base kernel here (and one more before)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 244\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0;31m# get label sets of both graphs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spkernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me2\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], @@ -1832,7 +1635,7 @@ " print(Kmatrix)\n", " else:\n", " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix = weisfeilerlehmankernel(dataset, height = int(height), base_kernel = 'sp')\n", + " Kmatrix = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height), base_kernel = 'sp')\n", " print(Kmatrix)\n", " print('\\n Saving kernel matrix to file...')\n", "# np.savetxt(kernel_file, Kmatrix)\n", @@ -1885,7 +1688,7 @@ " y_test = y_perm[(num_train + num_val):datasize]\n", "\n", " # Record the performance for each parameter trial respectively on validation and test set\n", - " perf_all_val = []\n", + " perf_all_train = []\n", " perf_all_test = []\n", "\n", " # For each parameter trial\n", diff --git a/notebooks/run_marginalizedkernel_acyclic.ipynb b/notebooks/run_marginalizedkernel_acyclic.ipynb index 46838bd..08c2d33 100644 --- a/notebooks/run_marginalizedkernel_acyclic.ipynb +++ b/notebooks/run_marginalizedkernel_acyclic.ipynb @@ -357,7 +357,7 @@ " print(Kmatrix)\n", " else:\n", " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix = marginalizedkernel(dataset, p_quit, 20)\n", + " Kmatrix, run_time = marginalizedkernel(dataset, p_quit, 20, node_label = 'atom', edge_label = 'bond_type')\n", " print(Kmatrix)\n", " print('\\n Saving kernel matrix to file...')\n", " np.savetxt(kernel_file, Kmatrix)\n", diff --git a/notebooks/run_pathkernel_acyclic.ipynb b/notebooks/run_pathkernel_acyclic.ipynb index 6913a74..86bd8fc 100644 --- a/notebooks/run_pathkernel_acyclic.ipynb +++ b/notebooks/run_pathkernel_acyclic.ipynb @@ -686,7 +686,7 @@ " print(Kmatrix)\n", "else:\n", " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix = pathkernel(dataset)\n", + " Kmatrix, run_time = pathkernel(dataset, node_label = 'atom', edge_label = 'bond_type')\n", " print(Kmatrix)\n", " print('\\n Saving kernel matrix to file...')\n", " np.savetxt(kernel_file, Kmatrix)\n", diff --git a/notebooks/run_spkernel_acyclic.ipynb b/notebooks/run_spkernel_acyclic.ipynb index 1bf4920..b3e0f40 100644 --- a/notebooks/run_spkernel_acyclic.ipynb +++ b/notebooks/run_spkernel_acyclic.ipynb @@ -182,7 +182,8 @@ " print(Kmatrix)\n", "else:\n", " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix = spkernel(dataset)\n", + " #@Q: is it appropriate to use bond type between atoms as the edge weight to calculate shortest path????????\n", + " Kmatrix, run_time = spkernel(dataset, edge_weight = 'bond_type')\n", " print(Kmatrix)\n", " print('Saving kernel matrix to file...')\n", " np.savetxt(kernel_file_path, Kmatrix)\n", diff --git a/pygraph/kernels/__pycache__/weisfeilerLehmanKernel.cpython-35.pyc b/pygraph/kernels/__pycache__/weisfeilerLehmanKernel.cpython-35.pyc index 4b9202976d0db7cf259f705cf5f0bf8588a1b2f1..242dee9fa54af4bbde6ee9f535d19224f6d3ff64 100644 GIT binary patch literal 8164 zcmeHMO>i7X6@D|be_E|%S&}WU63jTUlWZc(mVlTzB9PcYRk34IR)WG3*VbsKr5$;9 zcDH9XTC21tr?7pZD9&7{q5>SL;s6{tQ5-nsMh%b)1ywmw6r55V_+IzS{^-XE1u0Sm zYfr1E`}OPBuV453-kXyX6ZwDqeWm>M3q*gUpLuiy#ZFo<0IH>gfj?$-?}8zhLf zP1?2;fk}5wGGfvcmD9Jzxs9fmtI4~XaPE&n? z8k1C>B;%8sK0_`9hMZ&M=9RSLjM0+W^Yp{H`O_FaIp+VaQ-6iU!X%G8ss`jr@Xfma1QE?ArFd#*HChra??pkIq zL;i2nfQ%YYqY#5L%-ZmOqB^pwW8R3_^s*GDXgjTB$TA@dqbi$VX%JX;yA(q2)}RX( zclDM-y&QGlfR*-pwBG(H zt@rh?m!?y_yjo6<*P&I$ihzuJdD^t-B(IWr;LW79{?HK>em_|_uiv2JmPx%-tk%O- zI!^w|kV=1H45{?-A(g&wgpe%W9L*eR>Wx!hkfAk<@jxx87JEDq8p$n+GcPgJX#)f>3(?R$qGVvz-u)8y#q{6A0IK0$Uy8 zMf-J#?}|;QQx9!LsoHrmKHho>{b_~{^TB!eBKUaoD9Zjk{YzutWcmidJf0u+7Zcwg zw(eBTs(7em+!^nYUcTqs!ol8ounH&inm%v-`t_J*R4K!Qp_N1cZFc)azA3_v^p!x` z%2f4Q!-4X<7p|$xm2IHYXx*{n*5TE~6uM_Y%P8uiA$(IITc1s@x z`@$i_@l?sFNz8BGf^IZJF?4Lya$c`Ng3XR!Rf_YQJJ3n1)s$htDje01?f9;0t)XAw zkg;1y)U3Fv;-=UXQux*3xs(D`?>Coj*=sc+-*$qU=k5yo#)7?8t?dXe+!fM(ZNa`S zL}v$Q|K(Q}>{mstj@f+s+JgO3Q*P}z(hcovuPzQNp|0-^b)b{eyJ)}C#O^tDHilPW z9-c42*^xd@lYLVJs8(x>_VUH0rRSHHpIQ^G(&gnZeD1mP&n{`TeE_6Fl7Wjl3`}N)4a^!2XI-jSB1-kg9d_OypmR^ zBB}II1-vhV4@;q(9BmOjFvtVqfp5$be0{ggW3S%w=m8)SoOGJDGjyNmKClwtFF*<@ zgaY50qis0faeg&IAdh_f!T_c~_kpHBRM6d!MAdo|qvP6Zw2qlsW!6x1i5Sx(W&=IyOXPDI$e6Mv0~f0Ep+jZ&A9)dvJ{Q@PCT7r<4#TU9uQ+#Wak$ zKw8A>O4~Hn+kd9@Ze;SB0X!A5oKvg%mDxK!vOD`WuKPVBJRa}$)mz^--U9TQ=8eO? z_GYNt;*Hw|0#^Y3E*V#Ey+yrQx<_;`rB(ynua*qDy;+6LioEpR3EDeB+cPY^!5wKx zn*x>`qwQJl0?ix~&8R(A??(F>bDb>)7z+cn{!S|K=cwz@-bt9%H>h`t_V6E!)CS$o zko>*5H>XTVwWGJ_snJIJ$Ag{_vmri76K`JJuNyAGY(crsJYkP>Jh##P{K2)`yms_M zykxP(!QxihPg+X;qb$CdSZ-0x5L>)@!HoMYzMX+YpAy4{-agu}5%%pT`nL@Gch-9Q zmw0|_ti#^dN(JXHh`JsIuSfm)rx#}TG3x)J@F3zo2-1hqtcnEwx1g}XlM0+w8KLnH z_D_sZX7B`;QQ)3N{Cn#n^$<_PZ^6wXJl=+fCE}a$V$y`pqOVd<-!ZAp$H`Evkv>V? z7I%V@8JUY0qqK^2@N&Yb*DHRfQE6^gIQ-Cg6v@y*!5aBl)IQ9w)DewVn~l~B@3KfN z-x%#abr?chT&Xt^Zw4=*k_`Gp7bed@nvy9W!yoy$uAk-lBU~^xQ;U%2Q2QaCSb8~* zdX#dV(0P4CK28ED#WUYn@pG(r_L0~JXqc5B>sL*{+T}AIOA(FvoN3x3@XiP zwIhXFp5(5iO?u^Ne#iPMeDI_QkR&Oeh|g+5riVuPniv@sWi)j;`?jct zO}Ug*mLsDUS)SXCCV1Kg@-H4LfqarRlS|$aBB8hAd+m;hCPxK~(n{IUcr1_-oh`qV zmL*n#I+@Dy)K;rFjPDpZ#RZ;#3z|p3V`Qu`Bg6l!3G=iuVNByGn8(d2i$kG{2!_rX zXN}X=yrKfFRWvh3(L8JBjYo{**0h;1dDOi58Dr9%u!^A7eD?Er%vAGNEuI^{EFKo^ z3Z7sW1;<0ctLg^8or|m_?g4%O>I^3Gh&n?FKz<3N-IqA>z%a(?Ccr2sWzTcMmT|8N zjW|~cE_XRGiil(%gZ9e^cr0AtmeYY>co-gv!_A>AOPp{-CJlKJ-OX7hJ2)igVTa8@ z5-%Z6axlm}CP}~pURfs?l>7RKeC8*cXgngrm1MStlUeKi6hV@z)gk#o)eXF|aug+e z-@qZ-5RyHmx&S15L3MLPSt*pgm$GGZbqUY1PAhx{6y-c0J~d)#sJ<+}%)^ya7<0!}7m zsHnq0grkUt5SViqh>k2mK=?=(kv|d!D#tLV&&f7|fAwy(-@q3HNQ~GYQmS)+xP1_1 ztM5!a7fue#i%%UoRvVEHu^P@I7J{=BOH>?4u+sjUp5!RC;L``RR7jMgKCNh340oJ^ zk=>%(sE)ZnLL0XgX(+oXtd{t6n&Ctxf8GTd>g4|f139|sHkVV{to zuBUX9y5I-Kd=9N=V3rA7d?c3WFodX+lMKn^(}cOhp(YeccU~R z7nsX=E*81K2V{~6HH%v|5Q(!%&? zyxOeS0Y&i9BhZSn(X0^x-*`&Kp{ugLOHpp6(R4d?@dDdRz-EXG)0i>2EFdL1mEzje OaEp2N&9lL=r~e6YMPA?l delta 2739 zcmZWrO>7%Q6n-;nuYY1Ewv*UdCvnoIY3u%^4OB{h+?KYHsv<#=f(S&7f}3?3yNQ3s z8)T%lRwxxsI8aq{g9{Z>B~DezjRSB3p`JJ)?F9)q075`j@gq3Ed$V@pwvOj{_Psan z_nWu*;p~sc#*)!!`24L?bNdegd=DKT9rw@Ss-M0+_QwkVW&q3}m?1F3a0B>U512Zb z+x#vtBZ?nW%qW=MaD%~I98A=Vp$O&@U^1A=#$}UD%5T_ihok)vt^lr!Q%5b3KxyC z^2bULK!F3IkF_i`{b2c^gcRNTNppL%q@qEajRshlv$i8z=P*5=ho%mVS4n2;Psq+e zGXPeAOrp%7pFlg@b%!$>11Ej}aywgtwy1`ZT@z4B1f6l1$Gwgv-B z8YC%0JUz25c&udaw6mq8892r@4o%*!;}bOuLOIt_!w*?U4ac@?2;vK7VPIY#Ji7*# zr5RK;52=tJi;}mZ3_^GYu{4wI%73bDjrqn{aJf*twrq`_loR?sS&1JBheulniaC@*WCg7@yTA&A)dhAJJ>u7CMpSeWC5{rr zN|BbBp(BbsK(Z9=f@+7(F`Q;MG*-z+3GcO!?%6u9>G{{7*#kELmhevEy$HbDKuR`V zc&vyqO;NH zNswpOIQDevqm(+zF}`-X#!hy*+C!!ic&s;0M|1kXj*yF1i~3qwlDiGOEm#TFC047# zY~!K2N*&NrW5bLC<}8&q(1w*o=rEUQ5@% zlD}G5kw5oN$=GoBiDBernd>W8%tDsCeB!7ZD$|6yS`l>=u!|y%Dq;}HKjGX;v2HC^ zR;y>;lf%8om@D5JPRVx@mye54(o)vA+n--qStzfqE>xBl3i-w51?5wuNM+la7{kZU z$OC(98AIY52k-JK}zEF|Xq^`MaS-z3X`Lv?hkoS{EVo?lqVRdcA zD&nRtMr0y&T7H{MJ1XU@PwXb6st~Gjsv6V>5k-Qd59%)?fo>enOpm5mi0SmF89c*q zfb%F%bDbG{7Z0;R7WXB%&Z8(v^I_JlMg0cLXc=a(xJJYMD314Wwufc>ypN0M)}JYZ zv!INnhge)5PN#JB1~KIG=^32YM{ z*u)6(+U?#&d*-FOV~e2Sm$6JnmWF$D!mZdNo#B_x(H4(Bi$AWvST+ldwxQ=d$tnC1 zXNYWDL)cRL6>PiF=s_n-@&iN;62brKKpZAQ2=SnFgmy=XOcJ5DqS!}d0!cRL1$=@S zCyC^UP+sCRk+Ve35uqruny225E$Uq-aRFtIKz^Co%Pz=YGlvos5(0B=wfd$9?T9=y g^2#21x2m2Cs41v%j{n5Up1 zyh}lm)*7WJuX^7}qh6FoVd8D=$DK49#9lh^`hM(hhu*gKhdbU@)C-eRM?o0>ifFTF z8MNeI7`3u{L}SPn@jXR-hsF-6Lq**T9bqm*sY{1MM~Z&+Cp8A|Qa8&}-JC4Ql3g6Q zl;-IWiYuyPHB2~kCOePF!9hHiqk6Z%CSk?Ti9QfD>acIuA^m4ci!y{&m>=h+{i5yX zDfXsAR>;Yc$(B5gHYqK!5{J;wp_PqyXk1{~hmc#Og){O4teoMyg(53frNaz$XJm0% zbg581s7|>f<*2<+X?^tZR0u3#A-_2h!UvT3Y8FHsto#|#7A~a5vVKaFooR&+<%_D& zQRcMt>RB6cXBW@h3l9XdQL7*FFn3FS!uno%FpN5W&kIL>f7lDX!InKDisgVP@p{p2 z=%qVRa?c**z2@EZyyaV7yx-ppHSdTA0sPgE11}7=CsJ6_?dEO#K9hUECxm`y2j|D3 zhy5KLZSpB{ns;Ngc(C}yyXp1(&9K)HZ9ns%6EP1y&!TTl7Txh+hX;D+e<~7-HQqho zZ&9vv@vDngoyi%vhRGQ+@D%u?)rK;KO+N`^zaJV`Xn_IC+*5|^Gvyh2$|0jOMKE_< zv^BKkWsq%Rd~IB01OtQ!DZ3fcIp#lfXfKCvu5=y?|3D4LIL+HejWovrA5gVgO!h4flg1qk!`Qew^aE2$ zhKQ={YKh4tsWw>|FqtS$O;-Eyc4!KI5VYBqP0hM?d$1*aY{B=l^X%Z*oa$)UC~E#_ z#)(H(uG%(g%j7$QI1S^pRfsq3cHsmWdJY?`(TcMOX|~ej?8WnG$s!0<%W77YRZX2! zH`ICXv+BG9>ddPJ^snLnMRgN@i%NeH>yJMM-xq(32WZJ3Pv?R@!mc94`2e9plEH*W zRFlZhfgC)%OrqB$BOnYSSxE3lOh(}6D36u+sy4_WaoE(*Eplq8b4tilgiQq+{hBAc zh{Xum3eoh4#zl_b2-$~4n(SES#U&ao$ru((5?R@}LSw}18Ob{wxl7#PJucOk_x?ib z0Zu8=ZjSW7nL)SC|BAGHnCDQ%~WsEDmN5qcxz)Y)>|t8x$OAuI1)Q|nN9tx;Rm929_V_GIxd zEueFv0MUe<6G%*T%kAuhea&fKXF-h1{?*Dq_Ti}=EaDVkoOIBr>KAyC9LEqf7e@VF zGfZ_DHs9=peR%34@FpumX6DS;(AP<5^4pjj3{ri8_vcuCv}xQZ4YhI8aFptoScC`C zs92pv+{?V&dGiq#u>_aTFxe;$!jZnpbD1FO=o`%CB&H;~fsTHa$#FEZ5Xmz{-g@`p zEzBi%K}ZmC&Zz+>In_X$S93xyW2B1NDp10swtq|MH?XV2n(?oSHiITesbF-129!T2 zkpv&??5NTl_7PhyF%+a|LZJi@)KN0Q8JD>t0+$1aLJ3>}9*E6wKkUbchv&K|Clqir z`aJ_@W#hM81x*&=FnBp!$5vW`l}O13ls!Y<0Mx;U>+mAzu(I)<=wU^miY&`+pwgwu zJ}kGhV2*cVx(+Y`ODJs^h-YR1$ab0bKB4u!7#R77)${KH&A)L6&RSpD_^Y5&u(*rw zbX}boQ4uvz<=_@dQUTS37nDV~N8CV}MhDZw0OwPI0-_b_g5t_KL?RJ4W-Sm~ne99{ zNK9!z4k8_P(lBThc(Bh1%}Tb_?4{z}r@ZiJlgSc@$xo1HT%aP=_j#X^OL0AHidSZ3ygMKe@T|TuYpd;;8QFjzs%!BhM*)R z(WzAuO>NRsk4?F=uQdXK473X9wI$dTnL}vV4~_Hq4%%Hi$rkIcu!!f;n)r`Hp!FM= zOKyV@GFeR(fzT5EE;<$Uin^q(sH^HS_&Ffp5>cLT~gqHyFB_Pn9o#<>6Wp|D>2gFX>IQp(6 zoO2vwaQ4c^+f$D}FS%-5757I6>1kY>8t}rg0oCWUJ;b|&bzBCF5cqPmM+hBoW@p-6C|i}xk_Gormjkj+sbrC zJ(C@3uGL&V-(N`wcV!Ir=^9SZ!oYzTFN69YhT-A|*cx@MQj!TtT-~s)c1lxcy#upVMf< On9MolM){@kmHz-#`-YPM delta 1985 zcmah}OKc-W5Un12{2l)i;@CLsW_Pnmh9wZbyJ8WGAP~YQLdwA*D6$;SB(dYzNqY!B z%*Ya1eg{BkByL>b2+|61ljh+FnNHAH}q0WUzYWQUH?1%$;Zuvj6xh}oxWtfAl~S9 z9J}KLVrL`K*{XHAmfcf3+uE^vG7lx01Tlpy_%FpKo|(mggptjQl9(6cB9Cue1J#OR z66Xs3m&7IUgpe1Y*+7-4p8*=jdJ3zz;(s1{eXz?Qwt%K8n5E4sc}a4U3=nFDlZad) z>t2f77`X{01EeU1$ADlf!)%=RO2`H_ZVERw!IC6h_F{DS9-w`BWQwJL_F3}MFOr-7 zPg{68tIR=+chgiZv+?i*WXeM`49?$Fg)3`j#L*qz)29Ie|fRTXy#Y&FPvs zInFZ9x*YSrPF#CrlGS2$$C1sgAlh_n3H4^j3ZjnPcjOdno&_-t1zLD@+P^EToZn1VmhZ;7$6^95h8WXwpfO=$74a3sw8#RXqF5C23bY#R5dU&mX)J|L z+5aWEm-v($d};F&e>G(ej`)D?Mq<-@(kX(2JImc z3u0(VCo9X1Y30=1f)P|xUhUDCf|{E0EJOXl=rKHT5?E6OR=6dw0mAyf$k-)*<@V5c za)u6%r0`%E;dN2v20xralOsu(sS)RGsjjXx6?*sr{9(l?BtC9 zPkOO-A8)x5HlmGPdtY*o$~h+UOiY% zWnOCS)vY00mi&P+Yd*ki{WXa?MJ=;ZSd|YlKPkIiM-2}->wj-d>#8LEKaKfU9%4hQvpR>5fh@Kcr#)S zWi3~}*w4&r1Jzc~;SKpblRKD9Fex&rFge4d41#t$IIugG8cji5+6V1=!w&TAcDL@_ zmKV9ex9(-K$V4TXTgJ?Xk%u<@vAlWq94n|PS#M5@3a=G|3)BY#CSd_RBr+&UPm frXIv=HLKgG)s!cy6Xgo)uKR!F7xY