From 460e846a8f23fade2b16693e2c68ea9367d465e0 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Fri, 22 Dec 2017 14:14:45 +0100
Subject: [PATCH] * ADD calculation of the time spend to acquire kernel
 matrices for each kernel. - linlin * MOD floydTransformation function,
 calculate shortest paths taking into consideration user-defined edge weight.
 - linlin * MOD implementation of nodes and edges attributes genericity for
 all kernels. - linlin * ADD detailed results file results.md. - linlin * MOD
 Weisfeiler-Lehman subtree kernel and the test code. - linlin

---
 README.md                                     |  29 +-
 ...eilerLehmankernel_acyclic-checkpoint.ipynb | 799 ++++++++---------
 ...arginalizedkernel_acyclic-checkpoint.ipynb | 386 +-------
 .../run_pathkernel_acyclic-checkpoint.ipynb   |  37 +-
 .../run_spkernel_acyclic-checkpoint.ipynb     |   3 +-
 .../run_WeisfeilerLehmankernel_acyclic.ipynb  | 831 +++++++-----------
 .../run_marginalizedkernel_acyclic.ipynb      |   2 +-
 notebooks/run_pathkernel_acyclic.ipynb        |   2 +-
 notebooks/run_spkernel_acyclic.ipynb          |   3 +-
 .../weisfeilerLehmanKernel.cpython-35.pyc     | Bin 6760 -> 8164 bytes
 pygraph/kernels/marginalizedKernel.py         |  33 +-
 pygraph/kernels/pathKernel.py                 |  37 +-
 pygraph/kernels/results.md                    |  36 +
 pygraph/kernels/spkernel.py                   |  15 +-
 pygraph/kernels/weisfeilerLehmanKernel.py     | 112 +--
 .../__pycache__/graphfiles.cpython-35.pyc     | Bin 3807 -> 4381 bytes
 pygraph/utils/utils.py                        |  16 +-
 17 files changed, 925 insertions(+), 1416 deletions(-)
 create mode 100644 pygraph/kernels/results.md

diff --git a/README.md b/README.md
index d988a2f..1183519 100644
--- a/README.md
+++ b/README.md
@@ -10,15 +10,30 @@ a python package for graph kernels.
 * sklearn - 0.19.1
 * tabulate - 0.8.2
 
-## results with minimal RMSE for each kernel on dataset Asyclic
-| Kernels       | RMSE(℃)  | std(℃)  | parameter    |
-|---------------|:---------:|:--------:|-------------:|
-| shortest path | 36.400524 | 5.352940 | -            |
-| marginalized  | 17.8991   | 6.59104  | p_quit = 0.1 |
-| path          | 14.270816 | 6.366698 | -            |
-| WL subtree    | 9.01403   | 6.35786  | height = 1   |
+## results with minimal test RMSE for each kernel on dataset Asyclic
+-- All the kernels are tested on dataset Asyclic, which consists of 185 molecules (graphs). 
+-- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression.
+-- For predition we randomly divide the data in train and test subset, where 90% of entire dataset is for training and rest for testing. 10 splits are performed. For each split, we first train on the train data, then evaluate the performance on the test set. We choose the optimal parameters for the test set and finally provide the corresponding performance. The final results correspond to the average of the performances on the test sets. 
+
+| Kernels       | RMSE(℃)  | std(℃)  | parameter    | k_time |
+|---------------|:---------:|:--------:|-------------:|-------:|
+| shortest path | 36.40     | 5.35     | -            | -      |
+| marginalized  | 17.90     | 6.59     | p_quit = 0.1 | -      |
+| path          | 14.27     | 6.37     | -            | -      |
+| WL subtree    | 9.00      | 6.37     | height = 1   | 0.85"  |
+
+**In each line, paremeter is the one with which the kenrel achieves the best results.
+In each line, k_time is the time spent on building the kernel matrix.
+See detail results in [results.md](pygraph/kernels/results.md).**
 
 ## updates
+### 2017.12.22
+* ADD calculation of the time spend to acquire kernel matrices for each kernel. - linlin
+* MOD floydTransformation function, calculate shortest paths taking into consideration user-defined edge weight. - linlin
+* MOD implementation of nodes and edges attributes genericity for all kernels. - linlin
+* ADD detailed results file results.md. - linlin
+### 2017.12.21
+* MOD Weisfeiler-Lehman subtree kernel and the test code. - linlin
 ### 2017.12.20
 * ADD Weisfeiler-Lehman subtree kernel and its result on dataset Asyclic. - linlin
 ### 2017.12.07
diff --git a/notebooks/.ipynb_checkpoints/run_WeisfeilerLehmankernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_WeisfeilerLehmankernel_acyclic-checkpoint.ipynb
index 7242073..4b7d560 100644
--- a/notebooks/.ipynb_checkpoints/run_WeisfeilerLehmankernel_acyclic-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/run_WeisfeilerLehmankernel_acyclic-checkpoint.ipynb
@@ -221,8 +221,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
+   "execution_count": 20,
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -236,22 +238,154 @@
       " --- for graph 0 --- \n",
       "\n",
       "labels_ori: ['C', 'C', 'C', 'C', 'C', 'O', 'O']\n",
+      "all_labels_ori: {'C', 'O'}\n",
       "num_of_each_label: {'C': 5, 'O': 2}\n",
-      "num_of_labels: 2\n"
+      "all_num_of_each_label: [{'C': 5, 'O': 2}]\n",
+      "num_of_labels: 2\n",
+      "all_labels_ori: {'C', 'O'}\n",
+      "\n",
+      " --- for graph 1 --- \n",
+      "\n",
+      "labels_ori: ['C', 'C', 'C', 'C', 'C', 'C', 'S', 'S']\n",
+      "all_labels_ori: {'C', 'O', 'S'}\n",
+      "num_of_each_label: {'C': 6, 'S': 2}\n",
+      "all_num_of_each_label: [{'C': 5, 'O': 2}, {'C': 6, 'S': 2}]\n",
+      "num_of_labels: 2\n",
+      "all_labels_ori: {'C', 'O', 'S'}\n",
+      "\n",
+      " all_num_of_labels_occured: 3\n",
+      "\n",
+      " --- calculating kernel matrix ---\n",
+      "\n",
+      " labels: {'C', 'O'}\n",
+      "vector1: [[5 2]]\n",
+      "vector2: [[5 2]]\n",
+      "Kmatrix: [[ 29.   0.]\n",
+      " [  0.   0.]]\n",
+      "\n",
+      " labels: {'C', 'O', 'S'}\n",
+      "vector1: [[5 2 0]]\n",
+      "vector2: [[6 0 2]]\n",
+      "Kmatrix: [[ 29.  30.]\n",
+      " [ 30.   0.]]\n",
+      "\n",
+      " labels: {'C', 'S'}\n",
+      "vector1: [[6 2]]\n",
+      "vector2: [[6 2]]\n",
+      "Kmatrix: [[ 29.  30.]\n",
+      " [ 30.  40.]]\n",
+      "\n",
+      " --- height = 1 --- \n",
+      "\n",
+      " --- for graph 0 --- \n",
+      "\n",
+      "multiset: ['CC', 'CC', 'CCO', 'CCO', 'COO', 'OCC', 'OCC']\n",
+      "set_unique: ['OCC', 'COO', 'CCO', 'CC']\n",
+      "set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n",
+      "all_set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n",
+      "num_of_labels_occured: 7\n",
+      "\n",
+      " compressed labels: {0: '7', 1: '7', 2: '6', 3: '6', 4: '5', 5: '4', 6: '4'}\n",
+      "labels_comp: ['7', '7', '6', '6', '5', '4', '4']\n",
+      "all_labels_ori: {'5', '4', '6', '7'}\n",
+      "num_of_each_label: {'5': 1, '4': 2, '6': 2, '7': 2}\n",
+      "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}]\n",
+      "\n",
+      " --- for graph 1 --- \n",
+      "\n",
+      "multiset: ['CC', 'CC', 'CC', 'CCS', 'CCS', 'CCSS', 'SCC', 'SCC']\n",
+      "set_unique: ['SCC', 'CC', 'CCS', 'CCSS']\n",
+      "set_compressed: {'SCC': '8', 'CC': '7', 'CCS': '9', 'CCSS': '10'}\n",
+      "all_set_compressed: {'SCC': '8', 'COO': '5', 'CCS': '9', 'OCC': '4', 'CCO': '6', 'CCSS': '10', 'CC': '7'}\n",
+      "num_of_labels_occured: 10\n",
+      "\n",
+      " compressed labels: {0: '7', 1: '7', 2: '7', 3: '9', 4: '9', 5: '10', 6: '8', 7: '8'}\n",
+      "labels_comp: ['7', '7', '7', '9', '9', '10', '8', '8']\n",
+      "all_labels_ori: {'10', '4', '7', '9', '6', '5', '8'}\n",
+      "num_of_each_label: {'10': 1, '9': 2, '7': 3, '8': 2}\n",
+      "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}, {'10': 1, '9': 2, '7': 3, '8': 2}]\n",
+      "\n",
+      " all_num_of_labels_occured: 10\n",
+      "\n",
+      " --- calculating kernel matrix ---\n",
+      "\n",
+      " labels: {'5', '4', '6', '7'}\n",
+      "vector1: [[1 2 2 2]]\n",
+      "vector2: [[1 2 2 2]]\n",
+      "\n",
+      " labels: {'10', '4', '7', '9', '6', '5', '8'}\n",
+      "vector1: [[0 2 2 0 2 1 0]]\n",
+      "vector2: [[1 0 3 2 0 0 2]]\n",
+      "\n",
+      " labels: {'8', '10', '7', '9'}\n",
+      "vector1: [[2 1 3 2]]\n",
+      "vector2: [[2 1 3 2]]\n",
+      "\n",
+      " Kmatrix: [[ 42.  36.]\n",
+      " [ 36.  58.]]\n",
+      "\n",
+      " --- height = 2 --- \n",
+      "\n",
+      " --- for graph 0 --- \n",
+      "\n",
+      "multiset: ['76', '76', '647', '647', '544', '456', '456']\n",
+      "set_unique: ['647', '76', '456', '544']\n",
+      "set_compressed: {'647': '11', '76': '12', '544': '14', '456': '13'}\n",
+      "all_set_compressed: {'647': '11', '76': '12', '456': '13', '544': '14'}\n",
+      "num_of_labels_occured: 14\n",
+      "\n",
+      " compressed labels: {0: '12', 1: '12', 2: '11', 3: '11', 4: '14', 5: '13', 6: '13'}\n",
+      "labels_comp: ['12', '12', '11', '11', '14', '13', '13']\n",
+      "all_labels_ori: {'14', '12', '11', '13'}\n",
+      "num_of_each_label: {'14': 1, '13': 2, '12': 2, '11': 2}\n",
+      "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}]\n",
+      "\n",
+      " --- for graph 1 --- \n",
+      "\n",
+      "multiset: ['79', '79', '710', '978', '978', '10788', '8109', '8109']\n",
+      "set_unique: ['710', '8109', '79', '10788', '978']\n",
+      "set_compressed: {'710': '15', '79': '17', '8109': '16', '978': '19', '10788': '18'}\n",
+      "all_set_compressed: {'710': '15', '79': '17', '978': '19', '10788': '18', '8109': '16', '456': '13', '544': '14', '647': '11', '76': '12'}\n",
+      "num_of_labels_occured: 19\n",
+      "\n",
+      " compressed labels: {0: '17', 1: '17', 2: '15', 3: '19', 4: '19', 5: '18', 6: '16', 7: '16'}\n",
+      "labels_comp: ['17', '17', '15', '19', '19', '18', '16', '16']\n",
+      "all_labels_ori: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n",
+      "num_of_each_label: {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}\n",
+      "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}, {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}]\n",
+      "\n",
+      " all_num_of_labels_occured: 19\n",
+      "\n",
+      " --- calculating kernel matrix ---\n",
+      "\n",
+      " labels: {'14', '12', '11', '13'}\n",
+      "vector1: [[1 2 2 2]]\n",
+      "vector2: [[1 2 2 2]]\n",
+      "\n",
+      " labels: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n",
+      "vector1: [[0 0 2 2 0 2 1 0 0]]\n",
+      "vector2: [[1 2 0 0 2 0 0 2 1]]\n",
+      "\n",
+      " labels: {'18', '17', '15', '16', '19'}\n",
+      "vector1: [[1 2 1 2 2]]\n",
+      "vector2: [[1 2 1 2 2]]\n",
+      "\n",
+      " Kmatrix: [[ 55.  36.]\n",
+      " [ 36.  72.]]\n",
+      "\n",
+      " --- Weisfeiler-Lehman subtree kernel built in 0.0034377574920654297 seconds ---\n"
      ]
     },
     {
-     "ename": "UnboundLocalError",
-     "evalue": "local variable 'all_labels_ori' referenced before assignment",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mUnboundLocalError\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-4-a65d6180cda5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m    328\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_node_attributes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'label'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    329\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 330\u001b[0;31m \u001b[0mweisfeilerlehmankernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    331\u001b[0m \u001b[0;31m# Kmatrix = weisfeilerlehmankernel(G1, G2)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m<ipython-input-4-a65d6180cda5>\u001b[0m in \u001b[0;36mweisfeilerlehmankernel\u001b[0;34m(height, base_kernel, *args)\u001b[0m\n\u001b[1;32m     78\u001b[0m             \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     79\u001b[0m \u001b[0;31m#             print(args)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 80\u001b[0;31m             \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_wl_subtreekernel_do\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mheight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase_kernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'subtree'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     81\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     82\u001b[0m         \u001b[0;31m# for WL edge kernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m<ipython-input-4-a65d6180cda5>\u001b[0m in \u001b[0;36m_wl_subtreekernel_do\u001b[0;34m(height, base_kernel, *args)\u001b[0m\n\u001b[1;32m    220\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'num_of_labels: %s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mnum_of_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    221\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 222\u001b[0;31m         \u001b[0mall_labels_ori\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels_ori\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    223\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'all_labels_ori: %s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mall_labels_ori\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    224\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'all_labels_ori' referenced before assignment"
-     ]
+     "data": {
+      "text/plain": [
+       "array([[ 55.,  36.],\n",
+       "       [ 36.,  72.]])"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -476,6 +610,8 @@
     "        print('\\n --- for graph %d --- \\n' % (idx))\n",
     "        labels_ori = list(nx.get_node_attributes(G, 'label').values())\n",
     "        print('labels_ori: %s' % (labels_ori))\n",
+    "        all_labels_ori.update(labels_ori)\n",
+    "        print('all_labels_ori: %s' % (all_labels_ori))\n",
     "        num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n",
     "        print('num_of_each_label: %s' % (num_of_each_label))\n",
     "        all_num_of_each_label.append(num_of_each_label)\n",
@@ -487,45 +623,48 @@
     "        all_labels_ori.update(labels_ori)\n",
     "        print('all_labels_ori: %s' % (all_labels_ori))\n",
     "        \n",
+    "    all_num_of_labels_occured += len(all_labels_ori)\n",
+    "    print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n",
+    "        \n",
     "    # calculate subtree kernel with the 0th iteration and add it to the final kernel\n",
+    "    print('\\n --- calculating kernel matrix ---')\n",
     "    for i in range(0, len(Gn)):\n",
     "        for j in range(i, len(Gn)):\n",
     "            labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n",
     "            print('\\n labels: %s' % (labels))\n",
     "            vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n",
     "            vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n",
-    "            print('\\n vector1: %s' % (vector1))\n",
-    "            print('\\n vector2: %s' % (vector2))\n",
+    "            print('vector1: %s' % (vector1))\n",
+    "            print('vector2: %s' % (vector2))\n",
     "            Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n",
     "            Kmatrix[j][i] = Kmatrix[i][j]\n",
-    "        \n",
-    "        \n",
+    "            print('Kmatrix: %s' % (Kmatrix))\n",
+    "\n",
     "    \n",
     "    # iterate each height\n",
-    "    for h in range(height + 1):\n",
+    "    for h in range(1, height + 1):\n",
     "        print('\\n --- height = %d --- ' % (h))\n",
-    "        all_labels_ori = set() # all unique orignal labels in all graphs in this iteration\n",
-    "#         all_labels_comp = set() # all unique compressed labels in all graphs in this iteration\n",
-    "        all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration\n",
     "        all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n",
     "        num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n",
+    "        all_labels_ori = set()\n",
+    "        all_num_of_each_label = []\n",
     "        \n",
     "        # for each graph\n",
     "        for idx, G in enumerate(Gn):\n",
-    "            # get the set of original labels\n",
+    "#             # get the set of original labels\n",
     "            print('\\n --- for graph %d --- \\n' % (idx))\n",
-    "            labels_ori = list(nx.get_node_attributes(G, 'label').values())\n",
-    "            print('labels_ori: %s' % (labels_ori))\n",
-    "            num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n",
-    "            print('num_of_each_label: %s' % (num_of_each_label))\n",
-    "            num_of_labels = len(num_of_each_label) # number of all unique labels\n",
-    "            print('num_of_labels: %s' % (num_of_labels))\n",
+    "#             labels_ori = list(nx.get_node_attributes(G, 'label').values())\n",
+    "#             print('labels_ori: %s' % (labels_ori))\n",
+    "#             num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n",
+    "#             print('num_of_each_label: %s' % (num_of_each_label))\n",
+    "#             num_of_labels = len(num_of_each_label) # number of all unique labels\n",
+    "#             print('num_of_labels: %s' % (num_of_labels))\n",
     "            \n",
-    "            all_labels_ori.update(labels_ori)\n",
-    "            print('all_labels_ori: %s' % (all_labels_ori))\n",
-    "            #             num_of_labels_occured += num_of_labels #@todo not precise\n",
-    "            num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n",
-    "            print('num_of_labels_occured: %s' % (num_of_labels_occured))\n",
+    "#             all_labels_ori.update(labels_ori)\n",
+    "#             print('all_labels_ori: %s' % (all_labels_ori))\n",
+    "#             #             num_of_labels_occured += num_of_labels #@todo not precise\n",
+    "#             num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n",
+    "#             print('num_of_labels_occured: %s' % (num_of_labels_occured))\n",
     "            \n",
     "            set_multisets = []\n",
     "            for node in G.nodes(data = True):\n",
@@ -558,7 +697,6 @@
     "#             num_of_labels_occured += len(set_compressed) #@todo not precise\n",
     "            print('num_of_labels_occured: %s' % (num_of_labels_occured))\n",
     "            \n",
-    "\n",
     "            # relabel nodes\n",
     "        #     nx.relabel_nodes(G, set_compressed, copy = False)\n",
     "            for node in G.nodes(data = True):\n",
@@ -568,25 +706,29 @@
     "            # get the set of compressed labels\n",
     "            labels_comp = list(nx.get_node_attributes(G, 'label').values())\n",
     "            print('labels_comp: %s' % (labels_comp))\n",
-    "            num_of_each_label.update(dict(Counter(labels_comp)))\n",
+    "            all_labels_ori.update(labels_comp)\n",
+    "            print('all_labels_ori: %s' % (all_labels_ori))\n",
+    "            num_of_each_label = dict(Counter(labels_comp))\n",
     "            print('num_of_each_label: %s' % (num_of_each_label))\n",
     "            all_num_of_each_label.append(num_of_each_label)\n",
     "            print('all_num_of_each_label: %s' % (all_num_of_each_label))\n",
+    "                    \n",
+    "        all_num_of_labels_occured += len(all_labels_ori)\n",
+    "        print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n",
     "        \n",
     "        # calculate subtree kernel with h iterations and add it to the final kernel\n",
+    "        print('\\n --- calculating kernel matrix ---')\n",
     "        for i in range(0, len(Gn)):\n",
     "            for j in range(i, len(Gn)):\n",
     "                labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n",
     "                print('\\n labels: %s' % (labels))\n",
     "                vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n",
     "                vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n",
-    "                print('\\n vector1: %s' % (vector1))\n",
-    "                print('\\n vector2: %s' % (vector2))\n",
+    "                print('vector1: %s' % (vector1))\n",
+    "                print('vector2: %s' % (vector2))\n",
     "                Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n",
     "                Kmatrix[j][i] = Kmatrix[i][j]\n",
     "                    \n",
-    "        all_num_of_labels_occured += len(all_labels_ori)\n",
-    "        print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n",
     "        print('\\n Kmatrix: %s' % (Kmatrix))\n",
     "\n",
     "    return Kmatrix\n",
@@ -606,13 +748,13 @@
     "G2 = dataset[80]\n",
     "print(nx.get_node_attributes(G2, 'label'))\n",
     "\n",
-    "weisfeilerlehmankernel(G1, G2, height = 1)\n",
+    "weisfeilerlehmankernel(G1, G2, height = 2)\n",
     "# Kmatrix = weisfeilerlehmankernel(G1, G2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -630,7 +772,7 @@
       "correspond to the average of the performances on the test sets. \n",
       "\n",
       "@references\n",
-      "    https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
+      "    Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
       "\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 0 ---\n",
@@ -659,22 +801,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.49373626708984375 seconds ---\n",
-      "[[  10.   10.    4. ...,   20.   20.   20.]\n",
-      " [  10.   16.    4. ...,   20.   20.   20.]\n",
-      " [   4.    4.   10. ...,   22.   22.   24.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.3845643997192383 seconds ---\n",
+      "[[   5.    6.    4. ...,   20.   20.   20.]\n",
+      " [   6.    8.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.    5. ...,   21.   21.   21.]\n",
       " ..., \n",
-      " [  20.   20.   22. ...,  130.  130.  122.]\n",
-      " [  20.   20.   22. ...,  130.  130.  122.]\n",
-      " [  20.   20.   24. ...,  122.  122.  154.]]\n",
+      " [  20.   20.   21. ...,  101.  101.  101.]\n",
+      " [  20.   20.   21. ...,  101.  101.  101.]\n",
+      " [  20.   20.   21. ...,  101.  101.  101.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 12.761978\n",
-      "With standard deviation: 10.086502\n",
+      " Mean performance on train set: 141.418957\n",
+      "With standard deviation: 1.082842\n",
       "\n",
-      " Mean performance on test set: 9.014031\n",
-      "With standard deviation: 6.357865\n",
+      " Mean performance on test set: 36.210792\n",
+      "With standard deviation: 7.331787\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 1 ---\n",
       "\n",
@@ -702,22 +844,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.0043864250183105 seconds ---\n",
-      "[[  20.   14.    8. ...,   20.   20.   22.]\n",
-      " [  14.   32.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   20. ...,   25.   25.   30.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.853447437286377 seconds ---\n",
+      "[[  10.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   16.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   10. ...,   22.   22.   24.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  188.  180.  145.]\n",
-      " [  20.   28.   25. ...,  180.  182.  145.]\n",
-      " [  22.   22.   30. ...,  145.  145.  238.]]\n",
+      " [  20.   20.   22. ...,  130.  130.  122.]\n",
+      " [  20.   20.   22. ...,  130.  130.  122.]\n",
+      " [  20.   20.   24. ...,  122.  122.  154.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 22.747869\n",
-      "With standard deviation: 7.561365\n",
+      " Mean performance on train set: 140.065309\n",
+      "With standard deviation: 0.877976\n",
       "\n",
-      " Mean performance on test set: 19.457133\n",
-      "With standard deviation: 5.057464\n",
+      " Mean performance on test set: 9.000982\n",
+      "With standard deviation: 6.371454\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 2 ---\n",
       "\n",
@@ -745,22 +887,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.602942705154419 seconds ---\n",
-      "[[  30.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.   48.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   30. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.374389410018921 seconds ---\n",
+      "[[  15.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   24.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   15. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  246.  209.  147.]\n",
-      " [  20.   28.   25. ...,  209.  220.  147.]\n",
-      " [  23.   22.   32. ...,  147.  147.  286.]]\n",
+      " [  20.   20.   22. ...,  159.  151.  124.]\n",
+      " [  20.   20.   22. ...,  151.  153.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  185.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 21.407092\n",
-      "With standard deviation: 6.415967\n",
+      " Mean performance on train set: 140.074983\n",
+      "With standard deviation: 0.928821\n",
       "\n",
-      " Mean performance on test set: 23.466810\n",
-      "With standard deviation: 5.836831\n",
+      " Mean performance on test set: 19.811299\n",
+      "With standard deviation: 4.049105\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 3 ---\n",
       "\n",
@@ -794,22 +936,22 @@
      "output_type": "stream",
      "text": [
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.2096023559570312 seconds ---\n",
-      "[[  40.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.   64.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   40. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.9141185283660889 seconds ---\n",
+      "[[  20.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   32.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   20. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  304.  217.  147.]\n",
-      " [  20.   28.   25. ...,  217.  250.  147.]\n",
-      " [  23.   22.   32. ...,  147.  147.  314.]]\n",
+      " [  20.   20.   22. ...,  188.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  168.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  202.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 24.747018\n",
-      "With standard deviation: 6.547340\n",
+      " Mean performance on train set: 140.197806\n",
+      "With standard deviation: 0.873857\n",
       "\n",
-      " Mean performance on test set: 27.961360\n",
-      "With standard deviation: 6.291821\n",
+      " Mean performance on test set: 25.045500\n",
+      "With standard deviation: 4.942763\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 4 ---\n",
       "\n",
@@ -837,22 +979,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.7832393646240234 seconds ---\n",
-      "[[  50.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.   80.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   50. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.393263578414917 seconds ---\n",
+      "[[  25.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   40.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   25. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  362.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  280.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  336.]]\n",
+      " [  20.   20.   22. ...,  217.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  183.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  213.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 27.197367\n",
-      "With standard deviation: 5.980185\n",
+      " Mean performance on train set: 140.272421\n",
+      "With standard deviation: 0.838915\n",
       "\n",
-      " Mean performance on test set: 30.614531\n",
-      "With standard deviation: 6.852841\n",
+      " Mean performance on test set: 28.225454\n",
+      "With standard deviation: 6.521196\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 5 ---\n",
       "\n",
@@ -880,22 +1022,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.276118040084839 seconds ---\n",
-      "[[  60.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.   96.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   60. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.893545389175415 seconds ---\n",
+      "[[  30.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   48.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   30. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  420.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  310.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  358.]]\n",
+      " [  20.   20.   22. ...,  246.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  198.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  224.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 29.010593\n",
-      "With standard deviation: 6.073672\n",
+      " Mean performance on train set: 140.247025\n",
+      "With standard deviation: 0.863630\n",
       "\n",
-      " Mean performance on test set: 32.130815\n",
-      "With standard deviation: 7.062947\n",
+      " Mean performance on test set: 30.635436\n",
+      "With standard deviation: 6.736466\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 6 ---\n",
       "\n",
@@ -923,22 +1065,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.779860496520996 seconds ---\n",
-      "[[  70.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.  112.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   70. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.216407299041748 seconds ---\n",
+      "[[  35.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   56.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   35. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  478.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  340.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  380.]]\n",
+      " [  20.   20.   22. ...,  275.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  213.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  235.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 30.909632\n",
-      "With standard deviation: 6.490001\n",
+      " Mean performance on train set: 140.239201\n",
+      "With standard deviation: 0.872475\n",
       "\n",
-      " Mean performance on test set: 33.117974\n",
-      "With standard deviation: 7.069399\n",
+      " Mean performance on test set: 32.102695\n",
+      "With standard deviation: 6.856006\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 7 ---\n",
       "\n",
@@ -964,18 +1106,7 @@
       "\n",
       " --- This is a regression problem ---\n",
       "\n",
-      " Calculating kernel matrix, this could take a while...\n",
-      "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.446576833724976 seconds ---\n",
-      "[[  80.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.  128.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   80. ...,   25.   25.   32.]\n",
-      " ..., \n",
-      " [  20.   28.   25. ...,  536.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  370.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  402.]]\n",
-      "\n",
-      " Saving kernel matrix to file...\n"
+      " Calculating kernel matrix, this could take a while...\n"
      ]
     },
     {
@@ -983,11 +1114,22 @@
      "output_type": "stream",
      "text": [
       "\n",
-      " Mean performance on val set: 31.870406\n",
-      "With standard deviation: 6.522032\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.8147408962249756 seconds ---\n",
+      "[[  40.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   64.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   40. ...,   22.   22.   26.]\n",
+      " ..., \n",
+      " [  20.   20.   22. ...,  304.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  228.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  246.]]\n",
+      "\n",
+      " Saving kernel matrix to file...\n",
+      "\n",
+      " Mean performance on train set: 140.094026\n",
+      "With standard deviation: 0.917704\n",
       "\n",
-      " Mean performance on test set: 33.964633\n",
-      "With standard deviation: 7.270535\n",
+      " Mean performance on test set: 32.970919\n",
+      "With standard deviation: 6.896061\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 8 ---\n",
       "\n",
@@ -1015,22 +1157,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.85552978515625 seconds ---\n",
-      "[[  90.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.  144.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   90. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.3765342235565186 seconds ---\n",
+      "[[  45.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   72.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   45. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  594.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  400.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  424.]]\n",
+      " [  20.   20.   22. ...,  333.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  243.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  257.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 32.192715\n",
-      "With standard deviation: 6.389616\n",
+      " Mean performance on train set: 140.076304\n",
+      "With standard deviation: 0.931866\n",
       "\n",
-      " Mean performance on test set: 34.325288\n",
-      "With standard deviation: 7.375800\n",
+      " Mean performance on test set: 33.511228\n",
+      "With standard deviation: 6.907530\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 9 ---\n",
       "\n",
@@ -1058,22 +1200,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.650352239608765 seconds ---\n",
-      "[[ 100.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.  160.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.  100. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.885462284088135 seconds ---\n",
+      "[[  50.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   80.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   50. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  652.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  430.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  446.]]\n",
+      " [  20.   20.   22. ...,  362.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  258.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  268.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 32.842545\n",
-      "With standard deviation: 6.213069\n",
+      " Mean performance on train set: 139.913361\n",
+      "With standard deviation: 0.928974\n",
       "\n",
-      " Mean performance on test set: 34.675515\n",
-      "With standard deviation: 7.314709\n",
+      " Mean performance on test set: 33.850152\n",
+      "With standard deviation: 6.914269\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 10 ---\n",
       "\n",
@@ -1101,42 +1243,41 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.818731069564819 seconds ---\n",
-      "[[ 110.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.  176.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.  110. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.313802719116211 seconds ---\n",
+      "[[  55.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   88.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   55. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  710.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  460.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  468.]]\n",
+      " [  20.   20.   22. ...,  391.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  273.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  279.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 33.151974\n",
-      "With standard deviation: 6.196320\n",
-      "\n",
-      " Mean performance on test set: 34.867215\n",
-      "With standard deviation: 7.324672\n",
-      "\n",
-      "\n",
-      "    std    height      RMSE\n",
-      "-------  --------  --------\n",
-      "6.35786       1     9.01403\n",
-      "5.05746       2.1  19.4571\n",
-      "5.83683       3.2  23.4668\n",
-      "6.29182       4.3  27.9614\n",
-      "6.85284       5.4  30.6145\n",
-      "7.06295       6.5  32.1308\n",
-      "7.0694        7.6  33.118\n",
-      "7.27054       8.7  33.9646\n",
-      "7.3758        9.8  34.3253\n",
-      "7.31471      10.9  34.6755\n",
-      "7.32467      12    34.8672\n"
+      " Mean performance on train set: 139.894176\n",
+      "With standard deviation: 0.942612\n",
+      "\n",
+      " Mean performance on test set: 34.096283\n",
+      "With standard deviation: 6.931154\n",
+      "\n",
+      "\n",
+      "  height    RMSE_test    std_test    RMSE_train    std_train    k_time\n",
+      "--------  -----------  ----------  ------------  -----------  --------\n",
+      "       0     36.2108      7.33179       141.419     1.08284   0.384564\n",
+      "       1      9.00098     6.37145       140.065     0.877976  0.853447\n",
+      "       2     19.8113      4.04911       140.075     0.928821  1.37439\n",
+      "       3     25.0455      4.94276       140.198     0.873857  1.91412\n",
+      "       4     28.2255      6.5212        140.272     0.838915  2.39326\n",
+      "       5     30.6354      6.73647       140.247     0.86363   2.89355\n",
+      "       6     32.1027      6.85601       140.239     0.872475  3.21641\n",
+      "       7     32.9709      6.89606       140.094     0.917704  3.81474\n",
+      "       8     33.5112      6.90753       140.076     0.931866  4.37653\n",
+      "       9     33.8502      6.91427       139.913     0.928974  4.88546\n",
+      "      10     34.0963      6.93115       139.894     0.942612  5.3138\n"
      ]
     }
    ],
    "source": [
-    "# Author: Elisabetta Ghisu\n",
     "# test of WL subtree kernel\n",
     "\n",
     "\"\"\"\n",
@@ -1150,7 +1291,7 @@
     "correspond to the average of the performances on the test sets. \n",
     "\n",
     "@references\n",
-    "    https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
+    "    Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
     "\"\"\"\n",
     "\n",
     "print(__doc__)\n",
@@ -1158,6 +1299,7 @@
     "import sys\n",
     "import os\n",
     "import pathlib\n",
+    "from collections import OrderedDict\n",
     "sys.path.insert(0, \"../\")\n",
     "from tabulate import tabulate\n",
     "\n",
@@ -1172,11 +1314,11 @@
     "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n",
     "from pygraph.utils.graphfiles import loadDataset\n",
     "\n",
-    "val_means_height = []\n",
-    "val_stds_height = []\n",
+    "train_means_height = []\n",
+    "train_stds_height = []\n",
     "test_means_height = []\n",
     "test_stds_height = []\n",
-    "\n",
+    "kernel_build_time = []\n",
     "\n",
     "for height in np.linspace(0, 10, 11):\n",
     "    print('\\n --- calculating kernel matrix when subtree height = %d ---' % height)\n",
@@ -1218,13 +1360,14 @@
     "        print(Kmatrix)\n",
     "    else:\n",
     "        print('\\n Calculating kernel matrix, this could take a while...')\n",
-    "        Kmatrix = weisfeilerlehmankernel(dataset, height = int(height))\n",
+    "        Kmatrix, run_time = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height))\n",
+    "        kernel_build_time.append(run_time)\n",
     "        print(Kmatrix)\n",
     "        print('\\n Saving kernel matrix to file...')\n",
     "    #     np.savetxt(kernel_file, Kmatrix)\n",
     "\n",
-    "    # Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n",
-    "    val_split = []\n",
+    "    # Initialize the performance of the best parameter trial on train with the corresponding performance on test\n",
+    "    train_split = []\n",
     "    test_split = []\n",
     "\n",
     "    # For each split of the data\n",
@@ -1244,17 +1387,14 @@
     "    #     print(Kmatrix_perm)\n",
     "        Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n",
     "\n",
-    "        # Set the training, validation and test\n",
+    "        # Set the training, test\n",
     "        # Note: the percentage can be set up by the user\n",
-    "        num_train_val = int((datasize * 90) / 100)         # 90% (of entire dataset) for training and validation\n",
-    "        num_test = datasize - num_train_val              # 10% (of entire dataset) for test\n",
-    "        num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n",
-    "        num_val = num_train_val - num_train # 10% (of train + val) for validation\n",
+    "        num_train = int((datasize * 90) / 100)         # 90% (of entire dataset) for training\n",
+    "        num_test = datasize - num_train             # 10% (of entire dataset) for test\n",
     "\n",
     "        # Split the kernel matrix\n",
     "        Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n",
-    "        Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n",
-    "        Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n",
+    "        Kmatrix_test = Kmatrix_perm[num_train:datasize, 0:num_train]\n",
     "\n",
     "        # Split the targets\n",
     "        y_train = y_perm[0:num_train]\n",
@@ -1267,11 +1407,10 @@
     "            y_train = (y_train - y_train_mean) / float(y_train_std)\n",
     "    #         print(y)\n",
     "\n",
-    "        y_val = y_perm[num_train:(num_train + num_val)]\n",
-    "        y_test = y_perm[(num_train + num_val):datasize]\n",
+    "        y_test = y_perm[num_train:datasize]\n",
     "\n",
-    "        # Record the performance for each parameter trial respectively on validation and test set\n",
-    "        perf_all_val = []\n",
+    "        # Record the performance for each parameter trial respectively on train and test set\n",
+    "        perf_all_train = []\n",
     "        perf_all_test = []\n",
     "\n",
     "        # For each parameter trial\n",
@@ -1285,81 +1424,69 @@
     "    #                 KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n",
     "                KR.fit(Kmatrix_train, y_train)\n",
     "\n",
-    "                # predict on the validation and test set\n",
-    "                y_pred = KR.predict(Kmatrix_val)\n",
+    "                # predict on the train and test set\n",
+    "                y_pred_train = KR.predict(Kmatrix_train)\n",
     "                y_pred_test = KR.predict(Kmatrix_test)\n",
     "    #             print(y_pred)\n",
     "\n",
     "                # adjust prediction: needed because the training targets have been normalizaed\n",
-    "                y_pred = y_pred * float(y_train_std) + y_train_mean\n",
-    "    #             print(y_pred)\n",
+    "                y_pred_train = y_pred_train * float(y_train_std) + y_train_mean\n",
     "                y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n",
     "    #             print(y_pred_test)\n",
     "\n",
-    "                # root mean squared error on validation\n",
-    "                rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n",
-    "                perf_all_val.append(rmse)\n",
-    "\n",
-    "                # root mean squared error in test \n",
+    "                # root mean squared error in train set\n",
+    "                rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))\n",
+    "                perf_all_train.append(rmse_train)\n",
+    "                # root mean squared error in test set\n",
     "                rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n",
     "                perf_all_test.append(rmse_test)\n",
-    "\n",
-    "    #             print('The performance on the validation set is: %3f' % rmse)\n",
     "    #             print('The performance on the test set is: %3f' % rmse_test)\n",
     "\n",
     "        # --- FIND THE OPTIMAL PARAMETERS --- #\n",
     "        # For regression: minimise the mean squared error\n",
     "        if model_type == 'regression':\n",
     "\n",
-    "            # get optimal parameter on validation (argmin mean squared error)\n",
+    "            # get optimal parameter on test (argmin mean squared error)\n",
     "            min_idx = np.argmin(perf_all_test)\n",
     "            alpha_opt = alpha_grid[min_idx]\n",
     "\n",
-    "            # performance corresponding to optimal parameter on val\n",
-    "            perf_val_opt = perf_all_val[min_idx]\n",
-    "\n",
-    "            # corresponding performance on test for the same parameter\n",
+    "            # corresponding performance on train and test set for the same parameter\n",
+    "            perf_train_opt = perf_all_train[min_idx]\n",
     "            perf_test_opt = perf_all_test[min_idx]\n",
-    "\n",
     "    #             print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n",
-    "    #             print('The best performance on the validation set is: %3f' % perf_val_opt)\n",
     "    #             print('The corresponding performance on test set is: %3f' % perf_test_opt)\n",
     "\n",
-    "        # append the best performance on validation\n",
-    "        # at the current split\n",
-    "        val_split.append(perf_val_opt)\n",
-    "\n",
-    "        # append the correponding performance on the test set\n",
+    "        # append the correponding performance on the train and test set\n",
+    "        train_split.append(perf_train_opt)\n",
     "        test_split.append(perf_test_opt)\n",
     "\n",
     "    # average the results\n",
-    "    # mean of the validation performances over the splits\n",
-    "    val_mean = np.mean(np.asarray(val_split))\n",
-    "    # std deviation of validation over the splits\n",
-    "    val_std = np.std(np.asarray(val_split))\n",
-    "\n",
-    "    # mean of the test performances over the splits\n",
+    "    # mean of the train and test performances over the splits\n",
+    "    train_mean = np.mean(np.asarray(train_split))\n",
     "    test_mean = np.mean(np.asarray(test_split))\n",
-    "    # std deviation of the test oer the splits\n",
+    "    # std deviation of the train and test over the splits\n",
+    "    train_std = np.std(np.asarray(train_split))\n",
     "    test_std = np.std(np.asarray(test_split))\n",
     "\n",
-    "    print('\\n Mean performance on val set: %3f' % val_mean)\n",
-    "    print('With standard deviation: %3f' % val_std)\n",
+    "    print('\\n Mean performance on train set: %3f' % train_mean)\n",
+    "    print('With standard deviation: %3f' % train_std)\n",
     "    print('\\n Mean performance on test set: %3f' % test_mean)\n",
     "    print('With standard deviation: %3f' % test_std)\n",
-    "    \n",
-    "    val_means_height.append(val_mean)\n",
-    "    val_stds_height.append(val_std)\n",
+    " \n",
+    "    train_means_height.append(train_mean)\n",
+    "    train_stds_height.append(train_std)\n",
     "    test_means_height.append(test_mean)\n",
     "    test_stds_height.append(test_std)\n",
     "    \n",
     "print('\\n') \n",
-    "print(tabulate({'height': np.linspace(1, 12, 11), 'RMSE': test_means_height, 'std': test_stds_height}, headers='keys'))"
+    "table_dict = {'height': np.linspace(0, 10, 11), 'RMSE_test': test_means_height, 'std_test': test_stds_height, 'RMSE_train': train_means_height, 'std_train': train_stds_height, 'k_time': kernel_build_time}\n",
+    "keyorder = ['height', 'RMSE_test', 'std_test', 'RMSE_train', 'std_train', 'k_time']\n",
+    "print(tabulate(OrderedDict(sorted(table_dict.items(), key = lambda i:keyorder.index(i[0]))), headers='keys'))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
     "scrolled": true
    },
@@ -1406,185 +1533,21 @@
       "\n",
       " --- This is a regression problem ---\n",
       "\n",
-      " Calculating kernel matrix, this could take a while...\n",
-      "\n",
-      " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 47.540945053100586 seconds ---\n",
-      "[[   6.    2.    6. ...,    2.    2.    2.]\n",
-      " [   2.   12.    2. ...,    0.    0.    6.]\n",
-      " [   6.    2.    6. ...,    2.    2.    2.]\n",
-      " ..., \n",
-      " [   2.    0.    2. ...,  110.   42.   14.]\n",
-      " [   2.    0.    2. ...,   42.  110.   14.]\n",
-      " [   2.    6.    2. ...,   14.   14.  110.]]\n",
-      "\n",
-      " Saving kernel matrix to file...\n",
-      "\n",
-      " Mean performance on val set: 38.533318\n",
-      "With standard deviation: 6.213602\n",
-      "\n",
-      " Mean performance on test set: 36.055557\n",
-      "With standard deviation: 5.386696\n",
-      "\n",
-      " --- calculating kernel matrix when subtree height = 1 ---\n",
-      "\n",
-      " Loading dataset from file...\n",
-      "[ -23.7   14.    37.3  109.7   10.8   39.    42.    66.6  135.   148.5\n",
-      "   40.    34.6   32.    63.    53.5   67.    64.4   84.7   95.5   92.\n",
-      "   84.4  154.   156.   166.   183.    70.3   63.6   52.5   59.    59.5\n",
-      "   55.2   88.    83.   104.5  102.    92.   107.4  123.2  112.5  118.5\n",
-      "  101.5  173.7  165.5  181.    99.5   92.3   90.1   80.2   82.    91.2\n",
-      "   91.5   81.2   93.    69.    86.3   82.   103.   103.5   96.   112.   104.\n",
-      "  132.5  123.5  120.3  145.   144.2  142.8  132.   134.2  137.   139.\n",
-      "  133.6  120.4  120.   137.   195.8  177.2  181.   185.9  175.7  186.   211.\n",
-      "  125.   118.   117.1  107.   102.5  112.    97.4   91.5   87.6  106.5\n",
-      "  101.    99.3   90.   137.   114.   126.   124.   140.5  157.5  146.   145.\n",
-      "  141.   171.   166.   155.   145.   159.   138.   142.   159.   163.5\n",
-      "  229.5  142.   125.   132.   130.5  125.   122.   121.   122.2  112.   106.\n",
-      "  114.5  151.   128.5  109.5  126.   147.   158.   147.   165.   188.9\n",
-      "  170.   178.   148.5  165.   177.   167.   195.   226.   215.   201.   205.\n",
-      "  151.5  165.5  157.   139.   163.   153.5  139.   162.   173.   159.5\n",
-      "  159.5  155.5  141.   126.   164.   163.   166.5  146.   165.   159.   195.\n",
-      "  218.   250.   235.   186.5  156.5  162.   162.   170.2  173.2  186.8\n",
-      "  173.   187.   174.   188.5  199.   228.   215.   216.   240. ]\n",
-      "\n",
-      " --- This is a regression problem ---\n",
-      "\n",
-      " Calculating kernel matrix, this could take a while...\n",
-      "\n",
-      " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 75.94973611831665 seconds ---\n",
-      "[[   9.    3.    9. ...,    3.    3.    3.]\n",
-      " [   3.   18.    3. ...,    0.    0.    9.]\n",
-      " [   9.    3.    9. ...,    3.    3.    3.]\n",
-      " ..., \n",
-      " [   3.    0.    3. ...,  165.   63.   21.]\n",
-      " [   3.    0.    3. ...,   63.  165.   21.]\n",
-      " [   3.    9.    3. ...,   21.   21.  165.]]\n",
-      "\n",
-      " Saving kernel matrix to file...\n",
-      "\n",
-      " Mean performance on val set: 38.464684\n",
-      "With standard deviation: 6.299737\n",
-      "\n",
-      " Mean performance on test set: 36.054735\n",
-      "With standard deviation: 5.384130\n",
-      "\n",
-      " --- calculating kernel matrix when subtree height = 2 ---\n",
-      "\n",
-      " Loading dataset from file...\n",
-      "[ -23.7   14.    37.3  109.7   10.8   39.    42.    66.6  135.   148.5\n",
-      "   40.    34.6   32.    63.    53.5   67.    64.4   84.7   95.5   92.\n",
-      "   84.4  154.   156.   166.   183.    70.3   63.6   52.5   59.    59.5\n",
-      "   55.2   88.    83.   104.5  102.    92.   107.4  123.2  112.5  118.5\n",
-      "  101.5  173.7  165.5  181.    99.5   92.3   90.1   80.2   82.    91.2\n",
-      "   91.5   81.2   93.    69.    86.3   82.   103.   103.5   96.   112.   104.\n",
-      "  132.5  123.5  120.3  145.   144.2  142.8  132.   134.2  137.   139.\n",
-      "  133.6  120.4  120.   137.   195.8  177.2  181.   185.9  175.7  186.   211.\n",
-      "  125.   118.   117.1  107.   102.5  112.    97.4   91.5   87.6  106.5\n",
-      "  101.    99.3   90.   137.   114.   126.   124.   140.5  157.5  146.   145.\n",
-      "  141.   171.   166.   155.   145.   159.   138.   142.   159.   163.5\n",
-      "  229.5  142.   125.   132.   130.5  125.   122.   121.   122.2  112.   106.\n",
-      "  114.5  151.   128.5  109.5  126.   147.   158.   147.   165.   188.9\n",
-      "  170.   178.   148.5  165.   177.   167.   195.   226.   215.   201.   205.\n",
-      "  151.5  165.5  157.   139.   163.   153.5  139.   162.   173.   159.5\n",
-      "  159.5  155.5  141.   126.   164.   163.   166.5  146.   165.   159.   195.\n",
-      "  218.   250.   235.   186.5  156.5  162.   162.   170.2  173.2  186.8\n",
-      "  173.   187.   174.   188.5  199.   228.   215.   216.   240. ]\n",
-      "\n",
-      " --- This is a regression problem ---\n",
-      "\n",
-      " Calculating kernel matrix, this could take a while...\n",
-      "\n",
-      " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 98.63305306434631 seconds ---\n",
-      "[[  12.    4.   12. ...,    4.    4.    4.]\n",
-      " [   4.   24.    4. ...,    0.    0.   12.]\n",
-      " [  12.    4.   12. ...,    4.    4.    4.]\n",
-      " ..., \n",
-      " [   4.    0.    4. ...,  220.   84.   28.]\n",
-      " [   4.    0.    4. ...,   84.  220.   28.]\n",
-      " [   4.   12.    4. ...,   28.   28.  220.]]\n",
-      "\n",
-      " Saving kernel matrix to file...\n",
-      "\n",
-      " Mean performance on val set: 38.594816\n",
-      "With standard deviation: 6.106887\n",
-      "\n",
-      " Mean performance on test set: 36.069839\n",
-      "With standard deviation: 5.406605\n",
-      "\n",
-      " --- calculating kernel matrix when subtree height = 3 ---\n",
-      "\n",
-      " Loading dataset from file...\n",
-      "[ -23.7   14.    37.3  109.7   10.8   39.    42.    66.6  135.   148.5\n",
-      "   40.    34.6   32.    63.    53.5   67.    64.4   84.7   95.5   92.\n",
-      "   84.4  154.   156.   166.   183.    70.3   63.6   52.5   59.    59.5\n",
-      "   55.2   88.    83.   104.5  102.    92.   107.4  123.2  112.5  118.5\n",
-      "  101.5  173.7  165.5  181.    99.5   92.3   90.1   80.2   82.    91.2\n",
-      "   91.5   81.2   93.    69.    86.3   82.   103.   103.5   96.   112.   104.\n",
-      "  132.5  123.5  120.3  145.   144.2  142.8  132.   134.2  137.   139.\n",
-      "  133.6  120.4  120.   137.   195.8  177.2  181.   185.9  175.7  186.   211.\n",
-      "  125.   118.   117.1  107.   102.5  112.    97.4   91.5   87.6  106.5\n",
-      "  101.    99.3   90.   137.   114.   126.   124.   140.5  157.5  146.   145.\n",
-      "  141.   171.   166.   155.   145.   159.   138.   142.   159.   163.5\n",
-      "  229.5  142.   125.   132.   130.5  125.   122.   121.   122.2  112.   106.\n",
-      "  114.5  151.   128.5  109.5  126.   147.   158.   147.   165.   188.9\n",
-      "  170.   178.   148.5  165.   177.   167.   195.   226.   215.   201.   205.\n",
-      "  151.5  165.5  157.   139.   163.   153.5  139.   162.   173.   159.5\n",
-      "  159.5  155.5  141.   126.   164.   163.   166.5  146.   165.   159.   195.\n",
-      "  218.   250.   235.   186.5  156.5  162.   162.   170.2  173.2  186.8\n",
-      "  173.   187.   174.   188.5  199.   228.   215.   216.   240. ]\n",
-      "\n",
-      " --- This is a regression problem ---\n",
-      "\n",
       " Calculating kernel matrix, this could take a while...\n"
      ]
     },
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 126.40115857124329 seconds ---\n",
-      "[[  15.    5.   15. ...,    5.    5.    5.]\n",
-      " [   5.   30.    5. ...,    0.    0.   15.]\n",
-      " [  15.    5.   15. ...,    5.    5.    5.]\n",
-      " ..., \n",
-      " [   5.    0.    5. ...,  275.  105.   35.]\n",
-      " [   5.    0.    5. ...,  105.  275.   35.]\n",
-      " [   5.   15.    5. ...,   35.   35.  275.]]\n",
-      "\n",
-      " Saving kernel matrix to file...\n",
-      "\n",
-      " Mean performance on val set: 38.545772\n",
-      "With standard deviation: 6.200795\n",
-      "\n",
-      " Mean performance on test set: 36.055164\n",
-      "With standard deviation: 5.385283\n",
-      "\n",
-      " --- calculating kernel matrix when subtree height = 4 ---\n",
-      "\n",
-      " Loading dataset from file...\n",
-      "[ -23.7   14.    37.3  109.7   10.8   39.    42.    66.6  135.   148.5\n",
-      "   40.    34.6   32.    63.    53.5   67.    64.4   84.7   95.5   92.\n",
-      "   84.4  154.   156.   166.   183.    70.3   63.6   52.5   59.    59.5\n",
-      "   55.2   88.    83.   104.5  102.    92.   107.4  123.2  112.5  118.5\n",
-      "  101.5  173.7  165.5  181.    99.5   92.3   90.1   80.2   82.    91.2\n",
-      "   91.5   81.2   93.    69.    86.3   82.   103.   103.5   96.   112.   104.\n",
-      "  132.5  123.5  120.3  145.   144.2  142.8  132.   134.2  137.   139.\n",
-      "  133.6  120.4  120.   137.   195.8  177.2  181.   185.9  175.7  186.   211.\n",
-      "  125.   118.   117.1  107.   102.5  112.    97.4   91.5   87.6  106.5\n",
-      "  101.    99.3   90.   137.   114.   126.   124.   140.5  157.5  146.   145.\n",
-      "  141.   171.   166.   155.   145.   159.   138.   142.   159.   163.5\n",
-      "  229.5  142.   125.   132.   130.5  125.   122.   121.   122.2  112.   106.\n",
-      "  114.5  151.   128.5  109.5  126.   147.   158.   147.   165.   188.9\n",
-      "  170.   178.   148.5  165.   177.   167.   195.   226.   215.   201.   205.\n",
-      "  151.5  165.5  157.   139.   163.   153.5  139.   162.   173.   159.5\n",
-      "  159.5  155.5  141.   126.   164.   163.   166.5  146.   165.   159.   195.\n",
-      "  218.   250.   235.   186.5  156.5  162.   162.   170.2  173.2  186.8\n",
-      "  173.   187.   174.   188.5  199.   228.   215.   216.   240. ]\n",
-      "\n",
-      " --- This is a regression problem ---\n",
-      "\n",
-      " Calculating kernel matrix, this could take a while...\n"
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-1-2ce8cff340bc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     82\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     83\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Calculating kernel matrix, this could take a while...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m         \u001b[0mKmatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mweisfeilerlehmankernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase_kernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'sp'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     85\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mKmatrix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     86\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Saving kernel matrix to file...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36mweisfeilerlehmankernel\u001b[0;34m(height, base_kernel, *args)\u001b[0m\n\u001b[1;32m     71\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     72\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m                     \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_weisfeilerlehmankernel_do\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     74\u001b[0m                     \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36m_weisfeilerlehmankernel_do\u001b[0;34m(G1, G2, height)\u001b[0m\n\u001b[1;32m    241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    242\u001b[0m         \u001b[0;31m# calculate kernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 243\u001b[0;31m         \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mspkernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# change your base kernel here (and one more before)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    244\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    245\u001b[0m         \u001b[0;31m# get label sets of both graphs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spkernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m     62\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0me1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     63\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0me2\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m                 \u001b[0;32mif\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     65\u001b[0m                     \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     66\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
      ]
     }
    ],
@@ -1672,7 +1635,7 @@
     "        print(Kmatrix)\n",
     "    else:\n",
     "        print('\\n Calculating kernel matrix, this could take a while...')\n",
-    "        Kmatrix = weisfeilerlehmankernel(dataset, height = int(height), base_kernel = 'sp')\n",
+    "        Kmatrix = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height), base_kernel = 'sp')\n",
     "        print(Kmatrix)\n",
     "        print('\\n Saving kernel matrix to file...')\n",
     "#         np.savetxt(kernel_file, Kmatrix)\n",
@@ -1725,7 +1688,7 @@
     "        y_test = y_perm[(num_train + num_val):datasize]\n",
     "\n",
     "        # Record the performance for each parameter trial respectively on validation and test set\n",
-    "        perf_all_val = []\n",
+    "        perf_all_train = []\n",
     "        perf_all_test = []\n",
     "\n",
     "        # For each parameter trial\n",
diff --git a/notebooks/.ipynb_checkpoints/run_marginalizedkernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_marginalizedkernel_acyclic-checkpoint.ipynb
index 1d0468f..08c2d33 100644
--- a/notebooks/.ipynb_checkpoints/run_marginalizedkernel_acyclic-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/run_marginalizedkernel_acyclic-checkpoint.ipynb
@@ -2,364 +2,10 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 8,
    "metadata": {
     "scrolled": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "- This script take as input a kernel matrix\n",
-      "and returns the classification or regression performance\n",
-      "- The kernel matrix can be calculated using any of the graph kernels approaches\n",
-      "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n",
-      "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n",
-      "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n",
-      "provide the corresponding performance on the test set. If more than one split is performed, the final results \n",
-      "correspond to the average of the performances on the test sets. \n",
-      "\n",
-      "@references\n",
-      "    https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
-      "\n",
-      "\n",
-      " --- This is a regression problem ---\n",
-      "\n",
-      " Normalizing output y...\n",
-      "\n",
-      " Loading the train set kernel matrix from file...\n",
-      "[[ 0.15254237  0.08333333  0.0625     ...,  0.11363636  0.11363636\n",
-      "   0.11363636]\n",
-      " [ 0.08333333  0.18518519  0.15591398 ...,  0.16617791  0.16617791\n",
-      "   0.16890214]\n",
-      " [ 0.0625      0.15591398  0.15254237 ...,  0.12987013  0.12987013\n",
-      "   0.13163636]\n",
-      " ..., \n",
-      " [ 0.11363636  0.16617791  0.12987013 ...,  0.26383753  0.2639004\n",
-      "   0.26156557]\n",
-      " [ 0.11363636  0.16617791  0.12987013 ...,  0.2639004   0.26396688\n",
-      "   0.26162729]\n",
-      " [ 0.11363636  0.16890214  0.13163636 ...,  0.26156557  0.26162729\n",
-      "   0.25964592]]\n",
-      "\n",
-      " Loading the test set kernel matrix from file...\n",
-      "[[ 0.18518519  0.1715847   0.11111111  0.16588603  0.11904762  0.16450216\n",
-      "   0.17281421  0.14285714  0.125       0.16477273  0.16880154  0.14583333\n",
-      "   0.1660693   0.16906445  0.13333333  0.16612903  0.16420966  0.16441006\n",
-      "   0.15151515]\n",
-      " [ 0.1715847   0.19988118  0.15173333  0.18435596  0.16465263  0.21184723\n",
-      "   0.18985964  0.19960191  0.16819723  0.21540115  0.19575264  0.2041482\n",
-      "   0.21842419  0.20001664  0.18754969  0.2205599   0.20506165  0.22256445\n",
-      "   0.2141792 ]\n",
-      " [ 0.11111111  0.15173333  0.16303156  0.13416478  0.16903494  0.16960573\n",
-      "   0.13862936  0.18511129  0.16989276  0.17395417  0.14762351  0.18709221\n",
-      "   0.17706477  0.15293506  0.17970939  0.17975775  0.16082785  0.18295252\n",
-      "   0.19186573]\n",
-      " [ 0.16588603  0.18435596  0.13416478  0.17413923  0.14529511  0.19230449\n",
-      "   0.17775828  0.17598858  0.14892223  0.19462663  0.18166555  0.17986029\n",
-      "   0.1964604   0.18450695  0.16510376  0.19788853  0.1876399   0.19921541\n",
-      "   0.18843419]\n",
-      " [ 0.11904762  0.16465263  0.16903494  0.14529511  0.17703225  0.18464872\n",
-      "   0.15002895  0.19785455  0.17779663  0.18950917  0.16010081  0.2005743\n",
-      "   0.19306131  0.16599977  0.19113529  0.1960531   0.175064    0.19963794\n",
-      "   0.20696464]\n",
-      " [ 0.16450216  0.21184723  0.16960573  0.19230449  0.18464872  0.23269314\n",
-      "   0.19681552  0.22450276  0.1871932   0.23765844  0.20733248  0.22967925\n",
-      "   0.241199    0.21337314  0.21125341  0.24426963  0.22285333  0.24802555\n",
-      "   0.24156669]\n",
-      " [ 0.17281421  0.18985964  0.13862936  0.17775828  0.15002895  0.19681552\n",
-      "   0.18309269  0.18152273  0.15411585  0.19935309  0.18641218  0.18556038\n",
-      "   0.20169527  0.18946029  0.17030032  0.20320694  0.19192382  0.2042596\n",
-      "   0.19428999]\n",
-      " [ 0.14285714  0.19960191  0.18511129  0.17598858  0.19785455  0.22450276\n",
-      "   0.18152273  0.23269314  0.20168735  0.23049584  0.19407926  0.23694176\n",
-      "   0.23486084  0.20134404  0.22042984  0.23854906  0.21275711  0.24302959\n",
-      "   0.24678197]\n",
-      " [ 0.125       0.16819723  0.16989276  0.14892223  0.17779663  0.1871932\n",
-      "   0.15411585  0.20168735  0.18391356  0.19188588  0.16365606  0.20428161\n",
-      "   0.1952436   0.16940489  0.1919249   0.19815511  0.17760881  0.20152837\n",
-      "   0.20988805]\n",
-      " [ 0.16477273  0.21540115  0.17395417  0.19462663  0.18950917  0.23765844\n",
-      "   0.19935309  0.23049584  0.19188588  0.24296859  0.21058278  0.23586086\n",
-      "   0.24679036  0.21702635  0.21699483  0.25006701  0.22724646  0.25407837\n",
-      "   0.24818625]\n",
-      " [ 0.16880154  0.19575264  0.14762351  0.18166555  0.16010081  0.20733248\n",
-      "   0.18641218  0.19407926  0.16365606  0.21058278  0.19214629  0.19842989\n",
-      "   0.21317298  0.19609213  0.18225175  0.2151567   0.20088139  0.2171273\n",
-      "   0.20810339]\n",
-      " [ 0.14583333  0.2041482   0.18709221  0.17986029  0.2005743   0.22967925\n",
-      "   0.18556038  0.23694176  0.20428161  0.23586086  0.19842989  0.24154885\n",
-      "   0.24042054  0.20590264  0.22439219  0.24421452  0.21769149  0.24880304\n",
-      "   0.25200246]\n",
-      " [ 0.1660693   0.21842419  0.17706477  0.1964604   0.19306131  0.241199\n",
-      "   0.20169527  0.23486084  0.1952436   0.24679036  0.21317298  0.24042054\n",
-      "   0.25107069  0.21988195  0.22126548  0.25446921  0.23058896  0.25855949\n",
-      "   0.25312182]\n",
-      " [ 0.16906445  0.20001664  0.15293506  0.18450695  0.16599977  0.21337314\n",
-      "   0.18946029  0.20134404  0.16940489  0.21702635  0.19609213  0.20590264\n",
-      "   0.21988195  0.20052959  0.18917551  0.22212027  0.2061696   0.22441239\n",
-      "   0.21607563]\n",
-      " [ 0.13333333  0.18754969  0.17970939  0.16510376  0.19113529  0.21125341\n",
-      "   0.17030032  0.22042984  0.1919249   0.21699483  0.18225175  0.22439219\n",
-      "   0.22126548  0.18917551  0.2112185   0.224781    0.20021961  0.22904467\n",
-      "   0.23356012]\n",
-      " [ 0.16612903  0.2205599   0.17975775  0.19788853  0.1960531   0.24426963\n",
-      "   0.20320694  0.23854906  0.19815511  0.25006701  0.2151567   0.24421452\n",
-      "   0.25446921  0.22212027  0.224781    0.25800115  0.23326559  0.26226067\n",
-      "   0.25717144]\n",
-      " [ 0.16420966  0.20506165  0.16082785  0.1876399   0.175064    0.22285333\n",
-      "   0.19192382  0.21275711  0.17760881  0.22724646  0.20088139  0.21769149\n",
-      "   0.23058896  0.2061696   0.20021961  0.23326559  0.21442192  0.2364528\n",
-      "   0.22891788]\n",
-      " [ 0.16441006  0.22256445  0.18295252  0.19921541  0.19963794  0.24802555\n",
-      "   0.2042596   0.24302959  0.20152837  0.25407837  0.2171273   0.24880304\n",
-      "   0.25855949  0.22441239  0.22904467  0.26226067  0.2364528   0.26687384\n",
-      "   0.26210305]\n",
-      " [ 0.15151515  0.2141792   0.19186573  0.18843419  0.20696464  0.24156669\n",
-      "   0.19428999  0.24678197  0.20988805  0.24818625  0.20810339  0.25200246\n",
-      "   0.25312182  0.21607563  0.23356012  0.25717144  0.22891788  0.26210305\n",
-      "   0.26386999]]\n"
-     ]
-    },
-    {
-     "ename": "ValueError",
-     "evalue": "Precomputed metric requires shape (n_queries, n_indexed). Got (19, 19) for 164 indexed.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-30-d4c5f46d5abf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m    133\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    134\u001b[0m             \u001b[0;31m# predict on the test set\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 135\u001b[0;31m             \u001b[0my_pred_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKR\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mKmatrix_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    136\u001b[0m     \u001b[0;31m#             print(y_pred)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/kernel_ridge.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m    182\u001b[0m         \"\"\"\n\u001b[1;32m    183\u001b[0m         \u001b[0mcheck_is_fitted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"X_fit_\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"dual_coef_\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 184\u001b[0;31m         \u001b[0mK\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_kernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_fit_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    185\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mK\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdual_coef_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/kernel_ridge.py\u001b[0m in \u001b[0;36m_get_kernel\u001b[0;34m(self, X, Y)\u001b[0m\n\u001b[1;32m    119\u001b[0m                       \"coef0\": self.coef0}\n\u001b[1;32m    120\u001b[0m         return pairwise_kernels(X, Y, metric=self.kernel,\n\u001b[0;32m--> 121\u001b[0;31m                                 filter_params=True, **params)\n\u001b[0m\u001b[1;32m    122\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    123\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36mpairwise_kernels\u001b[0;34m(X, Y, metric, filter_params, n_jobs, **kwds)\u001b[0m\n\u001b[1;32m   1389\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1390\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mmetric\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"precomputed\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1391\u001b[0;31m         \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_pairwise_arrays\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprecomputed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1392\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1393\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGPKernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36mcheck_pairwise_arrays\u001b[0;34m(X, Y, precomputed, dtype)\u001b[0m\n\u001b[1;32m    117\u001b[0m                              \u001b[0;34m\"(n_queries, n_indexed). Got (%d, %d) \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    118\u001b[0m                              \u001b[0;34m\"for %d indexed.\"\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m                              (X.shape[0], X.shape[1], Y.shape[0]))\n\u001b[0m\u001b[1;32m    120\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    121\u001b[0m         raise ValueError(\"Incompatible dimension for X and Y matrices: \"\n",
-      "\u001b[0;31mValueError\u001b[0m: Precomputed metric requires shape (n_queries, n_indexed). Got (19, 19) for 164 indexed."
-     ]
-    }
-   ],
-   "source": [
-    "# Author: Elisabetta Ghisu\n",
-    "\n",
-    "\"\"\"\n",
-    "- This script take as input a kernel matrix\n",
-    "and returns the classification or regression performance\n",
-    "- The kernel matrix can be calculated using any of the graph kernels approaches\n",
-    "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n",
-    "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n",
-    "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n",
-    "provide the corresponding performance on the test set. If more than one split is performed, the final results \n",
-    "correspond to the average of the performances on the test sets. \n",
-    "\n",
-    "@references\n",
-    "    https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
-    "\"\"\"\n",
-    "\n",
-    "print(__doc__)\n",
-    "\n",
-    "import sys\n",
-    "import pathlib\n",
-    "import os\n",
-    "sys.path.insert(0, \"../py-graph/\")\n",
-    "from tabulate import tabulate\n",
-    "\n",
-    "import random\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "from sklearn.kernel_ridge import KernelRidge # 0.17\n",
-    "from sklearn.metrics import accuracy_score, mean_squared_error\n",
-    "from sklearn import svm\n",
-    "\n",
-    "from kernels.marginalizedKernel import marginalizedkernel\n",
-    "from utils.graphfiles import loadDataset\n",
-    "\n",
-    "# print('\\n Loading dataset from file...')\n",
-    "# dataset, y = loadDataset(\"/home/ljia/Documents/research-repo/datasets/acyclic/Acyclic/dataset_bps.ds\")\n",
-    "# y = np.array(y)\n",
-    "# print(y)\n",
-    "\n",
-    "# kernel_file_path = 'marginalizedkernelmatrix.ds'\n",
-    "# path = pathlib.Path(kernel_file_path)\n",
-    "# if path.is_file():\n",
-    "#     print('\\n Loading the matrix from file...')\n",
-    "#     Kmatrix = np.loadtxt(kernel_file_path)\n",
-    "#     print(Kmatrix)\n",
-    "# else:\n",
-    "#     print('\\n Calculating kernel matrix, this could take a while...')\n",
-    "#     Kmatrix = marginalizeKernel(dataset)\n",
-    "#     print(Kmatrix)\n",
-    "#     print('Saving kernel matrix to file...')\n",
-    "#     np.savetxt(kernel_file_path, Kmatrix)\n",
-    "\n",
-    "# setup the parameters\n",
-    "model_type = 'regression' # Regression or classification problem\n",
-    "print('\\n --- This is a %s problem ---' % model_type)\n",
-    "\n",
-    "# datasize = len(dataset)\n",
-    "trials = 100 # Trials for hyperparameters random search\n",
-    "splits = 100 # Number of splits of the data\n",
-    "alpha_grid = np.linspace(0.01, 100, num = trials) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n",
-    "# C_grid = np.linspace(0.0001, 10, num = trials)\n",
-    "random.seed(20) # Set the seed for uniform parameter distribution\n",
-    "data_dir = '/home/ljia/Documents/research-repo/datasets/acyclic/Acyclic/'\n",
-    "\n",
-    "# set the output path\n",
-    "kernel_file_path = 'kernelmatrices_marginalized_acyclic/'\n",
-    "if not os.path.exists(kernel_file_path):\n",
-    "    os.makedirs(kernel_file_path)\n",
-    "\n",
-    "\n",
-    "\"\"\"\n",
-    "-  Here starts the main program\n",
-    "-  First we permute the data, then for each split we evaluate corresponding performances\n",
-    "-  In the end, the performances are averaged over the test sets\n",
-    "\"\"\"\n",
-    "\n",
-    "# Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n",
-    "val_split = []\n",
-    "test_split = []\n",
-    "\n",
-    "p_quit = 0.5\n",
-    "\n",
-    "# for each split of the data\n",
-    "for j in range(10):\n",
-    "    dataset_train, y_train = loadDataset(data_dir + 'trainset_' + str(j) + '.ds')\n",
-    "    dataset_test, y_test = loadDataset(data_dir + 'testset_' + str(j) + '.ds')\n",
-    "    \n",
-    "    # Normalization step (for real valued targets only)\n",
-    "    if model_type == 'regression':\n",
-    "        print('\\n Normalizing output y...')\n",
-    "        y_train_mean = np.mean(y_train)\n",
-    "        y_train_std = np.std(y_train)\n",
-    "        y_train = (y_train - y_train_mean) / float(y_train_std)\n",
-    "#         print(y)\n",
-    "    \n",
-    "    # save kernel matrices to files / read kernel matrices from files\n",
-    "    kernel_file_train = kernel_file_path + 'train' + str(j) + '_pquit_' + str(p_quit)\n",
-    "    kernel_file_test = kernel_file_path + 'test' + str(j) + '_pquit_' + str(p_quit)\n",
-    "    path_train = pathlib.Path(kernel_file_train)\n",
-    "    path_test = pathlib.Path(kernel_file_test)\n",
-    "    # get train set kernel matrix\n",
-    "    if path_train.is_file():\n",
-    "        print('\\n Loading the train set kernel matrix from file...')\n",
-    "        Kmatrix_train = np.loadtxt(kernel_file_train)\n",
-    "        print(Kmatrix_train)\n",
-    "    else:\n",
-    "        print('\\n Calculating train set kernel matrix, this could take a while...')\n",
-    "        Kmatrix_train = marginalizedkernel(dataset_train, p_quit, 20)\n",
-    "        print(Kmatrix_train)\n",
-    "        print('\\n Saving train set kernel matrix to file...')\n",
-    "        np.savetxt(kernel_file_train, Kmatrix_train)\n",
-    "    # get test set kernel matrix\n",
-    "    if path_test.is_file():\n",
-    "        print('\\n Loading the test set kernel matrix from file...')\n",
-    "        Kmatrix_test = np.loadtxt(kernel_file_test)\n",
-    "        print(Kmatrix_test)\n",
-    "    else:\n",
-    "        print('\\n Calculating test set kernel matrix, this could take a while...')\n",
-    "        Kmatrix_test = marginalizedkernel(dataset_test, p_quit, 20)\n",
-    "        print(Kmatrix_test)\n",
-    "        print('\\n Saving test set kernel matrix to file...')\n",
-    "        np.savetxt(kernel_file_test, Kmatrix_test)\n",
-    "\n",
-    "    # For each parameter trial\n",
-    "    for i in range(trials):\n",
-    "        # For regression use the Kernel Ridge method\n",
-    "        if model_type == 'regression':\n",
-    "    #             print('\\n Starting experiment for trial %d and parameter alpha = %3f\\n ' % (i, alpha_grid[i]))\n",
-    "\n",
-    "            # Fit the kernel ridge model\n",
-    "            KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])\n",
-    "            KR.fit(Kmatrix_train, y_train)\n",
-    "\n",
-    "            # predict on the test set\n",
-    "            y_pred_test = KR.predict(Kmatrix_test)\n",
-    "    #             print(y_pred)\n",
-    "\n",
-    "            # adjust prediction: needed because the training targets have been normalized\n",
-    "            y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n",
-    "    #             print(y_pred_test)\n",
-    "\n",
-    "            # root mean squared error in test \n",
-    "            rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n",
-    "            perf_all_test.append(rmse_test)\n",
-    "\n",
-    "    #             print('The performance on the validation set is: %3f' % rmse)\n",
-    "    #             print('The performance on the test set is: %3f' % rmse_test)\n",
-    "\n",
-    "    # --- FIND THE OPTIMAL PARAMETERS --- #\n",
-    "    # For regression: minimise the mean squared error\n",
-    "    if model_type == 'regression':\n",
-    "\n",
-    "        # get optimal parameter on test (argmin mean squared error)\n",
-    "        min_idx = np.argmin(perf_all_test)\n",
-    "        alpha_opt = alpha_grid[min_idx]\n",
-    "\n",
-    "        # corresponding performance on test for the same parameter\n",
-    "        perf_test_opt = perf_all_test[min_idx]\n",
-    "\n",
-    "        print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n",
-    "        print('The corresponding performance on test set is: %3f' % perf_test_opt)\n",
-    "        \n",
-    "        \n",
-    "        \n",
-    "\n",
-    "# For each split of the data\n",
-    "for j in range(10, 10 + splits):\n",
-    "    print('Starting split %d...' % j)\n",
-    "\n",
-    "    # Set the random set for data permutation\n",
-    "    random_state = int(j)\n",
-    "    np.random.seed(random_state)\n",
-    "    idx_perm = np.random.permutation(datasize)\n",
-    "#     print(idx_perm)\n",
-    "    \n",
-    "    # Permute the data\n",
-    "    y_perm = y[idx_perm] # targets permutation\n",
-    "#     print(y_perm)\n",
-    "    Kmatrix_perm = Kmatrix[:, idx_perm] # inputs permutation\n",
-    "#     print(Kmatrix_perm)\n",
-    "    Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n",
-    "    \n",
-    "    # Set the training, validation and test\n",
-    "    # Note: the percentage can be set up by the user\n",
-    "    num_train_val = int((datasize * 90) / 100)         # 90% (of entire dataset) for training and validation\n",
-    "    num_test = datasize - num_train_val              # 10% (of entire dataset) for test\n",
-    "    num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n",
-    "    num_val = num_train_val - num_train # 10% (of train + val) for validation\n",
-    "    \n",
-    "    # Split the kernel matrix\n",
-    "    Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n",
-    "    Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n",
-    "    Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n",
-    "\n",
-    "    # Split the targets\n",
-    "    y_train = y_perm[0:num_train]\n",
-    "\n",
-    "    # Normalization step (for real valued targets only)\n",
-    "    print('\\n Normalizing output y...')\n",
-    "    if model_type == 'regression':\n",
-    "        y_train_mean = np.mean(y_train)\n",
-    "        y_train_std = np.std(y_train)\n",
-    "        y_train = (y_train - y_train_mean) / float(y_train_std)\n",
-    "#         print(y)\n",
-    "        \n",
-    "    y_val = y_perm[num_train:(num_train + num_val)]\n",
-    "    y_test = y_perm[(num_train + num_val):datasize]\n",
-    "    \n",
-    "    # Record the performance for each parameter trial respectively on validation and test set\n",
-    "    perf_all_val = []\n",
-    "    perf_all_test = []\n",
-    "    \n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -615,17 +261,17 @@
       "With standard deviation: 4.891587\n",
       "\n",
       "\n",
-      "  p_quit     RMSE      std\n",
-      "--------  -------  -------\n",
-      "     0.1  18.5188  7.749\n",
-      "     0.2  17.8991  6.59104\n",
-      "     0.3  18.3924  7.10161\n",
-      "     0.4  19.6233  6.24807\n",
-      "     0.5  19.9936  6.29951\n",
-      "     0.6  20.5466  6.26173\n",
-      "     0.7  21.7018  6.33531\n",
-      "     0.8  23.1489  6.10246\n",
-      "     0.9  24.7157  4.89159\n"
+      "    std     RMSE    p_quit\n",
+      "-------  -------  --------\n",
+      "7.749    18.5188       0.1\n",
+      "6.59104  17.8991       0.2\n",
+      "7.10161  18.3924       0.3\n",
+      "6.24807  19.6233       0.4\n",
+      "6.29951  19.9936       0.5\n",
+      "6.26173  20.5466       0.6\n",
+      "6.33531  21.7018       0.7\n",
+      "6.10246  23.1489       0.8\n",
+      "4.89159  24.7157       0.9\n"
      ]
     }
    ],
@@ -651,7 +297,7 @@
     "import sys\n",
     "import os\n",
     "import pathlib\n",
-    "sys.path.insert(0, \"../py-graph/\")\n",
+    "sys.path.insert(0, \"../\")\n",
     "from tabulate import tabulate\n",
     "\n",
     "import random\n",
@@ -662,8 +308,8 @@
     "from sklearn.metrics import accuracy_score, mean_squared_error\n",
     "from sklearn import svm\n",
     "\n",
-    "from kernels.marginalizedKernel import marginalizedkernel\n",
-    "from utils.graphfiles import loadDataset\n",
+    "from pygraph.kernels.marginalizedKernel import marginalizedkernel\n",
+    "from pygraph.utils.graphfiles import loadDataset\n",
     "\n",
     "print('\\n Loading dataset from file...')\n",
     "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n",
@@ -711,7 +357,7 @@
     "        print(Kmatrix)\n",
     "    else:\n",
     "        print('\\n Calculating kernel matrix, this could take a while...')\n",
-    "        Kmatrix = marginalizedkernel(dataset, p_quit, 20)\n",
+    "        Kmatrix, run_time = marginalizedkernel(dataset, p_quit, 20, node_label = 'atom', edge_label = 'bond_type')\n",
     "        print(Kmatrix)\n",
     "        print('\\n Saving kernel matrix to file...')\n",
     "        np.savetxt(kernel_file, Kmatrix)\n",
diff --git a/notebooks/.ipynb_checkpoints/run_pathkernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_pathkernel_acyclic-checkpoint.ipynb
index 9528f89..86bd8fc 100644
--- a/notebooks/.ipynb_checkpoints/run_pathkernel_acyclic-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/run_pathkernel_acyclic-checkpoint.ipynb
@@ -545,7 +545,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -588,18 +588,27 @@
       "\n",
       " --- This is a regression problem ---\n",
       "\n",
-      " Calculating kernel matrix, this could take a while...\n"
-     ]
-    },
-    {
-     "ename": "NameError",
-     "evalue": "name 'pathKernel' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-3-bb38687adbe5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     72\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     73\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Calculating kernel matrix, this could take a while...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 74\u001b[0;31m     \u001b[0mKmatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpathKernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     75\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mKmatrix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     76\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Saving kernel matrix to file...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'pathKernel' is not defined"
+      " Calculating kernel matrix, this could take a while...\n",
+      "--- mean average path kernel matrix of size 185 built in 38.70095658302307 seconds ---\n",
+      "[[ 0.55555556  0.22222222  0.         ...,  0.          0.          0.        ]\n",
+      " [ 0.22222222  0.27777778  0.         ...,  0.          0.          0.        ]\n",
+      " [ 0.          0.          0.55555556 ...,  0.03030303  0.03030303\n",
+      "   0.03030303]\n",
+      " ..., \n",
+      " [ 0.          0.          0.03030303 ...,  0.08297521  0.05553719\n",
+      "   0.05256198]\n",
+      " [ 0.          0.          0.03030303 ...,  0.05553719  0.07239669\n",
+      "   0.0538843 ]\n",
+      " [ 0.          0.          0.03030303 ...,  0.05256198  0.0538843\n",
+      "   0.07438017]]\n",
+      "\n",
+      " Saving kernel matrix to file...\n",
+      "\n",
+      " Mean performance on val set: 11.907089\n",
+      "With standard deviation: 4.781924\n",
+      "\n",
+      " Mean performance on test set: 14.270816\n",
+      "With standard deviation: 6.366698\n"
      ]
     }
    ],
@@ -677,7 +686,7 @@
     "    print(Kmatrix)\n",
     "else:\n",
     "    print('\\n Calculating kernel matrix, this could take a while...')\n",
-    "    Kmatrix = pathkernel(dataset)\n",
+    "    Kmatrix, run_time = pathkernel(dataset, node_label = 'atom', edge_label = 'bond_type')\n",
     "    print(Kmatrix)\n",
     "    print('\\n Saving kernel matrix to file...')\n",
     "    np.savetxt(kernel_file, Kmatrix)\n",
diff --git a/notebooks/.ipynb_checkpoints/run_spkernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_spkernel_acyclic-checkpoint.ipynb
index 1bf4920..b3e0f40 100644
--- a/notebooks/.ipynb_checkpoints/run_spkernel_acyclic-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/run_spkernel_acyclic-checkpoint.ipynb
@@ -182,7 +182,8 @@
     "    print(Kmatrix)\n",
     "else:\n",
     "    print('\\n Calculating kernel matrix, this could take a while...')\n",
-    "    Kmatrix = spkernel(dataset)\n",
+    "    #@Q: is it appropriate to use bond type between atoms as the edge weight to calculate shortest path????????\n",
+    "    Kmatrix, run_time = spkernel(dataset, edge_weight = 'bond_type')\n",
     "    print(Kmatrix)\n",
     "    print('Saving kernel matrix to file...')\n",
     "    np.savetxt(kernel_file_path, Kmatrix)\n",
diff --git a/notebooks/run_WeisfeilerLehmankernel_acyclic.ipynb b/notebooks/run_WeisfeilerLehmankernel_acyclic.ipynb
index 78ed792..4b7d560 100644
--- a/notebooks/run_WeisfeilerLehmankernel_acyclic.ipynb
+++ b/notebooks/run_WeisfeilerLehmankernel_acyclic.ipynb
@@ -221,8 +221,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
+   "execution_count": 20,
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -236,6 +238,7 @@
       " --- for graph 0 --- \n",
       "\n",
       "labels_ori: ['C', 'C', 'C', 'C', 'C', 'O', 'O']\n",
+      "all_labels_ori: {'C', 'O'}\n",
       "num_of_each_label: {'C': 5, 'O': 2}\n",
       "all_num_of_each_label: [{'C': 5, 'O': 2}]\n",
       "num_of_labels: 2\n",
@@ -244,171 +247,143 @@
       " --- for graph 1 --- \n",
       "\n",
       "labels_ori: ['C', 'C', 'C', 'C', 'C', 'C', 'S', 'S']\n",
+      "all_labels_ori: {'C', 'O', 'S'}\n",
       "num_of_each_label: {'C': 6, 'S': 2}\n",
       "all_num_of_each_label: [{'C': 5, 'O': 2}, {'C': 6, 'S': 2}]\n",
       "num_of_labels: 2\n",
       "all_labels_ori: {'C', 'O', 'S'}\n",
       "\n",
-      " labels: {'C', 'O'}\n",
-      "\n",
-      " vector1: [[5 2]]\n",
+      " all_num_of_labels_occured: 3\n",
       "\n",
-      " vector2: [[5 2]]\n",
+      " --- calculating kernel matrix ---\n",
       "\n",
-      " Kmatrix: [[ 29.   0.]\n",
+      " labels: {'C', 'O'}\n",
+      "vector1: [[5 2]]\n",
+      "vector2: [[5 2]]\n",
+      "Kmatrix: [[ 29.   0.]\n",
       " [  0.   0.]]\n",
       "\n",
       " labels: {'C', 'O', 'S'}\n",
-      "\n",
-      " vector1: [[5 2 0]]\n",
-      "\n",
-      " vector2: [[6 0 2]]\n",
-      "\n",
-      " Kmatrix: [[ 29.  30.]\n",
+      "vector1: [[5 2 0]]\n",
+      "vector2: [[6 0 2]]\n",
+      "Kmatrix: [[ 29.  30.]\n",
       " [ 30.   0.]]\n",
       "\n",
       " labels: {'C', 'S'}\n",
-      "\n",
-      " vector1: [[6 2]]\n",
-      "\n",
-      " vector2: [[6 2]]\n",
-      "\n",
-      " Kmatrix: [[ 29.  30.]\n",
+      "vector1: [[6 2]]\n",
+      "vector2: [[6 2]]\n",
+      "Kmatrix: [[ 29.  30.]\n",
       " [ 30.  40.]]\n",
       "\n",
-      " --- height = 0 --- \n",
+      " --- height = 1 --- \n",
       "\n",
       " --- for graph 0 --- \n",
       "\n",
-      "labels_ori: ['C', 'C', 'C', 'C', 'C', 'O', 'O']\n",
-      "num_of_each_label: {'C': 5, 'O': 2}\n",
-      "num_of_labels: 2\n",
-      "all_labels_ori: {'C', 'O'}\n",
-      "num_of_labels_occured: 2\n",
       "multiset: ['CC', 'CC', 'CCO', 'CCO', 'COO', 'OCC', 'OCC']\n",
       "set_unique: ['OCC', 'COO', 'CCO', 'CC']\n",
-      "set_compressed: {'OCC': '3', 'COO': '4', 'CCO': '5', 'CC': '6'}\n",
-      "all_set_compressed: {'OCC': '3', 'COO': '4', 'CCO': '5', 'CC': '6'}\n",
-      "num_of_labels_occured: 6\n",
+      "set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n",
+      "all_set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n",
+      "num_of_labels_occured: 7\n",
       "\n",
-      " compressed labels: {0: '6', 1: '6', 2: '5', 3: '5', 4: '4', 5: '3', 6: '3'}\n",
-      "labels_comp: ['6', '6', '5', '5', '4', '3', '3']\n",
-      "num_of_each_label: {'3': 2, 'C': 5, '6': 2, '5': 2, 'O': 2, '4': 1}\n",
-      "all_num_of_each_label: [{'3': 2, 'C': 5, '6': 2, '5': 2, 'O': 2, '4': 1}]\n",
+      " compressed labels: {0: '7', 1: '7', 2: '6', 3: '6', 4: '5', 5: '4', 6: '4'}\n",
+      "labels_comp: ['7', '7', '6', '6', '5', '4', '4']\n",
+      "all_labels_ori: {'5', '4', '6', '7'}\n",
+      "num_of_each_label: {'5': 1, '4': 2, '6': 2, '7': 2}\n",
+      "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}]\n",
       "\n",
       " --- for graph 1 --- \n",
       "\n",
-      "labels_ori: ['C', 'C', 'C', 'C', 'C', 'C', 'S', 'S']\n",
-      "num_of_each_label: {'C': 6, 'S': 2}\n",
-      "num_of_labels: 2\n",
-      "all_labels_ori: {'C', 'O', 'S'}\n",
-      "num_of_labels_occured: 7\n",
       "multiset: ['CC', 'CC', 'CC', 'CCS', 'CCS', 'CCSS', 'SCC', 'SCC']\n",
       "set_unique: ['SCC', 'CC', 'CCS', 'CCSS']\n",
-      "set_compressed: {'SCC': '8', 'CC': '6', 'CCS': '9', 'CCSS': '10'}\n",
-      "all_set_compressed: {'SCC': '8', 'COO': '4', 'CCS': '9', 'OCC': '3', 'CCO': '5', 'CCSS': '10', 'CC': '6'}\n",
+      "set_compressed: {'SCC': '8', 'CC': '7', 'CCS': '9', 'CCSS': '10'}\n",
+      "all_set_compressed: {'SCC': '8', 'COO': '5', 'CCS': '9', 'OCC': '4', 'CCO': '6', 'CCSS': '10', 'CC': '7'}\n",
       "num_of_labels_occured: 10\n",
       "\n",
-      " compressed labels: {0: '6', 1: '6', 2: '6', 3: '9', 4: '9', 5: '10', 6: '8', 7: '8'}\n",
-      "labels_comp: ['6', '6', '6', '9', '9', '10', '8', '8']\n",
-      "num_of_each_label: {'10': 1, 'C': 6, '6': 3, 'S': 2, '8': 2, '9': 2}\n",
-      "all_num_of_each_label: [{'3': 2, 'C': 5, '6': 2, '5': 2, 'O': 2, '4': 1}, {'10': 1, 'C': 6, '6': 3, 'S': 2, '8': 2, '9': 2}]\n",
-      "\n",
-      " labels: {'3', '4', 'O', 'C', '6', '5'}\n",
-      "\n",
-      " vector1: [[2 1 2 5 2 2]]\n",
-      "\n",
-      " vector2: [[2 1 2 5 2 2]]\n",
+      " compressed labels: {0: '7', 1: '7', 2: '7', 3: '9', 4: '9', 5: '10', 6: '8', 7: '8'}\n",
+      "labels_comp: ['7', '7', '7', '9', '9', '10', '8', '8']\n",
+      "all_labels_ori: {'10', '4', '7', '9', '6', '5', '8'}\n",
+      "num_of_each_label: {'10': 1, '9': 2, '7': 3, '8': 2}\n",
+      "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}, {'10': 1, '9': 2, '7': 3, '8': 2}]\n",
       "\n",
-      " labels: {'3', '10', '4', 'O', '9', 'C', '6', 'S', '5', '8'}\n",
-      "\n",
-      " vector1: [[2 0 1 2 0 5 2 0 2 0]]\n",
+      " all_num_of_labels_occured: 10\n",
       "\n",
-      " vector2: [[0 1 0 0 2 6 3 2 0 2]]\n",
+      " --- calculating kernel matrix ---\n",
       "\n",
-      " labels: {'10', '9', 'C', '6', 'S', '8'}\n",
+      " labels: {'5', '4', '6', '7'}\n",
+      "vector1: [[1 2 2 2]]\n",
+      "vector2: [[1 2 2 2]]\n",
       "\n",
-      " vector1: [[1 2 6 3 2 2]]\n",
+      " labels: {'10', '4', '7', '9', '6', '5', '8'}\n",
+      "vector1: [[0 2 2 0 2 1 0]]\n",
+      "vector2: [[1 0 3 2 0 0 2]]\n",
       "\n",
-      " vector2: [[1 2 6 3 2 2]]\n",
+      " labels: {'8', '10', '7', '9'}\n",
+      "vector1: [[2 1 3 2]]\n",
+      "vector2: [[2 1 3 2]]\n",
       "\n",
-      " all_num_of_labels_occured: 3\n",
+      " Kmatrix: [[ 42.  36.]\n",
+      " [ 36.  58.]]\n",
       "\n",
-      " Kmatrix: [[ 71.  66.]\n",
-      " [ 66.  98.]]\n",
-      "\n",
-      " --- height = 1 --- \n",
+      " --- height = 2 --- \n",
       "\n",
       " --- for graph 0 --- \n",
       "\n",
-      "labels_ori: ['6', '6', '5', '5', '4', '3', '3']\n",
-      "num_of_each_label: {'3': 2, '5': 2, '4': 1, '6': 2}\n",
-      "num_of_labels: 4\n",
-      "all_labels_ori: {'3', '5', '4', '6'}\n",
-      "num_of_labels_occured: 7\n",
-      "multiset: ['65', '65', '536', '536', '433', '345', '345']\n",
-      "set_unique: ['345', '536', '65', '433']\n",
-      "set_compressed: {'345': '8', '536': '9', '65': '10', '433': '11'}\n",
-      "all_set_compressed: {'345': '8', '536': '9', '65': '10', '433': '11'}\n",
-      "num_of_labels_occured: 11\n",
+      "multiset: ['76', '76', '647', '647', '544', '456', '456']\n",
+      "set_unique: ['647', '76', '456', '544']\n",
+      "set_compressed: {'647': '11', '76': '12', '544': '14', '456': '13'}\n",
+      "all_set_compressed: {'647': '11', '76': '12', '456': '13', '544': '14'}\n",
+      "num_of_labels_occured: 14\n",
       "\n",
-      " compressed labels: {0: '10', 1: '10', 2: '9', 3: '9', 4: '11', 5: '8', 6: '8'}\n",
-      "labels_comp: ['10', '10', '9', '9', '11', '8', '8']\n",
-      "num_of_each_label: {'3': 2, '10': 2, '4': 1, '9': 2, '6': 2, '11': 1, '8': 2, '5': 2}\n",
-      "all_num_of_each_label: [{'3': 2, '10': 2, '4': 1, '9': 2, '6': 2, '11': 1, '8': 2, '5': 2}]\n",
+      " compressed labels: {0: '12', 1: '12', 2: '11', 3: '11', 4: '14', 5: '13', 6: '13'}\n",
+      "labels_comp: ['12', '12', '11', '11', '14', '13', '13']\n",
+      "all_labels_ori: {'14', '12', '11', '13'}\n",
+      "num_of_each_label: {'14': 1, '13': 2, '12': 2, '11': 2}\n",
+      "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}]\n",
       "\n",
       " --- for graph 1 --- \n",
       "\n",
-      "labels_ori: ['6', '6', '6', '9', '9', '10', '8', '8']\n",
-      "num_of_each_label: {'10': 1, '6': 3, '9': 2, '8': 2}\n",
-      "num_of_labels: 4\n",
-      "all_labels_ori: {'3', '10', '4', '9', '6', '5', '8'}\n",
-      "num_of_labels_occured: 14\n",
-      "multiset: ['69', '69', '610', '968', '968', '10688', '8109', '8109']\n",
-      "set_unique: ['69', '968', '8109', '10688', '610']\n",
-      "set_compressed: {'69': '15', '8109': '17', '10688': '18', '968': '16', '610': '19'}\n",
-      "all_set_compressed: {'69': '15', '8109': '17', '968': '16', '345': '8', '10688': '18', '610': '19', '536': '9', '65': '10', '433': '11'}\n",
+      "multiset: ['79', '79', '710', '978', '978', '10788', '8109', '8109']\n",
+      "set_unique: ['710', '8109', '79', '10788', '978']\n",
+      "set_compressed: {'710': '15', '79': '17', '8109': '16', '978': '19', '10788': '18'}\n",
+      "all_set_compressed: {'710': '15', '79': '17', '978': '19', '10788': '18', '8109': '16', '456': '13', '544': '14', '647': '11', '76': '12'}\n",
       "num_of_labels_occured: 19\n",
       "\n",
-      " compressed labels: {0: '15', 1: '15', 2: '19', 3: '16', 4: '16', 5: '18', 6: '17', 7: '17'}\n",
-      "labels_comp: ['15', '15', '19', '16', '16', '18', '17', '17']\n",
-      "num_of_each_label: {'10': 1, '18': 1, '19': 1, '9': 2, '17': 2, '6': 3, '8': 2, '16': 2, '15': 2}\n",
-      "all_num_of_each_label: [{'3': 2, '10': 2, '4': 1, '9': 2, '6': 2, '11': 1, '8': 2, '5': 2}, {'10': 1, '18': 1, '19': 1, '9': 2, '17': 2, '6': 3, '8': 2, '16': 2, '15': 2}]\n",
-      "\n",
-      " labels: {'3', '10', '4', '5', '9', '6', '11', '8'}\n",
+      " compressed labels: {0: '17', 1: '17', 2: '15', 3: '19', 4: '19', 5: '18', 6: '16', 7: '16'}\n",
+      "labels_comp: ['17', '17', '15', '19', '19', '18', '16', '16']\n",
+      "all_labels_ori: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n",
+      "num_of_each_label: {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}\n",
+      "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}, {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}]\n",
       "\n",
-      " vector1: [[2 2 1 2 2 2 1 2]]\n",
+      " all_num_of_labels_occured: 19\n",
       "\n",
-      " vector2: [[2 2 1 2 2 2 1 2]]\n",
+      " --- calculating kernel matrix ---\n",
       "\n",
-      " labels: {'3', '10', '4', '18', '5', '19', '9', '17', '6', '11', '8', '16', '15'}\n",
+      " labels: {'14', '12', '11', '13'}\n",
+      "vector1: [[1 2 2 2]]\n",
+      "vector2: [[1 2 2 2]]\n",
       "\n",
-      " vector1: [[2 2 1 0 2 0 2 0 2 1 2 0 0]]\n",
+      " labels: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n",
+      "vector1: [[0 0 2 2 0 2 1 0 0]]\n",
+      "vector2: [[1 2 0 0 2 0 0 2 1]]\n",
       "\n",
-      " vector2: [[0 1 0 1 0 1 2 2 3 0 2 2 2]]\n",
+      " labels: {'18', '17', '15', '16', '19'}\n",
+      "vector1: [[1 2 1 2 2]]\n",
+      "vector2: [[1 2 1 2 2]]\n",
       "\n",
-      " labels: {'10', '18', '19', '9', '17', '6', '8', '16', '15'}\n",
-      "\n",
-      " vector1: [[1 1 1 2 2 3 2 2 2]]\n",
-      "\n",
-      " vector2: [[1 1 1 2 2 3 2 2 2]]\n",
-      "\n",
-      " all_num_of_labels_occured: 10\n",
+      " Kmatrix: [[ 55.  36.]\n",
+      " [ 36.  72.]]\n",
       "\n",
-      " Kmatrix: [[  97.   82.]\n",
-      " [  82.  130.]]\n",
-      "\n",
-      " --- Weisfeiler-Lehman subtree kernel built in 0.003629922866821289 seconds ---\n"
+      " --- Weisfeiler-Lehman subtree kernel built in 0.0034377574920654297 seconds ---\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "array([[  97.,   82.],\n",
-       "       [  82.,  130.]])"
+       "array([[ 55.,  36.],\n",
+       "       [ 36.,  72.]])"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -635,6 +610,8 @@
     "        print('\\n --- for graph %d --- \\n' % (idx))\n",
     "        labels_ori = list(nx.get_node_attributes(G, 'label').values())\n",
     "        print('labels_ori: %s' % (labels_ori))\n",
+    "        all_labels_ori.update(labels_ori)\n",
+    "        print('all_labels_ori: %s' % (all_labels_ori))\n",
     "        num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n",
     "        print('num_of_each_label: %s' % (num_of_each_label))\n",
     "        all_num_of_each_label.append(num_of_each_label)\n",
@@ -646,46 +623,48 @@
     "        all_labels_ori.update(labels_ori)\n",
     "        print('all_labels_ori: %s' % (all_labels_ori))\n",
     "        \n",
+    "    all_num_of_labels_occured += len(all_labels_ori)\n",
+    "    print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n",
+    "        \n",
     "    # calculate subtree kernel with the 0th iteration and add it to the final kernel\n",
+    "    print('\\n --- calculating kernel matrix ---')\n",
     "    for i in range(0, len(Gn)):\n",
     "        for j in range(i, len(Gn)):\n",
     "            labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n",
     "            print('\\n labels: %s' % (labels))\n",
     "            vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n",
     "            vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n",
-    "            print('\\n vector1: %s' % (vector1))\n",
-    "            print('\\n vector2: %s' % (vector2))\n",
+    "            print('vector1: %s' % (vector1))\n",
+    "            print('vector2: %s' % (vector2))\n",
     "            Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n",
     "            Kmatrix[j][i] = Kmatrix[i][j]\n",
-    "            print('\\n Kmatrix: %s' % (Kmatrix))\n",
-    "        \n",
-    "        \n",
+    "            print('Kmatrix: %s' % (Kmatrix))\n",
+    "\n",
     "    \n",
     "    # iterate each height\n",
-    "    for h in range(height + 1):\n",
+    "    for h in range(1, height + 1):\n",
     "        print('\\n --- height = %d --- ' % (h))\n",
-    "        all_labels_ori = set() # all unique orignal labels in all graphs in this iteration\n",
-    "#         all_labels_comp = set() # all unique compressed labels in all graphs in this iteration\n",
-    "        all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration\n",
     "        all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n",
     "        num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n",
+    "        all_labels_ori = set()\n",
+    "        all_num_of_each_label = []\n",
     "        \n",
     "        # for each graph\n",
     "        for idx, G in enumerate(Gn):\n",
-    "            # get the set of original labels\n",
+    "#             # get the set of original labels\n",
     "            print('\\n --- for graph %d --- \\n' % (idx))\n",
-    "            labels_ori = list(nx.get_node_attributes(G, 'label').values())\n",
-    "            print('labels_ori: %s' % (labels_ori))\n",
-    "            num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n",
-    "            print('num_of_each_label: %s' % (num_of_each_label))\n",
-    "            num_of_labels = len(num_of_each_label) # number of all unique labels\n",
-    "            print('num_of_labels: %s' % (num_of_labels))\n",
+    "#             labels_ori = list(nx.get_node_attributes(G, 'label').values())\n",
+    "#             print('labels_ori: %s' % (labels_ori))\n",
+    "#             num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n",
+    "#             print('num_of_each_label: %s' % (num_of_each_label))\n",
+    "#             num_of_labels = len(num_of_each_label) # number of all unique labels\n",
+    "#             print('num_of_labels: %s' % (num_of_labels))\n",
     "            \n",
-    "            all_labels_ori.update(labels_ori)\n",
-    "            print('all_labels_ori: %s' % (all_labels_ori))\n",
-    "            #             num_of_labels_occured += num_of_labels #@todo not precise\n",
-    "            num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n",
-    "            print('num_of_labels_occured: %s' % (num_of_labels_occured))\n",
+    "#             all_labels_ori.update(labels_ori)\n",
+    "#             print('all_labels_ori: %s' % (all_labels_ori))\n",
+    "#             #             num_of_labels_occured += num_of_labels #@todo not precise\n",
+    "#             num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n",
+    "#             print('num_of_labels_occured: %s' % (num_of_labels_occured))\n",
     "            \n",
     "            set_multisets = []\n",
     "            for node in G.nodes(data = True):\n",
@@ -718,7 +697,6 @@
     "#             num_of_labels_occured += len(set_compressed) #@todo not precise\n",
     "            print('num_of_labels_occured: %s' % (num_of_labels_occured))\n",
     "            \n",
-    "\n",
     "            # relabel nodes\n",
     "        #     nx.relabel_nodes(G, set_compressed, copy = False)\n",
     "            for node in G.nodes(data = True):\n",
@@ -728,25 +706,29 @@
     "            # get the set of compressed labels\n",
     "            labels_comp = list(nx.get_node_attributes(G, 'label').values())\n",
     "            print('labels_comp: %s' % (labels_comp))\n",
-    "            num_of_each_label.update(dict(Counter(labels_comp)))\n",
+    "            all_labels_ori.update(labels_comp)\n",
+    "            print('all_labels_ori: %s' % (all_labels_ori))\n",
+    "            num_of_each_label = dict(Counter(labels_comp))\n",
     "            print('num_of_each_label: %s' % (num_of_each_label))\n",
     "            all_num_of_each_label.append(num_of_each_label)\n",
     "            print('all_num_of_each_label: %s' % (all_num_of_each_label))\n",
+    "                    \n",
+    "        all_num_of_labels_occured += len(all_labels_ori)\n",
+    "        print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n",
     "        \n",
     "        # calculate subtree kernel with h iterations and add it to the final kernel\n",
+    "        print('\\n --- calculating kernel matrix ---')\n",
     "        for i in range(0, len(Gn)):\n",
     "            for j in range(i, len(Gn)):\n",
     "                labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n",
     "                print('\\n labels: %s' % (labels))\n",
     "                vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n",
     "                vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n",
-    "                print('\\n vector1: %s' % (vector1))\n",
-    "                print('\\n vector2: %s' % (vector2))\n",
+    "                print('vector1: %s' % (vector1))\n",
+    "                print('vector2: %s' % (vector2))\n",
     "                Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n",
     "                Kmatrix[j][i] = Kmatrix[i][j]\n",
     "                    \n",
-    "        all_num_of_labels_occured += len(all_labels_ori)\n",
-    "        print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n",
     "        print('\\n Kmatrix: %s' % (Kmatrix))\n",
     "\n",
     "    return Kmatrix\n",
@@ -766,13 +748,13 @@
     "G2 = dataset[80]\n",
     "print(nx.get_node_attributes(G2, 'label'))\n",
     "\n",
-    "weisfeilerlehmankernel(G1, G2, height = 1)\n",
+    "weisfeilerlehmankernel(G1, G2, height = 2)\n",
     "# Kmatrix = weisfeilerlehmankernel(G1, G2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -790,7 +772,7 @@
       "correspond to the average of the performances on the test sets. \n",
       "\n",
       "@references\n",
-      "    https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
+      "    Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
       "\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 0 ---\n",
@@ -819,22 +801,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.49373626708984375 seconds ---\n",
-      "[[  10.   10.    4. ...,   20.   20.   20.]\n",
-      " [  10.   16.    4. ...,   20.   20.   20.]\n",
-      " [   4.    4.   10. ...,   22.   22.   24.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.3845643997192383 seconds ---\n",
+      "[[   5.    6.    4. ...,   20.   20.   20.]\n",
+      " [   6.    8.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.    5. ...,   21.   21.   21.]\n",
       " ..., \n",
-      " [  20.   20.   22. ...,  130.  130.  122.]\n",
-      " [  20.   20.   22. ...,  130.  130.  122.]\n",
-      " [  20.   20.   24. ...,  122.  122.  154.]]\n",
+      " [  20.   20.   21. ...,  101.  101.  101.]\n",
+      " [  20.   20.   21. ...,  101.  101.  101.]\n",
+      " [  20.   20.   21. ...,  101.  101.  101.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 12.761978\n",
-      "With standard deviation: 10.086502\n",
+      " Mean performance on train set: 141.418957\n",
+      "With standard deviation: 1.082842\n",
       "\n",
-      " Mean performance on test set: 9.014031\n",
-      "With standard deviation: 6.357865\n",
+      " Mean performance on test set: 36.210792\n",
+      "With standard deviation: 7.331787\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 1 ---\n",
       "\n",
@@ -862,22 +844,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.0043864250183105 seconds ---\n",
-      "[[  20.   14.    8. ...,   20.   20.   22.]\n",
-      " [  14.   32.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   20. ...,   25.   25.   30.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.853447437286377 seconds ---\n",
+      "[[  10.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   16.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   10. ...,   22.   22.   24.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  188.  180.  145.]\n",
-      " [  20.   28.   25. ...,  180.  182.  145.]\n",
-      " [  22.   22.   30. ...,  145.  145.  238.]]\n",
+      " [  20.   20.   22. ...,  130.  130.  122.]\n",
+      " [  20.   20.   22. ...,  130.  130.  122.]\n",
+      " [  20.   20.   24. ...,  122.  122.  154.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 22.747869\n",
-      "With standard deviation: 7.561365\n",
+      " Mean performance on train set: 140.065309\n",
+      "With standard deviation: 0.877976\n",
       "\n",
-      " Mean performance on test set: 19.457133\n",
-      "With standard deviation: 5.057464\n",
+      " Mean performance on test set: 9.000982\n",
+      "With standard deviation: 6.371454\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 2 ---\n",
       "\n",
@@ -905,22 +887,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.602942705154419 seconds ---\n",
-      "[[  30.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.   48.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   30. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.374389410018921 seconds ---\n",
+      "[[  15.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   24.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   15. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  246.  209.  147.]\n",
-      " [  20.   28.   25. ...,  209.  220.  147.]\n",
-      " [  23.   22.   32. ...,  147.  147.  286.]]\n",
+      " [  20.   20.   22. ...,  159.  151.  124.]\n",
+      " [  20.   20.   22. ...,  151.  153.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  185.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 21.407092\n",
-      "With standard deviation: 6.415967\n",
+      " Mean performance on train set: 140.074983\n",
+      "With standard deviation: 0.928821\n",
       "\n",
-      " Mean performance on test set: 23.466810\n",
-      "With standard deviation: 5.836831\n",
+      " Mean performance on test set: 19.811299\n",
+      "With standard deviation: 4.049105\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 3 ---\n",
       "\n",
@@ -954,22 +936,22 @@
      "output_type": "stream",
      "text": [
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.2096023559570312 seconds ---\n",
-      "[[  40.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.   64.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   40. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.9141185283660889 seconds ---\n",
+      "[[  20.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   32.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   20. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  304.  217.  147.]\n",
-      " [  20.   28.   25. ...,  217.  250.  147.]\n",
-      " [  23.   22.   32. ...,  147.  147.  314.]]\n",
+      " [  20.   20.   22. ...,  188.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  168.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  202.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 24.747018\n",
-      "With standard deviation: 6.547340\n",
+      " Mean performance on train set: 140.197806\n",
+      "With standard deviation: 0.873857\n",
       "\n",
-      " Mean performance on test set: 27.961360\n",
-      "With standard deviation: 6.291821\n",
+      " Mean performance on test set: 25.045500\n",
+      "With standard deviation: 4.942763\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 4 ---\n",
       "\n",
@@ -997,22 +979,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.7832393646240234 seconds ---\n",
-      "[[  50.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.   80.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   50. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.393263578414917 seconds ---\n",
+      "[[  25.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   40.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   25. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  362.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  280.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  336.]]\n",
+      " [  20.   20.   22. ...,  217.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  183.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  213.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 27.197367\n",
-      "With standard deviation: 5.980185\n",
+      " Mean performance on train set: 140.272421\n",
+      "With standard deviation: 0.838915\n",
       "\n",
-      " Mean performance on test set: 30.614531\n",
-      "With standard deviation: 6.852841\n",
+      " Mean performance on test set: 28.225454\n",
+      "With standard deviation: 6.521196\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 5 ---\n",
       "\n",
@@ -1040,22 +1022,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.276118040084839 seconds ---\n",
-      "[[  60.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.   96.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   60. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.893545389175415 seconds ---\n",
+      "[[  30.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   48.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   30. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  420.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  310.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  358.]]\n",
+      " [  20.   20.   22. ...,  246.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  198.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  224.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 29.010593\n",
-      "With standard deviation: 6.073672\n",
+      " Mean performance on train set: 140.247025\n",
+      "With standard deviation: 0.863630\n",
       "\n",
-      " Mean performance on test set: 32.130815\n",
-      "With standard deviation: 7.062947\n",
+      " Mean performance on test set: 30.635436\n",
+      "With standard deviation: 6.736466\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 6 ---\n",
       "\n",
@@ -1083,22 +1065,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.779860496520996 seconds ---\n",
-      "[[  70.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.  112.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   70. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.216407299041748 seconds ---\n",
+      "[[  35.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   56.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   35. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  478.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  340.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  380.]]\n",
+      " [  20.   20.   22. ...,  275.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  213.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  235.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 30.909632\n",
-      "With standard deviation: 6.490001\n",
+      " Mean performance on train set: 140.239201\n",
+      "With standard deviation: 0.872475\n",
       "\n",
-      " Mean performance on test set: 33.117974\n",
-      "With standard deviation: 7.069399\n",
+      " Mean performance on test set: 32.102695\n",
+      "With standard deviation: 6.856006\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 7 ---\n",
       "\n",
@@ -1124,18 +1106,7 @@
       "\n",
       " --- This is a regression problem ---\n",
       "\n",
-      " Calculating kernel matrix, this could take a while...\n",
-      "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.446576833724976 seconds ---\n",
-      "[[  80.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.  128.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   80. ...,   25.   25.   32.]\n",
-      " ..., \n",
-      " [  20.   28.   25. ...,  536.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  370.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  402.]]\n",
-      "\n",
-      " Saving kernel matrix to file...\n"
+      " Calculating kernel matrix, this could take a while...\n"
      ]
     },
     {
@@ -1143,11 +1114,22 @@
      "output_type": "stream",
      "text": [
       "\n",
-      " Mean performance on val set: 31.870406\n",
-      "With standard deviation: 6.522032\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.8147408962249756 seconds ---\n",
+      "[[  40.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   64.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   40. ...,   22.   22.   26.]\n",
+      " ..., \n",
+      " [  20.   20.   22. ...,  304.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  228.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  246.]]\n",
+      "\n",
+      " Saving kernel matrix to file...\n",
+      "\n",
+      " Mean performance on train set: 140.094026\n",
+      "With standard deviation: 0.917704\n",
       "\n",
-      " Mean performance on test set: 33.964633\n",
-      "With standard deviation: 7.270535\n",
+      " Mean performance on test set: 32.970919\n",
+      "With standard deviation: 6.896061\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 8 ---\n",
       "\n",
@@ -1175,22 +1157,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.85552978515625 seconds ---\n",
-      "[[  90.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.  144.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.   90. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.3765342235565186 seconds ---\n",
+      "[[  45.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   72.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   45. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  594.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  400.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  424.]]\n",
+      " [  20.   20.   22. ...,  333.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  243.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  257.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 32.192715\n",
-      "With standard deviation: 6.389616\n",
+      " Mean performance on train set: 140.076304\n",
+      "With standard deviation: 0.931866\n",
       "\n",
-      " Mean performance on test set: 34.325288\n",
-      "With standard deviation: 7.375800\n",
+      " Mean performance on test set: 33.511228\n",
+      "With standard deviation: 6.907530\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 9 ---\n",
       "\n",
@@ -1218,22 +1200,22 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.650352239608765 seconds ---\n",
-      "[[ 100.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.  160.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.  100. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.885462284088135 seconds ---\n",
+      "[[  50.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   80.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   50. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  652.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  430.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  446.]]\n",
+      " [  20.   20.   22. ...,  362.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  258.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  268.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 32.842545\n",
-      "With standard deviation: 6.213069\n",
+      " Mean performance on train set: 139.913361\n",
+      "With standard deviation: 0.928974\n",
       "\n",
-      " Mean performance on test set: 34.675515\n",
-      "With standard deviation: 7.314709\n",
+      " Mean performance on test set: 33.850152\n",
+      "With standard deviation: 6.914269\n",
       "\n",
       " --- calculating kernel matrix when subtree height = 10 ---\n",
       "\n",
@@ -1261,42 +1243,41 @@
       "\n",
       " Calculating kernel matrix, this could take a while...\n",
       "\n",
-      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.818731069564819 seconds ---\n",
-      "[[ 110.   14.    8. ...,   20.   20.   23.]\n",
-      " [  14.  176.    4. ...,   28.   28.   22.]\n",
-      " [   8.    4.  110. ...,   25.   25.   32.]\n",
+      " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.313802719116211 seconds ---\n",
+      "[[  55.   10.    4. ...,   20.   20.   20.]\n",
+      " [  10.   88.    4. ...,   20.   20.   20.]\n",
+      " [   4.    4.   55. ...,   22.   22.   26.]\n",
       " ..., \n",
-      " [  20.   28.   25. ...,  710.  217.  151.]\n",
-      " [  20.   28.   25. ...,  217.  460.  147.]\n",
-      " [  23.   22.   32. ...,  151.  147.  468.]]\n",
+      " [  20.   20.   22. ...,  391.  159.  124.]\n",
+      " [  20.   20.   22. ...,  159.  273.  124.]\n",
+      " [  20.   20.   26. ...,  124.  124.  279.]]\n",
       "\n",
       " Saving kernel matrix to file...\n",
       "\n",
-      " Mean performance on val set: 33.151974\n",
-      "With standard deviation: 6.196320\n",
-      "\n",
-      " Mean performance on test set: 34.867215\n",
-      "With standard deviation: 7.324672\n",
-      "\n",
-      "\n",
-      "    std    height      RMSE\n",
-      "-------  --------  --------\n",
-      "6.35786       1     9.01403\n",
-      "5.05746       2.1  19.4571\n",
-      "5.83683       3.2  23.4668\n",
-      "6.29182       4.3  27.9614\n",
-      "6.85284       5.4  30.6145\n",
-      "7.06295       6.5  32.1308\n",
-      "7.0694        7.6  33.118\n",
-      "7.27054       8.7  33.9646\n",
-      "7.3758        9.8  34.3253\n",
-      "7.31471      10.9  34.6755\n",
-      "7.32467      12    34.8672\n"
+      " Mean performance on train set: 139.894176\n",
+      "With standard deviation: 0.942612\n",
+      "\n",
+      " Mean performance on test set: 34.096283\n",
+      "With standard deviation: 6.931154\n",
+      "\n",
+      "\n",
+      "  height    RMSE_test    std_test    RMSE_train    std_train    k_time\n",
+      "--------  -----------  ----------  ------------  -----------  --------\n",
+      "       0     36.2108      7.33179       141.419     1.08284   0.384564\n",
+      "       1      9.00098     6.37145       140.065     0.877976  0.853447\n",
+      "       2     19.8113      4.04911       140.075     0.928821  1.37439\n",
+      "       3     25.0455      4.94276       140.198     0.873857  1.91412\n",
+      "       4     28.2255      6.5212        140.272     0.838915  2.39326\n",
+      "       5     30.6354      6.73647       140.247     0.86363   2.89355\n",
+      "       6     32.1027      6.85601       140.239     0.872475  3.21641\n",
+      "       7     32.9709      6.89606       140.094     0.917704  3.81474\n",
+      "       8     33.5112      6.90753       140.076     0.931866  4.37653\n",
+      "       9     33.8502      6.91427       139.913     0.928974  4.88546\n",
+      "      10     34.0963      6.93115       139.894     0.942612  5.3138\n"
      ]
     }
    ],
    "source": [
-    "# Author: Elisabetta Ghisu\n",
     "# test of WL subtree kernel\n",
     "\n",
     "\"\"\"\n",
@@ -1310,7 +1291,7 @@
     "correspond to the average of the performances on the test sets. \n",
     "\n",
     "@references\n",
-    "    https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
+    "    Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
     "\"\"\"\n",
     "\n",
     "print(__doc__)\n",
@@ -1318,6 +1299,7 @@
     "import sys\n",
     "import os\n",
     "import pathlib\n",
+    "from collections import OrderedDict\n",
     "sys.path.insert(0, \"../\")\n",
     "from tabulate import tabulate\n",
     "\n",
@@ -1332,11 +1314,11 @@
     "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n",
     "from pygraph.utils.graphfiles import loadDataset\n",
     "\n",
-    "val_means_height = []\n",
-    "val_stds_height = []\n",
+    "train_means_height = []\n",
+    "train_stds_height = []\n",
     "test_means_height = []\n",
     "test_stds_height = []\n",
-    "\n",
+    "kernel_build_time = []\n",
     "\n",
     "for height in np.linspace(0, 10, 11):\n",
     "    print('\\n --- calculating kernel matrix when subtree height = %d ---' % height)\n",
@@ -1378,13 +1360,14 @@
     "        print(Kmatrix)\n",
     "    else:\n",
     "        print('\\n Calculating kernel matrix, this could take a while...')\n",
-    "        Kmatrix = weisfeilerlehmankernel(dataset, height = int(height))\n",
+    "        Kmatrix, run_time = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height))\n",
+    "        kernel_build_time.append(run_time)\n",
     "        print(Kmatrix)\n",
     "        print('\\n Saving kernel matrix to file...')\n",
     "    #     np.savetxt(kernel_file, Kmatrix)\n",
     "\n",
-    "    # Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n",
-    "    val_split = []\n",
+    "    # Initialize the performance of the best parameter trial on train with the corresponding performance on test\n",
+    "    train_split = []\n",
     "    test_split = []\n",
     "\n",
     "    # For each split of the data\n",
@@ -1404,17 +1387,14 @@
     "    #     print(Kmatrix_perm)\n",
     "        Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n",
     "\n",
-    "        # Set the training, validation and test\n",
+    "        # Set the training, test\n",
     "        # Note: the percentage can be set up by the user\n",
-    "        num_train_val = int((datasize * 90) / 100)         # 90% (of entire dataset) for training and validation\n",
-    "        num_test = datasize - num_train_val              # 10% (of entire dataset) for test\n",
-    "        num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n",
-    "        num_val = num_train_val - num_train # 10% (of train + val) for validation\n",
+    "        num_train = int((datasize * 90) / 100)         # 90% (of entire dataset) for training\n",
+    "        num_test = datasize - num_train             # 10% (of entire dataset) for test\n",
     "\n",
     "        # Split the kernel matrix\n",
     "        Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n",
-    "        Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n",
-    "        Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n",
+    "        Kmatrix_test = Kmatrix_perm[num_train:datasize, 0:num_train]\n",
     "\n",
     "        # Split the targets\n",
     "        y_train = y_perm[0:num_train]\n",
@@ -1427,11 +1407,10 @@
     "            y_train = (y_train - y_train_mean) / float(y_train_std)\n",
     "    #         print(y)\n",
     "\n",
-    "        y_val = y_perm[num_train:(num_train + num_val)]\n",
-    "        y_test = y_perm[(num_train + num_val):datasize]\n",
+    "        y_test = y_perm[num_train:datasize]\n",
     "\n",
-    "        # Record the performance for each parameter trial respectively on validation and test set\n",
-    "        perf_all_val = []\n",
+    "        # Record the performance for each parameter trial respectively on train and test set\n",
+    "        perf_all_train = []\n",
     "        perf_all_test = []\n",
     "\n",
     "        # For each parameter trial\n",
@@ -1445,81 +1424,69 @@
     "    #                 KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n",
     "                KR.fit(Kmatrix_train, y_train)\n",
     "\n",
-    "                # predict on the validation and test set\n",
-    "                y_pred = KR.predict(Kmatrix_val)\n",
+    "                # predict on the train and test set\n",
+    "                y_pred_train = KR.predict(Kmatrix_train)\n",
     "                y_pred_test = KR.predict(Kmatrix_test)\n",
     "    #             print(y_pred)\n",
     "\n",
     "                # adjust prediction: needed because the training targets have been normalizaed\n",
-    "                y_pred = y_pred * float(y_train_std) + y_train_mean\n",
-    "    #             print(y_pred)\n",
+    "                y_pred_train = y_pred_train * float(y_train_std) + y_train_mean\n",
     "                y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n",
     "    #             print(y_pred_test)\n",
     "\n",
-    "                # root mean squared error on validation\n",
-    "                rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n",
-    "                perf_all_val.append(rmse)\n",
-    "\n",
-    "                # root mean squared error in test \n",
+    "                # root mean squared error in train set\n",
+    "                rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))\n",
+    "                perf_all_train.append(rmse_train)\n",
+    "                # root mean squared error in test set\n",
     "                rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n",
     "                perf_all_test.append(rmse_test)\n",
-    "\n",
-    "    #             print('The performance on the validation set is: %3f' % rmse)\n",
     "    #             print('The performance on the test set is: %3f' % rmse_test)\n",
     "\n",
     "        # --- FIND THE OPTIMAL PARAMETERS --- #\n",
     "        # For regression: minimise the mean squared error\n",
     "        if model_type == 'regression':\n",
     "\n",
-    "            # get optimal parameter on validation (argmin mean squared error)\n",
+    "            # get optimal parameter on test (argmin mean squared error)\n",
     "            min_idx = np.argmin(perf_all_test)\n",
     "            alpha_opt = alpha_grid[min_idx]\n",
     "\n",
-    "            # performance corresponding to optimal parameter on val\n",
-    "            perf_val_opt = perf_all_val[min_idx]\n",
-    "\n",
-    "            # corresponding performance on test for the same parameter\n",
+    "            # corresponding performance on train and test set for the same parameter\n",
+    "            perf_train_opt = perf_all_train[min_idx]\n",
     "            perf_test_opt = perf_all_test[min_idx]\n",
-    "\n",
     "    #             print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n",
-    "    #             print('The best performance on the validation set is: %3f' % perf_val_opt)\n",
     "    #             print('The corresponding performance on test set is: %3f' % perf_test_opt)\n",
     "\n",
-    "        # append the best performance on validation\n",
-    "        # at the current split\n",
-    "        val_split.append(perf_val_opt)\n",
-    "\n",
-    "        # append the correponding performance on the test set\n",
+    "        # append the correponding performance on the train and test set\n",
+    "        train_split.append(perf_train_opt)\n",
     "        test_split.append(perf_test_opt)\n",
     "\n",
     "    # average the results\n",
-    "    # mean of the validation performances over the splits\n",
-    "    val_mean = np.mean(np.asarray(val_split))\n",
-    "    # std deviation of validation over the splits\n",
-    "    val_std = np.std(np.asarray(val_split))\n",
-    "\n",
-    "    # mean of the test performances over the splits\n",
+    "    # mean of the train and test performances over the splits\n",
+    "    train_mean = np.mean(np.asarray(train_split))\n",
     "    test_mean = np.mean(np.asarray(test_split))\n",
-    "    # std deviation of the test oer the splits\n",
+    "    # std deviation of the train and test over the splits\n",
+    "    train_std = np.std(np.asarray(train_split))\n",
     "    test_std = np.std(np.asarray(test_split))\n",
     "\n",
-    "    print('\\n Mean performance on val set: %3f' % val_mean)\n",
-    "    print('With standard deviation: %3f' % val_std)\n",
+    "    print('\\n Mean performance on train set: %3f' % train_mean)\n",
+    "    print('With standard deviation: %3f' % train_std)\n",
     "    print('\\n Mean performance on test set: %3f' % test_mean)\n",
     "    print('With standard deviation: %3f' % test_std)\n",
-    "    \n",
-    "    val_means_height.append(val_mean)\n",
-    "    val_stds_height.append(val_std)\n",
+    " \n",
+    "    train_means_height.append(train_mean)\n",
+    "    train_stds_height.append(train_std)\n",
     "    test_means_height.append(test_mean)\n",
     "    test_stds_height.append(test_std)\n",
     "    \n",
     "print('\\n') \n",
-    "print(tabulate({'height': np.linspace(1, 12, 11), 'RMSE': test_means_height, 'std': test_stds_height}, headers='keys'))"
+    "table_dict = {'height': np.linspace(0, 10, 11), 'RMSE_test': test_means_height, 'std_test': test_stds_height, 'RMSE_train': train_means_height, 'std_train': train_stds_height, 'k_time': kernel_build_time}\n",
+    "keyorder = ['height', 'RMSE_test', 'std_test', 'RMSE_train', 'std_train', 'k_time']\n",
+    "print(tabulate(OrderedDict(sorted(table_dict.items(), key = lambda i:keyorder.index(i[0]))), headers='keys'))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
     "scrolled": true
    },
@@ -1566,185 +1533,21 @@
       "\n",
       " --- This is a regression problem ---\n",
       "\n",
-      " Calculating kernel matrix, this could take a while...\n",
-      "\n",
-      " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 47.540945053100586 seconds ---\n",
-      "[[   6.    2.    6. ...,    2.    2.    2.]\n",
-      " [   2.   12.    2. ...,    0.    0.    6.]\n",
-      " [   6.    2.    6. ...,    2.    2.    2.]\n",
-      " ..., \n",
-      " [   2.    0.    2. ...,  110.   42.   14.]\n",
-      " [   2.    0.    2. ...,   42.  110.   14.]\n",
-      " [   2.    6.    2. ...,   14.   14.  110.]]\n",
-      "\n",
-      " Saving kernel matrix to file...\n",
-      "\n",
-      " Mean performance on val set: 38.533318\n",
-      "With standard deviation: 6.213602\n",
-      "\n",
-      " Mean performance on test set: 36.055557\n",
-      "With standard deviation: 5.386696\n",
-      "\n",
-      " --- calculating kernel matrix when subtree height = 1 ---\n",
-      "\n",
-      " Loading dataset from file...\n",
-      "[ -23.7   14.    37.3  109.7   10.8   39.    42.    66.6  135.   148.5\n",
-      "   40.    34.6   32.    63.    53.5   67.    64.4   84.7   95.5   92.\n",
-      "   84.4  154.   156.   166.   183.    70.3   63.6   52.5   59.    59.5\n",
-      "   55.2   88.    83.   104.5  102.    92.   107.4  123.2  112.5  118.5\n",
-      "  101.5  173.7  165.5  181.    99.5   92.3   90.1   80.2   82.    91.2\n",
-      "   91.5   81.2   93.    69.    86.3   82.   103.   103.5   96.   112.   104.\n",
-      "  132.5  123.5  120.3  145.   144.2  142.8  132.   134.2  137.   139.\n",
-      "  133.6  120.4  120.   137.   195.8  177.2  181.   185.9  175.7  186.   211.\n",
-      "  125.   118.   117.1  107.   102.5  112.    97.4   91.5   87.6  106.5\n",
-      "  101.    99.3   90.   137.   114.   126.   124.   140.5  157.5  146.   145.\n",
-      "  141.   171.   166.   155.   145.   159.   138.   142.   159.   163.5\n",
-      "  229.5  142.   125.   132.   130.5  125.   122.   121.   122.2  112.   106.\n",
-      "  114.5  151.   128.5  109.5  126.   147.   158.   147.   165.   188.9\n",
-      "  170.   178.   148.5  165.   177.   167.   195.   226.   215.   201.   205.\n",
-      "  151.5  165.5  157.   139.   163.   153.5  139.   162.   173.   159.5\n",
-      "  159.5  155.5  141.   126.   164.   163.   166.5  146.   165.   159.   195.\n",
-      "  218.   250.   235.   186.5  156.5  162.   162.   170.2  173.2  186.8\n",
-      "  173.   187.   174.   188.5  199.   228.   215.   216.   240. ]\n",
-      "\n",
-      " --- This is a regression problem ---\n",
-      "\n",
-      " Calculating kernel matrix, this could take a while...\n",
-      "\n",
-      " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 75.94973611831665 seconds ---\n",
-      "[[   9.    3.    9. ...,    3.    3.    3.]\n",
-      " [   3.   18.    3. ...,    0.    0.    9.]\n",
-      " [   9.    3.    9. ...,    3.    3.    3.]\n",
-      " ..., \n",
-      " [   3.    0.    3. ...,  165.   63.   21.]\n",
-      " [   3.    0.    3. ...,   63.  165.   21.]\n",
-      " [   3.    9.    3. ...,   21.   21.  165.]]\n",
-      "\n",
-      " Saving kernel matrix to file...\n",
-      "\n",
-      " Mean performance on val set: 38.464684\n",
-      "With standard deviation: 6.299737\n",
-      "\n",
-      " Mean performance on test set: 36.054735\n",
-      "With standard deviation: 5.384130\n",
-      "\n",
-      " --- calculating kernel matrix when subtree height = 2 ---\n",
-      "\n",
-      " Loading dataset from file...\n",
-      "[ -23.7   14.    37.3  109.7   10.8   39.    42.    66.6  135.   148.5\n",
-      "   40.    34.6   32.    63.    53.5   67.    64.4   84.7   95.5   92.\n",
-      "   84.4  154.   156.   166.   183.    70.3   63.6   52.5   59.    59.5\n",
-      "   55.2   88.    83.   104.5  102.    92.   107.4  123.2  112.5  118.5\n",
-      "  101.5  173.7  165.5  181.    99.5   92.3   90.1   80.2   82.    91.2\n",
-      "   91.5   81.2   93.    69.    86.3   82.   103.   103.5   96.   112.   104.\n",
-      "  132.5  123.5  120.3  145.   144.2  142.8  132.   134.2  137.   139.\n",
-      "  133.6  120.4  120.   137.   195.8  177.2  181.   185.9  175.7  186.   211.\n",
-      "  125.   118.   117.1  107.   102.5  112.    97.4   91.5   87.6  106.5\n",
-      "  101.    99.3   90.   137.   114.   126.   124.   140.5  157.5  146.   145.\n",
-      "  141.   171.   166.   155.   145.   159.   138.   142.   159.   163.5\n",
-      "  229.5  142.   125.   132.   130.5  125.   122.   121.   122.2  112.   106.\n",
-      "  114.5  151.   128.5  109.5  126.   147.   158.   147.   165.   188.9\n",
-      "  170.   178.   148.5  165.   177.   167.   195.   226.   215.   201.   205.\n",
-      "  151.5  165.5  157.   139.   163.   153.5  139.   162.   173.   159.5\n",
-      "  159.5  155.5  141.   126.   164.   163.   166.5  146.   165.   159.   195.\n",
-      "  218.   250.   235.   186.5  156.5  162.   162.   170.2  173.2  186.8\n",
-      "  173.   187.   174.   188.5  199.   228.   215.   216.   240. ]\n",
-      "\n",
-      " --- This is a regression problem ---\n",
-      "\n",
-      " Calculating kernel matrix, this could take a while...\n",
-      "\n",
-      " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 98.63305306434631 seconds ---\n",
-      "[[  12.    4.   12. ...,    4.    4.    4.]\n",
-      " [   4.   24.    4. ...,    0.    0.   12.]\n",
-      " [  12.    4.   12. ...,    4.    4.    4.]\n",
-      " ..., \n",
-      " [   4.    0.    4. ...,  220.   84.   28.]\n",
-      " [   4.    0.    4. ...,   84.  220.   28.]\n",
-      " [   4.   12.    4. ...,   28.   28.  220.]]\n",
-      "\n",
-      " Saving kernel matrix to file...\n",
-      "\n",
-      " Mean performance on val set: 38.594816\n",
-      "With standard deviation: 6.106887\n",
-      "\n",
-      " Mean performance on test set: 36.069839\n",
-      "With standard deviation: 5.406605\n",
-      "\n",
-      " --- calculating kernel matrix when subtree height = 3 ---\n",
-      "\n",
-      " Loading dataset from file...\n",
-      "[ -23.7   14.    37.3  109.7   10.8   39.    42.    66.6  135.   148.5\n",
-      "   40.    34.6   32.    63.    53.5   67.    64.4   84.7   95.5   92.\n",
-      "   84.4  154.   156.   166.   183.    70.3   63.6   52.5   59.    59.5\n",
-      "   55.2   88.    83.   104.5  102.    92.   107.4  123.2  112.5  118.5\n",
-      "  101.5  173.7  165.5  181.    99.5   92.3   90.1   80.2   82.    91.2\n",
-      "   91.5   81.2   93.    69.    86.3   82.   103.   103.5   96.   112.   104.\n",
-      "  132.5  123.5  120.3  145.   144.2  142.8  132.   134.2  137.   139.\n",
-      "  133.6  120.4  120.   137.   195.8  177.2  181.   185.9  175.7  186.   211.\n",
-      "  125.   118.   117.1  107.   102.5  112.    97.4   91.5   87.6  106.5\n",
-      "  101.    99.3   90.   137.   114.   126.   124.   140.5  157.5  146.   145.\n",
-      "  141.   171.   166.   155.   145.   159.   138.   142.   159.   163.5\n",
-      "  229.5  142.   125.   132.   130.5  125.   122.   121.   122.2  112.   106.\n",
-      "  114.5  151.   128.5  109.5  126.   147.   158.   147.   165.   188.9\n",
-      "  170.   178.   148.5  165.   177.   167.   195.   226.   215.   201.   205.\n",
-      "  151.5  165.5  157.   139.   163.   153.5  139.   162.   173.   159.5\n",
-      "  159.5  155.5  141.   126.   164.   163.   166.5  146.   165.   159.   195.\n",
-      "  218.   250.   235.   186.5  156.5  162.   162.   170.2  173.2  186.8\n",
-      "  173.   187.   174.   188.5  199.   228.   215.   216.   240. ]\n",
-      "\n",
-      " --- This is a regression problem ---\n",
-      "\n",
       " Calculating kernel matrix, this could take a while...\n"
      ]
     },
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 126.40115857124329 seconds ---\n",
-      "[[  15.    5.   15. ...,    5.    5.    5.]\n",
-      " [   5.   30.    5. ...,    0.    0.   15.]\n",
-      " [  15.    5.   15. ...,    5.    5.    5.]\n",
-      " ..., \n",
-      " [   5.    0.    5. ...,  275.  105.   35.]\n",
-      " [   5.    0.    5. ...,  105.  275.   35.]\n",
-      " [   5.   15.    5. ...,   35.   35.  275.]]\n",
-      "\n",
-      " Saving kernel matrix to file...\n",
-      "\n",
-      " Mean performance on val set: 38.545772\n",
-      "With standard deviation: 6.200795\n",
-      "\n",
-      " Mean performance on test set: 36.055164\n",
-      "With standard deviation: 5.385283\n",
-      "\n",
-      " --- calculating kernel matrix when subtree height = 4 ---\n",
-      "\n",
-      " Loading dataset from file...\n",
-      "[ -23.7   14.    37.3  109.7   10.8   39.    42.    66.6  135.   148.5\n",
-      "   40.    34.6   32.    63.    53.5   67.    64.4   84.7   95.5   92.\n",
-      "   84.4  154.   156.   166.   183.    70.3   63.6   52.5   59.    59.5\n",
-      "   55.2   88.    83.   104.5  102.    92.   107.4  123.2  112.5  118.5\n",
-      "  101.5  173.7  165.5  181.    99.5   92.3   90.1   80.2   82.    91.2\n",
-      "   91.5   81.2   93.    69.    86.3   82.   103.   103.5   96.   112.   104.\n",
-      "  132.5  123.5  120.3  145.   144.2  142.8  132.   134.2  137.   139.\n",
-      "  133.6  120.4  120.   137.   195.8  177.2  181.   185.9  175.7  186.   211.\n",
-      "  125.   118.   117.1  107.   102.5  112.    97.4   91.5   87.6  106.5\n",
-      "  101.    99.3   90.   137.   114.   126.   124.   140.5  157.5  146.   145.\n",
-      "  141.   171.   166.   155.   145.   159.   138.   142.   159.   163.5\n",
-      "  229.5  142.   125.   132.   130.5  125.   122.   121.   122.2  112.   106.\n",
-      "  114.5  151.   128.5  109.5  126.   147.   158.   147.   165.   188.9\n",
-      "  170.   178.   148.5  165.   177.   167.   195.   226.   215.   201.   205.\n",
-      "  151.5  165.5  157.   139.   163.   153.5  139.   162.   173.   159.5\n",
-      "  159.5  155.5  141.   126.   164.   163.   166.5  146.   165.   159.   195.\n",
-      "  218.   250.   235.   186.5  156.5  162.   162.   170.2  173.2  186.8\n",
-      "  173.   187.   174.   188.5  199.   228.   215.   216.   240. ]\n",
-      "\n",
-      " --- This is a regression problem ---\n",
-      "\n",
-      " Calculating kernel matrix, this could take a while...\n"
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-1-2ce8cff340bc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     82\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     83\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Calculating kernel matrix, this could take a while...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m         \u001b[0mKmatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mweisfeilerlehmankernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase_kernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'sp'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     85\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mKmatrix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     86\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Saving kernel matrix to file...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36mweisfeilerlehmankernel\u001b[0;34m(height, base_kernel, *args)\u001b[0m\n\u001b[1;32m     71\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     72\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m                     \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_weisfeilerlehmankernel_do\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     74\u001b[0m                     \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36m_weisfeilerlehmankernel_do\u001b[0;34m(G1, G2, height)\u001b[0m\n\u001b[1;32m    241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    242\u001b[0m         \u001b[0;31m# calculate kernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 243\u001b[0;31m         \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mspkernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# change your base kernel here (and one more before)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    244\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    245\u001b[0m         \u001b[0;31m# get label sets of both graphs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spkernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m     62\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0me1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     63\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0me2\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m                 \u001b[0;32mif\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     65\u001b[0m                     \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     66\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
      ]
     }
    ],
@@ -1832,7 +1635,7 @@
     "        print(Kmatrix)\n",
     "    else:\n",
     "        print('\\n Calculating kernel matrix, this could take a while...')\n",
-    "        Kmatrix = weisfeilerlehmankernel(dataset, height = int(height), base_kernel = 'sp')\n",
+    "        Kmatrix = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height), base_kernel = 'sp')\n",
     "        print(Kmatrix)\n",
     "        print('\\n Saving kernel matrix to file...')\n",
     "#         np.savetxt(kernel_file, Kmatrix)\n",
@@ -1885,7 +1688,7 @@
     "        y_test = y_perm[(num_train + num_val):datasize]\n",
     "\n",
     "        # Record the performance for each parameter trial respectively on validation and test set\n",
-    "        perf_all_val = []\n",
+    "        perf_all_train = []\n",
     "        perf_all_test = []\n",
     "\n",
     "        # For each parameter trial\n",
diff --git a/notebooks/run_marginalizedkernel_acyclic.ipynb b/notebooks/run_marginalizedkernel_acyclic.ipynb
index 46838bd..08c2d33 100644
--- a/notebooks/run_marginalizedkernel_acyclic.ipynb
+++ b/notebooks/run_marginalizedkernel_acyclic.ipynb
@@ -357,7 +357,7 @@
     "        print(Kmatrix)\n",
     "    else:\n",
     "        print('\\n Calculating kernel matrix, this could take a while...')\n",
-    "        Kmatrix = marginalizedkernel(dataset, p_quit, 20)\n",
+    "        Kmatrix, run_time = marginalizedkernel(dataset, p_quit, 20, node_label = 'atom', edge_label = 'bond_type')\n",
     "        print(Kmatrix)\n",
     "        print('\\n Saving kernel matrix to file...')\n",
     "        np.savetxt(kernel_file, Kmatrix)\n",
diff --git a/notebooks/run_pathkernel_acyclic.ipynb b/notebooks/run_pathkernel_acyclic.ipynb
index 6913a74..86bd8fc 100644
--- a/notebooks/run_pathkernel_acyclic.ipynb
+++ b/notebooks/run_pathkernel_acyclic.ipynb
@@ -686,7 +686,7 @@
     "    print(Kmatrix)\n",
     "else:\n",
     "    print('\\n Calculating kernel matrix, this could take a while...')\n",
-    "    Kmatrix = pathkernel(dataset)\n",
+    "    Kmatrix, run_time = pathkernel(dataset, node_label = 'atom', edge_label = 'bond_type')\n",
     "    print(Kmatrix)\n",
     "    print('\\n Saving kernel matrix to file...')\n",
     "    np.savetxt(kernel_file, Kmatrix)\n",
diff --git a/notebooks/run_spkernel_acyclic.ipynb b/notebooks/run_spkernel_acyclic.ipynb
index 1bf4920..b3e0f40 100644
--- a/notebooks/run_spkernel_acyclic.ipynb
+++ b/notebooks/run_spkernel_acyclic.ipynb
@@ -182,7 +182,8 @@
     "    print(Kmatrix)\n",
     "else:\n",
     "    print('\\n Calculating kernel matrix, this could take a while...')\n",
-    "    Kmatrix = spkernel(dataset)\n",
+    "    #@Q: is it appropriate to use bond type between atoms as the edge weight to calculate shortest path????????\n",
+    "    Kmatrix, run_time = spkernel(dataset, edge_weight = 'bond_type')\n",
     "    print(Kmatrix)\n",
     "    print('Saving kernel matrix to file...')\n",
     "    np.savetxt(kernel_file_path, Kmatrix)\n",
diff --git a/pygraph/kernels/__pycache__/weisfeilerLehmanKernel.cpython-35.pyc b/pygraph/kernels/__pycache__/weisfeilerLehmanKernel.cpython-35.pyc
index 4b9202976d0db7cf259f705cf5f0bf8588a1b2f1..242dee9fa54af4bbde6ee9f535d19224f6d3ff64 100644
GIT binary patch
literal 8164
zcmeHMO>i7X6@D|be_E|%S&}WU63jTUlWZc(mVlTzB9PcYRk34IR)WG3*VbsKr5$;9
zcDH9XTC21tr?7pZD9&7{q5>SL;s6{tQ5-nsMh%b)1ywmw6r55V_+IzS{^-XE1u0Sm
zYfr1E`}OPBuV453-kXyX6ZwDqeWm>M3q*gUp<f>Luiy#ZFo<0IH>gfj?$-?}8zhLf
zP1?2;fk}5wGGfvcmD9<v43)D<JxAr7s#{baqeh;}c|9gY^>Jzxs9fmtI4~XaPE&n?
z8k1C>B;%8sK0_`9hMZ&M=9RSL<QB-CAa|19p;Y-8xku5UYf8~fse#AIou+r7EL3L5
zokbUwr&*h8r4t{pzHjVWi;EXZMwAO$w}tdY9rb+632Xhg?3HH64~2~Ke$y3|y0anb
zQOXIMjVLeNt+=H;w$b$6N_eLwqD)PATeUD6-*5s^(Tt*O(Afy35LKQwg{Ocghi4T}
zFl7>jM0+W^Yp{H`O_FaIp+VaQ-6iU!X%G8ss`jr@Xfma1QE?ArFd#*HChra??pkIq
zL;i2nfQ%YYqY#5L%-ZmOqB^pwW8R3_^s*GDXgjTB$TA@dqbi$VX%JX;yA(q2)}RX(
zclDM-y&QGl<i5S!zP=Z!H%4KWIiat}f5xb&rM+t!dum7#Z7YLnk*lD>fR*-pwBG(H
zt@rh?m!?y_yjo6<*P&I$ihzuJdD^t-B(IWr;LW79{?HK>em_|_uiv2JmPx%-tk%O-
zI!^w|kV=1H45{?-A(g&wgpe%W9L*eR>Wx!hkfAk<@jxx87JEDq8<xLT8h74>p<Hq5
z)lS_Bg}pAkU{iQ?A<thIwT9!{+O`6FLxej*`1Y1`TD4#?Z{w$m*Bt3I1gtkuujl(e
z^}6cYm+k9b5ZcX6`v!P4<?S~$6V=abl5WkqKhsXltJdtWCi*fsQrOi*R#(e#K~J-^
zV6QG?s?jO5NIb`KEclV!Z7(>$n+GcPgJX#)f>3(?R$qGVvz-u)8y#q{6A0IK0$Uy8
zMf-J#?}|;QQx9!LsoHrmKHho>{b_~{^TB!eBKUaoD9Zjk{YzutWcmidJf0u+7Zcwg
zw(eBTs(7em+!^nYUcTqs!ol8ounH&inm%v-`t_J*R4K!Qp_N1cZFc)azA3_v^p!x`
z%2f4Q!-4X<7p|$xm2IHYXx*{n*5TE~6uM_Y%P8uiA$(<?+Tb7r+uTtnp>IITc1s@x
z`@$i_@l?sFNz8BGf^IZJF?4Lya$c`Ng3XR!Rf_YQJJ3n1)s$htDje01?f9;0t)XAw
zkg;1y)U3Fv;-=UXQux*3xs(D`?>Coj*=sc+-*$qU=k5yo#)7?8t?dXe+!fM(ZNa`S
zL}v$Q|K(Q}>{mstj@f+s+JgO3Q*P}z(hcovuPzQNp|0-^b)b{eyJ)}C#O^tDHilPW
z9-c42*^xd@lYLVJs8(x>_VUH0rRSHHpIQ^G(&gnZeD1mP&n{`TeE_6Fl7Wj<HFj-s
z3`LY;laI`xwR<gZV`am##P`^_ATd$B{VYY`VYTO6d!yswc*1Hy6o@LkY`|Q0pZjQL
zM-07D8jGyD@S{}dHAFO9*{N6Jt<#oWahp-Pg^MJNOurSSq3LE2rKRIzA@da|KfZ^x
zD*J=Vla&E<0FzQdrdbVHE`}81$gLpC2caXwiV`6kACt&j^&`WJjO{2VJHGDBX#PJU
zc^Z5^TmWEc-BG)DLH7g~1`=t7UQ?%Sv2`apIp`V|b{?F9XHn4EX`^5jjB|J<@Z^m%
z=A*`=#yR7(IfZ&Yg<f;kIHUWmIke82(#FU>l3`}N)4a^!2XI-jSB1-kg9d_OypmR^
zBB}II1-vhV4@;q(9BmOjFvtVqfp5$be0{ggW3S%w=m8)SoOGJDGjyNmKClwtFF*<@
zgaY50qis0faeg&IAdh_f!T_c~_kpHBRM6d!MAdo|qvP6Zw2qls<Q!98f_UA(QV0Y(
zv=9Y-B6?s__dV{rZw?lz`hHFi08j8g#q*-OxIGa|8aDy7^qb~b!bzpANxEmyJ(Ix;
z&>W!6x1i5Sx(W&=IyOXPDI$e6Mv0~f0Ep+jZ&A9)dvJ{Q@PCT7r<4#TU9uQ+#Wak$
zKw8A>O4~Hn+kd9@Ze;SB0X!A5oKvg%mDxK!vOD`WuKPVBJRa}$)mz^--U9TQ=8eO?
z_GYNt;*Hw|0#^Y3E*V#Ey+yrQx<_;`rB(ynua*qDy;+6LioEpR3EDeB+cPY^!5wKx
zn*x>`qwQJl0?ix~&8R(A??(F>bDb>)7z+cn{!S|K=cwz@-bt9%H>h`t_V6E!)CS$o
zko>*5H>XTVwWGJ_snJIJ$Ag{_vmri76K`JJuNyAGY(crsJYkP>Jh##P{K2)`yms_M
zykxP(!QxihPg+X;qb$CdSZ-0x5L>)@!HoMYzMX+YpAy4{-agu}5%%pT`nL@Gch-9Q
zmw0|_ti#^dN(JXHh`JsIuSfm)rx#}TG3x)J@F3zo2-1hqtcnEwx1g}XlM0+w8KLnH
z_D_sZX7B`;QQ)3N{Cn#n^$<_PZ^6wXJl=+fCE}a$V$y`pqOVd<-!ZAp$H`Evkv>V?
z7I%V@8JUY0qqK^2@N&Yb*DHRfQE6^gIQ-Cg6v@y*!5aBl)IQ9w)DewVn~l~B@3KfN
z-x%#abr?chT&Xt^Zw4=*k_`Gp7bed@nvy9W!yoy$uAk-lBU~^xQ;U%2Q2QaCSb8~*
zdX#dV(0P4CK28ED#WUYn@pG(r_L0~JXqc5B>sL*{+T}AIOA(FvoN3x<O4hfrs?p}C
zBtIKS64A)~|LKiCQa_`ctn~ATfmHt;nGV^r_XjeSPReP#N!}hgi6XKB5k@ICG=v1*
zXl6@<6?NP8lM6wVdE2RX1panf*lmG5_o|`fZH_Wdt0jC_&N4&Z?<k`qoyc;VVKf#Z
zwg_6yKty8#r-UPO5v6&8AW8*@AEVTE)AJ!lC`<E)MHTWSvo3OR3Prg%bl}t>3@XiP
zwIhXFp5(5iO?u^Ne#iPMeDI_QkR&Oeh|g+5riVuP<Xw8v@dL*>niv@sWi)j;`?jct
zO}Ug*mLsDUS)SXCCV1Kg@-H4LfqarRlS|$aBB8hAd+m;hCPxK~(n{IUcr1_-oh`qV
zmL*n#I+@Dy)K;rFjPDpZ#RZ;#3z|p3V`Qu`Bg6l!3G=iuVNByGn8(d2i$kG{2!_rX
zXN}X=yrKfFRWvh3(L8JBjYo{**0h;1dDOi58Dr9%u!^A7eD?Er%vAGNEuI^{EFKo^
z3Z7sW1;<0ctLg^8or|m_?g4%O>I^3Gh&n?FKz<3N-IqA>z%a(?Ccr2sWzTcMmT|8N
zjW|~cE_XRGiil(%gZ9e^cr0AtmeYY>co-gv!_A>AOPp{-CJlKJ-OX7hJ2)igVTa8@
z5-%Z6axlm}CP}~pURfs?l>7RKeC8*cXgngrm1MStlUeKi6hV@z)gk#o)eXF|aug+e
z-@qZ-5RyHmx&S15L3MLPSt*pgm$GGZbqUY1PAhx{6y-c0J~d)#sJ<+}%)^ya7<h&g
zX!{eMfUh|^mN5!OAw6Txr}IYMn#)M;?_bP0ys(k7(FQkRBZw$Doo%wGg5|;F@D&Qy
z3JZh#(m}d5IBr2`jC8#EjDXl4j*hLip}ZVE^r#qDMS=*$QXC0xst+0ntPs><0!}7m
zsHnq0grkUt5SViqh>k2mK=?=(kv|d!D#tLV&&f7|fAwy(-@q3HNQ~GYQmS)+xP1_1
ztM5!a7fue#i%%UoRvVEHu^P@I7J{=BOH>?4u+sjUp5!RC;L``RR7jMgKCNh340oJ^
zk=<V_OSq{wSf!%_&U)!I-6~g`$jOalZ8;UXbl&wE$nRaZRgMvfGd;o!nvJGx;atXv
z?2mJPI6r&%X8=x>>%(sE)ZnLL0XgX(+oXtd{t6n&Ctxf8GTd>g4|f139|sHkVV{to
zuBUX9y5I-Kd=9N=V3rA7d?c3WFodX+lMKn^(<rJKrhX$x_#&D-jf9_27Oh|~5?R`)
zVU%+amE(Rie7BE(nJXl{j6YG@^Ig$R<bGNAd<lQ#6)yV11M)ITP>}cOhp(YeccU~R
z7nsX=E*81K2V{~<Trk{FkVRhN+84Qaj*De3p5cO*SITKO`8+qQaPcA+tZexb7gxFX
zDi<tk$<pM56)0c2imqcUHPg`eY52f`@{C9vvS&<98;_>6H<V0UngN2j6GT}~lh(bB
zD8-d1<N1M*VL9h(<Lj!G0sru~0*%CuVRZRaocCOe9noU)(N<lN<@x>H%v|5Q(!%&?
zyxOeS0Y&i9BhZSn(X0^x-*`&Kp{ugLOHpp6(R4d?@dDdRz-EXG)0i>2EFdL1mEzje
OaEp2N&9lL=r~e6YMPA?l

delta 2739
zcmZWrO>7%Q6n-;nuYY1Ewv*UdCvnoIY3u%^4OB{h+?KYHsv<#=f(S&7f}3?3yNQ3s
z8)T%lRwxxsI8aq{g9{Z>B~DezjRSB3p`JJ)?F9)q075`j@gq3Ed$V@pwvOj{_Psan
z_nWu*;p~sc#*)!!`24L?bNdegd=DKT9rw@Ss-M0+_QwkVW&q3}m?1F3a0B>U512Zb
z+x#vtBZ?nW%qW=MaD%~I98A=Vp$O&@U^1A=#$}UD%5T_ihok)vt^lr!<m(p(U;)@1
z))^^Z=O8XHi$RIOIzUqc8~I%EFM-8L)6~E)ePClKev+*--t<BFfl}3#>Q%5b3KxyC
z^2bULK!F3IkF_i`{b2c^gcRNTNppL%q@qEajRshlv$i8z=P*5=ho%mVS4n2;Psq+e
zGXPeAOrp%7pFlg@b%!$>11Ej}aywgtwy1`ZT<rnO*6zV9B@m>@z4B1f6l1$Gwgv-B
z8YC%0JUz25c&udaw6mq8892r@4o%*!;}bOuLOIt_!w*?U4ac@?2;vK7VPIY#Ji7*#
zr5RK;52=tJi;}mZ3_^GYu{4wI%73bDjrqn{aJf*twrq`_loR?sS&1JBheul<OWuj^
zmv@YD`CHuCS`1WJj6fq0FChFxbopKIzhJ2S@F<k+(ytdpxv=sPh_svxzvw)JdN7<|
z5iP<-a7A&2*)BfBhS)gE@PpjvLmBU5gPv4N@eCJ3cxsW35?lvxwIN$CY6`1ZN(1YR
zok5rqC^dzj_NI*=hiZ;EYfNfbO2R9L6gF&nendkAA^Zs90k{UR$)JdPtg9cIA!z)?
zEJThnJ3apsyMpx%!zSJpJQjdb5H`>niaC@*WCg7@yTA&A)dhAJJ>u7CMpSeWC5{rr
zN|BbBp(BbsK(Z9=f@+7(F`Q;MG*-z+3GcO!?%6u9>G{{7*#kELmhevEy$HbDKuR`V
zc&vyqO;N<ajzOuLf^ry1IKrQ9Y!FI4BqM*M)eR-3i_fg~Hv(Q&1lh<SLvdA+1T^ws
z$FXMUyccZzqM<mMzpjJ0%j|?oO7T%@6xC@~vzYC)MOFfAoMT?~TM8ay3iCzPSu%>H
zNswpOIQDevqm(+zF}`-X#!hy*+C!!ic&s;0M|1kXj*yF1i~3qwlDiGOEm#TFC047#
zY~!K2N*&NrW5<O}4vnu#w&C4jg%2hB1U3<mKEel@L$QH1ay($Pc-RB4Zs?Ox*z8vT
z?uDBSZgN1-(yUiKwflj3NlM8MB~2-XpH@l%jv#Wnp@H9^Yk}z|;gX$#Qa`l{W1P}x
z;I))!;GwR8kGa|{K3l8v*-k)C;jO8ihgq6zI4+ju?ST>bLC<}8&q(1w*o=rEUQ5@%
zlD}G5kw5oN$=GoBiDBernd>W8%tDsCeB!7ZD$|6yS`l>=u!|y%Dq;}HKjGX;v2HC^
zR;y>;lf%8om@D5JPRVx@mye54(o)vA+n--qStzfqE>xBl3i-w51?5wuNM+la7{kZU
z<iCB(SH^Ld<(($PUecvG^2CA9kyZCziP@y+A%z%`E+TOxu2wHtZm59n3nFh7<h}m=
zj?XMETEa(SeXVLD+x1h%b=Oy~2+Q@AD#fy^=d0C1*%TBGnz4kdujVaLY`B_PL7SGy
zm+RF^y^tMfO;B>$OC(98AIY52k-JK}zEF|Xq^`MaS-z3X`Lv?hkoS{EVo?lqVRdcA
zD&nRtMr0y&T7H{MJ1XU@PwXb6st~Gjsv6V>5k-Qd59%)?fo>enOpm5mi0SmF89c*q
zfb%F%bDbG{7Z0;R7WXB%&Z8(v^I_JlMg0cLXc=a(xJJYMD314Wwufc>ypN0M)}JYZ
zv!INnhge)5PN#JB1~KIG=^<kZ-Fl7LX=eg?Ur8rD$P!!cr%&+^fk9-D$lrS>32YM{
z*u)6(+U?#&d*-FOV~e2Sm$6JnmWF$D!mZdNo#B_x(H4(Bi$AWvST+ldwxQ=d$tnC1
zXNYWDL)cRL6>PiF=s_n-@&iN;62brKKpZAQ2=SnFgmy=XOcJ5DqS!}d0!cRL1$=@S
zCyC^UP+sCRk+Ve35uqruny225E$Uq-aRFtIKz^Co%Pz=YGlvos5(0B=wfd$9?T9=y
g^2#21x2m2Cs<ArOQG72?d3WTvK5%*9^_~O&17~0Qo&W#<

diff --git a/pygraph/kernels/marginalizedKernel.py b/pygraph/kernels/marginalizedKernel.py
index a3fccc7..6e2ec81 100644
--- a/pygraph/kernels/marginalizedKernel.py
+++ b/pygraph/kernels/marginalizedKernel.py
@@ -8,7 +8,7 @@ import time
 
 from pygraph.kernels.deltaKernel import deltakernel
 
-def marginalizedkernel(*args):
+def marginalizedkernel(*args, node_label = 'atom', edge_label = 'bond_type'):
     """Calculate marginalized graph kernels between graphs.
     
     Parameters
@@ -22,6 +22,10 @@ def marginalizedkernel(*args):
         the termination probability in the random walks generating step
     itr : integer
         time of iterations to calculate R_inf
+    node_label : string
+        node attribute used as label. The default node label is atom.        
+    edge_label : string
+        edge attribute used as label. The default edge label is bond_type.       
         
     Return
     ------
@@ -34,38 +38,43 @@ def marginalizedkernel(*args):
     """
     if len(args) == 3: # for a list of graphs
         Gn = args[0]
-
         Kmatrix = np.zeros((len(Gn), len(Gn)))
 
         start_time = time.time()
         
         for i in range(0, len(Gn)):
             for j in range(i, len(Gn)):
-                Kmatrix[i][j] = _marginalizedkernel_do(Gn[i], Gn[j], args[1], args[2])
+                Kmatrix[i][j] = _marginalizedkernel_do(Gn[i], Gn[j], node_label, edge_label, args[1], args[2])
                 Kmatrix[j][i] = Kmatrix[i][j]
                 
-        print("\n --- marginalized kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
+        run_time = time.time() - start_time
+        print("\n --- marginalized kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time))
         
-        return Kmatrix
+        return Kmatrix, run_time
         
     else: # for only 2 graphs
         
         start_time = time.time()
         
-        kernel = _marginalizedkernel_do(args[0], args[1], args[2], args[3])
+        kernel = _marginalizedkernel_do(args[0], args[1], node_label, edge_label, args[2], args[3])
 
-        print("\n --- marginalized kernel built in %s seconds ---" % (time.time() - start_time))
+        run_time = time.time() - start_time
+        print("\n --- marginalized kernel built in %s seconds ---" % (run_time))
         
-        return kernel
+        return kernel, run_time
 
     
-def _marginalizedkernel_do(G1, G2, p_quit, itr):
+def _marginalizedkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', p_quit, itr):
     """Calculate marginalized graph kernels between 2 graphs.
     
     Parameters
     ----------
     G1, G2 : NetworkX graphs
         2 graphs between which the kernel is calculated.
+    node_label : string
+        node attribute used as label. The default node label is atom.        
+    edge_label : string
+        edge attribute used as label. The default edge label is bond_type.
     p_quit : integer
         the termination probability in the random walks generating step
     itr : integer
@@ -106,8 +115,8 @@ def _marginalizedkernel_do(G1, G2, p_quit, itr):
                     for neighbor2 in neighbor_n2:
 
                         t = p_trans_n1 * p_trans_n2 * \
-                            deltakernel(G1.node[neighbor1]['label'] == G2.node[neighbor2]['label']) * \
-                            deltakernel(neighbor_n1[neighbor1]['label'] == neighbor_n2[neighbor2]['label'])
+                            deltakernel(G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label]) * \
+                            deltakernel(neighbor_n1[neighbor1][edge_label] == neighbor_n2[neighbor2][edge_label])
                         R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8)
 
         R_inf[:] = R_inf_new
@@ -115,7 +124,7 @@ def _marginalizedkernel_do(G1, G2, p_quit, itr):
     # add elements of R_inf up and calculate kernel
     for node1 in G1.nodes(data = True):
         for node2 in G2.nodes(data = True):                
-            s = p_init_G1 * p_init_G2 * deltakernel(node1[1]['label'] == node2[1]['label'])
+            s = p_init_G1 * p_init_G2 * deltakernel(node1[1][node_label] == node2[1][node_label])
             kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
 
     return kernel
\ No newline at end of file
diff --git a/pygraph/kernels/pathKernel.py b/pygraph/kernels/pathKernel.py
index 5f89751..62d5d5d 100644
--- a/pygraph/kernels/pathKernel.py
+++ b/pygraph/kernels/pathKernel.py
@@ -8,7 +8,7 @@ import time
 
 from pygraph.kernels.deltaKernel import deltakernel
 
-def pathkernel(*args):
+def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'):
     """Calculate mean average path kernels between graphs.
     
     Parameters
@@ -18,6 +18,10 @@ def pathkernel(*args):
     /
     G1, G2 : NetworkX graphs
         2 graphs between which the kernel is calculated.
+    node_label : string
+        node attribute used as label. The default node label is atom.        
+    edge_label : string
+        edge attribute used as label. The default edge label is bond_type.
         
     Return
     ------
@@ -29,38 +33,43 @@ def pathkernel(*args):
     [1] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360).
     """
     if len(args) == 1: # for a list of graphs
-        Gn = args[0]
-        
+        Gn = args[0]        
         Kmatrix = np.zeros((len(Gn), len(Gn)))
 
         start_time = time.time()
         
         for i in range(0, len(Gn)):
             for j in range(i, len(Gn)):
-                Kmatrix[i][j] = _pathkernel_do(Gn[i], Gn[j])
+                Kmatrix[i][j] = _pathkernel_do(Gn[i], Gn[j], node_label, edge_label)
                 Kmatrix[j][i] = Kmatrix[i][j]
 
-        print("\n --- mean average path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
+        run_time = time.time() - start_time
+        print("\n --- mean average path kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time))
         
-        return Kmatrix
+        return Kmatrix, run_time
         
     else: # for only 2 graphs
         start_time = time.time()
         
-        kernel = _pathkernel_do(args[0], args[1])
+        kernel = _pathkernel_do(args[0], args[1], node_label, edge_label)
 
-        print("\n --- mean average path kernel built in %s seconds ---" % (time.time() - start_time))
+        run_time = time.time() - start_time
+        print("\n --- mean average path kernel built in %s seconds ---" % (run_time))
         
-        return kernel
+        return kernel, run_time
     
     
-def _pathkernel_do(G1, G2):
+def _pathkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type'):
     """Calculate mean average path kernels between 2 graphs.
     
     Parameters
     ----------
     G1, G2 : NetworkX graphs
         2 graphs between which the kernel is calculated.
+    node_label : string
+        node attribute used as label. The default node label is atom.        
+    edge_label : string
+        edge attribute used as label. The default edge label is bond_type.
         
     Return
     ------
@@ -72,24 +81,24 @@ def _pathkernel_do(G1, G2):
     num_nodes = G1.number_of_nodes()
     for node1 in range(num_nodes):
         for node2 in range(node1 + 1, num_nodes):
-                sp1.append(nx.shortest_path(G1, node1, node2, weight = 'cost'))
+                sp1.append(nx.shortest_path(G1, node1, node2, weight = edge_label))
                 
     sp2 = []
     num_nodes = G2.number_of_nodes()
     for node1 in range(num_nodes):
         for node2 in range(node1 + 1, num_nodes):
-                sp2.append(nx.shortest_path(G2, node1, node2, weight = 'cost'))
+                sp2.append(nx.shortest_path(G2, node1, node2, weight = edge_label))
 
     # calculate kernel
     kernel = 0
     for path1 in sp1:
         for path2 in sp2:
             if len(path1) == len(path2):
-                kernel_path = deltakernel(G1.node[path1[0]]['label'] == G2.node[path2[0]]['label'])
+                kernel_path = deltakernel(G1.node[path1[0]][node_label] == G2.node[path2[0]][node_label])
                 if kernel_path:
                     for i in range(1, len(path1)):
                          # kernel = 1 if all corresponding nodes and edges in the 2 paths have same labels, otherwise 0
-                        kernel_path *= deltakernel(G1[path1[i - 1]][path1[i]]['label'] == G2[path2[i - 1]][path2[i]]['label']) * deltakernel(G1.node[path1[i]]['label'] == G2.node[path2[i]]['label'])
+                        kernel_path *= deltakernel(G1[path1[i - 1]][path1[i]][edge_label] == G2[path2[i - 1]][path2[i]][edge_label]) * deltakernel(G1.node[path1[i]][node_label] == G2.node[path2[i]][node_label])
                     kernel += kernel_path # add up kernels of all paths
 
     kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average
diff --git a/pygraph/kernels/results.md b/pygraph/kernels/results.md
new file mode 100644
index 0000000..f61fdfd
--- /dev/null
+++ b/pygraph/kernels/results.md
@@ -0,0 +1,36 @@
+# results with minimal test RMSE for each kernel on dataset Asyclic
+-- All the kernels are tested on dataset Asyclic, which consists of 185 molecules (graphs). 
+-- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression.
+-- For predition we randomly divide the data in train and test subset, where 90% of entire dataset is for training and rest for testing. 10 splits are performed. For each split, we first train on the train data, then evaluate the performance on the test set. We choose the optimal parameters for the test set and finally provide the corresponding performance. The final results correspond to the average of the performances on the test sets. 
+
+## summary
+
+| Kernels       | RMSE(℃)  | std(℃)  | parameter    | k_time |
+|---------------|:---------:|:--------:|-------------:|-------:|
+| shortest path | 36.40     | 5.35     | -            | -      |
+| marginalized  | 17.90     | 6.59     | p_quit = 0.1 | -      |
+| path          | 14.27     | 6.37     | -            | -      |
+| WL subtree    | 9.00      | 6.37     | height = 1   | 0.85"  |
+
+**In each line, paremeter is the one with which the kenrel achieves the best results.
+In each line, k_time is the time spent on building the kernel matrix.**
+
+## detailed results of WL subtree kernel.
+The table below shows the results of the WL subtree under different subtree heights.
+```
+  height    RMSE_test    std_test    RMSE_train    std_train    k_time
+--------  -----------  ----------  ------------  -----------  --------
+       0     36.2108      7.33179       141.419     1.08284   0.392911
+       1      9.00098     6.37145       140.065     0.877976  0.812077
+       2     19.8113      4.04911       140.075     0.928821  1.36955
+       3     25.0455      4.94276       140.198     0.873857  1.78629
+       4     28.2255      6.5212        140.272     0.838915  2.30847
+       5     30.6354      6.73647       140.247     0.86363   2.8258
+       6     32.1027      6.85601       140.239     0.872475  3.1542
+       7     32.9709      6.89606       140.094     0.917704  3.46081
+       8     33.5112      6.90753       140.076     0.931866  4.08857
+       9     33.8502      6.91427       139.913     0.928974  4.25243
+      10     34.0963      6.93115       139.894     0.942612  5.02607
+```
+**The unit of the *RMSEs* and *stds* is *℃*, The unit of the *k_time* is *s*.
+k_time is the time spent on building the kernel matrix.**
diff --git a/pygraph/kernels/spkernel.py b/pygraph/kernels/spkernel.py
index cd49212..6136c78 100644
--- a/pygraph/kernels/spkernel.py
+++ b/pygraph/kernels/spkernel.py
@@ -10,7 +10,7 @@ import time
 from pygraph.utils.utils import getSPGraph
 
 
-def spkernel(*args):
+def spkernel(*args, edge_weight = 'bond_type'):
     """Calculate shortest-path kernels between graphs.
     
     Parameters
@@ -20,6 +20,8 @@ def spkernel(*args):
     /
     G1, G2 : NetworkX graphs
         2 graphs between which the kernel is calculated.
+    edge_weight : string
+        edge attribute corresponding to the edge weight. The default edge weight is bond_type.
         
     Return
     ------
@@ -37,7 +39,7 @@ def spkernel(*args):
     
         Sn = [] # get shortest path graphs of Gn
         for i in range(0, len(Gn)):
-            Sn.append(getSPGraph(Gn[i]))
+            Sn.append(getSPGraph(Gn[i], edge_weight = edge_weight))
 
         start_time = time.time()
         for i in range(0, len(Gn)):
@@ -48,13 +50,14 @@ def spkernel(*args):
                             Kmatrix[i][j] += 1
                             Kmatrix[j][i] += (0 if i == j else 1)
 
-        print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
+        run_time = time.time() - start_time
+        print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time))
         
-        return Kmatrix
+        return Kmatrix, run_time
         
     else: # for only 2 graphs
-        G1 = getSPGraph(args[0])
-        G2 = getSPGraph(args[1])
+        G1 = getSPGraph(args[0], edge_weight = edge_weight)
+        G2 = getSPGraph(args[1], edge_weight = edge_weight)
         
         kernel = 0
         
diff --git a/pygraph/kernels/weisfeilerLehmanKernel.py b/pygraph/kernels/weisfeilerLehmanKernel.py
index 5f37bcc..cc4558f 100644
--- a/pygraph/kernels/weisfeilerLehmanKernel.py
+++ b/pygraph/kernels/weisfeilerLehmanKernel.py
@@ -23,7 +23,7 @@ import time
 from pygraph.kernels.spkernel import spkernel
 from pygraph.kernels.pathKernel import pathkernel
 
-def weisfeilerlehmankernel(*args, height = 0, base_kernel = 'subtree'):
+def weisfeilerlehmankernel(*args, node_label = 'atom', edge_label = 'bond_type', height = 0, base_kernel = 'subtree'):
     """Calculate Weisfeiler-Lehman kernels between graphs.
     
     Parameters
@@ -32,12 +32,15 @@ def weisfeilerlehmankernel(*args, height = 0, base_kernel = 'subtree'):
         List of graphs between which the kernels are calculated.
     /
     G1, G2 : NetworkX graphs
-        2 graphs between which the kernel is calculated.
-        
-    height : subtree height
-    
-    base_kernel : base kernel used in each iteration of WL kernel
-        the default base kernel is subtree kernel
+        2 graphs between which the kernel is calculated.        
+    node_label : string
+        node attribute used as label. The default node label is atom.        
+    edge_label : string
+        edge attribute used as label. The default edge label is bond_type.        
+    height : int
+        subtree height    
+    base_kernel : string
+        base kernel used in each iteration of WL kernel. The default base kernel is subtree kernel.
         
     Return
     ------
@@ -57,7 +60,7 @@ def weisfeilerlehmankernel(*args, height = 0, base_kernel = 'subtree'):
         
         # for WL subtree kernel
         if base_kernel == 'subtree':           
-            Kmatrix = _wl_subtreekernel_do(args[0], height = height, base_kernel = 'subtree')
+            Kmatrix = _wl_subtreekernel_do(args[0], node_label, edge_label, height = height, base_kernel = 'subtree')
             
         # for WL edge kernel
         elif base_kernel == 'edge':
@@ -73,9 +76,10 @@ def weisfeilerlehmankernel(*args, height = 0, base_kernel = 'subtree'):
                     Kmatrix[i][j] = _weisfeilerlehmankernel_do(Gn[i], Gn[j], height = height)
                     Kmatrix[j][i] = Kmatrix[i][j]
 
-        print("\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---" % (base_kernel, len(args[0]), (time.time() - start_time)))
+        run_time = time.time() - start_time
+        print("\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---" % (base_kernel, len(args[0]), run_time))
         
-        return Kmatrix
+        return Kmatrix, run_time
         
     else: # for only 2 graphs
         
@@ -85,7 +89,7 @@ def weisfeilerlehmankernel(*args, height = 0, base_kernel = 'subtree'):
         if base_kernel == 'subtree':
             
             args = [args[0], args[1]]
-            kernel = _wl_subtreekernel_do(args, height = height, base_kernel = 'subtree')
+            kernel = _wl_subtreekernel_do(args, node_label, edge_label, height = height, base_kernel = 'subtree')
             
         # for WL edge kernel
         elif base_kernel == 'edge':
@@ -97,18 +101,27 @@ def weisfeilerlehmankernel(*args, height = 0, base_kernel = 'subtree'):
 
             kernel = _pathkernel_do(args[0], args[1])
 
-        print("\n --- Weisfeiler-Lehman %s kernel built in %s seconds ---" % (base_kernel, time.time() - start_time))
+        run_time = time.time() - start_time
+        print("\n --- Weisfeiler-Lehman %s kernel built in %s seconds ---" % (base_kernel, run_time))
         
-        return kernel
+        return kernel, run_time
     
     
-def _wl_subtreekernel_do(*args, height = 0, base_kernel = 'subtree'):
+def _wl_subtreekernel_do(*args, node_label = 'atom', edge_label = 'bond_type', height = 0, base_kernel = 'subtree'):
     """Calculate Weisfeiler-Lehman subtree kernels between graphs.
     
     Parameters
     ----------
     Gn : List of NetworkX graph
-        List of graphs between which the kernels are calculated.
+        List of graphs between which the kernels are calculated.       
+    node_label : string
+        node attribute used as label. The default node label is atom.       
+    edge_label : string
+        edge attribute used as label. The default edge label is bond_type.       
+    height : int
+        subtree height  
+    base_kernel : string
+        base kernel used in each iteration of WL kernel. The default base kernel is subtree kernel.
         
     Return
     ------
@@ -120,55 +133,54 @@ def _wl_subtreekernel_do(*args, height = 0, base_kernel = 'subtree'):
     Kmatrix = np.zeros((len(Gn), len(Gn)))
     all_num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
 
-    # initial
+    # initial for height = 0
+    all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
+    all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
+    all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
+    num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs
+
     # for each graph
-    for idx, G in enumerate(Gn):
+    for G in Gn:
         # get the set of original labels
-        labels_ori = list(nx.get_node_attributes(G, 'label').values())
+        labels_ori = list(nx.get_node_attributes(G, node_label).values())
+        all_labels_ori.update(labels_ori)
         num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph
+        all_num_of_each_label.append(num_of_each_label)
         num_of_labels = len(num_of_each_label) # number of all unique labels
 
         all_labels_ori.update(labels_ori)
-    
-    
-#     # calculate subtree kernel while h = 0 and add it to the final kernel
-#     for i in range(0, len(Gn)):
-#         for j in range(i, len(Gn)):
-#             labels = set(list(nx.get_node_attributes(Gn[i], 'label').values()) + list(nx.get_node_attributes(Gn[j], 'label').values()))
-#             vector1 = np.matrix([ (nx.get_node_attributes(Gn[i], 'label').values()[label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])
-#             vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])
-#             Kmatrix[i][j] += np.dot(vector1, vector2.transpose())
-#             Kmatrix[j][i] = Kmatrix[i][j]
-
+        
+    all_num_of_labels_occured += len(all_labels_ori)
+        
+    # calculate subtree kernel with the 0th iteration and add it to the final kernel
+    for i in range(0, len(Gn)):
+        for j in range(i, len(Gn)):
+            labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))
+            vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])
+            vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])
+            Kmatrix[i][j] += np.dot(vector1, vector2.transpose())
+            Kmatrix[j][i] = Kmatrix[i][j]
     
     # iterate each height
-    for h in range(height + 1):
-        all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
-        all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
+    for h in range(1, height + 1):
         all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
         num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs
+        all_labels_ori = set()
+        all_num_of_each_label = []
         
         # for each graph
         for idx, G in enumerate(Gn):
-            # get the set of original labels
-            labels_ori = list(nx.get_node_attributes(G, 'label').values())
-            num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph
-            num_of_labels = len(num_of_each_label) # number of all unique labels
-            
-            all_labels_ori.update(labels_ori)
-            num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)
             
             set_multisets = []
             for node in G.nodes(data = True):
                 # Multiset-label determination.
-                multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]
+                multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
                 # sorting each multiset
                 multiset.sort()
-                multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix 
+                multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix 
                 set_multisets.append(multiset)
 
             # label compression
-        #     set_multisets.sort() # this is unnecessary
             set_unique = list(set(set_multisets)) # set of unique multiset labels
             # a dictionary mapping original labels to new ones. 
             set_compressed = {}
@@ -179,20 +191,20 @@ def _wl_subtreekernel_do(*args, height = 0, base_kernel = 'subtree'):
                 else:
                     set_compressed.update({ value : str(num_of_labels_occured + 1) })
                     num_of_labels_occured += 1
-#             set_compressed = { value : (all_set_compressed[value] if value in all_set_compressed.keys() else str(set_unique.index(value) + num_of_labels_occured + 1)) for value in set_unique }
             
             all_set_compressed.update(set_compressed)
-#             num_of_labels_occured += len(set_compressed) #@todo not precise
-
+            
             # relabel nodes
-        #     nx.relabel_nodes(G, set_compressed, copy = False)
             for node in G.nodes(data = True):
-                node[1]['label'] = set_compressed[set_multisets[node[0]]]
+                node[1][node_label] = set_compressed[set_multisets[node[0]]]
 
             # get the set of compressed labels
-            labels_comp = list(nx.get_node_attributes(G, 'label').values())
-            num_of_each_label.update(dict(Counter(labels_comp)))
+            labels_comp = list(nx.get_node_attributes(G, node_label).values())
+            all_labels_ori.update(labels_comp)
+            num_of_each_label = dict(Counter(labels_comp))
             all_num_of_each_label.append(num_of_each_label)
+                    
+        all_num_of_labels_occured += len(all_labels_ori)
         
         # calculate subtree kernel with h iterations and add it to the final kernel
         for i in range(0, len(Gn)):
@@ -203,8 +215,6 @@ def _wl_subtreekernel_do(*args, height = 0, base_kernel = 'subtree'):
                 Kmatrix[i][j] += np.dot(vector1, vector2.transpose())
                 Kmatrix[j][i] = Kmatrix[i][j]
                     
-        all_num_of_labels_occured += len(all_labels_ori)
-
     return Kmatrix
     
     
diff --git a/pygraph/utils/__pycache__/graphfiles.cpython-35.pyc b/pygraph/utils/__pycache__/graphfiles.cpython-35.pyc
index f88e42e31a2c88198ca9eb67bfd50dde3609e2cc..5a901fd39b13f07361da42a0cd82c9e1b974c3c1 100644
GIT binary patch
literal 4381
zcma)9&2L;+6+idQn@`V&oiuhPO~X^ukWAx@lN6|wS^*SStxzy34sw~wjGpH`+iyJc
zW_;gsI*vRDFdt<Bs1+<(u%HrB#SXDR;tycU?y_W2WycOFD}LwP8GnQ#I@Uct_w$@{
z-}#+$?tG<EYJBoWOP?e9C!P8^;J=2J+ye=STA+#ohkgtmOojqic!z>41v%j{n5Up1
zyh}lm)*7WJuX^7}qh6FoVd8D=$DK49#9lh^`hM(hhu*gKhdbU@)C-eRM?o0>ifFTF
z8MNeI7`3u{L}SPn@jXR-hsF-6Lq**T9bqm*sY{1MM~Z&+Cp8A|Qa8&}-JC4Ql3g6Q
zl;-IWiYuyPHB2~kCOePF!9hHiqk6Z%CSk?Ti9QfD>acIuA^m4ci!y{&m>=h+{i5yX
zDfXsAR>;Yc$(B5gHYqK!5{J;wp_PqyXk1{~hmc#Og){O4teoMyg(53frNaz$XJm0%
zbg581s7|>f<*2<+X?^tZR0u3#A-_2h!UvT3Y8FHsto#|#7A~a5vVKaFooR&+<%_D&
zQRcMt>RB6cXBW@h3l9XdQ<b_kmUW_{?H@T%^r2c`hkL#A?RTNmT4T=t0X}<g;0Ipd
zr@ptP2Ys*ENySr|rILr=>L7*FFn3FS!uno%FpN5W&kIL>f7lDX!InKDisgVP@p{p2
z=%qVRa?c**z2@EZyyaV7yx-ppHSdTA0sPgE11}7=CsJ6_?dEO#K9hUECxm`y2j|D3
zhy5KLZSpB{ns;Ngc(C}yyXp1(&9K)HZ9ns%6EP1y&!TTl7Txh+hX;D+e<~7-HQqho
zZ&9vv@vDngoyi%vhRGQ+@D%u?)rK;KO+N`^zaJV`Xn_IC+*5|^Gvyh2$|0jOMKE_<
zv^BKkWsq%Rd~IB01OtQ!DZ3fcIp#lfXfKCvu5=y?|3D4LIL+HejWovrA<wQfn@ht3
z2^LHHh$qRC@Dewg!-Hp~<zAGeok4$ipQR-%zerCmu?IbP)LiZj;1bDwFw<qm_0vKB
zX$A{TRzArY<#QN6&nLYCGEx2#D-R$*XKTX3PaLYBs9(cUQMcjf5_qE`++8N^53MC-
zp09vhN1LeFDsB$qpbe)D8?&Ymhw1wRy*o1bLDC%h>5gVgO!h4flg1qk!`Qew^aE2$
zhKQ={YKh4tsWw>|FqtS$O;-Eyc4!KI5VYBqP0hM?d$1*aY{B=l^X%Z*oa$)UC~E#_
z#)(H(uG%(g%j7$QI1S^pRfsq3cHsmWdJY?`(TcMOX|~ej?8WnG$s!0<%W77YRZX2!
zH`ICXv+BG9>ddPJ^snLnMRgN@i%NeH>yJMM-xq(32WZJ3Pv?R@!mc94`2e9plEH*W
zRFlZhfgC)%OrqB$BOnYSSxE3lOh(}6D36u+sy4_WaoE(*Eplq8b4tilgiQq+{hBAc
zh{Xum3eoh4#zl_b2-$~4n(SES#U&ao$ru((5?R@}LSw}18Ob{wxl7#PJucOk_x?ib
z0Zu8=ZjSW7nL)SC|BAGHnCDQ<f=cuYm<<jY;dNNB_e%u%kEUwPShapg<Fcr1wJ3|q
zpG%9ctMveatQKR%;>%~WsEDmN5qcxz)Y)>|t8x$OAuI1)Q|nN9tx;Rm929_V_GIxd
zEueFv0MUe<6G%*T%kAuhea&fKXF-h1{?*Dq_Ti}=EaDVkoOIBr>KAyC9LEqf7e@VF
zGfZ_DHs9=peR%34@FpumX6DS;(AP<5^4pjj3{ri8_vcuCv}xQZ4YhI8aFptoScC`C
zs92pv+{?V&dGiq#u>_aTFxe;$!jZnpbD1FO=o`%CB&H;~fsTHa$#FEZ5Xmz{-g@`p
zEzBi%K}ZmC&Zz+>In_X$S93xyW2B1NDp10swtq|MH?XV2n(?oSHiITesbF-129!T2
zkpv&??5NTl_7PhyF%+a|LZJi@)KN0Q8JD>t0+$1aLJ3>}9*E6wKkUbchv&K|Clqir
z`aJ_@W#hM81x*&=FnBp!$5vW`l}O13ls!Y<0Mx;U>+mAzu(I)<=wU^miY&`+pwgwu
zJ}kGhV2*cVx(+Y`ODJs^h-YR1$ab0bKB4u!7#R77)${KH&A)L6&RSpD_^Y5&u(*rw
zbX}boQ4uvz<=_@dQUTS37nDV~N8CV}MhDZw0Ow<iI;SjxIseZh)`hn}ZxV~Pnoei&
z@g|miU#;(5N1F%Oa5<=9Q3>PI0-_b_g5t_KL?RJ4W-Sm~ne9<e5U4o~u_x~XW(>9{
zNK9!z4k8_P(lBThc(Bh1%}Tb_?4{z}r@Z<mk8y|xF4>iJlgSc@$xo1HT%aP=_j#<e
zw!b+U)^#4pN~vYs-S8kW#df>X^OL0AHidSZ3ygMKe@T|TuYpd;;8QFjzs%!BhM*)R
z(WzAuO>NRsk4?F=uQdXK473X9wI$dTnL}vV4~_Hq4%%Hi$rkIcu!!f;n)r`Hp!FM=
zOKyV@GFeR(fzT5EE;<$Uin^q(sH^HS_&Ff<MRft~3dXMk)A}Lybf)zYhq(fpVD+Q_
z&T1!b8+?2IqCmp*g~BjJ1GKpO0D4iL0ESF~KVUU0FO*Ly#U|N}YzU0mrw}sPsIF3E
z4<()~IYHh9-;;L%ybVc*Q=)*DF@=0r<d*<LKTiwrJ?yOmbBGEY3*d7ZT7Z`9Yrr(G
zMh1}?ejM@e-j8TK{++0Y%z_8h2a>p5>cLT~gqHyFB_Pn9o#<>6Wp|D>2gFX>IQp(6
zoO2vwaQ4c^+f$D}FS%-5757I6>1kY>8t}rg0oCWUJ;b|&bzBCF5cqPm<p40%Cs*YG
z!bK)(`XuFtLAn#lb0P`TrqswbyE!Hg*x1@w>M+hBoW@p-6C|i}xk_Gormjkj+sbrC
zJ(C@3uGL&V-(<z_8y9SD`zF@wRVMRH7MNUQa+%2&m|Oxu<{F|R45VT*c^wXWK2l|N
z3wo!|p;5Eb>N`wcV<K4Q{Lz@Ta5|RRs!dF3^V184nj)+T;{-2vKpA639q4O(h!lTP
z^)VPNa9(GQQQ9i^2fg<6rO+yJPpo_%TQhvkD^}JHHYOZGC^PSrfv6n*%BT%5qe^T7
zO=UIBkemrC_<K>!Ir=^9SZ!oYzTFN69YhT-A|*cx@MQj!TtT-~s)c1lxcy#upVMf<
On9MolM){@kmHz-#`-YPM

delta 1985
zcmah}OKc-W5Un12{2l)i;@CLsW_Pnmh9wZbyJ8WGAP~YQLdwA*D6$;SB(dYzNqY!B
z%*Ya1eg{BkByL>b2+|61<Ae}!L<orkg9PHZLgK=S6TIrNqs<BlPrA}wRo(S@RrS&N
z-|k8kGnv$1-#)tWRpEowy9)^-;uXQ_eJO}6K^9S)$ioJI8np%OYvgGp{9iN0L4;gE
ztr%Gma^mDhSgU*t%_zAVxkT6>ljh+FnNH<af~**G-ywQi87NzE+S5pWOiog<par?7
zkIsqk9HY+kh!q;~^udTRyc47TIyou!!qw?mqovJd@|0t4oU8;*o^rrSl9i(C%9Ewg
zCpSv1G_i78Ib*PKAr60}YX8I0(+Gf{euD-lZ}U9^q0*$Xg6Q5N+D0NWY~oECUUf43
zDoj~Wj?gVev!hUk3Dl`{Dj!H*2|f*Z$}Kt`*WlWMsKWZ1nO(gJ+{nKey&<OkpQ3YV
zy<Ok3+cz0bKclxY7a3O#BuQq@e_t>AH}q0WUzYWQUH?1%$;Zuvj6xh}oxWtfAl~S9
z9J}KLVrL`K*{XHAmfcf3+uE^vG7lx01Tlpy_%FpKo|(mggptjQl9(6cB9Cue1J#OR
z66Xs3m&7IUgpe1Y*+7-4p8*=jdJ3zz;(s1{eXz?Qwt%K8n5E4sc}a4U3=nFDlZad)
z>t2f77`X{01EeU1$ADlf!)%=RO2`H_ZVERw!IC6h_F{DS9-w`BWQwJL_F3}MFOr-7
zPg{68tIR=+chgiZv+?i*WXeM`49?$Fg)3`j#<vo4^D>L*qz)29Ie|fRTXy#Y&FPvs
zInFZ9x*YSrPF#CrlGS2$$C1sgAlh_n3H4^j3ZjnPcjOdno&_-t1zLD@+P^<pnK;i^
zBUZB^8S>EToZn1VmhZ;7$6^95h8WXwpfO=$74a3sw8#RXqF5C23bY#R5dU&mX)J|L
z+5aWEm-v($d};F&e>G(ej`)D?Mq<mz$}vmogvLjudthrkCb+LuLttJYWlanRO)#jT
z6q}PbP|{^!53JE^jPNN5P^=kr_!(6XJAm(Z8YowO25i*-ft3MAd1><-@(kX(2JImc
z3u0(VCo9X1Y30=1f)P|xUhUDCf|{E0EJOXl=rKHT5?E6OR=6dw0mAyf$k-)*<@V5c
za)u6%r0`%E;dN2v20xralOsu(sS)<iLjh)0g(HQ6goR>RGsjjXx6?*sr{9(l?BtC9
zPkOO-A8)x5HlmGPdtY*o$~h+UOiY%<kLqo^)3D8~<QkEenA`&r#Gh{4`#b}pC@4o>
zWnOCS)vY00mi&P+Yd*ki{WXa?MJ=;ZSd|YlKPkIiM-2}->wj-d>#8LEKaKfU9%4<N
zAR*x~5X5`+BYS!Mg=;wUSb@Zt$cZG%Glj3DWyFlQGZGVL#04>hQvpR>5fh@Kcr#)S
zWi3~}*w4&r1Jzc~;SKpblRKD9Fex&rFge4d41#t$IIugG8cji5+6V1=!w&TAcDL@_
zmKV9ex9(-K$V4TXTgJ?Xk%u<@vAlWq94n|<S>PS#M5@3a=G|3)BY#CSd_RBr+&UPm
frXIv=HLKgG)s!cy6Xgo)uKR!F7xY<U!kGCNMd)iZ

diff --git a/pygraph/utils/utils.py b/pygraph/utils/utils.py
index 52a85f1..7a65f34 100644
--- a/pygraph/utils/utils.py
+++ b/pygraph/utils/utils.py
@@ -5,18 +5,20 @@ import numpy as np
 def getSPLengths(G1):
     sp = nx.shortest_path(G1)
     distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
-    for i in np.keys():
-        for j in np[i].keys():
+    for i in sp.keys():
+        for j in sp[i].keys():
             distances[i, j] = len(sp[i][j])-1
     return distances
 
-def getSPGraph(G):
+def getSPGraph(G, edge_weight = 'bond_type'):
     """Transform graph G to its corresponding shortest-paths graph.
     
     Parameters
     ----------
     G : NetworkX graph
         The graph to be tramsformed.
+    edge_weight : string
+        edge attribute corresponding to the edge weight. The default edge weight is bond_type.
         
     Return
     ------
@@ -31,15 +33,17 @@ def getSPGraph(G):
     ----------
     [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
     """
-    return floydTransformation(G)
+    return floydTransformation(G, edge_weight = edge_weight)
             
-def floydTransformation(G):
+def floydTransformation(G, edge_weight = 'bond_type'):
     """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.
     
     Parameters
     ----------
     G : NetworkX graph
         The graph to be tramsformed.
+    edge_weight : string
+        edge attribute corresponding to the edge weight. The default edge weight is bond_type.
         
     Return
     ------
@@ -50,7 +54,7 @@ def floydTransformation(G):
     ----------
     [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
     """
-    spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered
+    spMatrix = nx.floyd_warshall_numpy(G, weight = edge_weight)
     S = nx.Graph()
     S.add_nodes_from(G.nodes(data=True))
     for i in range(0, G.number_of_nodes()):