| @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt | |||
| from numpy.linalg import eig | |||
| # read gram matrices from file. | |||
| results_dir = 'results/structuralspkernel/' | |||
| results_dir = 'results/untilhpathkernel/myria' | |||
| ds_name = 'Letter-med' | |||
| gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz') | |||
| #print('gm time: ', gmfile['gmtime']) | |||
| @@ -6,94 +6,116 @@ | |||
| "metadata": { | |||
| "scrolled": false | |||
| }, | |||
| "outputs": [], | |||
| "outputs": [ | |||
| { | |||
| "name": "stdout", | |||
| "output_type": "stream", | |||
| "text": [ | |||
| "\n", | |||
| "MAO\n", | |||
| "\n", | |||
| "--- This is a classification problem ---\n", | |||
| "\n", | |||
| "\n", | |||
| "1. Loading dataset from file...\n", | |||
| "\n", | |||
| "2. Calculating gram matrices. This could take a while...\n", | |||
| "\n", | |||
| " None edge weight specified. Set all weight to 1.\n", | |||
| "\n", | |||
| "getting sp graphs: 68it [00:00, 692.11it/s]\n", | |||
| "calculating kernels: 2346it [00:05, 399.28it/s]\n", | |||
| "\n", | |||
| " --- shortest path kernel matrix of size 68 built in 6.345669507980347 seconds ---\n", | |||
| "\n", | |||
| "the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7fe240afd620>, 'nsymb': <function gaussiankernel at 0x7fe240afd9d8>, 'mix': functools.partial(<function kernelproduct at 0x7fe240aaf0d0>, <function deltakernel at 0x7fe240afd620>, <function gaussiankernel at 0x7fe240afd9d8>)}, 'n_jobs': 8} is: \n", | |||
| "\n", | |||
| "1 gram matrices are calculated, 0 of which are ignored.\n", | |||
| "\n", | |||
| "3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||
| "cross validation: 7it [00:09, 4.67s/it]" | |||
| ] | |||
| } | |||
| ], | |||
| "source": [ | |||
| "%load_ext line_profiler\n", | |||
| "%matplotlib inline\n", | |||
| "import functools\n", | |||
| "from libs import *\n", | |||
| "import multiprocessing\n", | |||
| "from sklearn.metrics.pairwise import rbf_kernel\n", | |||
| "\n", | |||
| "from pygraph.kernels.spKernel import spkernel, spkernel_do\n", | |||
| "from pygraph.utils.kernels import deltakernel, kernelsum\n", | |||
| "from pygraph.utils.model_selection_precomputed import trial_do\n", | |||
| "\n", | |||
| "dslist = [ \n", | |||
| " {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', 'task': 'regression'}, # node symb\n", | |||
| "# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', \n", | |||
| "# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',}, # contains single node graph, node symb\n", | |||
| "# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',}, # node/edge symb\n", | |||
| "# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',}, # unlabeled\n", | |||
| "# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n", | |||
| "# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb\n", | |||
| " {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n", | |||
| " {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, # node symb/nsymb\n", | |||
| " {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, # node/edge symb\n", | |||
| " {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n", | |||
| " 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb\n", | |||
| "\n", | |||
| "# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n", | |||
| "# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n", | |||
| "# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n", | |||
| "# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},\n", | |||
| "# \n", | |||
| "# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n", | |||
| "# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n", | |||
| "# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n", | |||
| "# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n", | |||
| "# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n", | |||
| "\n", | |||
| "# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n", | |||
| "# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n", | |||
| "# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n", | |||
| "# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n", | |||
| "# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
| "# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n", | |||
| "# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
| "# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n", | |||
| "# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n", | |||
| " \n", | |||
| "# # not working below\n", | |||
| "# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n", | |||
| "# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},\n", | |||
| "# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},\n", | |||
| "# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n", | |||
| "from pygraph.kernels.spKernel import spkernel\n", | |||
| "from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct\n", | |||
| "#from pygraph.utils.model_selection_precomputed import trial_do\n", | |||
| "\n", | |||
| "dslist = [\n", | |||
| "# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n", | |||
| "# 'task': 'regression'}, # node symb\n", | |||
| "# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n", | |||
| "# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, \n", | |||
| "# # contains single node graph, node symb\n", | |||
| " {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb\n", | |||
| "# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled\n", | |||
| "# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n", | |||
| "# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb\n", | |||
| "# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n", | |||
| "# # node nsymb\n", | |||
| "# {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n", | |||
| "# # node symb/nsymb\n", | |||
| "# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n", | |||
| " # node/edge symb\n", | |||
| "# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n", | |||
| "# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb\n", | |||
| "\n", | |||
| " # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n", | |||
| " # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n", | |||
| " # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n", | |||
| " # {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},\n", | |||
| " #\n", | |||
| " # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n", | |||
| " # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n", | |||
| " # # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n", | |||
| " # # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n", | |||
| " # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n", | |||
| "\n", | |||
| " # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n", | |||
| " # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n", | |||
| " # # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n", | |||
| " # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n", | |||
| " # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
| " # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n", | |||
| " # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
| " # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n", | |||
| " # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n", | |||
| "\n", | |||
| " # # not working below\n", | |||
| " # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n", | |||
| " # {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},\n", | |||
| " # {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},\n", | |||
| " # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n", | |||
| "]\n", | |||
| "estimator = spkernel\n", | |||
| "mixkernel = functools.partial(kernelsum, deltakernel, rbf_kernel)\n", | |||
| "param_grid_precomputed = {'node_kernels': [{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]}\n", | |||
| "param_grid = [{'C': np.logspace(-10, 10, num = 41, base = 10)}, \n", | |||
| " {'alpha': np.logspace(-10, 10, num = 41, base = 10)}]\n", | |||
| "mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)\n", | |||
| "param_grid_precomputed = {'node_kernels': [\n", | |||
| " {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}\n", | |||
| "param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},\n", | |||
| " {'alpha': np.logspace(-10, 10, num=41, base=10)}]\n", | |||
| "\n", | |||
| "for ds in dslist:\n", | |||
| " print()\n", | |||
| " print(ds['name'])\n", | |||
| " model_selection_for_precomputed_kernel(\n", | |||
| " ds['dataset'], \n", | |||
| " estimator, \n", | |||
| " param_grid_precomputed, \n", | |||
| " (param_grid[1] if ('task' in ds and ds['task'] == 'regression') else param_grid[0]), \n", | |||
| " (ds['task'] if 'task' in ds else 'classification'), \n", | |||
| " ds['dataset'],\n", | |||
| " estimator,\n", | |||
| " param_grid_precomputed,\n", | |||
| " (param_grid[1] if ('task' in ds and ds['task']\n", | |||
| " == 'regression') else param_grid[0]),\n", | |||
| " (ds['task'] if 'task' in ds else 'classification'),\n", | |||
| " NUM_TRIALS=30,\n", | |||
| " datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),\n", | |||
| " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | |||
| " ds_name=ds['name'],\n", | |||
| " n_jobs=multiprocessing.cpu_count())\n", | |||
| " \n", | |||
| "# %lprun -f trial_do -f spkernel -f spkernel_do -f model_selection_for_precomputed_kernel \\\n", | |||
| "# model_selection_for_precomputed_kernel( \\\n", | |||
| "# ds['dataset'], \\\n", | |||
| "# estimator, \\\n", | |||
| "# param_grid_precomputed, \\\n", | |||
| "# (param_grid[1] if ('task' in ds and ds['task'] == 'regression') else param_grid[0]), \\\n", | |||
| "# (ds['task'] if 'task' in ds else 'classification'), \\\n", | |||
| "# NUM_TRIALS=30, \\\n", | |||
| "# datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), \\\n", | |||
| "# extra_params=(ds['extra_params'] if 'extra_params' in ds else None), \\\n", | |||
| "# ds_name=ds['name'], \\\n", | |||
| "# n_jobs=multiprocessing.cpu_count()) \n", | |||
| " print()" | |||
| " n_jobs=multiprocessing.cpu_count(),\n", | |||
| " read_gm_from_file=False)\n", | |||
| " print()\n" | |||
| ] | |||
| }, | |||
| { | |||
| @@ -713,8 +735,8 @@ | |||
| ], | |||
| "metadata": { | |||
| "kernelspec": { | |||
| "display_name": "Python 3 (Spyder)", | |||
| "language": "python3", | |||
| "display_name": "Python 3", | |||
| "language": "python", | |||
| "name": "python3" | |||
| }, | |||
| "language_info": { | |||
| @@ -727,7 +749,7 @@ | |||
| "name": "python", | |||
| "nbconvert_exporter": "python", | |||
| "pygments_lexer": "ipython3", | |||
| "version": "3.5.2" | |||
| "version": "3.6.6" | |||
| } | |||
| }, | |||
| "nbformat": 4, | |||
| @@ -7,21 +7,21 @@ from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
| #from pygraph.utils.model_selection_precomputed import trial_do | |||
| dslist = [ | |||
| # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
| # 'task': 'regression'}, # node symb | |||
| # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
| # 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||
| # # contains single node graph, node symb | |||
| # {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
| # {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
| # {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
| # 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
| {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
| 'task': 'regression'}, # node symb | |||
| {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
| 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||
| # contains single node graph, node symb | |||
| {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
| {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
| {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
| 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
| {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
| # node nsymb | |||
| {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
| # node symb/nsymb | |||
| # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
| # # node/edge symb | |||
| # node/edge symb | |||
| # {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||
| # 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | |||
| @@ -56,7 +56,7 @@ estimator = spkernel | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| param_grid_precomputed = {'node_kernels': [ | |||
| {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]} | |||
| param_grid = [{'C': np.logspace(-10, 3, num=27, base=10)}, | |||
| param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
| {'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
| for ds in dslist: | |||
| @@ -23,10 +23,10 @@ dslist = [ | |||
| # {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
| # {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
| # 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
| {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
| # node nsymb | |||
| # {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
| # # node symb/nsymb | |||
| # {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
| # # node nsymb | |||
| {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
| # node symb/nsymb | |||
| # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
| # # node/edge symb | |||
| # {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||
| @@ -39,8 +39,8 @@ dslist = [ | |||
| # | |||
| # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | |||
| # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | |||
| # # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb, missing values | |||
| # # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb, missing values | |||
| # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb, missing values | |||
| # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb, missing values | |||
| # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | |||
| # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | |||
| @@ -53,8 +53,8 @@ dslist = [ | |||
| # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | |||
| # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | |||
| # # not working below | |||
| # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||
| # # not working below | |||
| # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||
| # {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',}, | |||
| # {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',}, | |||
| # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | |||
| @@ -62,7 +62,7 @@ dslist = [ | |||
| ] | |||
| estimator = untilhpathkernel | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel) | |||
| param_grid_precomputed = {'depth': np.linspace(7, 10, 10), | |||
| param_grid_precomputed = {'depth': np.linspace(1, 10, 10), | |||
| 'k_func': ['tanimoto', 'MinMax']} | |||
| param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
| {'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
| @@ -1,77 +0,0 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Fri Sep 28 16:37:29 2018 | |||
| @author: ljia | |||
| """ | |||
| import functools | |||
| from libs import * | |||
| import multiprocessing | |||
| from sklearn.metrics.pairwise import rbf_kernel | |||
| from pygraph.kernels.structuralspKernel import structuralspkernel | |||
| from pygraph.utils.kernels import deltakernel, kernelproduct | |||
| dslist = [ | |||
| # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
| # 'task': 'regression'}, # node symb | |||
| # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
| # 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||
| {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
| # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | |||
| # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | |||
| # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | |||
| # {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'}, | |||
| # | |||
| # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | |||
| # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | |||
| # # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb | |||
| # # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb | |||
| # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | |||
| # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | |||
| # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | |||
| # # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||
| # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | |||
| # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | |||
| # # not working below | |||
| # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||
| # {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',}, | |||
| # {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',}, | |||
| # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | |||
| ] | |||
| estimator = structuralspkernel | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel) | |||
| param_grid_precomputed = {'node_kernels': | |||
| [{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}], | |||
| 'edge_kernels': | |||
| [{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]} | |||
| param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
| {'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
| for ds in dslist: | |||
| print() | |||
| print(ds['name']) | |||
| model_selection_for_precomputed_kernel( | |||
| ds['dataset'], | |||
| estimator, | |||
| param_grid_precomputed, | |||
| (param_grid[1] if ('task' in ds and ds['task'] | |||
| == 'regression') else param_grid[0]), | |||
| (ds['task'] if 'task' in ds else 'classification'), | |||
| NUM_TRIALS=30, | |||
| datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), | |||
| extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | |||
| ds_name=ds['name'], | |||
| n_jobs=multiprocessing.cpu_count(), | |||
| read_gm_from_file=False) | |||
| print() | |||
| @@ -85,21 +85,20 @@ def commonwalkkernel(*args, | |||
| # ---- use pool.imap_unordered to parallel and track progress. ---- | |||
| pool = Pool(n_jobs) | |||
| itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
| itr = zip(combinations_with_replacement(Gn, 2), | |||
| combinations_with_replacement(range(0, len(Gn)), 2)) | |||
| len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
| if len_itr < 1000 * n_jobs: | |||
| chunksize = int(len_itr / n_jobs) + 1 | |||
| else: | |||
| chunksize = 100 | |||
| chunksize = 1000 | |||
| # direct product graph method - exponential | |||
| if compute_method == 'exp': | |||
| do_partial = partial(_commonwalkkernel_exp, Gn, node_label, edge_label, | |||
| weight) | |||
| do_partial = partial(wrapper_cw_exp, node_label, edge_label, weight) | |||
| # direct product graph method - geometric | |||
| elif compute_method == 'geo': | |||
| do_partial = partial(_commonwalkkernel_geo, Gn, node_label, edge_label, | |||
| weight) | |||
| do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight) | |||
| for i, j, kernel in tqdm( | |||
| pool.imap_unordered(do_partial, itr, chunksize), | |||
| @@ -153,7 +152,7 @@ def commonwalkkernel(*args, | |||
| return Kmatrix, run_time | |||
| def _commonwalkkernel_exp(Gn, node_label, edge_label, beta, ij): | |||
| def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta): | |||
| """Calculate walk graph kernels up to n between 2 graphs using exponential | |||
| series. | |||
| @@ -175,10 +174,6 @@ def _commonwalkkernel_exp(Gn, node_label, edge_label, beta, ij): | |||
| kernel : float | |||
| The common walk Kernel between 2 graphs. | |||
| """ | |||
| iglobal = ij[0] | |||
| jglobal = ij[1] | |||
| g1 = Gn[iglobal] | |||
| g2 = Gn[jglobal] | |||
| # get tensor product / direct product | |||
| gp = direct_product(g1, g2, node_label, edge_label) | |||
| @@ -219,10 +214,18 @@ def _commonwalkkernel_exp(Gn, node_label, edge_label, beta, ij): | |||
| # print(np.exp(weight * A)) | |||
| # print('-------') | |||
| return iglobal, jglobal, exp_D.sum() | |||
| return exp_D.sum() | |||
| def _commonwalkkernel_geo(Gn, node_label, edge_label, gamma, ij): | |||
| def wrapper_cw_exp(node_label, edge_label, beta, itr_item): | |||
| g1 = itr_item[0][0] | |||
| g2 = itr_item[0][1] | |||
| i = itr_item[1][0] | |||
| j = itr_item[1][1] | |||
| return i, j, _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta) | |||
| def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma): | |||
| """Calculate common walk graph kernels up to n between 2 graphs using | |||
| geometric series. | |||
| @@ -244,19 +247,22 @@ def _commonwalkkernel_geo(Gn, node_label, edge_label, gamma, ij): | |||
| kernel : float | |||
| The common walk Kernel between 2 graphs. | |||
| """ | |||
| iglobal = ij[0] | |||
| jglobal = ij[1] | |||
| g1 = Gn[iglobal] | |||
| g2 = Gn[jglobal] | |||
| # get tensor product / direct product | |||
| gp = direct_product(g1, g2, node_label, edge_label) | |||
| A = nx.adjacency_matrix(gp).todense() | |||
| mat = np.identity(len(A)) - gamma * A | |||
| try: | |||
| return iglobal, jglobal, mat.I.sum() | |||
| return mat.I.sum() | |||
| except np.linalg.LinAlgError: | |||
| return iglobal, jglobal, np.nan | |||
| return np.nan | |||
| def wrapper_cw_geo(node_label, edge_label, gama, itr_item): | |||
| g1 = itr_item[0][0] | |||
| g2 = itr_item[0][1] | |||
| i = itr_item[1][0] | |||
| j = itr_item[1][1] | |||
| return i, j, _commonwalkkernel_geo(g1, g2, node_label, edge_label, gama) | |||
| def _commonwalkkernel_brute(walks1, | |||
| @@ -8,7 +8,6 @@ import sys | |||
| import time | |||
| from itertools import combinations_with_replacement, product | |||
| from functools import partial | |||
| from joblib import Parallel, delayed | |||
| from multiprocessing import Pool | |||
| from tqdm import tqdm | |||
| @@ -89,7 +88,8 @@ def spkernel(*args, | |||
| pool = Pool(n_jobs) | |||
| # get shortest path graphs of Gn | |||
| getsp_partial = partial(wrap_getSPGraph, Gn, weight) | |||
| getsp_partial = partial(wrapper_getSPGraph, weight) | |||
| itr = zip(Gn, range(0, len(Gn))) | |||
| if len(Gn) < 1000 * n_jobs: | |||
| # # use default chunksize as pool.map when iterable is less than 100 | |||
| # chunksize, extra = divmod(len(Gn), n_jobs * 4) | |||
| @@ -98,9 +98,8 @@ def spkernel(*args, | |||
| chunksize = int(len(Gn) / n_jobs) + 1 | |||
| else: | |||
| chunksize = 1000 | |||
| # chunksize = 300 # int(len(list(itr)) / n_jobs) | |||
| for i, g in tqdm( | |||
| pool.imap_unordered(getsp_partial, range(0, len(Gn)), chunksize), | |||
| pool.imap_unordered(getsp_partial, itr, chunksize), | |||
| desc='getting sp graphs', file=sys.stdout): | |||
| Gn[i] = g | |||
| pool.close() | |||
| @@ -144,8 +143,9 @@ def spkernel(*args, | |||
| # ---- use pool.imap_unordered to parallel and track progress. ---- | |||
| pool = Pool(n_jobs) | |||
| do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels) | |||
| itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
| do_partial = partial(wrapper_sp_do, ds_attrs, node_label, node_kernels) | |||
| itr = zip(combinations_with_replacement(Gn, 2), | |||
| combinations_with_replacement(range(0, len(Gn)), 2)) | |||
| len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
| if len_itr < 1000 * n_jobs: | |||
| chunksize = int(len_itr / n_jobs) + 1 | |||
| @@ -200,15 +200,10 @@ def spkernel(*args, | |||
| return Kmatrix, run_time, idx | |||
| def spkernel_do(Gn, ds_attrs, node_label, node_kernels, ij): | |||
| i = ij[0] | |||
| j = ij[1] | |||
| g1 = Gn[i] | |||
| g2 = Gn[j] | |||
| def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels): | |||
| kernel = 0 | |||
| # try: | |||
| # compute shortest path matrices first, method borrowed from FCSP. | |||
| if ds_attrs['node_labeled']: | |||
| # node symb and non-synb labeled | |||
| @@ -243,7 +238,7 @@ def spkernel_do(Gn, ds_attrs, node_label, node_kernels, ij): | |||
| g1.edges(data=True), g2.edges(data=True)): | |||
| if e1[2]['cost'] == e2[2]['cost']: | |||
| kernel += 1 | |||
| return i, j, kernel | |||
| return kernel | |||
| # compute graph kernels | |||
| if ds_attrs['is_directed']: | |||
| @@ -293,12 +288,20 @@ def spkernel_do(Gn, ds_attrs, node_label, node_kernels, ij): | |||
| # kn1 = vk_mat[x1][x2] * vk_mat[y1][y2] | |||
| # kn2 = vk_mat[x1][y2] * vk_mat[y1][x2] | |||
| # kernel += kn1 + kn2 | |||
| # except KeyError: # missing labels or attributes | |||
| # pass | |||
| return i, j, kernel | |||
| return kernel | |||
| def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr_item): | |||
| g1 = itr_item[0][0] | |||
| g2 = itr_item[0][1] | |||
| i = itr_item[1][0] | |||
| j = itr_item[1][1] | |||
| return i, j, spkernel_do(g1, g2, ds_attrs, node_label, node_kernels) | |||
| def wrap_getSPGraph(Gn, weight, i): | |||
| return i, getSPGraph(Gn[i], edge_weight=weight) | |||
| # return i, nx.floyd_warshall_numpy(Gn[i], weight=weight) | |||
| def wrapper_getSPGraph(weight, itr_item): | |||
| g = itr_item[0] | |||
| i = itr_item[1] | |||
| return i, getSPGraph(g, edge_weight=weight) | |||
| # return i, nx.floyd_warshall_numpy(g, weight=weight) | |||
| @@ -12,7 +12,6 @@ import sys | |||
| import time | |||
| from itertools import combinations, combinations_with_replacement, product | |||
| from functools import partial | |||
| from joblib import Parallel, delayed | |||
| from multiprocessing import Pool | |||
| from tqdm import tqdm | |||
| @@ -71,7 +70,6 @@ def structuralspkernel(*args, | |||
| """ | |||
| # pre-process | |||
| Gn = args[0] if len(args) == 1 else [args[0], args[1]] | |||
| weight = None | |||
| if edge_weight is None: | |||
| print('\n None edge weight specified. Set all weight to 1.\n') | |||
| @@ -98,34 +96,61 @@ def structuralspkernel(*args, | |||
| start_time = time.time() | |||
| # get shortest paths of each graph in Gn | |||
| splist = [[] for _ in range(len(Gn))] | |||
| splist = [None] * len(Gn) | |||
| pool = Pool(n_jobs) | |||
| # get shortest path graphs of Gn | |||
| getsp_partial = partial(wrap_getSP, Gn, weight, ds_attrs['is_directed']) | |||
| getsp_partial = partial(wrapper_getSP, weight, ds_attrs['is_directed']) | |||
| itr = zip(Gn, range(0, len(Gn))) | |||
| if len(Gn) < 1000 * n_jobs: | |||
| chunksize = int(len(Gn) / n_jobs) + 1 | |||
| else: | |||
| chunksize = 1000 | |||
| # chunksize = 300 # int(len(list(itr)) / n_jobs) | |||
| for i, sp in tqdm( | |||
| pool.imap_unordered(getsp_partial, range(0, len(Gn)), chunksize), | |||
| pool.imap_unordered(getsp_partial, itr, chunksize), | |||
| desc='getting shortest paths', | |||
| file=sys.stdout): | |||
| splist[i] = sp | |||
| # time.sleep(10) | |||
| pool.close() | |||
| pool.join() | |||
| # # ---- use pool.map to parallel ---- | |||
| # result_sp = pool.map(getsp_partial, range(0, len(Gn))) | |||
| # for i in result_sp: | |||
| # Gn[i[0]] = i[1] | |||
| # or | |||
| # getsp_partial = partial(wrap_getSP, Gn, weight) | |||
| # for i, g in tqdm( | |||
| # pool.map(getsp_partial, range(0, len(Gn))), | |||
| # desc='getting sp graphs', | |||
| # file=sys.stdout): | |||
| # Gn[i] = g | |||
| # # get shortest paths of each graph in Gn | |||
| # splist = [[] for _ in range(len(Gn))] | |||
| # # get shortest path graphs of Gn | |||
| # getsp_partial = partial(wrapper_getSP, weight, ds_attrs['is_directed']) | |||
| # itr = zip(Gn, range(0, len(Gn))) | |||
| # if len(Gn) < 1000 * n_jobs: | |||
| # chunksize = int(len(Gn) / n_jobs) + 1 | |||
| # else: | |||
| # chunksize = 1000 | |||
| # # chunksize = 300 # int(len(list(itr)) / n_jobs) | |||
| # from contextlib import closing | |||
| # with closing(Pool(n_jobs)) as pool: | |||
| ## for i, sp in tqdm( | |||
| # res = pool.imap_unordered(getsp_partial, itr, 10) | |||
| ## desc='getting shortest paths', | |||
| ## file=sys.stdout): | |||
| ## splist[i] = sp | |||
| ## time.sleep(10) | |||
| # pool.close() | |||
| # pool.join() | |||
| # ss = 0 | |||
| # ss += sys.getsizeof(splist) | |||
| # for spss in splist: | |||
| # ss += sys.getsizeof(spss) | |||
| # for spp in spss: | |||
| # ss += sys.getsizeof(spp) | |||
| # time.sleep(20) | |||
| # # ---- direct running, normally use single CPU core. ---- | |||
| # splist = [] | |||
| # for g in tqdm(Gn, desc='getting sp graphs', file=sys.stdout): | |||
| # splist.append(get_shortest_paths(g, weight, ds_attrs['is_directed'])) | |||
| # # ---- only for the Fast Computation of Shortest Path Kernel (FCSP) | |||
| # sp_ml = [0] * len(Gn) # shortest path matrices | |||
| @@ -149,9 +174,11 @@ def structuralspkernel(*args, | |||
| # ---- use pool.imap_unordered to parallel and track progress. ---- | |||
| pool = Pool(n_jobs) | |||
| do_partial = partial(structuralspkernel_do, Gn, splist, ds_attrs, | |||
| node_label, edge_label, node_kernels, edge_kernels) | |||
| itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
| do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label, | |||
| node_kernels, edge_kernels) | |||
| itr = zip(combinations_with_replacement(Gn, 2), | |||
| combinations_with_replacement(splist, 2), | |||
| combinations_with_replacement(range(0, len(Gn)), 2)) | |||
| len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
| if len_itr < 1000 * n_jobs: | |||
| chunksize = int(len_itr / n_jobs) + 1 | |||
| @@ -166,36 +193,36 @@ def structuralspkernel(*args, | |||
| pool.close() | |||
| pool.join() | |||
| # # ---- use pool.map to parallel. ---- | |||
| # # result_perf = pool.map(do_partial, itr) | |||
| # do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels) | |||
| # itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
| # for i, j, kernel in tqdm( | |||
| # pool.map(do_partial, itr), desc='calculating kernels', | |||
| # file=sys.stdout): | |||
| # Kmatrix[i][j] = kernel | |||
| # Kmatrix[j][i] = kernel | |||
| # pool.close() | |||
| # pool.join() | |||
| # # ---- use joblib.Parallel to parallel and track progress. ---- | |||
| # result_perf = Parallel( | |||
| # n_jobs=n_jobs, verbose=10)( | |||
| # delayed(do_partial)(ij) | |||
| # for ij in combinations_with_replacement(range(0, len(Gn)), 2)) | |||
| # result_perf = [ | |||
| # do_partial(ij) | |||
| # for ij in combinations_with_replacement(range(0, len(Gn)), 2) | |||
| # ] | |||
| # for i in result_perf: | |||
| # Kmatrix[i[0]][i[1]] = i[2] | |||
| # Kmatrix[i[1]][i[0]] = i[2] | |||
| # # ---- use pool.imap_unordered to parallel and track progress. ---- | |||
| # do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label, | |||
| # node_kernels, edge_kernels) | |||
| # itr = zip(combinations_with_replacement(Gn, 2), | |||
| # combinations_with_replacement(splist, 2), | |||
| # combinations_with_replacement(range(0, len(Gn)), 2)) | |||
| # len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
| # if len_itr < 1000 * n_jobs: | |||
| # chunksize = int(len_itr / n_jobs) + 1 | |||
| # else: | |||
| # chunksize = 1000 | |||
| # from contextlib import closing | |||
| # with closing(Pool(n_jobs)) as pool: | |||
| # for i, j, kernel in tqdm( | |||
| # pool.imap_unordered(do_partial, itr, 1000), | |||
| # desc='calculating kernels', | |||
| # file=sys.stdout): | |||
| # Kmatrix[i][j] = kernel | |||
| # Kmatrix[j][i] = kernel | |||
| # pool.close() | |||
| # pool.join() | |||
| # # ---- direct running, normally use single CPU core. ---- | |||
| # itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
| # itr = zip(combinations_with_replacement(Gn, 2), | |||
| # combinations_with_replacement(splist, 2), | |||
| # combinations_with_replacement(range(0, len(Gn)), 2)) | |||
| # for gs in tqdm(itr, desc='calculating kernels', file=sys.stdout): | |||
| # i, j, kernel = structuralspkernel_do(Gn, splist, ds_attrs, | |||
| # node_label, edge_label, node_kernels, edge_kernels, gs) | |||
| # i, j, kernel = wrapper_ssp_do(ds_attrs, node_label, edge_label, | |||
| # node_kernels, edge_kernels, gs) | |||
| # if(kernel > 1): | |||
| # print("error here ") | |||
| # Kmatrix[i][j] = kernel | |||
| @@ -209,18 +236,11 @@ def structuralspkernel(*args, | |||
| return Kmatrix, run_time | |||
| def structuralspkernel_do(Gn, splist, ds_attrs, node_label, edge_label, | |||
| node_kernels, edge_kernels, ij): | |||
| iglobal = ij[0] | |||
| jglobal = ij[1] | |||
| g1 = Gn[iglobal] | |||
| g2 = Gn[jglobal] | |||
| spl1 = splist[iglobal] | |||
| spl2 = splist[jglobal] | |||
| def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label, | |||
| node_kernels, edge_kernels): | |||
| kernel = 0 | |||
| #try: | |||
| # First, compute shortest path matrices, method borrowed from FCSP. | |||
| if ds_attrs['node_labeled']: | |||
| # node symb and non-synb labeled | |||
| @@ -369,11 +389,19 @@ def structuralspkernel_do(Gn, splist, ds_attrs, node_label, edge_label, | |||
| # kn1 = vk_mat[x1][x2] * vk_mat[y1][y2] | |||
| # kn2 = vk_mat[x1][y2] * vk_mat[y1][x2] | |||
| # Kmatrix += kn1 + kn2 | |||
| #except KeyError: # missing labels or attributes | |||
| # print("toto") | |||
| # pass | |||
| return kernel | |||
| return iglobal, jglobal, kernel | |||
| def wrapper_ssp_do(ds_attrs, node_label, edge_label, node_kernels, | |||
| edge_kernels, itr_item): | |||
| g1 = itr_item[0][0] | |||
| g2 = itr_item[0][1] | |||
| spl1 = itr_item[1][0] | |||
| spl2 = itr_item[1][1] | |||
| i = itr_item[2][0] | |||
| j = itr_item[2][1] | |||
| return i, j, structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, | |||
| node_label, edge_label, node_kernels, edge_kernels) | |||
| def get_shortest_paths(G, weight, directed): | |||
| @@ -397,17 +425,21 @@ def get_shortest_paths(G, weight, directed): | |||
| for n1, n2 in combinations(G.nodes(), 2): | |||
| try: | |||
| spltemp = list(nx.all_shortest_paths(G, n1, n2, weight=weight)) | |||
| except nx.NetworkXNoPath: # nodes not connected | |||
| # sp.append([]) | |||
| pass | |||
| else: | |||
| sp += spltemp | |||
| # each edge walk is counted twice, starting from both its extreme nodes. | |||
| if not directed: | |||
| sp += [sptemp[::-1] for sptemp in spltemp] | |||
| except nx.NetworkXNoPath: # nodes not connected | |||
| # sp.append([]) | |||
| pass | |||
| # add single nodes as length 0 paths. | |||
| sp += [[n] for n in G.nodes()] | |||
| return sp | |||
| def wrap_getSP(Gn, weight, directed, i): | |||
| return i, get_shortest_paths(Gn[i], weight, directed) | |||
| def wrapper_getSP(weight, directed, itr_item): | |||
| g = itr_item[0] | |||
| i = itr_item[1] | |||
| return i, get_shortest_paths(g, weight, directed) | |||
| @@ -13,7 +13,6 @@ from itertools import chain, combinations_with_replacement | |||
| from functools import partial | |||
| from multiprocessing import Pool | |||
| from tqdm import tqdm | |||
| import traceback | |||
| import networkx as nx | |||
| import numpy as np | |||
| @@ -77,15 +76,15 @@ def untilhpathkernel(*args, | |||
| # but this may cost a lot of memory for large datasets. | |||
| pool = Pool(n_jobs) | |||
| all_paths = [[] for _ in range(len(Gn))] | |||
| getps_partial = partial(wrap_find_all_paths_until_length, Gn, depth, | |||
| getps_partial = partial(wrapper_find_all_paths_until_length, depth, | |||
| ds_attrs, node_label, edge_label) | |||
| itr = zip(Gn, range(0, len(Gn))) | |||
| if len(Gn) < 1000 * n_jobs: | |||
| chunksize = int(len(Gn) / n_jobs) + 1 | |||
| else: | |||
| chunksize = 1000 | |||
| # chunksize = 300 # int(len(list(itr)) / n_jobs) | |||
| for i, ps in tqdm( | |||
| pool.imap_unordered(getps_partial, range(0, len(Gn)), chunksize), | |||
| pool.imap_unordered(getps_partial, itr, chunksize), | |||
| desc='getting paths', file=sys.stdout): | |||
| all_paths[i] = ps | |||
| pool.close() | |||
| @@ -110,8 +109,9 @@ def untilhpathkernel(*args, | |||
| pass | |||
| else: | |||
| pool = Pool(n_jobs) | |||
| do_partial = partial(_untilhpathkernel_do_naive, all_paths, k_func) | |||
| itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
| do_partial = partial(wrapper_uhpath_do_naive, k_func) | |||
| itr = zip(combinations_with_replacement(all_paths, 2), | |||
| combinations_with_replacement(range(0, len(Gn)), 2)) | |||
| len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
| if len_itr < 1000 * n_jobs: | |||
| chunksize = int(len_itr / n_jobs) + 1 | |||
| @@ -216,7 +216,7 @@ def _untilhpathkernel_do_gst(gst1, gst2, paths1, paths2, k_func): | |||
| return kernel | |||
| def _untilhpathkernel_do_naive(paths_list, k_func, ij): | |||
| def _untilhpathkernel_do_naive(paths1, paths2, k_func): | |||
| """Calculate path graph kernels up to depth d between 2 graphs naively. | |||
| Parameters | |||
| @@ -235,10 +235,6 @@ def _untilhpathkernel_do_naive(paths_list, k_func, ij): | |||
| kernel : float | |||
| Path kernel up to h between 2 graphs. | |||
| """ | |||
| iglobal = ij[0] | |||
| jglobal = ij[1] | |||
| paths1 = paths_list[iglobal] | |||
| paths2 = paths_list[jglobal] | |||
| all_paths = list(set(paths1 + paths2)) | |||
| if k_func == 'tanimoto': | |||
| @@ -260,12 +256,18 @@ def _untilhpathkernel_do_naive(paths_list, k_func, ij): | |||
| kernel = np.sum(np.minimum(vector1, vector2)) / \ | |||
| np.sum(np.maximum(vector1, vector2)) | |||
| return iglobal, jglobal, kernel | |||
| return kernel | |||
| # @todo: (can be removed maybe) this method find paths repetively, it could be faster. | |||
| def wrapper_uhpath_do_naive(k_func, itr_item): | |||
| plist1 = itr_item[0][0] | |||
| plist2 = itr_item[0][1] | |||
| i = itr_item[1][0] | |||
| j = itr_item[1][1] | |||
| return i, j, _untilhpathkernel_do_naive(plist1, plist2, k_func) | |||
| # @todo: (can be removed maybe) this method find paths repetively, it could be faster. | |||
| def find_all_paths_until_length(G, | |||
| length, | |||
| ds_attrs, | |||
| @@ -368,15 +370,12 @@ def find_all_paths_until_length(G, | |||
| return [tuple([len(path)]) for path in all_paths] | |||
| def wrap_find_all_paths_until_length(Gn, length, ds_attrs, node_label, | |||
| edge_label, i): | |||
| try: | |||
| return i, find_all_paths_until_length(Gn[i], length, ds_attrs, | |||
| def wrapper_find_all_paths_until_length(length, ds_attrs, node_label, | |||
| edge_label, itr_item): | |||
| g = itr_item[0] | |||
| i = itr_item[1] | |||
| return i, find_all_paths_until_length(g, length, ds_attrs, | |||
| node_label=node_label, edge_label=edge_label) | |||
| except Exception as e: | |||
| traceback.print_exc() | |||
| print('') | |||
| raise e | |||
| def paths2GSuffixTree(paths): | |||
| @@ -206,54 +206,50 @@ def model_selection_for_precomputed_kernel(datafile, | |||
| '3. Fitting and predicting using nested cross validation. This could really take a while...' | |||
| ) | |||
| # pool = Pool(n_jobs) | |||
| # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type) | |||
| # train_pref = [] | |||
| # val_pref = [] | |||
| # test_pref = [] | |||
| ## if NUM_TRIALS < 1000 * n_jobs: | |||
| ## chunksize = int(NUM_TRIALS / n_jobs) + 1 | |||
| ## else: | |||
| ## chunksize = 1000 | |||
| # chunksize = 1 | |||
| # for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout): | |||
| # train_pref.append(o1) | |||
| # val_pref.append(o2) | |||
| # test_pref.append(o3) | |||
| # pool.close() | |||
| # pool.join() | |||
| # ---- use pool.map to parallel. ---- | |||
| pool = Pool(n_jobs) | |||
| trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type) | |||
| train_pref = [] | |||
| val_pref = [] | |||
| test_pref = [] | |||
| # if NUM_TRIALS < 100: | |||
| # chunksize, extra = divmod(NUM_TRIALS, n_jobs * 4) | |||
| # if extra: | |||
| # chunksize += 1 | |||
| # else: | |||
| # chunksize = 100 | |||
| chunksize = 1 | |||
| for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout): | |||
| train_pref.append(o1) | |||
| val_pref.append(o2) | |||
| test_pref.append(o3) | |||
| pool.close() | |||
| pool.join() | |||
| # # ---- use pool.map to parallel. ---- | |||
| # result_perf = pool.map(trial_do_partial, range(NUM_TRIALS)) | |||
| # train_pref = [item[0] for item in result_perf] | |||
| # val_pref = [item[1] for item in result_perf] | |||
| # test_pref = [item[2] for item in result_perf] | |||
| result_perf = pool.map(trial_do_partial, range(NUM_TRIALS)) | |||
| train_pref = [item[0] for item in result_perf] | |||
| val_pref = [item[1] for item in result_perf] | |||
| test_pref = [item[2] for item in result_perf] | |||
| # # ---- use joblib.Parallel to parallel and track progress. ---- | |||
| # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type) | |||
| # result_perf = Parallel(n_jobs=n_jobs, verbose=10)(delayed(trial_do_partial)(trial) for trial in range(NUM_TRIALS)) | |||
| # train_pref = [item[0] for item in result_perf] | |||
| # val_pref = [item[1] for item in result_perf] | |||
| # test_pref = [item[2] for item in result_perf] | |||
| # # ---- direct running, normally use a single CPU core. ---- | |||
| # train_pref = [] | |||
| # val_pref = [] | |||
| # test_pref = [] | |||
| # for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout): | |||
| # o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i) | |||
| # train_pref.append(o1) | |||
| # val_pref.append(o2) | |||
| # test_pref.append(o3) | |||
| # # ---- direct running, normally use a single CPU core. ---- | |||
| # train_pref = [] | |||
| # val_pref = [] | |||
| # test_pref = [] | |||
| # for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout): | |||
| # o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i) | |||
| # train_pref.append(o1) | |||
| # val_pref.append(o2) | |||
| # test_pref.append(o3) | |||
| # print() | |||
| print() | |||
| print('4. Getting final performance...') | |||
| str_fw += '\nIII. Performance.\n\n' | |||
| # averages and confidences of performances on outer trials for each combination of parameters | |||
| average_train_scores = np.mean(train_pref, axis=0) | |||
| # print('val_pref: ', val_pref[0][0]) | |||
| average_val_scores = np.mean(val_pref, axis=0) | |||
| # print('test_pref: ', test_pref[0][0]) | |||
| average_perf_scores = np.mean(test_pref, axis=0) | |||
| # sample std is used here | |||
| std_train_scores = np.std(train_pref, axis=0, ddof=1) | |||
| @@ -264,6 +260,9 @@ def model_selection_for_precomputed_kernel(datafile, | |||
| best_val_perf = np.amin(average_val_scores) | |||
| else: | |||
| best_val_perf = np.amax(average_val_scores) | |||
| # print('average_val_scores: ', average_val_scores) | |||
| # print('best_val_perf: ', best_val_perf) | |||
| # print() | |||
| best_params_index = np.where(average_val_scores == best_val_perf) | |||
| # find smallest val std with best val perf. | |||
| best_val_stds = [ | |||
| @@ -286,6 +285,9 @@ def model_selection_for_precomputed_kernel(datafile, | |||
| str_fw += 'best_val_perf: %s\n' % best_val_perf | |||
| str_fw += 'best_val_std: %s\n' % min_val_std | |||
| # print(best_params_index) | |||
| # print(best_params_index[0]) | |||
| # print(average_perf_scores) | |||
| final_performance = [ | |||
| average_perf_scores[value][best_params_index[1][idx]] | |||
| for idx, value in enumerate(best_params_index[0]) | |||
| @@ -429,23 +431,23 @@ def model_selection_for_precomputed_kernel(datafile, | |||
| '3. Fitting and predicting using nested cross validation. This could really take a while...' | |||
| ) | |||
| # pool = Pool(n_jobs) | |||
| # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type) | |||
| # train_pref = [] | |||
| # val_pref = [] | |||
| # test_pref = [] | |||
| # if NUM_TRIALS < 100: | |||
| # chunksize, extra = divmod(NUM_TRIALS, n_jobs * 4) | |||
| # if extra: | |||
| # chunksize += 1 | |||
| # else: | |||
| # chunksize = 100 | |||
| # for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout): | |||
| # train_pref.append(o1) | |||
| # val_pref.append(o2) | |||
| # test_pref.append(o3) | |||
| # pool.close() | |||
| # pool.join() | |||
| pool = Pool(n_jobs) | |||
| trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type) | |||
| train_pref = [] | |||
| val_pref = [] | |||
| test_pref = [] | |||
| if NUM_TRIALS < 100: | |||
| chunksize, extra = divmod(NUM_TRIALS, n_jobs * 4) | |||
| if extra: | |||
| chunksize += 1 | |||
| else: | |||
| chunksize = 100 | |||
| for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout): | |||
| train_pref.append(o1) | |||
| val_pref.append(o2) | |||
| test_pref.append(o3) | |||
| pool.close() | |||
| pool.join() | |||
| # # ---- use pool.map to parallel. ---- | |||
| # result_perf = pool.map(trial_do_partial, range(NUM_TRIALS)) | |||
| @@ -460,15 +462,15 @@ def model_selection_for_precomputed_kernel(datafile, | |||
| # val_pref = [item[1] for item in result_perf] | |||
| # test_pref = [item[2] for item in result_perf] | |||
| # ---- direct running, normally use a single CPU core. ---- | |||
| train_pref = [] | |||
| val_pref = [] | |||
| test_pref = [] | |||
| for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout): | |||
| o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i) | |||
| train_pref.append(o1) | |||
| val_pref.append(o2) | |||
| test_pref.append(o3) | |||
| # # ---- direct running, normally use a single CPU core. ---- | |||
| # train_pref = [] | |||
| # val_pref = [] | |||
| # test_pref = [] | |||
| # for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout): | |||
| # o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i) | |||
| # train_pref.append(o1) | |||
| # val_pref.append(o2) | |||
| # test_pref.append(o3) | |||
| print() | |||
| print('4. Getting final performance...') | |||
| @@ -623,89 +625,142 @@ def trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, t | |||
| val_pref = np.zeros((len(param_list_pre_revised), len(param_list))) | |||
| test_pref = np.zeros((len(param_list_pre_revised), len(param_list))) | |||
| # randomness added to seeds of split function below. "high" is "size" times | |||
| # 10 so that at least 10 different random output will be yielded. Remove | |||
| # these lines if identical outputs is required. | |||
| rdm_out = np.random.RandomState(seed=None) | |||
| rdm_seed_out_l = rdm_out.uniform(high=len(param_list_pre_revised) * 10, | |||
| size=len(param_list_pre_revised)) | |||
| # print(trial, rdm_seed_out_l) | |||
| # print() | |||
| # loop for each outer param tuple | |||
| for index_out, params_out in enumerate(param_list_pre_revised): | |||
| # split gram matrix and y to app and test sets. | |||
| indices = range(len(y)) | |||
| # The argument "random_state" in function "train_test_split" can not be | |||
| # set to None, because it will use RandomState instance used by | |||
| # np.random, which is possible for multiple subprocesses to inherit the | |||
| # same seed if they forked at the same time, leading to identical | |||
| # random variates for different subprocesses. Instead, we use "trial" | |||
| # and "index_out" parameters to generate different seeds for different | |||
| # trials/subprocesses and outer loops. "rdm_seed_out_l" is used to add | |||
| # randomness into seeds, so that it yields a different output every | |||
| # time the program is run. To yield identical outputs every time, | |||
| # remove the second line below. Same method is used to the "KFold" | |||
| # function in the inner loop. | |||
| rdm_seed_out = (trial + 1) * (index_out + 1) | |||
| rdm_seed_out = (rdm_seed_out + int(rdm_seed_out_l[index_out])) % (2 ** 32 - 1) | |||
| # print(trial, rdm_seed_out) | |||
| X_app, X_test, y_app, y_test, idx_app, idx_test = train_test_split( | |||
| gram_matrices[index_out], y, indices, test_size=0.1, | |||
| random_state=None, shuffle=True) | |||
| random_state=rdm_seed_out, shuffle=True) | |||
| # print(trial, idx_app, idx_test) | |||
| # print() | |||
| X_app = X_app[:, idx_app] | |||
| X_test = X_test[:, idx_app] | |||
| y_app = np.array(y_app) | |||
| y_test = np.array(y_test) | |||
| rdm_seed_in_l = rdm_out.uniform(high=len(param_list) * 10, | |||
| size=len(param_list)) | |||
| # loop for each inner param tuple | |||
| for index_in, params_in in enumerate(param_list): | |||
| # print(index_in, params_in) | |||
| # if trial == 0: | |||
| # print(index_out, index_in) | |||
| # print('params_in: ', params_in) | |||
| # st = time.time() | |||
| inner_cv = KFold(n_splits=10, shuffle=True, random_state=trial) | |||
| rdm_seed_in = (trial + 1) * (index_out + 1) * (index_in + 1) | |||
| # print("rdm_seed_in1: ", trial, index_in, rdm_seed_in) | |||
| rdm_seed_in = (rdm_seed_in + int(rdm_seed_in_l[index_in])) % (2 ** 32 - 1) | |||
| # print("rdm_seed_in2: ", trial, index_in, rdm_seed_in) | |||
| inner_cv = KFold(n_splits=10, shuffle=True, random_state=rdm_seed_in) | |||
| current_train_perf = [] | |||
| current_valid_perf = [] | |||
| current_test_perf = [] | |||
| # For regression use the Kernel Ridge method | |||
| try: | |||
| if model_type == 'regression': | |||
| kr = KernelRidge(kernel='precomputed', **params_in) | |||
| # loop for each split on validation set level | |||
| # validation set level | |||
| for train_index, valid_index in inner_cv.split(X_app): | |||
| kr.fit(X_app[train_index, :][:, train_index], | |||
| y_app[train_index]) | |||
| # try: | |||
| if model_type == 'regression': | |||
| kr = KernelRidge(kernel='precomputed', **params_in) | |||
| # loop for each split on validation set level | |||
| # validation set level | |||
| for train_index, valid_index in inner_cv.split(X_app): | |||
| # print("train_index, valid_index: ", trial, index_in, train_index, valid_index) | |||
| # if trial == 0: | |||
| # print('train_index: ', train_index) | |||
| # print('valid_index: ', valid_index) | |||
| # print('idx_test: ', idx_test) | |||
| # print('y_app[train_index]: ', y_app[train_index]) | |||
| # print('X_app[train_index, :][:, train_index]: ', X_app[train_index, :][:, train_index]) | |||
| # print('X_app[valid_index, :][:, train_index]: ', X_app[valid_index, :][:, train_index]) | |||
| kr.fit(X_app[train_index, :][:, train_index], | |||
| y_app[train_index]) | |||
| # predict on the train, validation and test set | |||
| y_pred_train = kr.predict( | |||
| X_app[train_index, :][:, train_index]) | |||
| y_pred_valid = kr.predict( | |||
| X_app[valid_index, :][:, train_index]) | |||
| y_pred_test = kr.predict( | |||
| X_test[:, train_index]) | |||
| # predict on the train, validation and test set | |||
| y_pred_train = kr.predict( | |||
| X_app[train_index, :][:, train_index]) | |||
| y_pred_valid = kr.predict( | |||
| X_app[valid_index, :][:, train_index]) | |||
| # if trial == 0: | |||
| # print('y_pred_valid: ', y_pred_valid) | |||
| # print() | |||
| y_pred_test = kr.predict( | |||
| X_test[:, train_index]) | |||
| # root mean squared errors | |||
| current_train_perf.append( | |||
| np.sqrt( | |||
| mean_squared_error( | |||
| y_app[train_index], y_pred_train))) | |||
| current_valid_perf.append( | |||
| np.sqrt( | |||
| mean_squared_error( | |||
| y_app[valid_index], y_pred_valid))) | |||
| current_test_perf.append( | |||
| np.sqrt( | |||
| mean_squared_error( | |||
| y_test, y_pred_test))) | |||
| # For clcassification use SVM | |||
| else: | |||
| svc = SVC(kernel='precomputed', cache_size=200, | |||
| verbose=False, **params_in) | |||
| # loop for each split on validation set level | |||
| # validation set level | |||
| for train_index, valid_index in inner_cv.split(X_app): | |||
| # root mean squared errors | |||
| current_train_perf.append( | |||
| np.sqrt( | |||
| mean_squared_error( | |||
| y_app[train_index], y_pred_train))) | |||
| current_valid_perf.append( | |||
| np.sqrt( | |||
| mean_squared_error( | |||
| y_app[valid_index], y_pred_valid))) | |||
| # if trial == 0: | |||
| # print(mean_squared_error( | |||
| # y_app[valid_index], y_pred_valid)) | |||
| current_test_perf.append( | |||
| np.sqrt( | |||
| mean_squared_error( | |||
| y_test, y_pred_test))) | |||
| # For clcassification use SVM | |||
| else: | |||
| svc = SVC(kernel='precomputed', cache_size=200, | |||
| verbose=False, **params_in) | |||
| # loop for each split on validation set level | |||
| # validation set level | |||
| for train_index, valid_index in inner_cv.split(X_app): | |||
| # np.savez("bug.npy",X_app[train_index, :][:, train_index],y_app[train_index]) | |||
| svc.fit(X_app[train_index, :][:, train_index], | |||
| y_app[train_index]) | |||
| # predict on the train, validation and test set | |||
| y_pred_train = svc.predict( | |||
| X_app[train_index, :][:, train_index]) | |||
| y_pred_valid = svc.predict( | |||
| X_app[valid_index, :][:, train_index]) | |||
| y_pred_test = svc.predict( | |||
| X_test[:, train_index]) | |||
| # if trial == 0: | |||
| # print('train_index: ', train_index) | |||
| # print('valid_index: ', valid_index) | |||
| # print('idx_test: ', idx_test) | |||
| # print('y_app[train_index]: ', y_app[train_index]) | |||
| # print('X_app[train_index, :][:, train_index]: ', X_app[train_index, :][:, train_index]) | |||
| # print('X_app[valid_index, :][:, train_index]: ', X_app[valid_index, :][:, train_index]) | |||
| svc.fit(X_app[train_index, :][:, train_index], | |||
| y_app[train_index]) | |||
| # predict on the train, validation and test set | |||
| y_pred_train = svc.predict( | |||
| X_app[train_index, :][:, train_index]) | |||
| y_pred_valid = svc.predict( | |||
| X_app[valid_index, :][:, train_index]) | |||
| y_pred_test = svc.predict( | |||
| X_test[:, train_index]) | |||
| # root mean squared errors | |||
| current_train_perf.append( | |||
| accuracy_score(y_app[train_index], | |||
| y_pred_train)) | |||
| current_valid_perf.append( | |||
| accuracy_score(y_app[valid_index], | |||
| y_pred_valid)) | |||
| current_test_perf.append( | |||
| accuracy_score(y_test, y_pred_test)) | |||
| except ValueError: | |||
| print(sys.exc_info()[0]) | |||
| print(params_out, params_in) | |||
| # root mean squared errors | |||
| current_train_perf.append( | |||
| accuracy_score(y_app[train_index], | |||
| y_pred_train)) | |||
| current_valid_perf.append( | |||
| accuracy_score(y_app[valid_index], | |||
| y_pred_valid)) | |||
| current_test_perf.append( | |||
| accuracy_score(y_test, y_pred_test)) | |||
| # except ValueError: | |||
| # print(sys.exc_info()[0]) | |||
| # print(params_out, params_in) | |||
| # average performance on inner splits | |||
| train_pref[index_out][index_in] = np.mean( | |||
| @@ -715,5 +770,8 @@ def trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, t | |||
| test_pref[index_out][index_in] = np.mean( | |||
| current_test_perf) | |||
| # print(time.time() - st) | |||
| # if trial == 0: | |||
| # print('val_pref: ', val_pref) | |||
| # print('test_pref: ', test_pref) | |||
| return train_pref, val_pref, test_pref | |||