2. modify the treelet kernel, use tuples to store canonkeys instead of strings in case that some labels strings contain more than 1 character.tags/v0.1
| @@ -9,6 +9,10 @@ datasets/* | |||||
| !datasets/MUTAG/ | !datasets/MUTAG/ | ||||
| !datasets/Letter-med/ | !datasets/Letter-med/ | ||||
| !datasets/ENZYMES_txt/ | !datasets/ENZYMES_txt/ | ||||
| !datasets/DD/ | |||||
| !datasets/NCI1/ | |||||
| !datasets/NCI109/ | |||||
| !datasets/AIDS/ | |||||
| notebooks/results/* | notebooks/results/* | ||||
| notebooks/check_gm/* | notebooks/check_gm/* | ||||
| notebooks/test_parallel/* | notebooks/test_parallel/* | ||||
| @@ -12,22 +12,25 @@ import multiprocessing | |||||
| from pygraph.kernels.commonWalkKernel import commonwalkkernel | from pygraph.kernels.commonWalkKernel import commonwalkkernel | ||||
| dslist = [ | dslist = [ | ||||
| {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| 'task': 'regression'}, # node symb | |||||
| {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # contains single node graph, node symb | |||||
| {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # node nsymb | |||||
| {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
| # node symb/nsymb | |||||
| # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| # 'task': 'regression'}, # node symb | |||||
| # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| # 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # # contains single node graph, node symb | |||||
| # {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| # {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| # {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| # {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # # node nsymb | |||||
| # {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
| # # node symb/nsymb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||||
| {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # | |||||
| # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | ||||
| # # node/edge symb | # # node/edge symb | ||||
| # {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | ||||
| # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | ||||
| # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | ||||
| @@ -41,11 +44,6 @@ dslist = [ | |||||
| # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | ||||
| # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | ||||
| @@ -56,10 +54,12 @@ dslist = [ | |||||
| # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | ||||
| ] | ] | ||||
| estimator = commonwalkkernel | estimator = commonwalkkernel | ||||
| #param_grid_precomputed = [{'compute_method': ['geo'], | |||||
| # 'weight': np.linspace(0.01, 0.15, 15)}, | |||||
| ## 'weight': np.logspace(-1, -10, num=10, base=10)}, | |||||
| # {'compute_method': ['exp'], 'weight': range(0, 15)}] | |||||
| param_grid_precomputed = [{'compute_method': ['geo'], | param_grid_precomputed = [{'compute_method': ['geo'], | ||||
| 'weight': np.linspace(0.01, 0.15, 15)}, | |||||
| # 'weight': np.logspace(-1, -10, num=10, base=10)}, | |||||
| {'compute_method': ['exp'], 'weight': range(0, 15)}] | |||||
| 'weight': np.linspace(0.01, 0.15, 15)}] | |||||
| param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | ||||
| {'alpha': np.logspace(-10, 10, num=41, base=10)}] | {'alpha': np.logspace(-10, 10, num=41, base=10)}] | ||||
| @@ -12,22 +12,25 @@ import multiprocessing | |||||
| from pygraph.kernels.marginalizedKernel import marginalizedkernel | from pygraph.kernels.marginalizedKernel import marginalizedkernel | ||||
| dslist = [ | dslist = [ | ||||
| {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| 'task': 'regression'}, # node symb | |||||
| {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # contains single node graph, node symb | |||||
| {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # node nsymb | |||||
| {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
| # node symb/nsymb | |||||
| # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| # 'task': 'regression'}, # node symb | |||||
| # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| # 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # # contains single node graph, node symb | |||||
| # {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| # {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| # {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| # {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # # node nsymb | |||||
| # {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
| # # node symb/nsymb | |||||
| # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||||
| {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # | |||||
| # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | ||||
| # # node/edge symb | # # node/edge symb | ||||
| # {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | ||||
| # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | ||||
| # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | ||||
| @@ -41,11 +44,6 @@ dslist = [ | |||||
| # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | ||||
| # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | ||||
| @@ -59,7 +57,7 @@ estimator = marginalizedkernel | |||||
| #param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.3, 3), | #param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.3, 3), | ||||
| # 'n_iteration': np.linspace(1, 1, 1), | # 'n_iteration': np.linspace(1, 1, 1), | ||||
| param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.9, 9), | param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.9, 9), | ||||
| 'n_iteration': np.linspace(1, 19, 7), | |||||
| 'n_iteration': np.linspace(5, 20, 4), | |||||
| 'remove_totters': [False]} | 'remove_totters': [False]} | ||||
| param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | ||||
| {'alpha': np.logspace(-10, 10, num=41, base=10)}] | {'alpha': np.logspace(-10, 10, num=41, base=10)}] | ||||
| @@ -17,22 +17,25 @@ import numpy as np | |||||
| dslist = [ | dslist = [ | ||||
| {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| 'task': 'regression'}, # node symb | |||||
| {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # contains single node graph, node symb | |||||
| {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # node nsymb | |||||
| {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
| # node symb/nsymb | |||||
| # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| # 'task': 'regression'}, # node symb | |||||
| # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| # 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # # contains single node graph, node symb | |||||
| # {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| # {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| # {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| # {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
| # # node symb/nsymb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||||
| {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # # node nsymb | |||||
| # | |||||
| # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | ||||
| # # node/edge symb | # # node/edge symb | ||||
| # {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | ||||
| # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | ||||
| # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | ||||
| @@ -40,22 +43,17 @@ dslist = [ | |||||
| # | # | ||||
| # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | ||||
| # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb, missing values | |||||
| # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb, missing values | |||||
| # # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb | |||||
| # # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb | |||||
| # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | ||||
| # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | ||||
| # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | ||||
| # # not working below | |||||
| # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||||
| # # not working below | |||||
| # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||||
| # {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',}, | # {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',}, | ||||
| # {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',}, | # {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',}, | ||||
| # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | ||||
| @@ -63,12 +61,25 @@ dslist = [ | |||||
| estimator = randomwalkkernel | estimator = randomwalkkernel | ||||
| param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | ||||
| {'alpha': np.logspace(-10, 10, num=41, base=10)}] | {'alpha': np.logspace(-10, 10, num=41, base=10)}] | ||||
| gaussiankernel = functools.partial(gaussiankernel, gamma=0.5) | |||||
| ## for non-symbolic labels. | |||||
| #gkernels = [functools.partial(gaussiankernel, gamma=1 / ga) | |||||
| # for ga in np.logspace(0, 10, num=11, base=10)] | |||||
| #mixkernels = [functools.partial(kernelproduct, deltakernel, gk) for gk in gkernels] | |||||
| #sub_kernels = [{'symb': deltakernel, 'nsymb': gkernels[i], 'mix': mixkernels[i]} | |||||
| # for i in range(len(gkernels))] | |||||
| # for symbolic labels only. | |||||
| #gaussiankernel = functools.partial(gaussiankernel, gamma=0.5) | |||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
| sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}] | |||||
| for ds in dslist: | for ds in dslist: | ||||
| print() | print() | ||||
| print(ds['name']) | print(ds['name']) | ||||
| for compute_method in ['sylvester', 'conjugate', 'fp', 'spectral']: | |||||
| # for compute_method in ['sylvester', 'conjugate', 'fp', 'spectral']: | |||||
| for compute_method in ['conjugate', 'fp']: | |||||
| if compute_method == 'sylvester': | if compute_method == 'sylvester': | ||||
| param_grid_precomputed = {'compute_method': ['sylvester'], | param_grid_precomputed = {'compute_method': ['sylvester'], | ||||
| # 'weight': np.linspace(0.01, 0.10, 10)} | # 'weight': np.linspace(0.01, 0.10, 10)} | ||||
| @@ -76,18 +87,12 @@ for ds in dslist: | |||||
| elif compute_method == 'conjugate': | elif compute_method == 'conjugate': | ||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | ||||
| param_grid_precomputed = {'compute_method': ['conjugate'], | param_grid_precomputed = {'compute_method': ['conjugate'], | ||||
| 'node_kernels': | |||||
| [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}], | |||||
| 'edge_kernels': | |||||
| [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}], | |||||
| 'node_kernels': sub_kernels, 'edge_kernels': sub_kernels, | |||||
| 'weight': np.logspace(-1, -10, num=10, base=10)} | 'weight': np.logspace(-1, -10, num=10, base=10)} | ||||
| elif compute_method == 'fp': | elif compute_method == 'fp': | ||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | ||||
| param_grid_precomputed = {'compute_method': ['fp'], | param_grid_precomputed = {'compute_method': ['fp'], | ||||
| 'node_kernels': | |||||
| [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}], | |||||
| 'edge_kernels': | |||||
| [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}], | |||||
| 'node_kernels': sub_kernels, 'edge_kernels': sub_kernels, | |||||
| 'weight': np.logspace(-3, -10, num=8, base=10)} | 'weight': np.logspace(-3, -10, num=8, base=10)} | ||||
| elif compute_method == 'spectral': | elif compute_method == 'spectral': | ||||
| param_grid_precomputed = {'compute_method': ['spectral'], | param_grid_precomputed = {'compute_method': ['spectral'], | ||||
| @@ -8,41 +8,40 @@ from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
| # datasets | # datasets | ||||
| dslist = [ | dslist = [ | ||||
| {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| 'task': 'regression'}, # node symb | |||||
| {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # contains single node graph, node symb | |||||
| {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| # 'task': 'regression'}, # node symb | |||||
| # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| # 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # # contains single node graph, node symb | |||||
| # {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| # {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| # {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | ||||
| # node nsymb | # node nsymb | ||||
| {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | ||||
| # node symb/nsymb | # node symb/nsymb | ||||
| {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||||
| # {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # | |||||
| # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | ||||
| # # node/edge symb | # # node/edge symb | ||||
| # {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # | |||||
| # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | |||||
| # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | |||||
| # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | |||||
| # {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'}, | |||||
| # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | |||||
| # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | |||||
| # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb | |||||
| # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb | |||||
| # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | |||||
| # | |||||
| # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | |||||
| # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | |||||
| # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | |||||
| # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | |||||
| # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | |||||
| # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | |||||
| # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | |||||
| # {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'}, | |||||
| # | |||||
| # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | |||||
| # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | |||||
| # # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb | |||||
| # # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb | |||||
| # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | |||||
| # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | |||||
| # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | |||||
| # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | |||||
| # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | |||||
| # # not working below | # # not working below | ||||
| # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | ||||
| @@ -52,6 +51,7 @@ dslist = [ | |||||
| ] | ] | ||||
| estimator = spkernel | estimator = spkernel | ||||
| # hyper-parameters | # hyper-parameters | ||||
| #gaussiankernel = functools.partial(gaussiankernel, gamma=0.5) | |||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | ||||
| param_grid_precomputed = {'node_kernels': [ | param_grid_precomputed = {'node_kernels': [ | ||||
| {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]} | {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]} | ||||
| @@ -14,22 +14,25 @@ from pygraph.kernels.structuralspKernel import structuralspkernel | |||||
| from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | ||||
| dslist = [ | dslist = [ | ||||
| {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| 'task': 'regression'}, # node symb | |||||
| {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # contains single node graph, node symb | |||||
| {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # node nsymb | |||||
| {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
| # node symb/nsymb | |||||
| # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| # 'task': 'regression'}, # node symb | |||||
| # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| # 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # # contains single node graph, node symb | |||||
| # {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| # {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| # {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| # {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # # node nsymb | |||||
| # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||||
| # {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
| # # node symb/nsymb | |||||
| {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # | |||||
| # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | ||||
| # # node/edge symb | # # node/edge symb | ||||
| # {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | ||||
| # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | ||||
| # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | ||||
| @@ -37,33 +40,37 @@ dslist = [ | |||||
| # | # | ||||
| # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | ||||
| # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb, missing values | |||||
| # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb, missing values | |||||
| # # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb | |||||
| # # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb | |||||
| # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | ||||
| # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | ||||
| # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | ||||
| # # not working below | |||||
| # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||||
| # # not working below | |||||
| # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||||
| # {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',}, | # {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',}, | ||||
| # {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',}, | # {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',}, | ||||
| # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | ||||
| ] | ] | ||||
| estimator = structuralspkernel | estimator = structuralspkernel | ||||
| ## for non-symbolic labels. | |||||
| #gkernels = [functools.partial(gaussiankernel, gamma=1 / ga) | |||||
| # for ga in np.logspace(0, 10, num=11, base=10)] | |||||
| #mixkernels = [functools.partial(kernelproduct, deltakernel, gk) for gk in gkernels] | |||||
| #sub_kernels = [{'symb': deltakernel, 'nsymb': gkernels[i], 'mix': mixkernels[i]} | |||||
| # for i in range(len(gkernels))] | |||||
| # for symbolic labels only. | |||||
| #gaussiankernel = functools.partial(gaussiankernel, gamma=0.5) | |||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | ||||
| param_grid_precomputed = {'node_kernels': | |||||
| [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}], | |||||
| 'edge_kernels': | |||||
| [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}], | |||||
| 'compute_method': ['naive']} | |||||
| sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}] | |||||
| param_grid_precomputed = {'node_kernels': sub_kernels, 'edge_kernels': sub_kernels, | |||||
| 'compute_method': ['naive']} | |||||
| param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | ||||
| {'alpha': np.logspace(-10, 10, num=41, base=10)}] | {'alpha': np.logspace(-10, 10, num=41, base=10)}] | ||||
| @@ -8,27 +8,31 @@ Created on Mon Mar 21 11:19:33 2019 | |||||
| from libs import * | from libs import * | ||||
| import multiprocessing | import multiprocessing | ||||
| import functools | |||||
| from pygraph.kernels.treeletKernel import treeletkernel | from pygraph.kernels.treeletKernel import treeletkernel | ||||
| from pygraph.utils.kernels import gaussiankernel, linearkernel, polynomialkernel | |||||
| from pygraph.utils.kernels import gaussiankernel, polynomialkernel | |||||
| dslist = [ | dslist = [ | ||||
| {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| 'task': 'regression'}, # node symb | |||||
| # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| # 'task': 'regression'}, # node symb | |||||
| {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | ||||
| 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | ||||
| # contains single node graph, node symb | # contains single node graph, node symb | ||||
| {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | ||||
| {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | ||||
| {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | ||||
| # {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # # node nsymb | |||||
| {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | ||||
| # node symb/nsymb | # node symb/nsymb | ||||
| {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||||
| {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||||
| {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # node nsymb | |||||
| {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # | |||||
| # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | ||||
| # # node/edge symb | # # node/edge symb | ||||
| # {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | ||||
| # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | ||||
| # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | ||||
| @@ -42,11 +46,6 @@ dslist = [ | |||||
| # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | ||||
| {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | ||||
| # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | ||||
| @@ -57,7 +56,12 @@ dslist = [ | |||||
| # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | ||||
| ] | ] | ||||
| estimator = treeletkernel | estimator = treeletkernel | ||||
| param_grid_precomputed = {'sub_kernel': [gaussiankernel, linearkernel, polynomialkernel]} | |||||
| gkernels = [functools.partial(gaussiankernel, gamma=1 / ga) | |||||
| # for ga in np.linspace(1, 10, 10)] | |||||
| for ga in np.logspace(0, 10, num=11, base=10)] | |||||
| pkernels = [functools.partial(polynomialkernel, d=d, c=c) for d in range(1, 11) | |||||
| for c in np.logspace(0, 10, num=11, base=10)] | |||||
| param_grid_precomputed = {'sub_kernel': pkernels + gkernels} | |||||
| param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | ||||
| {'alpha': np.logspace(-10, 10, num=41, base=10)}] | {'alpha': np.logspace(-10, 10, num=41, base=10)}] | ||||
| @@ -12,22 +12,25 @@ import multiprocessing | |||||
| from pygraph.kernels.untilHPathKernel import untilhpathkernel | from pygraph.kernels.untilHPathKernel import untilhpathkernel | ||||
| dslist = [ | dslist = [ | ||||
| {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| 'task': 'regression'}, # node symb | |||||
| {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # contains single node graph, node symb | |||||
| {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # node nsymb | |||||
| {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
| # node symb/nsymb | |||||
| # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| # 'task': 'regression'}, # node symb | |||||
| # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| # 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # # contains single node graph, node symb | |||||
| # {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| # {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| # {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| # {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # # node nsymb | |||||
| # {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
| # # node symb/nsymb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||||
| # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # | |||||
| # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | ||||
| # # node/edge symb | # # node/edge symb | ||||
| # {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | ||||
| # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | ||||
| # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | ||||
| @@ -41,11 +44,6 @@ dslist = [ | |||||
| # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||||
| # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | ||||
| # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | ||||
| @@ -57,7 +55,7 @@ dslist = [ | |||||
| ] | ] | ||||
| estimator = untilhpathkernel | estimator = untilhpathkernel | ||||
| param_grid_precomputed = {'depth': np.linspace(1, 10, 10), # [2], | param_grid_precomputed = {'depth': np.linspace(1, 10, 10), # [2], | ||||
| 'k_func': ['MinMax', 'tanimoto'], | |||||
| 'k_func': ['MinMax'], # ['MinMax', 'tanimoto'], | |||||
| 'compute_method': ['trie']} # ['MinMax']} | 'compute_method': ['trie']} # ['MinMax']} | ||||
| param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | ||||
| {'alpha': np.logspace(-10, 10, num=41, base=10)}] | {'alpha': np.logspace(-10, 10, num=41, base=10)}] | ||||
| @@ -10,26 +10,29 @@ from libs import * | |||||
| import multiprocessing | import multiprocessing | ||||
| from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel | from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel | ||||
| from pygraph.utils.kernels import gaussiankernel, polynomialkernel | |||||
| dslist = [ | dslist = [ | ||||
| {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| 'task': 'regression'}, # node symb | |||||
| {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # contains single node graph, node symb | |||||
| {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # node nsymb | |||||
| {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
| # node symb/nsymb | |||||
| # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| # 'task': 'regression'}, # node symb | |||||
| # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
| # 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt'}, | |||||
| # # contains single node graph, node symb | |||||
| # {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds'}, # node/edge symb | |||||
| # {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds'}, # unlabeled | |||||
| # {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt'}, # node/edge symb | |||||
| # {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
| # # node nsymb | |||||
| # {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
| # # node symb/nsymb | |||||
| # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||||
| # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||||
| # {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| # | |||||
| # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | ||||
| # # node/edge symb | # # node/edge symb | ||||
| {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||||
| # | |||||
| # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | ||||
| # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | ||||
| # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | ||||
| @@ -43,9 +46,6 @@ dslist = [ | |||||
| # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | ||||
| # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | ||||
| # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||||
| {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||||
| {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||||
| # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | ||||
| # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | ||||
| @@ -13,7 +13,7 @@ | |||||
| "text": [ | "text": [ | ||||
| "\n", | "\n", | ||||
| "Acyclic:\n", | "Acyclic:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -38,7 +38,7 @@ | |||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "Alkane:\n", | "Alkane:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : False\n", | "node_labeled : False\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -63,7 +63,7 @@ | |||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "MAO:\n", | "MAO:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : True\n", | "edge_labeled : True\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -88,7 +88,7 @@ | |||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "PAH:\n", | "PAH:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : False\n", | "node_labeled : False\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -113,7 +113,7 @@ | |||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "MUTAG:\n", | "MUTAG:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : True\n", | "edge_labeled : True\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -131,14 +131,14 @@ | |||||
| "min_fill_factor : 0.039540816326530615\n", | "min_fill_factor : 0.039540816326530615\n", | ||||
| "max_fill_factor : 0.1\n", | "max_fill_factor : 0.1\n", | ||||
| "node_label_num : 7\n", | "node_label_num : 7\n", | ||||
| "edge_label_num : 11\n", | |||||
| "edge_label_num : 4\n", | |||||
| "node_attr_dim : 0\n", | "node_attr_dim : 0\n", | ||||
| "edge_attr_dim : 0\n", | "edge_attr_dim : 0\n", | ||||
| "class_number : 2\n", | "class_number : 2\n", | ||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "Letter-med:\n", | "Letter-med:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : False\n", | "node_labeled : False\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -163,7 +163,7 @@ | |||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "ENZYMES:\n", | "ENZYMES:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -187,33 +187,8 @@ | |||||
| "class_number : 6\n", | "class_number : 6\n", | ||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "Mutagenicity:\n", | |||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "node_labeled : True\n", | |||||
| "edge_labeled : True\n", | |||||
| "is_directed : False\n", | |||||
| "dataset_size : 4337\n", | |||||
| "ave_node_num : 30.317731150564907\n", | |||||
| "min_node_num : 4\n", | |||||
| "max_node_num : 417\n", | |||||
| "ave_edge_num : 30.76942587041734\n", | |||||
| "min_edge_num : 3\n", | |||||
| "max_edge_num : 112\n", | |||||
| "ave_node_degree : 2.0379886162441148\n", | |||||
| "min_node_degree : 0.47961630695443647\n", | |||||
| "max_node_degree : 2.3703703703703702\n", | |||||
| "ave_fill_factor : 0.0431047931997047\n", | |||||
| "min_fill_factor : 0.0005750795047415305\n", | |||||
| "max_fill_factor : 0.1875\n", | |||||
| "node_label_num : 14\n", | |||||
| "edge_label_num : 3\n", | |||||
| "node_attr_dim : 0\n", | |||||
| "edge_attr_dim : 0\n", | |||||
| "class_number : 2\n", | |||||
| "\n", | |||||
| "\n", | |||||
| "D&D:\n", | "D&D:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -237,8 +212,58 @@ | |||||
| "class_number : 2\n", | "class_number : 2\n", | ||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "NCI1:\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | |||||
| "edge_labeled : False\n", | |||||
| "is_directed : False\n", | |||||
| "dataset_size : 4110\n", | |||||
| "ave_node_num : 29.8654501216545\n", | |||||
| "min_node_num : 3\n", | |||||
| "max_node_num : 111\n", | |||||
| "ave_edge_num : 32.3\n", | |||||
| "min_edge_num : 2\n", | |||||
| "max_edge_num : 119\n", | |||||
| "ave_node_degree : 2.155013792267071\n", | |||||
| "min_node_degree : 0.8\n", | |||||
| "max_node_degree : 2.769230769230769\n", | |||||
| "ave_fill_factor : 0.04239828192835043\n", | |||||
| "min_fill_factor : 0.009522961908152367\n", | |||||
| "max_fill_factor : 0.2222222222222222\n", | |||||
| "node_label_num : 37\n", | |||||
| "edge_label_num : 0\n", | |||||
| "node_attr_dim : 0\n", | |||||
| "edge_attr_dim : 0\n", | |||||
| "class_number : 2\n", | |||||
| "\n", | |||||
| "\n", | |||||
| "NCI109:\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | |||||
| "edge_labeled : False\n", | |||||
| "is_directed : False\n", | |||||
| "dataset_size : 4127\n", | |||||
| "ave_node_num : 29.681124303368065\n", | |||||
| "min_node_num : 4\n", | |||||
| "max_node_num : 111\n", | |||||
| "ave_edge_num : 32.13084565059365\n", | |||||
| "min_edge_num : 3\n", | |||||
| "max_edge_num : 119\n", | |||||
| "ave_node_degree : 2.156446168619097\n", | |||||
| "min_node_degree : 1.0909090909090908\n", | |||||
| "max_node_degree : 2.769230769230769\n", | |||||
| "ave_fill_factor : 0.04263668408405519\n", | |||||
| "min_fill_factor : 0.009522961908152367\n", | |||||
| "max_fill_factor : 0.1875\n", | |||||
| "node_label_num : 38\n", | |||||
| "edge_label_num : 0\n", | |||||
| "node_attr_dim : 0\n", | |||||
| "edge_attr_dim : 0\n", | |||||
| "class_number : 2\n", | |||||
| "\n", | |||||
| "\n", | |||||
| "AIDS:\n", | "AIDS:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : True\n", | "edge_labeled : True\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -262,6 +287,31 @@ | |||||
| "class_number : 2\n", | "class_number : 2\n", | ||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "Mutagenicity:\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | |||||
| "edge_labeled : True\n", | |||||
| "is_directed : False\n", | |||||
| "dataset_size : 4337\n", | |||||
| "ave_node_num : 30.317731150564907\n", | |||||
| "min_node_num : 4\n", | |||||
| "max_node_num : 417\n", | |||||
| "ave_edge_num : 30.76942587041734\n", | |||||
| "min_edge_num : 3\n", | |||||
| "max_edge_num : 112\n", | |||||
| "ave_node_degree : 2.0379886162441148\n", | |||||
| "min_node_degree : 0.47961630695443647\n", | |||||
| "max_node_degree : 2.3703703703703702\n", | |||||
| "ave_fill_factor : 0.0431047931997047\n", | |||||
| "min_fill_factor : 0.0005750795047415305\n", | |||||
| "max_fill_factor : 0.1875\n", | |||||
| "node_label_num : 14\n", | |||||
| "edge_label_num : 3\n", | |||||
| "node_attr_dim : 0\n", | |||||
| "edge_attr_dim : 0\n", | |||||
| "class_number : 2\n", | |||||
| "\n", | |||||
| "\n", | |||||
| "FIRSTMM_DB:\n", | "FIRSTMM_DB:\n", | ||||
| "substructures : {'non linear'}\n", | "substructures : {'non linear'}\n", | ||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| @@ -288,7 +338,7 @@ | |||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "MSRC9:\n", | "MSRC9:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -313,7 +363,7 @@ | |||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "MSRC21:\n", | "MSRC21:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -335,10 +385,16 @@ | |||||
| "node_attr_dim : 0\n", | "node_attr_dim : 0\n", | ||||
| "edge_attr_dim : 0\n", | "edge_attr_dim : 0\n", | ||||
| "class_number : 20\n", | "class_number : 20\n", | ||||
| "\n", | |||||
| "\n" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "name": "stdout", | |||||
| "output_type": "stream", | |||||
| "text": [ | |||||
| "\n", | "\n", | ||||
| "SYNTHETIC:\n", | "SYNTHETIC:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -363,7 +419,7 @@ | |||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "BZR:\n", | "BZR:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -385,16 +441,10 @@ | |||||
| "node_attr_dim : 3\n", | "node_attr_dim : 3\n", | ||||
| "edge_attr_dim : 0\n", | "edge_attr_dim : 0\n", | ||||
| "class_number : 2\n", | "class_number : 2\n", | ||||
| "\n" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "name": "stdout", | |||||
| "output_type": "stream", | |||||
| "text": [ | |||||
| "\n", | |||||
| "\n", | "\n", | ||||
| "COX2:\n", | "COX2:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -419,7 +469,7 @@ | |||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "DHFR:\n", | "DHFR:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -444,7 +494,7 @@ | |||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "PROTEINS:\n", | "PROTEINS:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -469,7 +519,7 @@ | |||||
| "\n", | "\n", | ||||
| "\n", | "\n", | ||||
| "PROTEINS_full:\n", | "PROTEINS_full:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : False\n", | "edge_labeled : False\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -492,61 +542,11 @@ | |||||
| "edge_attr_dim : 0\n", | "edge_attr_dim : 0\n", | ||||
| "class_number : 2\n", | "class_number : 2\n", | ||||
| "\n", | "\n", | ||||
| "\n", | |||||
| "NCI1:\n", | |||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "node_labeled : True\n", | |||||
| "edge_labeled : False\n", | |||||
| "is_directed : False\n", | |||||
| "dataset_size : 4110\n", | |||||
| "ave_node_num : 29.8654501216545\n", | |||||
| "min_node_num : 3\n", | |||||
| "max_node_num : 111\n", | |||||
| "ave_edge_num : 32.3\n", | |||||
| "min_edge_num : 2\n", | |||||
| "max_edge_num : 119\n", | |||||
| "ave_node_degree : 2.155013792267071\n", | |||||
| "min_node_degree : 0.8\n", | |||||
| "max_node_degree : 2.769230769230769\n", | |||||
| "ave_fill_factor : 0.04239828192835043\n", | |||||
| "min_fill_factor : 0.009522961908152367\n", | |||||
| "max_fill_factor : 0.2222222222222222\n", | |||||
| "node_label_num : 37\n", | |||||
| "edge_label_num : 0\n", | |||||
| "node_attr_dim : 0\n", | |||||
| "edge_attr_dim : 0\n", | |||||
| "class_number : 2\n", | |||||
| "\n", | |||||
| "\n", | |||||
| "NCI109:\n", | |||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "node_labeled : True\n", | |||||
| "edge_labeled : False\n", | |||||
| "is_directed : False\n", | |||||
| "dataset_size : 4127\n", | |||||
| "ave_node_num : 29.681124303368065\n", | |||||
| "min_node_num : 4\n", | |||||
| "max_node_num : 111\n", | |||||
| "ave_edge_num : 32.13084565059365\n", | |||||
| "min_edge_num : 3\n", | |||||
| "max_edge_num : 119\n", | |||||
| "ave_node_degree : 2.156446168619097\n", | |||||
| "min_node_degree : 1.0909090909090908\n", | |||||
| "max_node_degree : 2.769230769230769\n", | |||||
| "ave_fill_factor : 0.04263668408405519\n", | |||||
| "min_fill_factor : 0.009522961908152367\n", | |||||
| "max_fill_factor : 0.1875\n", | |||||
| "node_label_num : 38\n", | |||||
| "edge_label_num : 0\n", | |||||
| "node_attr_dim : 0\n", | |||||
| "edge_attr_dim : 0\n", | |||||
| "class_number : 2\n", | |||||
| "\n", | |||||
| "load SDF: 100%|██████████| 4457424/4457424 [00:08<00:00, 497346.72it/s]\n", | |||||
| "ajust data: 100%|██████████| 42687/42687 [00:09<00:00, 4689.76it/s] \n", | |||||
| "load SDF: 100%|██████████| 4457424/4457424 [00:09<00:00, 489414.03it/s]\n", | |||||
| "ajust data: 100%|██████████| 42687/42687 [00:09<00:00, 4562.13it/s] \n", | |||||
| "\n", | "\n", | ||||
| "NCI-HIV:\n", | "NCI-HIV:\n", | ||||
| "substructures : {'non linear', 'linear'}\n", | |||||
| "substructures : {'linear', 'non linear'}\n", | |||||
| "node_labeled : True\n", | "node_labeled : True\n", | ||||
| "edge_labeled : True\n", | "edge_labeled : True\n", | ||||
| "is_directed : False\n", | "is_directed : False\n", | ||||
| @@ -584,14 +584,15 @@ | |||||
| " 'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt',},\n", | " 'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt',},\n", | ||||
| " {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds',},\n", | " {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds',},\n", | ||||
| " {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds',},\n", | " {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds',},\n", | ||||
| " {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',\n", | |||||
| " 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},\n", | |||||
| " {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG_A.txt'},\n", | |||||
| " {'name': 'Letter-med', 'dataset': '../../datasets/Letter-med/Letter-med_A.txt'},\n", | " {'name': 'Letter-med', 'dataset': '../../datasets/Letter-med/Letter-med_A.txt'},\n", | ||||
| " {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n", | " {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n", | ||||
| " {'name': 'Mutagenicity', 'dataset': '../../datasets/Mutagenicity/Mutagenicity_A.txt'},\n", | |||||
| " {'name': 'D&D', 'dataset': '../../datasets/D&D/DD.mat',\n", | |||||
| " 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},\n", | |||||
| " {'name': 'D&D', 'dataset': '../../datasets/DD/DD_A.txt'},\n", | |||||
| " {'name': 'NCI1', 'dataset': '../../datasets/NCI1/NCI1_A.txt'},\n", | |||||
| " {'name': 'NCI109', 'dataset': '../../datasets/NCI109/NCI109_A.txt'},\n", | |||||
| " {'name': 'AIDS', 'dataset': '../../datasets/AIDS/AIDS_A.txt'},\n", | " {'name': 'AIDS', 'dataset': '../../datasets/AIDS/AIDS_A.txt'},\n", | ||||
| " \n", | |||||
| " {'name': 'Mutagenicity', 'dataset': '../../datasets/Mutagenicity/Mutagenicity_A.txt'},\n", | |||||
| " {'name': 'FIRSTMM_DB', 'dataset': '../../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},\n", | " {'name': 'FIRSTMM_DB', 'dataset': '../../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},\n", | ||||
| " {'name': 'MSRC9', 'dataset': '../../datasets/MSRC_9_txt/MSRC_9_A.txt'},\n", | " {'name': 'MSRC9', 'dataset': '../../datasets/MSRC_9_txt/MSRC_9_A.txt'},\n", | ||||
| " {'name': 'MSRC21', 'dataset': '../../datasets/MSRC_21_txt/MSRC_21_A.txt'},\n", | " {'name': 'MSRC21', 'dataset': '../../datasets/MSRC_21_txt/MSRC_21_A.txt'},\n", | ||||
| @@ -601,10 +602,6 @@ | |||||
| " {'name': 'DHFR', 'dataset': '../../datasets/DHFR_txt/DHFR_A_sparse.txt'}, \n", | " {'name': 'DHFR', 'dataset': '../../datasets/DHFR_txt/DHFR_A_sparse.txt'}, \n", | ||||
| " {'name': 'PROTEINS', 'dataset': '../../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},\n", | " {'name': 'PROTEINS', 'dataset': '../../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},\n", | ||||
| " {'name': 'PROTEINS_full', 'dataset': '../../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, \n", | " {'name': 'PROTEINS_full', 'dataset': '../../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, \n", | ||||
| " {'name': 'NCI1', 'dataset': '../../datasets/NCI1/NCI1.mat',\n", | |||||
| " 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},\n", | |||||
| " {'name': 'NCI109', 'dataset': '../../datasets/NCI109/NCI109.mat',\n", | |||||
| " 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},\n", | |||||
| " {'name': 'NCI-HIV', 'dataset': '../../datasets/NCI-HIV/AIDO99SD.sdf',\n", | " {'name': 'NCI-HIV', 'dataset': '../../datasets/NCI-HIV/AIDO99SD.sdf',\n", | ||||
| " 'dataset_y': '../../datasets/NCI-HIV/aids_conc_may04.txt',},\n", | " 'dataset_y': '../../datasets/NCI-HIV/aids_conc_may04.txt',},\n", | ||||
| "\n", | "\n", | ||||
| @@ -646,7 +643,7 @@ | |||||
| "name": "python", | "name": "python", | ||||
| "nbconvert_exporter": "python", | "nbconvert_exporter": "python", | ||||
| "pygments_lexer": "ipython3", | "pygments_lexer": "ipython3", | ||||
| "version": "3.6.7" | |||||
| "version": "3.6.8" | |||||
| } | } | ||||
| }, | }, | ||||
| "nbformat": 4, | "nbformat": 4, | ||||
| @@ -22,6 +22,11 @@ from iam import iam, test_iam_with_more_graphs_as_init, test_iam_moreGraphsAsIni | |||||
| sys.path.insert(0, "../") | sys.path.insert(0, "../") | ||||
| from pygraph.kernels.marginalizedKernel import marginalizedkernel | from pygraph.kernels.marginalizedKernel import marginalizedkernel | ||||
| from pygraph.kernels.untilHPathKernel import untilhpathkernel | from pygraph.kernels.untilHPathKernel import untilhpathkernel | ||||
| from pygraph.kernels.spKernel import spkernel | |||||
| import functools | |||||
| from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
| from pygraph.kernels.structuralspKernel import structuralspkernel | |||||
| from median import draw_Letter_graph | |||||
| def gk_iam(Gn, alpha): | def gk_iam(Gn, alpha): | ||||
| @@ -119,6 +124,8 @@ def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max): | |||||
| for gi in Gk: | for gi in Gk: | ||||
| nx.draw_networkx(gi) | nx.draw_networkx(gi) | ||||
| plt.show() | plt.show() | ||||
| print(gi.nodes(data=True)) | |||||
| print(gi.edges(data=True)) | |||||
| Gs_nearest = Gk.copy() | Gs_nearest = Gk.copy() | ||||
| # gihat_list = [] | # gihat_list = [] | ||||
| @@ -132,6 +139,8 @@ def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max): | |||||
| g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1) | g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1) | ||||
| nx.draw_networkx(g_tmp) | nx.draw_networkx(g_tmp) | ||||
| plt.show() | plt.show() | ||||
| print(g_tmp.nodes(data=True)) | |||||
| print(g_tmp.edges(data=True)) | |||||
| # compute distance between phi and the new generated graph. | # compute distance between phi and the new generated graph. | ||||
| gi_list = [Gn[i] for i in idx_gi] | gi_list = [Gn[i] for i in idx_gi] | ||||
| @@ -166,28 +175,249 @@ def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max): | |||||
| return dhat, ghat | return dhat, ghat | ||||
| def dis_gstar(idx_g, idx_gi, alpha, Kmatrix): | |||||
| #def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max): | |||||
| # """This function constructs graph pre-image by the iterative pre-image | |||||
| # framework in reference [1], algorithm 1, where the step of generating new | |||||
| # graphs randomly is replaced by the IAM algorithm in reference [2]. | |||||
| # | |||||
| # notes | |||||
| # ----- | |||||
| # Every time a set of n better graphs is acquired, their distances in kernel space are | |||||
| # compared with the k nearest ones, and the k nearest distances from the k+n | |||||
| # distances will be used as the new ones. | |||||
| # """ | |||||
| # Gn_median = [Gn[idx].copy() for idx in idx_gi] | |||||
| # # compute k nearest neighbors of phi in DN. | |||||
| # dis_list = [] # distance between g_star and each graph. | |||||
| # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): | |||||
| # dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix) | |||||
| ## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * | |||||
| ## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * | |||||
| ## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * | |||||
| ## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6]) | |||||
| # dis_list.append(dtemp) | |||||
| # | |||||
| # # sort | |||||
| # sort_idx = np.argsort(dis_list) | |||||
| # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances | |||||
| # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | |||||
| # g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN | |||||
| # if dis_gs[0] == 0: # the exact pre-image. | |||||
| # print('The exact pre-image is found from the input dataset.') | |||||
| # return 0, g0hat_list | |||||
| # dhat = dis_gs[0] # the nearest distance | |||||
| # ghat_list = [g.copy() for g in g0hat_list] | |||||
| # for g in ghat_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | |||||
| # for gi in Gk: | |||||
| # nx.draw_networkx(gi) | |||||
| # plt.show() | |||||
| # print(gi.nodes(data=True)) | |||||
| # print(gi.edges(data=True)) | |||||
| # Gs_nearest = Gk.copy() | |||||
| ## gihat_list = [] | |||||
| # | |||||
| ## i = 1 | |||||
| # r = 1 | |||||
| # while r < r_max: | |||||
| # print('r =', r) | |||||
| ## found = False | |||||
| ## Gs_nearest = Gk + gihat_list | |||||
| ## g_tmp = iam(Gs_nearest) | |||||
| # g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| # Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1) | |||||
| # for g in g_tmp_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # | |||||
| # # compute distance between phi and the new generated graphs. | |||||
| # gi_list = [Gn[i] for i in idx_gi] | |||||
| # knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False) | |||||
| # dnew_list = [] | |||||
| # for idx, g_tmp in enumerate(g_tmp_list): | |||||
| # dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), | |||||
| # len(g_tmp_list) + len(gi_list) + 1), alpha, knew)) | |||||
| # | |||||
| ## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * | |||||
| ## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * | |||||
| ## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * | |||||
| ## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1]) | |||||
| # | |||||
| # # find the new k nearest graphs. | |||||
| # dis_gs = dnew_list + dis_gs # add the new nearest distances. | |||||
| # Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs. | |||||
| # sort_idx = np.argsort(dis_gs) | |||||
| # if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0: | |||||
| # print('We got better k nearest neighbors! Hurray!') | |||||
| # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. | |||||
| # print(dis_gs[-1]) | |||||
| # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] | |||||
| # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | |||||
| # if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0: | |||||
| # print('I have smaller or equal distance!') | |||||
| # dhat = dis_gs[0] | |||||
| # print(str(dhat) + '->' + str(dhat)) | |||||
| # idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist() | |||||
| # ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list] | |||||
| # for g in ghat_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # r = 0 | |||||
| # else: | |||||
| # r += 1 | |||||
| # | |||||
| # return dhat, ghat_list | |||||
| def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, gkernel): | |||||
| """This function constructs graph pre-image by the iterative pre-image | |||||
| framework in reference [1], algorithm 1, where the step of generating new | |||||
| graphs randomly is replaced by the IAM algorithm in reference [2]. | |||||
| notes | |||||
| ----- | |||||
| Every time a set of n better graphs is acquired, their distances in kernel space are | |||||
| compared with the k nearest ones, and the k nearest distances from the k+n | |||||
| distances will be used as the new ones. | |||||
| """ | |||||
| # compute k nearest neighbors of phi in DN. | |||||
| dis_list = [] # distance between g_star and each graph. | |||||
| term3 = 0 | |||||
| for i1, a1 in enumerate(alpha): | |||||
| for i2, a2 in enumerate(alpha): | |||||
| term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | |||||
| for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout): | |||||
| dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3) | |||||
| # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * | |||||
| # k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * | |||||
| # (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * | |||||
| # k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6]) | |||||
| dis_list.append(dtemp) | |||||
| # sort | |||||
| sort_idx = np.argsort(dis_list) | |||||
| dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances | |||||
| nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | |||||
| g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN | |||||
| if dis_gs[0] == 0: # the exact pre-image. | |||||
| print('The exact pre-image is found from the input dataset.') | |||||
| return 0, g0hat_list | |||||
| dhat = dis_gs[0] # the nearest distance | |||||
| ghat_list = [g.copy() for g in g0hat_list] | |||||
| for g in ghat_list: | |||||
| draw_Letter_graph(g) | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| print(g.nodes(data=True)) | |||||
| print(g.edges(data=True)) | |||||
| Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | |||||
| for gi in Gk: | |||||
| # nx.draw_networkx(gi) | |||||
| # plt.show() | |||||
| draw_Letter_graph(g) | |||||
| print(gi.nodes(data=True)) | |||||
| print(gi.edges(data=True)) | |||||
| Gs_nearest = Gk.copy() | |||||
| # gihat_list = [] | |||||
| # i = 1 | |||||
| r = 1 | |||||
| while r < r_max: | |||||
| print('r =', r) | |||||
| # found = False | |||||
| # Gs_nearest = Gk + gihat_list | |||||
| # g_tmp = iam(Gs_nearest) | |||||
| g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1) | |||||
| for g in g_tmp_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| draw_Letter_graph(g) | |||||
| print(g.nodes(data=True)) | |||||
| print(g.edges(data=True)) | |||||
| # compute distance between phi and the new generated graphs. | |||||
| knew = compute_kernel(g_tmp_list + Gn_median, gkernel, False) | |||||
| dnew_list = [] | |||||
| for idx, g_tmp in enumerate(g_tmp_list): | |||||
| dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), | |||||
| len(g_tmp_list) + len(Gn_median) + 1), alpha, knew, | |||||
| withterm3=False)) | |||||
| # dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * | |||||
| # knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * | |||||
| # alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * | |||||
| # k_g1_list[1] + alpha[1] * alpha[1] * k_list[1]) | |||||
| # find the new k nearest graphs. | |||||
| dis_gs = dnew_list + dis_gs # add the new nearest distances. | |||||
| Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs. | |||||
| sort_idx = np.argsort(dis_gs) | |||||
| if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0: | |||||
| print('We got better k nearest neighbors! Hurray!') | |||||
| dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. | |||||
| print(dis_gs[-1]) | |||||
| Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] | |||||
| nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | |||||
| if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0: | |||||
| print('I have smaller or equal distance!') | |||||
| print(str(dhat) + '->' + str(dis_gs[0])) | |||||
| dhat = dis_gs[0] | |||||
| idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist() | |||||
| ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list] | |||||
| for g in ghat_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| draw_Letter_graph(g) | |||||
| print(g.nodes(data=True)) | |||||
| print(g.edges(data=True)) | |||||
| r = 0 | |||||
| else: | |||||
| r += 1 | |||||
| return dhat, ghat_list | |||||
| def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||||
| term1 = Kmatrix[idx_g, idx_g] | term1 = Kmatrix[idx_g, idx_g] | ||||
| term2 = 0 | term2 = 0 | ||||
| for i, a in enumerate(alpha): | for i, a in enumerate(alpha): | ||||
| term2 += a * Kmatrix[idx_g, idx_gi[i]] | term2 += a * Kmatrix[idx_g, idx_gi[i]] | ||||
| term2 *= 2 | term2 *= 2 | ||||
| term3 = 0 | |||||
| for i1, a1 in enumerate(alpha): | |||||
| for i2, a2 in enumerate(alpha): | |||||
| term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | |||||
| if withterm3 == False: | |||||
| for i1, a1 in enumerate(alpha): | |||||
| for i2, a2 in enumerate(alpha): | |||||
| term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | |||||
| return np.sqrt(term1 - term2 + term3) | return np.sqrt(term1 - term2 + term3) | ||||
| def compute_kernel(Gn, graph_kernel, verbose): | def compute_kernel(Gn, graph_kernel, verbose): | ||||
| if graph_kernel == 'marginalizedkernel': | if graph_kernel == 'marginalizedkernel': | ||||
| Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | ||||
| p_quit=0.3, n_iteration=19, remove_totters=False, | |||||
| p_quit=0.03, n_iteration=20, remove_totters=False, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
| elif graph_kernel == 'untilhpathkernel': | elif graph_kernel == 'untilhpathkernel': | ||||
| Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label='bond_type', | Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label='bond_type', | ||||
| depth=2, k_func='MinMax', compute_method='trie', | |||||
| depth=10, k_func='MinMax', compute_method='trie', | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
| elif graph_kernel == 'spkernel': | |||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
| Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= | |||||
| {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
| elif graph_kernel == 'structuralspkernel': | |||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
| Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= | |||||
| {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
| # normalization | # normalization | ||||
| Kmatrix_diag = Kmatrix.diagonal().copy() | Kmatrix_diag = Kmatrix.diagonal().copy() | ||||
| @@ -204,170 +434,4 @@ def gram2distances(Kmatrix): | |||||
| for i2 in range(len(Kmatrix)): | for i2 in range(len(Kmatrix)): | ||||
| dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] | dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] | ||||
| dmatrix = np.sqrt(dmatrix) | dmatrix = np.sqrt(dmatrix) | ||||
| return dmatrix | |||||
| # --------------------------- These are tests --------------------------------# | |||||
| def test_who_is_the_closest_in_kernel_space(Gn): | |||||
| idx_gi = [0, 6] | |||||
| g1 = Gn[idx_gi[0]] | |||||
| g2 = Gn[idx_gi[1]] | |||||
| # create the "median" graph. | |||||
| gnew = g2.copy() | |||||
| gnew.remove_node(0) | |||||
| nx.draw_networkx(gnew) | |||||
| plt.show() | |||||
| print(gnew.nodes(data=True)) | |||||
| Gn = [gnew] + Gn | |||||
| # compute gram matrix | |||||
| Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True) | |||||
| # the distance matrix | |||||
| dmatrix = gram2distances(Kmatrix) | |||||
| print(np.sort(dmatrix[idx_gi[0] + 1])) | |||||
| print(np.argsort(dmatrix[idx_gi[0] + 1])) | |||||
| print(np.sort(dmatrix[idx_gi[1] + 1])) | |||||
| print(np.argsort(dmatrix[idx_gi[1] + 1])) | |||||
| # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2 | |||||
| dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))] | |||||
| print(np.sort(dis_median)) | |||||
| print(np.argsort(dis_median)) | |||||
| return | |||||
| def test_who_is_the_closest_in_GED_space(Gn): | |||||
| from iam import GED | |||||
| idx_gi = [0, 6] | |||||
| g1 = Gn[idx_gi[0]] | |||||
| g2 = Gn[idx_gi[1]] | |||||
| # create the "median" graph. | |||||
| gnew = g2.copy() | |||||
| gnew.remove_node(0) | |||||
| nx.draw_networkx(gnew) | |||||
| plt.show() | |||||
| print(gnew.nodes(data=True)) | |||||
| Gn = [gnew] + Gn | |||||
| # compute GEDs | |||||
| ged_matrix = np.zeros((len(Gn), len(Gn))) | |||||
| for i1 in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout): | |||||
| for i2 in range(len(Gn)): | |||||
| dis, _, _ = GED(Gn[i1], Gn[i2], lib='gedlib') | |||||
| ged_matrix[i1, i2] = dis | |||||
| print(np.sort(ged_matrix[idx_gi[0] + 1])) | |||||
| print(np.argsort(ged_matrix[idx_gi[0] + 1])) | |||||
| print(np.sort(ged_matrix[idx_gi[1] + 1])) | |||||
| print(np.argsort(ged_matrix[idx_gi[1] + 1])) | |||||
| # for all g in Gn, compute (GED(g1, g) + GED(g2, g)) / 2 | |||||
| dis_median = [(ged_matrix[i, idx_gi[0] + 1] + ged_matrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))] | |||||
| print(np.sort(dis_median)) | |||||
| print(np.argsort(dis_median)) | |||||
| return | |||||
| def test_will_IAM_give_the_median_graph_we_wanted(Gn): | |||||
| idx_gi = [0, 6] | |||||
| g1 = Gn[idx_gi[0]].copy() | |||||
| g2 = Gn[idx_gi[1]].copy() | |||||
| # del Gn[idx_gi[0]] | |||||
| # del Gn[idx_gi[1] - 1] | |||||
| g_median = test_iam_with_more_graphs_as_init([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1) | |||||
| # g_median = test_iam_with_more_graphs_as_init(Gn, Gn, c_ei=1, c_er=1, c_es=1) | |||||
| nx.draw_networkx(g_median) | |||||
| plt.show() | |||||
| print(g_median.nodes(data=True)) | |||||
| print(g_median.edges(data=True)) | |||||
| def test_new_IAM_allGraph_deleteNodes(Gn): | |||||
| idx_gi = [0, 6] | |||||
| # g1 = Gn[idx_gi[0]].copy() | |||||
| # g2 = Gn[idx_gi[1]].copy() | |||||
| g1 = nx.Graph(name='haha') | |||||
| g1.add_nodes_from([(2, {'atom': 'C'}), (3, {'atom': 'O'}), (4, {'atom': 'C'})]) | |||||
| g1.add_edges_from([(2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})]) | |||||
| g2 = nx.Graph(name='hahaha') | |||||
| g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}), | |||||
| (3, {'atom': 'O'}), (4, {'atom': 'C'})]) | |||||
| g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}), | |||||
| (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})]) | |||||
| # g2 = g1.copy() | |||||
| # g2.add_nodes_from([(3, {'atom': 'O'})]) | |||||
| # g2.add_nodes_from([(4, {'atom': 'C'})]) | |||||
| # g2.add_edges_from([(1, 3, {'bond_type': '1'})]) | |||||
| # g2.add_edges_from([(3, 4, {'bond_type': '1'})]) | |||||
| # del Gn[idx_gi[0]] | |||||
| # del Gn[idx_gi[1] - 1] | |||||
| g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1) | |||||
| # g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(Gn, Gn, c_ei=1, c_er=1, c_es=1) | |||||
| nx.draw_networkx(g_median) | |||||
| plt.show() | |||||
| print(g_median.nodes(data=True)) | |||||
| print(g_median.edges(data=True)) | |||||
| if __name__ == '__main__': | |||||
| from pygraph.utils.graphfiles import loadDataset | |||||
| # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||||
| # 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb | |||||
| # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||||
| # 'extra_params': {}} # node nsymb | |||||
| # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds', | |||||
| # 'extra_params': {}} | |||||
| ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| 'extra_params': {}} # node symb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| # Gn = Gn[0:20] | |||||
| test_new_IAM_allGraph_deleteNodes(Gn) | |||||
| test_will_IAM_give_the_median_graph_we_wanted(Gn) | |||||
| test_who_is_the_closest_in_GED_space(Gn) | |||||
| test_who_is_the_closest_in_kernel_space(Gn) | |||||
| lmbda = 0.03 # termination probalility | |||||
| r_max = 10 # recursions | |||||
| l = 500 | |||||
| alpha_range = np.linspace(0.5, 0.5, 1) | |||||
| k = 20 # k nearest neighbors | |||||
| # randomly select two molecules | |||||
| np.random.seed(1) | |||||
| idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2) | |||||
| g1 = Gn[idx_gi[0]] | |||||
| g2 = Gn[idx_gi[1]] | |||||
| # g_tmp = iam([g1, g2]) | |||||
| # nx.draw_networkx(g_tmp) | |||||
| # plt.show() | |||||
| # compute | |||||
| # k_list = [] # kernel between each graph and itself. | |||||
| # k_g1_list = [] # kernel between each graph and g1 | |||||
| # k_g2_list = [] # kernel between each graph and g2 | |||||
| # for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout): | |||||
| # ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False) | |||||
| # k_list.append(ktemp[0][0, 0]) | |||||
| # k_g1_list.append(ktemp[0][0, 1]) | |||||
| # k_g2_list.append(ktemp[0][0, 2]) | |||||
| km = compute_kernel(Gn, 'untilhpathkernel', True) | |||||
| # k_list = np.diag(km) # kernel between each graph and itself. | |||||
| # k_g1_list = km[idx_gi[0]] # kernel between each graph and g1 | |||||
| # k_g2_list = km[idx_gi[1]] # kernel between each graph and g2 | |||||
| g_best = [] | |||||
| dis_best = [] | |||||
| # for each alpha | |||||
| for alpha in alpha_range: | |||||
| print('alpha =', alpha) | |||||
| dhat, ghat = gk_iam_nearest(Gn, [alpha, 1 - alpha], idx_gi, km, k, r_max) | |||||
| dis_best.append(dhat) | |||||
| g_best.append(ghat) | |||||
| for idx, item in enumerate(alpha_range): | |||||
| print('when alpha is', item, 'the shortest distance is', dis_best[idx]) | |||||
| print('the corresponding pre-image is') | |||||
| nx.draw_networkx(g_best[idx]) | |||||
| plt.show() | |||||
| return dmatrix | |||||
| @@ -158,7 +158,7 @@ def GED(g1, g2, lib='gedlib'): | |||||
| script.PyRestartEnv() | script.PyRestartEnv() | ||||
| script.PyLoadGXLGraph('ged_tmp/', 'ged_tmp/tmp.xml') | script.PyLoadGXLGraph('ged_tmp/', 'ged_tmp/tmp.xml') | ||||
| listID = script.PyGetGraphIds() | listID = script.PyGetGraphIds() | ||||
| script.PySetEditCost("CHEM_1") | |||||
| script.PySetEditCost("LETTER") #("CHEM_1") | |||||
| script.PyInitEnv() | script.PyInitEnv() | ||||
| script.PySetMethod("IPFP", "") | script.PySetMethod("IPFP", "") | ||||
| script.PyInitMethod() | script.PyInitMethod() | ||||
| @@ -168,7 +168,15 @@ def GED(g1, g2, lib='gedlib'): | |||||
| pi_forward, pi_backward = script.PyGetAllMap(g, h) | pi_forward, pi_backward = script.PyGetAllMap(g, h) | ||||
| upper = script.PyGetUpperBound(g, h) | upper = script.PyGetUpperBound(g, h) | ||||
| lower = script.PyGetLowerBound(g, h) | lower = script.PyGetLowerBound(g, h) | ||||
| dis = (upper + lower) / 2 | |||||
| dis = upper | |||||
| # make the map label correct (label remove map as np.inf) | |||||
| nodes1 = [n for n in g1.nodes()] | |||||
| nodes2 = [n for n in g2.nodes()] | |||||
| nb1 = nx.number_of_nodes(g1) | |||||
| nb2 = nx.number_of_nodes(g2) | |||||
| pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||||
| pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||||
| return dis, pi_forward, pi_backward | return dis, pi_forward, pi_backward | ||||
| @@ -319,7 +327,7 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| from tqdm import tqdm | from tqdm import tqdm | ||||
| # Gn_median = Gn_median[0:10] | # Gn_median = Gn_median[0:10] | ||||
| # Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median] | # Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median] | ||||
| node_ir = sys.maxsize * 2 # Max number for c++, corresponding to the node remove and insertion. | |||||
| node_ir = np.inf # corresponding to the node remove and insertion. | |||||
| label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable. | label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable. | ||||
| ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate, | ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate, | ||||
| attr_names=['edge_labeled', 'node_attr_dim'], | attr_names=['edge_labeled', 'node_attr_dim'], | ||||
| @@ -347,7 +355,7 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| h_i0 = 0 | h_i0 = 0 | ||||
| for idx, g in enumerate(Gn_median): | for idx, g in enumerate(Gn_median): | ||||
| pi_i = pi_p_forward[idx][ndi] | pi_i = pi_p_forward[idx][ndi] | ||||
| if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label: | |||||
| if pi_i != node_ir and g.nodes[pi_i][node_label] == label: | |||||
| h_i0 += 1 | h_i0 += 1 | ||||
| h_i0_list.append(h_i0) | h_i0_list.append(h_i0) | ||||
| label_list.append(label) | label_list.append(label) | ||||
| @@ -364,7 +372,7 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| nlabel_best = [label_list[idx] for idx in idx_max] | nlabel_best = [label_list[idx] for idx in idx_max] | ||||
| # generate "best" graphs with regard to "best" node labels. | # generate "best" graphs with regard to "best" node labels. | ||||
| G_new_list_nd = [] | G_new_list_nd = [] | ||||
| for g in G_new_list: | |||||
| for g in G_new_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now. | |||||
| for nl in nlabel_best: | for nl in nlabel_best: | ||||
| g_tmp = g.copy() | g_tmp = g.copy() | ||||
| if nl == label_r: | if nl == label_r: | ||||
| @@ -380,16 +388,16 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| G_new_list = G_new_list_nd[:] | G_new_list = G_new_list_nd[:] | ||||
| else: # labels are non-symbolic | else: # labels are non-symbolic | ||||
| for nd in G.nodes(): | |||||
| for ndi, (nd, _) in enumerate(G.nodes(data=True)): | |||||
| Si_norm = 0 | Si_norm = 0 | ||||
| phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])]) | phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])]) | ||||
| for idx, g in enumerate(Gn_median): | for idx, g in enumerate(Gn_median): | ||||
| pi_i = pi_p_forward[idx][nd] | |||||
| pi_i = pi_p_forward[idx][ndi] | |||||
| if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0? | if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0? | ||||
| Si_norm += 1 | Si_norm += 1 | ||||
| phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']]) | phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']]) | ||||
| phi_i_bar /= Si_norm | phi_i_bar /= Si_norm | ||||
| G_new.nodes[nd]['attributes'] = phi_i_bar | |||||
| G_new_list[0].nodes[nd]['attributes'] = phi_i_bar | |||||
| # update edge labels and adjacency matrix. | # update edge labels and adjacency matrix. | ||||
| if ds_attrs['edge_labeled']: | if ds_attrs['edge_labeled']: | ||||
| @@ -467,12 +475,12 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| # pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list] | # pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list] | ||||
| # G_new_list = [G_new_list[idx] for idx in idx_min_list] | # G_new_list = [G_new_list[idx] for idx in idx_min_list] | ||||
| for g in G_new_list: | |||||
| import matplotlib.pyplot as plt | |||||
| nx.draw_networkx(g) | |||||
| plt.show() | |||||
| print(g.nodes(data=True)) | |||||
| print(g.edges(data=True)) | |||||
| # for g in G_new_list: | |||||
| # import matplotlib.pyplot as plt | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| return G_new_list, pi_forward_list | return G_new_list, pi_forward_list | ||||
| @@ -504,7 +512,7 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| G_list = [G] | G_list = [G] | ||||
| pi_forward_list = [pi_p_forward] | pi_forward_list = [pi_p_forward] | ||||
| # iterations. | # iterations. | ||||
| for itr in range(0, 10): # @todo: the convergence condition? | |||||
| for itr in range(0, 5): # @todo: the convergence condition? | |||||
| # print('itr is', itr) | # print('itr is', itr) | ||||
| G_new_list = [] | G_new_list = [] | ||||
| pi_forward_new_list = [] | pi_forward_new_list = [] | ||||
| @@ -562,7 +570,7 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| # phase 1: initilize. | # phase 1: initilize. | ||||
| # compute set-median. | # compute set-median. | ||||
| dis_min = np.inf | dis_min = np.inf | ||||
| dis_all, pi_all_forward = median_distance(Gn_candidate[::-1], Gn_median) | |||||
| dis_all, pi_all_forward = median_distance(Gn_candidate, Gn_median) | |||||
| # find all smallest distances. | # find all smallest distances. | ||||
| idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist() | idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist() | ||||
| dis_min = dis_all[idx_min_list[0]] | dis_min = dis_all[idx_min_list[0]] | ||||
| @@ -580,24 +588,27 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| G_list, _ = remove_duplicates(G_list) | G_list, _ = remove_duplicates(G_list) | ||||
| if connected == True: | if connected == True: | ||||
| G_list, _ = remove_disconnected(G_list) | |||||
| G_list_con, _ = remove_disconnected(G_list) | |||||
| # if there is no connected graphs at all, then remain the disconnected ones. | |||||
| if len(G_list_con) > 0: # @todo: ?????????????????????????? | |||||
| G_list = G_list_con | |||||
| import matplotlib.pyplot as plt | |||||
| for g in G_list: | |||||
| nx.draw_networkx(g) | |||||
| plt.show() | |||||
| print(g.nodes(data=True)) | |||||
| print(g.edges(data=True)) | |||||
| # import matplotlib.pyplot as plt | |||||
| # for g in G_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # get the best median graphs | # get the best median graphs | ||||
| dis_all, pi_all_forward = median_distance(G_list, Gn_median) | dis_all, pi_all_forward = median_distance(G_list, Gn_median) | ||||
| G_min_list, pi_forward_min_list, dis_min = best_median_graphs( | G_min_list, pi_forward_min_list, dis_min = best_median_graphs( | ||||
| G_list, dis_all, pi_all_forward) | G_list, dis_all, pi_all_forward) | ||||
| for g in G_min_list: | |||||
| nx.draw_networkx(g) | |||||
| plt.show() | |||||
| print(g.nodes(data=True)) | |||||
| print(g.edges(data=True)) | |||||
| # for g in G_min_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| return G_min_list | return G_min_list | ||||
| @@ -9,6 +9,7 @@ pre-image | |||||
| import sys | import sys | ||||
| import numpy as np | import numpy as np | ||||
| import random | |||||
| import multiprocessing | import multiprocessing | ||||
| from tqdm import tqdm | from tqdm import tqdm | ||||
| import networkx as nx | import networkx as nx | ||||
| @@ -16,127 +17,190 @@ import matplotlib.pyplot as plt | |||||
| sys.path.insert(0, "../") | sys.path.insert(0, "../") | ||||
| from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||||
| from pygraph.utils.graphfiles import loadDataset | from pygraph.utils.graphfiles import loadDataset | ||||
| from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||||
| from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||||
| from pygraph.kernels.spKernel import spkernel | |||||
| import functools | |||||
| from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
| from pygraph.kernels.structuralspKernel import structuralspkernel | |||||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||||
| 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb | |||||
| DN, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| DN = DN[0:10] | |||||
| lmbda = 0.03 # termination probalility | |||||
| r_max = 10 # recursions | |||||
| l = 500 | |||||
| alpha_range = np.linspace(0.1, 0.9, 9) | |||||
| k = 5 # k nearest neighbors | |||||
| # randomly select two molecules | |||||
| np.random.seed(1) | |||||
| idx1, idx2 = np.random.randint(0, len(DN), 2) | |||||
| g1 = DN[idx1] | |||||
| g2 = DN[idx2] | |||||
| def compute_kernel(Gn, graph_kernel, verbose): | |||||
| if graph_kernel == 'marginalizedkernel': | |||||
| Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | |||||
| p_quit=0.03, n_iteration=20, remove_totters=False, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
| elif graph_kernel == 'untilhpathkernel': | |||||
| Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label='bond_type', | |||||
| depth=10, k_func='MinMax', compute_method='trie', | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
| elif graph_kernel == 'spkernel': | |||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
| Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= | |||||
| {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
| elif graph_kernel == 'structuralspkernel': | |||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
| Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= | |||||
| {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
| # normalization | |||||
| # Kmatrix_diag = Kmatrix.diagonal().copy() | |||||
| # for i in range(len(Kmatrix)): | |||||
| # for j in range(i, len(Kmatrix)): | |||||
| # Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||||
| # Kmatrix[j][i] = Kmatrix[i][j] | |||||
| return Kmatrix | |||||
| # compute | |||||
| k_list = [] # kernel between each graph and itself. | |||||
| k_g1_list = [] # kernel between each graph and g1 | |||||
| k_g2_list = [] # kernel between each graph and g2 | |||||
| for ig, g in tqdm(enumerate(DN), desc='computing self kernels', file=sys.stdout): | |||||
| ktemp = marginalizedkernel([g, g1, g2], node_label='atom', edge_label=None, | |||||
| p_quit=lmbda, n_iteration=20, remove_totters=False, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=False) | |||||
| k_list.append(ktemp[0][0, 0]) | |||||
| k_g1_list.append(ktemp[0][0, 1]) | |||||
| k_g2_list.append(ktemp[0][0, 2]) | |||||
| g_best = [] | |||||
| dis_best = [] | |||||
| # for each alpha | |||||
| for alpha in alpha_range: | |||||
| print('alpha =', alpha) | |||||
| # compute k nearest neighbors of phi in DN. | |||||
| dis_list = [] # distance between g_star and each graph. | |||||
| for ig, g in tqdm(enumerate(DN), desc='computing distances', file=sys.stdout): | |||||
| dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * | |||||
| k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * | |||||
| (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * | |||||
| k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) | |||||
| dis_list.append(dtemp) | |||||
| if __name__ == '__main__': | |||||
| # sort | |||||
| sort_idx = np.argsort(dis_list) | |||||
| dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] | |||||
| g0hat = DN[sort_idx[0]] # the nearest neighbor of phi in DN | |||||
| if dis_gs[0] == 0: # the exact pre-image. | |||||
| print('The exact pre-image is found from the input dataset.') | |||||
| g_pimg = g0hat | |||||
| break | |||||
| dhat = dis_gs[0] # the nearest distance | |||||
| Dk = [DN[ig] for ig in sort_idx[0:k]] # the k nearest neighbors | |||||
| gihat_list = [] | |||||
| # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
| # 'extra_params': {}} # node/edge symb | |||||
| # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||||
| # 'extra_params': {}} # node nsymb | |||||
| # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds', | |||||
| # 'extra_params': {}} | |||||
| ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| 'extra_params': {}} # node symb | |||||
| i = 1 | |||||
| r = 1 | |||||
| while r < r_max: | |||||
| print('r =', r) | |||||
| found = False | |||||
| for ig, gs in enumerate(Dk + gihat_list): | |||||
| # nx.draw_networkx(gs) | |||||
| # plt.show() | |||||
| fdgs = int(np.abs(np.ceil(np.log(alpha * dis_gs[ig])))) # @todo ??? | |||||
| for trail in tqdm(range(0, l), desc='l loop', file=sys.stdout): | |||||
| # add and delete edges. | |||||
| gtemp = gs.copy() | |||||
| np.random.seed() | |||||
| # which edges to change. | |||||
| idx_change = np.random.randint(0, nx.number_of_nodes(gs) * | |||||
| (nx.number_of_nodes(gs) - 1), fdgs) | |||||
| for item in idx_change: | |||||
| node1 = int(item / (nx.number_of_nodes(gs) - 1)) | |||||
| node2 = (item - node1 * (nx.number_of_nodes(gs) - 1)) | |||||
| if node2 >= node1: | |||||
| node2 += 1 | |||||
| # @todo: is the randomness correct? | |||||
| if not gtemp.has_edge(node1, node2): | |||||
| gtemp.add_edges_from([(node1, node2, {'bond_type': 0})]) | |||||
| # nx.draw_networkx(gs) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| else: | |||||
| gtemp.remove_edge(node1, node2) | |||||
| # nx.draw_networkx(gs) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| # compute distance between phi and the new generated graph. | |||||
| knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None, | |||||
| p_quit=lmbda, n_iteration=20, remove_totters=False, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=False) | |||||
| dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) * | |||||
| knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha * | |||||
| (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * | |||||
| k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) | |||||
| if dnew <= dhat: # the new distance is smaller | |||||
| print('I am smaller!') | |||||
| dhat = dnew | |||||
| gnew = gtemp.copy() | |||||
| found = True # found better graph. | |||||
| r = 0 | |||||
| if found: | |||||
| gihat_list = [gnew] | |||||
| dis_gs.append(dhat) | |||||
| else: | |||||
| r += 1 | |||||
| dis_best.append(dhat) | |||||
| g_best += ([g0hat] if len(gihat_list) == 0 else gihat_list) | |||||
| for idx, item in enumerate(alpha_range): | |||||
| print('when alpha is', item, 'the shortest distance is', dis_best[idx]) | |||||
| print('the corresponding pre-image is') | |||||
| nx.draw_networkx(g_best[idx]) | |||||
| plt.show() | |||||
| DN, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| #DN = DN[0:10] | |||||
| lmbda = 0.03 # termination probalility | |||||
| r_max = 10 # recursions | |||||
| l = 500 | |||||
| alpha_range = np.linspace(0.5, 0.5, 1) | |||||
| #alpha_range = np.linspace(0.1, 0.9, 9) | |||||
| k = 5 # k nearest neighbors | |||||
| # randomly select two molecules | |||||
| #np.random.seed(1) | |||||
| #idx1, idx2 = np.random.randint(0, len(DN), 2) | |||||
| #g1 = DN[idx1] | |||||
| #g2 = DN[idx2] | |||||
| idx1 = 0 | |||||
| idx2 = 6 | |||||
| g1 = DN[idx1] | |||||
| g2 = DN[idx2] | |||||
| # compute | |||||
| k_list = [] # kernel between each graph and itself. | |||||
| k_g1_list = [] # kernel between each graph and g1 | |||||
| k_g2_list = [] # kernel between each graph and g2 | |||||
| for ig, g in tqdm(enumerate(DN), desc='computing self kernels', file=sys.stdout): | |||||
| # ktemp = marginalizedkernel([g, g1, g2], node_label='atom', edge_label=None, | |||||
| # p_quit=lmbda, n_iteration=20, remove_totters=False, | |||||
| # n_jobs=multiprocessing.cpu_count(), verbose=False) | |||||
| ktemp = compute_kernel([g, g1, g2], 'untilhpathkernel', verbose=False) | |||||
| k_list.append(ktemp[0, 0]) | |||||
| k_g1_list.append(ktemp[0, 1]) | |||||
| k_g2_list.append(ktemp[0, 2]) | |||||
| g_best = [] | |||||
| dis_best = [] | |||||
| # for each alpha | |||||
| for alpha in alpha_range: | |||||
| print('alpha =', alpha) | |||||
| # compute k nearest neighbors of phi in DN. | |||||
| dis_list = [] # distance between g_star and each graph. | |||||
| for ig, g in tqdm(enumerate(DN), desc='computing distances', file=sys.stdout): | |||||
| dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * | |||||
| k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * | |||||
| (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * | |||||
| k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) | |||||
| dis_list.append(np.sqrt(dtemp)) | |||||
| # sort | |||||
| sort_idx = np.argsort(dis_list) | |||||
| dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] | |||||
| g0hat = DN[sort_idx[0]] # the nearest neighbor of phi in DN | |||||
| if dis_gs[0] == 0: # the exact pre-image. | |||||
| print('The exact pre-image is found from the input dataset.') | |||||
| g_pimg = g0hat | |||||
| break | |||||
| dhat = dis_gs[0] # the nearest distance | |||||
| Dk = [DN[ig] for ig in sort_idx[0:k]] # the k nearest neighbors | |||||
| gihat_list = [] | |||||
| i = 1 | |||||
| r = 1 | |||||
| while r < r_max: | |||||
| print('r =', r) | |||||
| found = False | |||||
| for ig, gs in enumerate(Dk + gihat_list): | |||||
| # nx.draw_networkx(gs) | |||||
| # plt.show() | |||||
| # @todo what if the log is negetive? | |||||
| fdgs = int(np.abs(np.ceil(np.log(alpha * dis_gs[ig])))) | |||||
| for trail in tqdm(range(0, l), desc='l loop', file=sys.stdout): | |||||
| # add and delete edges. | |||||
| gtemp = gs.copy() | |||||
| np.random.seed() | |||||
| # which edges to change. | |||||
| # @todo: should we use just half of the adjacency matrix for undirected graphs? | |||||
| nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1) | |||||
| # @todo: what if fdgs is bigger than nb_vpairs? | |||||
| idx_change = random.sample(range(nb_vpairs), fdgs if fdgs < nb_vpairs else nb_vpairs) | |||||
| # idx_change = np.random.randint(0, nx.number_of_nodes(gs) * | |||||
| # (nx.number_of_nodes(gs) - 1), fdgs) | |||||
| for item in idx_change: | |||||
| node1 = int(item / (nx.number_of_nodes(gs) - 1)) | |||||
| node2 = (item - node1 * (nx.number_of_nodes(gs) - 1)) | |||||
| if node2 >= node1: # skip the self pair. | |||||
| node2 += 1 | |||||
| # @todo: is the randomness correct? | |||||
| if not gtemp.has_edge(node1, node2): | |||||
| # @todo: how to update the bond_type? 0 or 1? | |||||
| gtemp.add_edges_from([(node1, node2, {'bond_type': 1})]) | |||||
| # nx.draw_networkx(gs) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| else: | |||||
| gtemp.remove_edge(node1, node2) | |||||
| # nx.draw_networkx(gs) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| # compute distance between phi and the new generated graph. | |||||
| # knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None, | |||||
| # p_quit=lmbda, n_iteration=20, remove_totters=False, | |||||
| # n_jobs=multiprocessing.cpu_count(), verbose=False) | |||||
| knew = compute_kernel([gtemp, g1, g2], 'untilhpathkernel', verbose=False) | |||||
| dnew = np.sqrt(knew[0, 0] - 2 * (alpha * knew[0, 1] + (1 - alpha) * | |||||
| knew[0, 2]) + (alpha * alpha * k_list[idx1] + alpha * | |||||
| (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * | |||||
| k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])) | |||||
| if dnew < dhat: # @todo: the new distance is smaller or also equal? | |||||
| print('I am smaller!') | |||||
| print(dhat, '->', dnew) | |||||
| nx.draw_networkx(gtemp) | |||||
| plt.show() | |||||
| print(gtemp.nodes(data=True)) | |||||
| print(gtemp.edges(data=True)) | |||||
| dhat = dnew | |||||
| gnew = gtemp.copy() | |||||
| found = True # found better graph. | |||||
| r = 0 | |||||
| elif dnew == dhat: | |||||
| print('I am equal!') | |||||
| if found: | |||||
| gihat_list = [gnew] | |||||
| dis_gs.append(dhat) | |||||
| else: | |||||
| r += 1 | |||||
| dis_best.append(dhat) | |||||
| g_best += ([g0hat] if len(gihat_list) == 0 else gihat_list) | |||||
| for idx, item in enumerate(alpha_range): | |||||
| print('when alpha is', item, 'the shortest distance is', dis_best[idx]) | |||||
| print('the corresponding pre-image is') | |||||
| nx.draw_networkx(g_best[idx]) | |||||
| plt.show() | |||||
| @@ -24,6 +24,7 @@ def treeletkernel(*args, | |||||
| sub_kernel, | sub_kernel, | ||||
| node_label='atom', | node_label='atom', | ||||
| edge_label='bond_type', | edge_label='bond_type', | ||||
| parallel='imap_unordered', | |||||
| n_jobs=None, | n_jobs=None, | ||||
| verbose=True): | verbose=True): | ||||
| """Calculate treelet graph kernels between graphs. | """Calculate treelet graph kernels between graphs. | ||||
| @@ -70,34 +71,55 @@ def treeletkernel(*args, | |||||
| start_time = time.time() | start_time = time.time() | ||||
| # ---- use pool.imap_unordered to parallel and track progress. ---- | # ---- use pool.imap_unordered to parallel and track progress. ---- | ||||
| # get all canonical keys of all graphs before calculating kernels to save | |||||
| # time, but this may cost a lot of memory for large dataset. | |||||
| pool = Pool(n_jobs) | |||||
| itr = zip(Gn, range(0, len(Gn))) | |||||
| if len(Gn) < 100 * n_jobs: | |||||
| chunksize = int(len(Gn) / n_jobs) + 1 | |||||
| else: | |||||
| chunksize = 100 | |||||
| canonkeys = [[] for _ in range(len(Gn))] | |||||
| get_partial = partial(wrapper_get_canonkeys, node_label, edge_label, | |||||
| labeled, ds_attrs['is_directed']) | |||||
| if verbose: | |||||
| iterator = tqdm(pool.imap_unordered(get_partial, itr, chunksize), | |||||
| desc='getting canonkeys', file=sys.stdout) | |||||
| if parallel == 'imap_unordered': | |||||
| # get all canonical keys of all graphs before calculating kernels to save | |||||
| # time, but this may cost a lot of memory for large dataset. | |||||
| pool = Pool(n_jobs) | |||||
| itr = zip(Gn, range(0, len(Gn))) | |||||
| if len(Gn) < 100 * n_jobs: | |||||
| chunksize = int(len(Gn) / n_jobs) + 1 | |||||
| else: | |||||
| chunksize = 100 | |||||
| canonkeys = [[] for _ in range(len(Gn))] | |||||
| get_partial = partial(wrapper_get_canonkeys, node_label, edge_label, | |||||
| labeled, ds_attrs['is_directed']) | |||||
| if verbose: | |||||
| iterator = tqdm(pool.imap_unordered(get_partial, itr, chunksize), | |||||
| desc='getting canonkeys', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(get_partial, itr, chunksize) | |||||
| for i, ck in iterator: | |||||
| canonkeys[i] = ck | |||||
| pool.close() | |||||
| pool.join() | |||||
| # compute kernels. | |||||
| def init_worker(canonkeys_toshare): | |||||
| global G_canonkeys | |||||
| G_canonkeys = canonkeys_toshare | |||||
| do_partial = partial(wrapper_treeletkernel_do, sub_kernel) | |||||
| parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||||
| glbv=(canonkeys,), n_jobs=n_jobs, verbose=verbose) | |||||
| # ---- do not use parallelization. ---- | |||||
| elif parallel == None: | |||||
| # get all canonical keys of all graphs before calculating kernels to save | |||||
| # time, but this may cost a lot of memory for large dataset. | |||||
| canonkeys = [] | |||||
| for g in (tqdm(Gn, desc='getting canonkeys', file=sys.stdout) if verbose else Gn): | |||||
| canonkeys.append(get_canonkeys(g, node_label, edge_label, labeled, | |||||
| ds_attrs['is_directed'])) | |||||
| # compute kernels. | |||||
| from itertools import combinations_with_replacement | |||||
| itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||||
| for i, j in (tqdm(itr, desc='getting canonkeys', file=sys.stdout) if verbose else itr): | |||||
| Kmatrix[i][j] = _treeletkernel_do(canonkeys[i], canonkeys[j], sub_kernel) | |||||
| Kmatrix[j][i] = Kmatrix[i][j] # @todo: no directed graph considered? | |||||
| else: | else: | ||||
| iterator = pool.imap_unordered(get_partial, itr, chunksize) | |||||
| for i, ck in iterator: | |||||
| canonkeys[i] = ck | |||||
| pool.close() | |||||
| pool.join() | |||||
| # compute kernels. | |||||
| def init_worker(canonkeys_toshare): | |||||
| global G_canonkeys | |||||
| G_canonkeys = canonkeys_toshare | |||||
| do_partial = partial(wrapper_treeletkernel_do, sub_kernel) | |||||
| parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||||
| glbv=(canonkeys,), n_jobs=n_jobs, verbose=verbose) | |||||
| raise Exception('No proper parallelization method designated.') | |||||
| run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
| if verbose: | if verbose: | ||||
| @@ -123,8 +145,7 @@ def _treeletkernel_do(canonkey1, canonkey2, sub_kernel): | |||||
| keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs | keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs | ||||
| vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) | vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) | ||||
| vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) | vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) | ||||
| kernel = np.sum(np.exp(-np.square(vector1 - vector2) / 2)) | |||||
| # kernel = sub_kernel(vector1, vector2) | |||||
| kernel = sub_kernel(vector1, vector2) | |||||
| return kernel | return kernel | ||||
| @@ -266,7 +287,7 @@ def get_canonkeys(G, node_label, edge_label, labeled, is_directed): | |||||
| # linear patterns | # linear patterns | ||||
| canonkey_t = Counter(list(nx.get_node_attributes(G, node_label).values())) | canonkey_t = Counter(list(nx.get_node_attributes(G, node_label).values())) | ||||
| for key in canonkey_t: | for key in canonkey_t: | ||||
| canonkey_l['0' + key] = canonkey_t[key] | |||||
| canonkey_l[('0', key)] = canonkey_t[key] | |||||
| for i in range(1, 6): # for i in range(1, 6): | for i in range(1, 6): # for i in range(1, 6): | ||||
| treelet = [] | treelet = [] | ||||
| @@ -274,93 +295,111 @@ def get_canonkeys(G, node_label, edge_label, labeled, is_directed): | |||||
| canonlist = list(chain.from_iterable((G.node[node][node_label], \ | canonlist = list(chain.from_iterable((G.node[node][node_label], \ | ||||
| G[node][pattern[idx+1]][edge_label]) for idx, node in enumerate(pattern[:-1]))) | G[node][pattern[idx+1]][edge_label]) for idx, node in enumerate(pattern[:-1]))) | ||||
| canonlist.append(G.node[pattern[-1]][node_label]) | canonlist.append(G.node[pattern[-1]][node_label]) | ||||
| canonkey_t = ''.join(canonlist) | |||||
| canonkey_t = canonkey_t if canonkey_t < canonkey_t[::-1] else canonkey_t[::-1] | |||||
| treelet.append(str(i) + canonkey_t) | |||||
| canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] | |||||
| treelet.append(tuple([str(i)] + canonkey_t)) | |||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # n-star patterns | # n-star patterns | ||||
| for i in range(3, 6): | for i in range(3, 6): | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns[str(i) + 'star']: | for pattern in patterns[str(i) + 'star']: | ||||
| canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:] ] | |||||
| canonlist = [tuple((G.node[leaf][node_label], | |||||
| G[leaf][pattern[0]][edge_label])) for leaf in pattern[1:]] | |||||
| canonlist.sort() | canonlist.sort() | ||||
| canonkey_t = ('d' if i == 5 else str(i * 2)) + G.node[pattern[0]][node_label] + ''.join(canonlist) | |||||
| canonlist = list(chain.from_iterable(canonlist)) | |||||
| canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + | |||||
| [G.node[pattern[0]][node_label]] + canonlist) | |||||
| treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # pattern 7 | # pattern 7 | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns['7']: | for pattern in patterns['7']: | ||||
| canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:3] ] | |||||
| canonlist = [tuple((G.node[leaf][node_label], | |||||
| G[leaf][pattern[0]][edge_label])) for leaf in pattern[1:3]] | |||||
| canonlist.sort() | canonlist.sort() | ||||
| canonkey_t = '7' + G.node[pattern[0]][node_label] + ''.join(canonlist) \ | |||||
| + G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label] \ | |||||
| + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[3]][edge_label] | |||||
| canonlist = list(chain.from_iterable(canonlist)) | |||||
| canonkey_t = tuple(['7'] + [G.node[pattern[0]][node_label]] + canonlist | |||||
| + [G.node[pattern[3]][node_label]] | |||||
| + [G[pattern[3]][pattern[0]][edge_label]] | |||||
| + [G.node[pattern[4]][node_label]] | |||||
| + [G[pattern[4]][pattern[3]][edge_label]]) | |||||
| treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # pattern 11 | # pattern 11 | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns['11']: | for pattern in patterns['11']: | ||||
| canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:4] ] | |||||
| canonlist = [tuple((G.node[leaf][node_label], | |||||
| G[leaf][pattern[0]][edge_label])) for leaf in pattern[1:4]] | |||||
| canonlist.sort() | canonlist.sort() | ||||
| canonkey_t = 'b' + G.node[pattern[0]][node_label] + ''.join(canonlist) \ | |||||
| + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[0]][edge_label] \ | |||||
| + G.node[pattern[5]][node_label] + G[pattern[5]][pattern[4]][edge_label] | |||||
| canonlist = list(chain.from_iterable(canonlist)) | |||||
| canonkey_t = tuple(['b'] + [G.node[pattern[0]][node_label]] + canonlist | |||||
| + [G.node[pattern[4]][node_label]] | |||||
| + [G[pattern[4]][pattern[0]][edge_label]] | |||||
| + [G.node[pattern[5]][node_label]] | |||||
| + [G[pattern[5]][pattern[4]][edge_label]]) | |||||
| treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # pattern 10 | # pattern 10 | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns['10']: | for pattern in patterns['10']: | ||||
| canonkey4 = G.node[pattern[5]][node_label] + G[pattern[5]][pattern[4]][edge_label] | |||||
| canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:3] ] | |||||
| canonkey4 = [G.node[pattern[5]][node_label], G[pattern[5]][pattern[4]][edge_label]] | |||||
| canonlist = [tuple((G.node[leaf][node_label], | |||||
| G[leaf][pattern[0]][edge_label])) for leaf in pattern[1:3]] | |||||
| canonlist.sort() | canonlist.sort() | ||||
| canonkey0 = ''.join(canonlist) | |||||
| canonkey_t = 'a' + G.node[pattern[3]][node_label] \ | |||||
| + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[3]][edge_label] \ | |||||
| + G.node[pattern[0]][node_label] + G[pattern[0]][pattern[3]][edge_label] \ | |||||
| + canonkey4 + canonkey0 | |||||
| canonkey0 = list(chain.from_iterable(canonlist)) | |||||
| canonkey_t = tuple(['a'] + [G.node[pattern[3]][node_label]] | |||||
| + [G.node[pattern[4]][node_label]] | |||||
| + [G[pattern[4]][pattern[3]][edge_label]] | |||||
| + [G.node[pattern[0]][node_label]] | |||||
| + [G[pattern[0]][pattern[3]][edge_label]] | |||||
| + canonkey4 + canonkey0) | |||||
| treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # pattern 12 | # pattern 12 | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns['12']: | for pattern in patterns['12']: | ||||
| canonlist0 = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:3] ] | |||||
| canonlist0 = [tuple((G.node[leaf][node_label], | |||||
| G[leaf][pattern[0]][edge_label])) for leaf in pattern[1:3]] | |||||
| canonlist0.sort() | canonlist0.sort() | ||||
| canonlist3 = [ G.node[leaf][node_label] + G[leaf][pattern[3]][edge_label] for leaf in pattern[4:6] ] | |||||
| canonlist0 = list(chain.from_iterable(canonlist0)) | |||||
| canonlist3 = [tuple((G.node[leaf][node_label], | |||||
| G[leaf][pattern[3]][edge_label])) for leaf in pattern[4:6]] | |||||
| canonlist3.sort() | canonlist3.sort() | ||||
| canonlist3 = list(chain.from_iterable(canonlist3)) | |||||
| # 2 possible key can be generated from 2 nodes with extended label 3, select the one with lower lexicographic order. | |||||
| canonkey_t1 = 'c' + G.node[pattern[0]][node_label] \ | |||||
| + ''.join(canonlist0) \ | |||||
| + G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label] \ | |||||
| + ''.join(canonlist3) | |||||
| canonkey_t2 = 'c' + G.node[pattern[3]][node_label] \ | |||||
| + ''.join(canonlist3) \ | |||||
| + G.node[pattern[0]][node_label] + G[pattern[0]][pattern[3]][edge_label] \ | |||||
| + ''.join(canonlist0) | |||||
| # 2 possible key can be generated from 2 nodes with extended label 3, | |||||
| # select the one with lower lexicographic order. | |||||
| canonkey_t1 = tuple(['c'] + [G.node[pattern[0]][node_label]] + canonlist0 | |||||
| + [G.node[pattern[3]][node_label]] | |||||
| + [G[pattern[3]][pattern[0]][edge_label]] | |||||
| + canonlist3) | |||||
| canonkey_t2 = tuple(['c'] + [G.node[pattern[3]][node_label]] + canonlist3 | |||||
| + [G.node[pattern[0]][node_label]] | |||||
| + [G[pattern[0]][pattern[3]][edge_label]] | |||||
| + canonlist0) | |||||
| treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) | treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # pattern 9 | # pattern 9 | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns['9']: | for pattern in patterns['9']: | ||||
| canonkey2 = G.node[pattern[4]][node_label] + G[pattern[4]][pattern[2]][edge_label] | |||||
| canonkey3 = G.node[pattern[5]][node_label] + G[pattern[5]][pattern[3]][edge_label] | |||||
| prekey2 = G.node[pattern[2]][node_label] + G[pattern[2]][pattern[0]][edge_label] | |||||
| prekey3 = G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label] | |||||
| canonkey2 = [G.node[pattern[4]][node_label], G[pattern[4]][pattern[2]][edge_label]] | |||||
| canonkey3 = [G.node[pattern[5]][node_label], G[pattern[5]][pattern[3]][edge_label]] | |||||
| prekey2 = [G.node[pattern[2]][node_label], G[pattern[2]][pattern[0]][edge_label]] | |||||
| prekey3 = [G.node[pattern[3]][node_label], G[pattern[3]][pattern[0]][edge_label]] | |||||
| if prekey2 + canonkey2 < prekey3 + canonkey3: | if prekey2 + canonkey2 < prekey3 + canonkey3: | ||||
| canonkey_t = G.node[pattern[1]][node_label] + G[pattern[1]][pattern[0]][edge_label] \ | |||||
| + prekey2 + prekey3 + canonkey2 + canonkey3 | |||||
| canonkey_t = [G.node[pattern[1]][node_label]] \ | |||||
| + [G[pattern[1]][pattern[0]][edge_label]] \ | |||||
| + prekey2 + prekey3 + canonkey2 + canonkey3 | |||||
| else: | else: | ||||
| canonkey_t = G.node[pattern[1]][node_label] + G[pattern[1]][pattern[0]][edge_label] \ | |||||
| + prekey3 + prekey2 + canonkey3 + canonkey2 | |||||
| treelet.append('9' + G.node[pattern[0]][node_label] + canonkey_t) | |||||
| canonkey_t = [G.node[pattern[1]][node_label]] \ | |||||
| + [G[pattern[1]][pattern[0]][edge_label]] \ | |||||
| + prekey3 + prekey2 + canonkey3 + canonkey2 | |||||
| treelet.append(tuple(['9'] + [G.node[pattern[0]][node_label]] + canonkey_t)) | |||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| return canonkey_l | return canonkey_l | ||||
| @@ -84,7 +84,7 @@ def loadGXL(filename): | |||||
| return g | return g | ||||
| def saveGXL(graph, filename, method='gedlib'): | |||||
| def saveGXL(graph, filename, method='gedlib-letter'): | |||||
| if method == 'benoit': | if method == 'benoit': | ||||
| import xml.etree.ElementTree as ET | import xml.etree.ElementTree as ET | ||||
| root_node = ET.Element('gxl') | root_node = ET.Element('gxl') | ||||
| @@ -142,6 +142,24 @@ def saveGXL(graph, filename, method='gedlib'): | |||||
| gxl_file.write("</graph>\n") | gxl_file.write("</graph>\n") | ||||
| gxl_file.write("</gxl>\n") | gxl_file.write("</gxl>\n") | ||||
| gxl_file.close() | gxl_file.close() | ||||
| elif method == 'gedlib-letter': | |||||
| # reference: https://github.com/dbblumenthal/gedlib/blob/master/data/generate_molecules.py#L22 | |||||
| # and https://github.com/dbblumenthal/gedlib/blob/master/data/datasets/Letter/HIGH/AP1_0000.gxl | |||||
| gxl_file = open(filename, 'w') | |||||
| gxl_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") | |||||
| gxl_file.write("<!DOCTYPE gxl SYSTEM \"http://www.gupro.de/GXL/gxl-1.0.dtd\">\n") | |||||
| gxl_file.write("<gxl xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n") | |||||
| gxl_file.write("<graph id=\"" + str(graph.graph['name']) + "\" edgeids=\"false\" edgemode=\"undirected\">") | |||||
| for v, attrs in graph.nodes(data=True): | |||||
| gxl_file.write("<node id=\"_" + str(v) + "\">") | |||||
| gxl_file.write("<attr name=\"x\"><float>" + str(attrs['attributes'][0]) + "</float></attr>") | |||||
| gxl_file.write("<attr name=\"y\"><float>" + str(attrs['attributes'][1]) + "</float></attr>") | |||||
| gxl_file.write("</node>") | |||||
| for v1, v2, attrs in graph.edges(data=True): | |||||
| gxl_file.write("<edge from=\"_" + str(v1) + "\" to=\"_" + str(v2) + "\"/>") | |||||
| gxl_file.write("</graph>") | |||||
| gxl_file.write("</gxl>") | |||||
| gxl_file.close() | |||||
| def loadSDF(filename): | def loadSDF(filename): | ||||
| @@ -227,9 +227,9 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
| str_fw += '\nall gram matrices are ignored, no results obtained.\n\n' | str_fw += '\nall gram matrices are ignored, no results obtained.\n\n' | ||||
| else: | else: | ||||
| # save gram matrices to file. | # save gram matrices to file. | ||||
| np.savez(results_dir + '/' + ds_name + '.gm', | |||||
| gms=gram_matrices, params=param_list_pre_revised, y=y, | |||||
| gmtime=gram_matrix_time) | |||||
| # np.savez(results_dir + '/' + ds_name + '.gm', | |||||
| # gms=gram_matrices, params=param_list_pre_revised, y=y, | |||||
| # gmtime=gram_matrix_time) | |||||
| if verbose: | if verbose: | ||||
| print( | print( | ||||
| '3. Fitting and predicting using nested cross validation. This could really take a while...' | '3. Fitting and predicting using nested cross validation. This could really take a while...' | ||||