| @@ -12,6 +12,70 @@ from gklearn.preimage.utils import generate_median_preimages_by_class | |||
| from gklearn.utils import compute_gram_matrices_by_class | |||
| def xp_median_preimage_9_1(): | |||
| """xp 9_1: Acyclic, sspkernel, using CONSTANT. | |||
| """ | |||
| # set parameters. | |||
| ds_name = 'Acyclic' # | |||
| mpg_options = {'fit_method': 'k-graphs', | |||
| 'init_ecc': [4, 4, 2, 1, 1, 1], # | |||
| 'ds_name': ds_name, | |||
| 'parallel': True, # False | |||
| 'time_limit_in_sec': 0, | |||
| 'max_itrs': 100, # | |||
| 'max_itrs_without_update': 3, | |||
| 'epsilon_residual': 0.01, | |||
| 'epsilon_ec': 0.1, | |||
| 'verbose': 2} | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
| kernel_options = {'name': 'structuralspkernel', | |||
| 'edge_weight': None, | |||
| 'node_kernels': sub_kernels, | |||
| 'edge_kernels': sub_kernels, | |||
| 'compute_method': 'naive', | |||
| 'parallel': 'imap_unordered', | |||
| # 'parallel': None, | |||
| 'n_jobs': multiprocessing.cpu_count(), | |||
| 'normalize': True, | |||
| 'verbose': 2} | |||
| ged_options = {'method': 'IPFP', | |||
| 'initialization_method': 'RANDOM', # 'NODE' | |||
| 'initial_solutions': 10, # 1 | |||
| 'edit_cost': 'CONSTANT', # | |||
| 'attr_distance': 'euclidean', | |||
| 'ratio_runs_from_initial_solutions': 1, | |||
| 'threads': multiprocessing.cpu_count(), | |||
| 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} | |||
| mge_options = {'init_type': 'MEDOID', | |||
| 'random_inits': 10, | |||
| 'time_limit': 600, | |||
| 'verbose': 2, | |||
| 'refine': False} | |||
| save_results = True | |||
| dir_save='../results/xp_median_preimage/' | |||
| irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # | |||
| edge_required = False # | |||
| # print settings. | |||
| print('parameters:') | |||
| print('dataset name:', ds_name) | |||
| print('mpg_options:', mpg_options) | |||
| print('kernel_options:', kernel_options) | |||
| print('ged_options:', ged_options) | |||
| print('mge_options:', mge_options) | |||
| print('save_results:', save_results) | |||
| print('irrelevant_labels:', irrelevant_labels) | |||
| print() | |||
| # generate preimages. | |||
| for fit_method in ['k-graphs', 'expert', 'random', 'random', 'random']: | |||
| print('\n-------------------------------------') | |||
| print('fit method:', fit_method, '\n') | |||
| mpg_options['fit_method'] = fit_method | |||
| generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) | |||
| def xp_median_preimage_8_1(): | |||
| """xp 8_1: Monoterpenoides, sspkernel, using CONSTANT. | |||
| """ | |||
| @@ -546,4 +610,7 @@ if __name__ == "__main__": | |||
| # xp_median_preimage_7_1() | |||
| #### xp 8_1: Monoterpenoides, sspkernel, using CONSTANT. | |||
| xp_median_preimage_8_1() | |||
| # xp_median_preimage_8_1() | |||
| #### xp 9_1: Acyclic, sspkernel, using CONSTANT. | |||
| xp_median_preimage_9_1() | |||
| @@ -68,7 +68,8 @@ class Dataset(object): | |||
| def load_predefined_dataset(self, ds_name): | |||
| current_path = os.path.dirname(os.path.realpath(__file__)) + '/' | |||
| if ds_name == 'Acyclic': | |||
| pass | |||
| ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds' | |||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
| elif ds_name == 'COIL-DEL': | |||
| ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' | |||
| self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
| @@ -720,38 +720,26 @@ def load_from_ds(filename, filename_targets): | |||
| label_names = {'node_labels': [], 'edge_labels': [], 'node_attrs': [], 'edge_attrs': []} | |||
| content = open(filename).read().splitlines() | |||
| extension = splitext(content[0].split(' ')[0])[1][1:] | |||
| if extension == 'ct': | |||
| load_file_fun = load_ct | |||
| elif extension == 'gxl': | |||
| load_file_fun = load_gxl | |||
| if filename_targets is None or filename_targets == '': | |||
| if extension == 'ct': | |||
| for i in range(0, len(content)): | |||
| tmp = content[i].split(' ') | |||
| # remove the '#'s in file names | |||
| g, l_names = load_ct(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) | |||
| data.append(g) | |||
| __append_label_names(label_names, l_names) | |||
| y.append(float(tmp[1])) | |||
| elif extension == 'gxl': | |||
| for i in range(0, len(content)): | |||
| tmp = content[i].split(' ') | |||
| # remove the '#'s in file names | |||
| g, l_names = load_gxl(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) | |||
| data.append(g) | |||
| __append_label_names(label_names, l_names) | |||
| y.append(float(tmp[1])) | |||
| else: # y in a seperate file | |||
| if extension == 'ct': | |||
| for i in range(0, len(content)): | |||
| tmp = content[i] | |||
| # remove the '#'s in file names | |||
| g, l_names = load_ct(dirname_dataset + '/' + tmp.replace('#', '', 1)) | |||
| data.append(g) | |||
| __append_label_names(label_names, l_names) | |||
| elif extension == 'gxl': | |||
| for i in range(0, len(content)): | |||
| tmp = content[i] | |||
| # remove the '#'s in file names | |||
| g, l_names = load_gxl(dirname_dataset + '/' + tmp.replace('#', '', 1)) | |||
| data.append(g) | |||
| __append_label_names(label_names, l_names) | |||
| for i in range(0, len(content)): | |||
| tmp = content[i].split(' ') | |||
| # remove the '#'s in file names | |||
| g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) | |||
| data.append(g) | |||
| __append_label_names(label_names, l_names) | |||
| y.append(float(tmp[1])) | |||
| else: # targets in a seperate file | |||
| for i in range(0, len(content)): | |||
| tmp = content[i] | |||
| # remove the '#'s in file names | |||
| g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1)) | |||
| data.append(g) | |||
| __append_label_names(label_names, l_names) | |||
| content_y = open(filename_targets).read().splitlines() | |||
| # assume entries in filename and filename_targets have the same order. | |||
| @@ -774,16 +762,16 @@ if __name__ == '__main__': | |||
| # ds = {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds', | |||
| # 'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt'} | |||
| # Gn, y = loadDataset(ds['dataset'], filename_y=ds['dataset_y']) | |||
| ds_file = '../../datasets/acyclic/dataset_bps.ds' # node symb | |||
| Gn, targets, label_names = load_dataset(ds_file) | |||
| # ds_file = '../../datasets/Acyclic/dataset_bps.ds' # node symb | |||
| # Gn, targets, label_names = load_dataset(ds_file) | |||
| ## ds = {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds'} # node/edge symb | |||
| ## Gn, y = loadDataset(ds['dataset']) | |||
| ## ds = {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds'} # unlabeled | |||
| ## Gn, y = loadDataset(ds['dataset']) | |||
| print(Gn[1].graph) | |||
| print(Gn[1].nodes(data=True)) | |||
| print(Gn[1].edges(data=True)) | |||
| print(targets[1]) | |||
| # print(Gn[1].graph) | |||
| # print(Gn[1].nodes(data=True)) | |||
| # print(Gn[1].edges(data=True)) | |||
| # print(targets[1]) | |||
| # # .gxl file. | |||
| # ds_file = '../../datasets/monoterpenoides/dataset_10+.ds' # node/edge symb | |||