| @@ -12,10 +12,10 @@ import multiprocessing | |||||
| import pickle | import pickle | ||||
| import logging | import logging | ||||
| from gklearn.ged.util import compute_geds | from gklearn.ged.util import compute_geds | ||||
| import numpy as np | |||||
| import time | import time | ||||
| from utils import get_dataset | from utils import get_dataset | ||||
| import sys | import sys | ||||
| from group_results import group_trials | |||||
| def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial): | def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial): | ||||
| @@ -92,11 +92,12 @@ def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio): | |||||
| ged_mats.append(ged_mat) | ged_mats.append(ged_mat) | ||||
| runtimes.append(runtime) | runtimes.append(runtime) | ||||
| # save_file_suffix = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) | |||||
| # with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f: | |||||
| # np.save(f, np.array(ged_mats)) | |||||
| # with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f: | |||||
| # pickle.dump(runtime, f) | |||||
| # Group trials and Remove single files. | |||||
| name_middle = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_prefix = 'ged_matrix' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| name_prefix = 'runtime' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| def results_for_a_dataset(ds_name): | def results_for_a_dataset(ds_name): | ||||
| @@ -12,15 +12,19 @@ import multiprocessing | |||||
| import pickle | import pickle | ||||
| import logging | import logging | ||||
| from gklearn.ged.util import compute_geds | from gklearn.ged.util import compute_geds | ||||
| import numpy as np | |||||
| import time | import time | ||||
| from utils import get_dataset | from utils import get_dataset | ||||
| import sys | import sys | ||||
| from group_results import group_trials | |||||
| def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): | def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): | ||||
| save_file_suffix = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | save_file_suffix = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | ||||
| # Return if the file exists. | |||||
| if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): | |||||
| return None, None | |||||
| """**2. Set parameters.**""" | """**2. Set parameters.**""" | ||||
| @@ -39,8 +43,8 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): | |||||
| } | } | ||||
| edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] | edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] | ||||
| # edit_cost_constants = [item * 0.01 for item in edit_cost_constants] | |||||
| # pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) | |||||
| # edit_cost_constants = [item * 0.01 for item in edit_cost_constants] | |||||
| # pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) | |||||
| options = ged_options.copy() | options = ged_options.copy() | ||||
| options['edit_cost_constants'] = edit_cost_constants | options['edit_cost_constants'] = edit_cost_constants | ||||
| @@ -75,6 +79,12 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): | |||||
| def save_trials_as_group(dataset, ds_name, num_solutions, ratio): | def save_trials_as_group(dataset, ds_name, num_solutions, ratio): | ||||
| # Return if the group file exists. | |||||
| name_middle = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' | |||||
| if os.path.isfile(name_group): | |||||
| return | |||||
| ged_mats = [] | ged_mats = [] | ||||
| runtimes = [] | runtimes = [] | ||||
| for trial in range(1, 101): | for trial in range(1, 101): | ||||
| @@ -84,24 +94,35 @@ def save_trials_as_group(dataset, ds_name, num_solutions, ratio): | |||||
| ged_mats.append(ged_mat) | ged_mats.append(ged_mat) | ||||
| runtimes.append(runtime) | runtimes.append(runtime) | ||||
| # save_file_suffix = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) | |||||
| # with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f: | |||||
| # np.save(f, np.array(ged_mats)) | |||||
| # with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f: | |||||
| # pickle.dump(runtime, f) | |||||
| # Group trials and Remove single files. | |||||
| name_prefix = 'ged_matrix' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| name_prefix = 'runtime' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| def results_for_a_dataset(ds_name): | def results_for_a_dataset(ds_name): | ||||
| """**1. Get dataset.**""" | """**1. Get dataset.**""" | ||||
| dataset = get_dataset(ds_name) | dataset = get_dataset(ds_name) | ||||
| for num_solutions in [1, 20, 40, 60, 80, 100]: | |||||
| for num_solutions in num_solutions_list: | |||||
| print() | print() | ||||
| print('# of solutions:', num_solutions) | print('# of solutions:', num_solutions) | ||||
| for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]: | |||||
| for ratio in ratio_list: | |||||
| print() | print() | ||||
| print('Ratio:', ratio) | print('Ratio:', ratio) | ||||
| save_trials_as_group(dataset, ds_name, num_solutions, ratio) | save_trials_as_group(dataset, ds_name, num_solutions, ratio) | ||||
| def get_param_lists(ds_name): | |||||
| if ds_name == 'AIDS_symb': | |||||
| num_solutions_list = [1, 20, 40, 60, 80, 100] | |||||
| ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] | |||||
| else: | |||||
| num_solutions_list = [1, 20, 40, 60, 80, 100] | |||||
| ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] | |||||
| return num_solutions_list, ratio_list | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| @@ -117,4 +138,5 @@ if __name__ == '__main__': | |||||
| for ds_name in ds_name_list: | for ds_name in ds_name_list: | ||||
| print() | print() | ||||
| print('Dataset:', ds_name) | print('Dataset:', ds_name) | ||||
| num_solutions_list, ratio_list = get_param_lists(ds_name) | |||||
| results_for_a_dataset(ds_name) | results_for_a_dataset(ds_name) | ||||
| @@ -12,10 +12,10 @@ import multiprocessing | |||||
| import pickle | import pickle | ||||
| import logging | import logging | ||||
| from gklearn.ged.util import compute_geds | from gklearn.ged.util import compute_geds | ||||
| import numpy as np | |||||
| import time | import time | ||||
| from utils import get_dataset | from utils import get_dataset | ||||
| import sys | import sys | ||||
| from group_results import group_trials | |||||
| def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): | def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): | ||||
| @@ -87,11 +87,12 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio): | |||||
| ged_mats.append(ged_mat) | ged_mats.append(ged_mat) | ||||
| runtimes.append(runtime) | runtimes.append(runtime) | ||||
| # save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) | |||||
| # with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f: | |||||
| # np.save(f, np.array(ged_mats)) | |||||
| # with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f: | |||||
| # pickle.dump(runtime, f) | |||||
| # Group trials and Remove single files. | |||||
| name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_prefix = 'ged_matrix' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| name_prefix = 'runtime' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| def results_for_a_dataset(ds_name): | def results_for_a_dataset(ds_name): | ||||
| @@ -12,10 +12,10 @@ import multiprocessing | |||||
| import pickle | import pickle | ||||
| import logging | import logging | ||||
| from gklearn.ged.util import compute_geds | from gklearn.ged.util import compute_geds | ||||
| import numpy as np | |||||
| import time | import time | ||||
| from utils import get_dataset | from utils import get_dataset | ||||
| import sys | import sys | ||||
| from group_results import group_trials | |||||
| def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): | def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): | ||||
| @@ -92,11 +92,12 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio): | |||||
| ged_mats.append(ged_mat) | ged_mats.append(ged_mat) | ||||
| runtimes.append(runtime) | runtimes.append(runtime) | ||||
| # save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) | |||||
| # with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f: | |||||
| # np.save(f, np.array(ged_mats)) | |||||
| # with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f: | |||||
| # pickle.dump(runtime, f) | |||||
| # Group trials and Remove single files. | |||||
| name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_prefix = 'ged_matrix' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| name_prefix = 'runtime' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| def results_for_a_dataset(ds_name): | def results_for_a_dataset(ds_name): | ||||
| @@ -16,6 +16,7 @@ from tqdm import tqdm | |||||
| import sys | import sys | ||||
| # This function is used by other scripts. Modify it carefully. | |||||
| def group_trials(dir_folder, name_prefix, override, clear, backup): | def group_trials(dir_folder, name_prefix, override, clear, backup): | ||||
| # Get group name. | # Get group name. | ||||
| @@ -47,8 +48,20 @@ def group_trials(dir_folder, name_prefix, override, clear, backup): | |||||
| file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl' | file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl' | ||||
| if os.path.isfile(file_name): | if os.path.isfile(file_name): | ||||
| with open(file_name, 'rb') as f: | with open(file_name, 'rb') as f: | ||||
| data = pickle.load(f) | |||||
| try: | |||||
| data = pickle.load(f) | |||||
| except EOFError: | |||||
| print('EOF Error occurred.') | |||||
| return | |||||
| data_group.append(data) | data_group.append(data) | ||||
| # unpickler = pickle.Unpickler(f) | |||||
| # data = unpickler.load() | |||||
| # if not isinstance(data, np.array): | |||||
| # return | |||||
| # else: | |||||
| # data_group.append(data) | |||||
| else: # Not all trials are completed. | else: # Not all trials are completed. | ||||
| return | return | ||||
| @@ -81,11 +94,9 @@ def group_trials(dir_folder, name_prefix, override, clear, backup): | |||||
| def group_all_in_folder(dir_folder, override=False, clear=True, backup=True): | def group_all_in_folder(dir_folder, override=False, clear=True, backup=True): | ||||
| # Create folders. | # Create folders. | ||||
| if not os.path.exists(dir_folder + 'groups/'): | |||||
| os.makedirs(dir_folder + 'groups/') | |||||
| os.makedirs(dir_folder + 'groups/', exist_ok=True) | |||||
| if backup: | if backup: | ||||
| if not os.path.exists(dir_folder + 'backups'): | |||||
| os.makedirs(dir_folder + 'backups') | |||||
| os.makedirs(dir_folder + 'backups', exist_ok=True) | |||||
| # Iterate all files. | # Iterate all files. | ||||
| cur_file_prefix = '' | cur_file_prefix = '' | ||||
| @@ -105,4 +116,10 @@ if __name__ == '__main__': | |||||
| group_all_in_folder(dir_folder) | group_all_in_folder(dir_folder) | ||||
| dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.IPFP/' | dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.IPFP/' | ||||
| group_all_in_folder(dir_folder) | |||||
| dir_folder = 'outputs/CRIANN/edit_costs.max_num_sols.ratios.bipartite/' | |||||
| group_all_in_folder(dir_folder) | |||||
| dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.bipartite/' | |||||
| group_all_in_folder(dir_folder) | group_all_in_folder(dir_folder) | ||||
| @@ -0,0 +1,47 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Tue Nov 3 20:23:25 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| import os | |||||
| import re | |||||
| def get_job_script(arg): | |||||
| script = r""" | |||||
| #!/bin/bash | |||||
| #SBATCH --exclusive | |||||
| #SBATCH --job-name="st.""" + arg + r""".IPFP" | |||||
| #SBATCH --partition=tlong | |||||
| #SBATCH --mail-type=ALL | |||||
| #SBATCH --mail-user=jajupmochi@gmail.com | |||||
| #SBATCH --output="outputs/output_edit_costs.nums_sols.ratios.IPFP.""" + arg + """.txt" | |||||
| #SBATCH --error="errors/error_edit_costs.nums_sols.ratios.IPFP.""" + arg + """.txt" | |||||
| # | |||||
| #SBATCH --ntasks=1 | |||||
| #SBATCH --nodes=1 | |||||
| #SBATCH --cpus-per-task=1 | |||||
| #SBATCH --time=300:00:00 | |||||
| #SBATCH --mem-per-cpu=4000 | |||||
| srun hostname | |||||
| srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability | |||||
| srun python3 edit_costs.nums_sols.ratios.IPFP.py """ + arg | |||||
| script = script.strip() | |||||
| script = re.sub('\n\t+', '\n', script) | |||||
| script = re.sub('\n +', '\n', script) | |||||
| return script | |||||
| if __name__ == '__main__': | |||||
| ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb'] | |||||
| for ds_name in [ds_list[i] for i in [0, 3]]: | |||||
| job_script = get_job_script(ds_name) | |||||
| command = 'sbatch <<EOF\n' + job_script + '\nEOF' | |||||
| # print(command) | |||||
| os.system(command) | |||||
| # os.popen(command) | |||||
| # output = stream.readlines() | |||||