| @@ -0,0 +1,142 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Wed Oct 20 11:48:02 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| # This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. | |||||
| import os | |||||
| import multiprocessing | |||||
| import pickle | |||||
| import logging | |||||
| from gklearn.ged.util import compute_geds | |||||
| import time | |||||
| import sys | |||||
| from group_results import group_trials | |||||
| def generate_graphs(): | |||||
| from gklearn.utils.graph_synthesizer import GraphSynthesizer | |||||
| gsyzer = GraphSynthesizer() | |||||
| graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) | |||||
| return graphs | |||||
| def xp_compute_ged_matrix(graphs, N, max_num_solutions, ratio, trial): | |||||
| save_file_suffix = '.' + str(N) + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | |||||
| # Return if the file exists. | |||||
| if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): | |||||
| return None, None | |||||
| """**2. Set parameters.**""" | |||||
| # Parameters for GED computation. | |||||
| ged_options = {'method': 'BIPARTITE', # use BIPARTITE huristic. | |||||
| # 'initialization_method': 'RANDOM', # or 'NODE', etc. (for GEDEnv) | |||||
| 'lsape_model': 'ECBP', # | |||||
| # ??when bigger than 1, then the method is considered mIPFP. | |||||
| # the actual number of computed solutions might be smaller than the specified value | |||||
| 'max_num_solutions': max_num_solutions, | |||||
| 'edit_cost': 'CONSTANT', # use CONSTANT cost. | |||||
| 'greedy_method': 'BASIC', # | |||||
| # the distance between non-symbolic node/edge labels is computed by euclidean distance. | |||||
| 'attr_distance': 'euclidean', | |||||
| 'optimal': True, # if TRUE, the option --greedy-method has no effect | |||||
| # parallel threads. Do not work if mpg_options['parallel'] = False. | |||||
| 'threads': multiprocessing.cpu_count(), | |||||
| 'centrality_method': 'NONE', | |||||
| 'centrality_weight': 0.7, | |||||
| 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' | |||||
| } | |||||
| edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] | |||||
| # edit_cost_constants = [item * 0.01 for item in edit_cost_constants] | |||||
| # pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) | |||||
| options = ged_options.copy() | |||||
| options['edit_cost_constants'] = edit_cost_constants | |||||
| options['node_labels'] = [] | |||||
| options['edge_labels'] = [] | |||||
| options['node_attrs'] = [] | |||||
| options['edge_attrs'] = [] | |||||
| parallel = True # if num_solutions == 1 else False | |||||
| """**5. Compute GED matrix.**""" | |||||
| ged_mat = 'error' | |||||
| runtime = 0 | |||||
| try: | |||||
| time0 = time.time() | |||||
| ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=1, parallel=parallel, verbose=True) | |||||
| runtime = time.time() - time0 | |||||
| except Exception as exp: | |||||
| print('An exception occured when running this experiment:') | |||||
| LOG_FILENAME = save_dir + 'error.txt' | |||||
| logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) | |||||
| logging.exception(save_file_suffix) | |||||
| print(repr(exp)) | |||||
| """**6. Get results.**""" | |||||
| with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f: | |||||
| pickle.dump(ged_mat, f) | |||||
| with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: | |||||
| pickle.dump(runtime, f) | |||||
| return ged_mat, runtime | |||||
| def save_trials_as_group(graphs, N, max_num_solutions, ratio): | |||||
| # Return if the group file exists. | |||||
| name_middle = '.' + str(N) + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' | |||||
| if os.path.isfile(name_group): | |||||
| return | |||||
| ged_mats = [] | |||||
| runtimes = [] | |||||
| for trial in range(1, 101): | |||||
| print() | |||||
| print('Trial:', trial) | |||||
| ged_mat, runtime = xp_compute_ged_matrix(graphs, N, max_num_solutions, ratio, trial) | |||||
| ged_mats.append(ged_mat) | |||||
| runtimes.append(runtime) | |||||
| # Group trials and Remove single files. | |||||
| name_prefix = 'ged_matrix' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| name_prefix = 'runtime' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| def results_for_a_ratio(ratio): | |||||
| for N in N_list: | |||||
| print() | |||||
| print('# of graphs:', N) | |||||
| for max_num_solutions in [1, 20, 40, 60, 80, 100]: | |||||
| print() | |||||
| print('Max # of solutions:', max_num_solutions) | |||||
| save_trials_as_group(graphs[:N], N, max_num_solutions, ratio) | |||||
| if __name__ == '__main__': | |||||
| if len(sys.argv) > 1: | |||||
| N_list = [int(i) for i in sys.argv[1:]] | |||||
| else: | |||||
| N_list = [10, 50, 100] | |||||
| # Generate graphs. | |||||
| graphs = generate_graphs() | |||||
| save_dir = 'outputs/edit_costs.max_num_sols.N.bipartite/' | |||||
| os.makedirs(save_dir, exist_ok=True) | |||||
| os.makedirs(save_dir + 'groups/', exist_ok=True) | |||||
| for ratio in [10, 1, 0.1]: | |||||
| print() | |||||
| print('Ratio:', ratio) | |||||
| results_for_a_ratio(ratio) | |||||
| @@ -22,8 +22,9 @@ def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial): | |||||
| save_file_suffix = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | save_file_suffix = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | ||||
| """**1. Get dataset.**""" | |||||
| dataset = get_dataset(ds_name) | |||||
| # Return if the file exists. | |||||
| if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): | |||||
| return None, None | |||||
| """**2. Set parameters.**""" | """**2. Set parameters.**""" | ||||
| @@ -83,6 +84,12 @@ def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial): | |||||
| def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio): | def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio): | ||||
| # Return if the group file exists. | |||||
| name_middle = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' | |||||
| if os.path.isfile(name_group): | |||||
| return | |||||
| ged_mats = [] | ged_mats = [] | ||||
| runtimes = [] | runtimes = [] | ||||
| for trial in range(1, 101): | for trial in range(1, 101): | ||||
| @@ -93,25 +100,35 @@ def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio): | |||||
| runtimes.append(runtime) | runtimes.append(runtime) | ||||
| # Group trials and Remove single files. | # Group trials and Remove single files. | ||||
| name_middle = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_prefix = 'ged_matrix' + name_middle | name_prefix = 'ged_matrix' + name_middle | ||||
| group_trials(save_dir, name_prefix, True, True, False) | group_trials(save_dir, name_prefix, True, True, False) | ||||
| name_prefix = 'runtime' + name_middle | name_prefix = 'runtime' + name_middle | ||||
| group_trials(save_dir, name_prefix, True, True, False) | group_trials(save_dir, name_prefix, True, True, False) | ||||
| def results_for_a_dataset(ds_name): | def results_for_a_dataset(ds_name): | ||||
| """**1. Get dataset.**""" | """**1. Get dataset.**""" | ||||
| dataset = get_dataset(ds_name) | dataset = get_dataset(ds_name) | ||||
| for max_num_solutions in [1, 20, 40, 60, 80, 100]: | |||||
| for max_num_solutions in mnum_solutions_list: | |||||
| print() | print() | ||||
| print('Max # of solutions:', max_num_solutions) | print('Max # of solutions:', max_num_solutions) | ||||
| for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]: | |||||
| for ratio in ratio_list: | |||||
| print() | print() | ||||
| print('Ratio:', ratio) | print('Ratio:', ratio) | ||||
| save_trials_as_group(dataset, ds_name, max_num_solutions, ratio) | save_trials_as_group(dataset, ds_name, max_num_solutions, ratio) | ||||
| def get_param_lists(ds_name): | |||||
| if ds_name == 'AIDS_symb': | |||||
| mnum_solutions_list = [1, 20, 40, 60, 80, 100] | |||||
| ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] | |||||
| else: | |||||
| mnum_solutions_list = [1, 20, 40, 60, 80, 100] | |||||
| ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] | |||||
| return mnum_solutions_list, ratio_list | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| if len(sys.argv) > 1: | if len(sys.argv) > 1: | ||||
| @@ -126,4 +143,5 @@ if __name__ == '__main__': | |||||
| for ds_name in ds_name_list: | for ds_name in ds_name_list: | ||||
| print() | print() | ||||
| print('Dataset:', ds_name) | print('Dataset:', ds_name) | ||||
| mnum_solutions_list, ratio_list = get_param_lists(ds_name) | |||||
| results_for_a_dataset(ds_name) | results_for_a_dataset(ds_name) | ||||
| @@ -0,0 +1,137 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Wed Oct 20 11:48:02 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| # This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. | |||||
| import os | |||||
| import multiprocessing | |||||
| import pickle | |||||
| import logging | |||||
| from gklearn.ged.util import compute_geds | |||||
| import time | |||||
| import sys | |||||
| from group_results import group_trials | |||||
| def generate_graphs(): | |||||
| from gklearn.utils.graph_synthesizer import GraphSynthesizer | |||||
| gsyzer = GraphSynthesizer() | |||||
| graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) | |||||
| return graphs | |||||
| def xp_compute_ged_matrix(graphs, N, num_solutions, ratio, trial): | |||||
| save_file_suffix = '.' + str(N) + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | |||||
| # Return if the file exists. | |||||
| if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): | |||||
| return None, None | |||||
| """**2. Set parameters.**""" | |||||
| # Parameters for GED computation. | |||||
| ged_options = {'method': 'IPFP', # use IPFP huristic. | |||||
| 'initialization_method': 'RANDOM', # or 'NODE', etc. | |||||
| # when bigger than 1, then the method is considered mIPFP. | |||||
| 'initial_solutions': int(num_solutions * 4), | |||||
| 'edit_cost': 'CONSTANT', # use CONSTANT cost. | |||||
| # the distance between non-symbolic node/edge labels is computed by euclidean distance. | |||||
| 'attr_distance': 'euclidean', | |||||
| 'ratio_runs_from_initial_solutions': 0.25, | |||||
| # parallel threads. Do not work if mpg_options['parallel'] = False. | |||||
| 'threads': multiprocessing.cpu_count(), | |||||
| 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' | |||||
| } | |||||
| edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] | |||||
| # edit_cost_constants = [item * 0.01 for item in edit_cost_constants] | |||||
| # pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) | |||||
| options = ged_options.copy() | |||||
| options['edit_cost_constants'] = edit_cost_constants | |||||
| options['node_labels'] = [] | |||||
| options['edge_labels'] = [] | |||||
| options['node_attrs'] = [] | |||||
| options['edge_attrs'] = [] | |||||
| parallel = True # if num_solutions == 1 else False | |||||
| """**5. Compute GED matrix.**""" | |||||
| ged_mat = 'error' | |||||
| runtime = 0 | |||||
| try: | |||||
| time0 = time.time() | |||||
| ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=1, parallel=parallel, verbose=True) | |||||
| runtime = time.time() - time0 | |||||
| except Exception as exp: | |||||
| print('An exception occured when running this experiment:') | |||||
| LOG_FILENAME = save_dir + 'error.txt' | |||||
| logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) | |||||
| logging.exception(save_file_suffix) | |||||
| print(repr(exp)) | |||||
| """**6. Get results.**""" | |||||
| with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f: | |||||
| pickle.dump(ged_mat, f) | |||||
| with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: | |||||
| pickle.dump(runtime, f) | |||||
| return ged_mat, runtime | |||||
| def save_trials_as_group(graphs, N, num_solutions, ratio): | |||||
| # Return if the group file exists. | |||||
| name_middle = '.' + str(N) + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' | |||||
| if os.path.isfile(name_group): | |||||
| return | |||||
| ged_mats = [] | |||||
| runtimes = [] | |||||
| for trial in range(1, 101): | |||||
| print() | |||||
| print('Trial:', trial) | |||||
| ged_mat, runtime = xp_compute_ged_matrix(graphs, N, num_solutions, ratio, trial) | |||||
| ged_mats.append(ged_mat) | |||||
| runtimes.append(runtime) | |||||
| # Group trials and Remove single files. | |||||
| name_prefix = 'ged_matrix' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| name_prefix = 'runtime' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| def results_for_a_ratio(ratio): | |||||
| for N in N_list: | |||||
| print() | |||||
| print('# of graphs:', N) | |||||
| for num_solutions in [1, 20, 40, 60, 80, 100]: | |||||
| print() | |||||
| print('# of solutions:', num_solutions) | |||||
| save_trials_as_group(graphs[:N], N, num_solutions, ratio) | |||||
| if __name__ == '__main__': | |||||
| if len(sys.argv) > 1: | |||||
| N_list = [int(i) for i in sys.argv[1:]] | |||||
| else: | |||||
| N_list = [10, 50, 100] | |||||
| # Generate graphs. | |||||
| graphs = generate_graphs() | |||||
| save_dir = 'outputs/edit_costs.num_sols.N.IPFP/' | |||||
| os.makedirs(save_dir, exist_ok=True) | |||||
| os.makedirs(save_dir + 'groups/', exist_ok=True) | |||||
| for ratio in [10, 1, 0.1]: | |||||
| print() | |||||
| print('Ratio:', ratio) | |||||
| results_for_a_ratio(ratio) | |||||
| @@ -59,7 +59,7 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): | |||||
| runtime = 0 | runtime = 0 | ||||
| try: | try: | ||||
| time0 = time.time() | time0 = time.time() | ||||
| ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, parallel=parallel, verbose=True) | |||||
| ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, repeats=1, parallel=parallel, verbose=True) | |||||
| runtime = time.time() - time0 | runtime = time.time() - time0 | ||||
| except Exception as exp: | except Exception as exp: | ||||
| print('An exception occured when running this experiment:') | print('An exception occured when running this experiment:') | ||||
| @@ -74,9 +74,9 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): | |||||
| pickle.dump(ged_mat, f) | pickle.dump(ged_mat, f) | ||||
| with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: | with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: | ||||
| pickle.dump(runtime, f) | pickle.dump(runtime, f) | ||||
| return ged_mat, runtime | return ged_mat, runtime | ||||
| def save_trials_as_group(dataset, ds_name, num_solutions, ratio): | def save_trials_as_group(dataset, ds_name, num_solutions, ratio): | ||||
| # Return if the group file exists. | # Return if the group file exists. | ||||
| @@ -99,8 +99,8 @@ def save_trials_as_group(dataset, ds_name, num_solutions, ratio): | |||||
| group_trials(save_dir, name_prefix, True, True, False) | group_trials(save_dir, name_prefix, True, True, False) | ||||
| name_prefix = 'runtime' + name_middle | name_prefix = 'runtime' + name_middle | ||||
| group_trials(save_dir, name_prefix, True, True, False) | group_trials(save_dir, name_prefix, True, True, False) | ||||
| def results_for_a_dataset(ds_name): | def results_for_a_dataset(ds_name): | ||||
| """**1. Get dataset.**""" | """**1. Get dataset.**""" | ||||
| dataset = get_dataset(ds_name) | dataset = get_dataset(ds_name) | ||||
| @@ -0,0 +1,137 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Wed Oct 20 11:48:02 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| # This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. | |||||
| import os | |||||
| import multiprocessing | |||||
| import pickle | |||||
| import logging | |||||
| from gklearn.ged.util import compute_geds | |||||
| import time | |||||
| import sys | |||||
| from group_results import group_trials | |||||
| def generate_graphs(): | |||||
| from gklearn.utils.graph_synthesizer import GraphSynthesizer | |||||
| gsyzer = GraphSynthesizer() | |||||
| graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) | |||||
| return graphs | |||||
| def xp_compute_ged_matrix(graphs, N, repeats, ratio, trial): | |||||
| save_file_suffix = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | |||||
| # Return if the file exists. | |||||
| if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): | |||||
| return None, None | |||||
| """**2. Set parameters.**""" | |||||
| # Parameters for GED computation. | |||||
| ged_options = {'method': 'IPFP', # use IPFP huristic. | |||||
| 'initialization_method': 'RANDOM', # or 'NODE', etc. | |||||
| # when bigger than 1, then the method is considered mIPFP. | |||||
| 'initial_solutions': 1, | |||||
| 'edit_cost': 'CONSTANT', # use CONSTANT cost. | |||||
| # the distance between non-symbolic node/edge labels is computed by euclidean distance. | |||||
| 'attr_distance': 'euclidean', | |||||
| 'ratio_runs_from_initial_solutions': 1, | |||||
| # parallel threads. Do not work if mpg_options['parallel'] = False. | |||||
| 'threads': multiprocessing.cpu_count(), | |||||
| 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' | |||||
| } | |||||
| edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] | |||||
| # edit_cost_constants = [item * 0.01 for item in edit_cost_constants] | |||||
| # pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) | |||||
| options = ged_options.copy() | |||||
| options['edit_cost_constants'] = edit_cost_constants | |||||
| options['node_labels'] = [] | |||||
| options['edge_labels'] = [] | |||||
| options['node_attrs'] = [] | |||||
| options['edge_attrs'] = [] | |||||
| parallel = True # if num_solutions == 1 else False | |||||
| """**5. Compute GED matrix.**""" | |||||
| ged_mat = 'error' | |||||
| runtime = 0 | |||||
| try: | |||||
| time0 = time.time() | |||||
| ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=repeats, parallel=parallel, verbose=True) | |||||
| runtime = time.time() - time0 | |||||
| except Exception as exp: | |||||
| print('An exception occured when running this experiment:') | |||||
| LOG_FILENAME = save_dir + 'error.txt' | |||||
| logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) | |||||
| logging.exception(save_file_suffix) | |||||
| print(repr(exp)) | |||||
| """**6. Get results.**""" | |||||
| with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f: | |||||
| pickle.dump(ged_mat, f) | |||||
| with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: | |||||
| pickle.dump(runtime, f) | |||||
| return ged_mat, runtime | |||||
| def save_trials_as_group(graphs, N, repeats, ratio): | |||||
| # Return if the group file exists. | |||||
| name_middle = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' | |||||
| if os.path.isfile(name_group): | |||||
| return | |||||
| ged_mats = [] | |||||
| runtimes = [] | |||||
| for trial in range(1, 101): | |||||
| print() | |||||
| print('Trial:', trial) | |||||
| ged_mat, runtime = xp_compute_ged_matrix(graphs, N, repeats, ratio, trial) | |||||
| ged_mats.append(ged_mat) | |||||
| runtimes.append(runtime) | |||||
| # Group trials and Remove single files. | |||||
| name_prefix = 'ged_matrix' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| name_prefix = 'runtime' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| def results_for_a_ratio(ratio): | |||||
| for N in N_list: | |||||
| print() | |||||
| print('# of graphs:', N) | |||||
| for repeats in [1, 20, 40, 60, 80, 100]: | |||||
| print() | |||||
| print('Repeats:', repeats) | |||||
| save_trials_as_group(graphs[:N], N, repeats, ratio) | |||||
| if __name__ == '__main__': | |||||
| if len(sys.argv) > 1: | |||||
| N_list = [int(i) for i in sys.argv[1:]] | |||||
| else: | |||||
| N_list = [10, 50, 100] | |||||
| # Generate graphs. | |||||
| graphs = generate_graphs() | |||||
| save_dir = 'outputs/edit_costs.repeats.N.IPFP/' | |||||
| os.makedirs(save_dir, exist_ok=True) | |||||
| os.makedirs(save_dir + 'groups/', exist_ok=True) | |||||
| for ratio in [10, 1, 0.1]: | |||||
| print() | |||||
| print('Ratio:', ratio) | |||||
| results_for_a_ratio(ratio) | |||||
| @@ -0,0 +1,142 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Wed Oct 20 11:48:02 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| # This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. | |||||
| import os | |||||
| import multiprocessing | |||||
| import pickle | |||||
| import logging | |||||
| from gklearn.ged.util import compute_geds | |||||
| import time | |||||
| import sys | |||||
| from group_results import group_trials | |||||
| def generate_graphs(): | |||||
| from gklearn.utils.graph_synthesizer import GraphSynthesizer | |||||
| gsyzer = GraphSynthesizer() | |||||
| graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) | |||||
| return graphs | |||||
| def xp_compute_ged_matrix(graphs, N, repeats, ratio, trial): | |||||
| save_file_suffix = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | |||||
| # Return if the file exists. | |||||
| if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): | |||||
| return None, None | |||||
| """**2. Set parameters.**""" | |||||
| # Parameters for GED computation. | |||||
| ged_options = {'method': 'BIPARTITE', # use BIPARTITE huristic. | |||||
| # 'initialization_method': 'RANDOM', # or 'NODE', etc. (for GEDEnv) | |||||
| 'lsape_model': 'ECBP', # | |||||
| # ??when bigger than 1, then the method is considered mIPFP. | |||||
| # the actual number of computed solutions might be smaller than the specified value | |||||
| 'max_num_solutions': 1, | |||||
| 'edit_cost': 'CONSTANT', # use CONSTANT cost. | |||||
| 'greedy_method': 'BASIC', # | |||||
| # the distance between non-symbolic node/edge labels is computed by euclidean distance. | |||||
| 'attr_distance': 'euclidean', | |||||
| 'optimal': True, # if TRUE, the option --greedy-method has no effect | |||||
| # parallel threads. Do not work if mpg_options['parallel'] = False. | |||||
| 'threads': multiprocessing.cpu_count(), | |||||
| 'centrality_method': 'NONE', | |||||
| 'centrality_weight': 0.7, | |||||
| 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' | |||||
| } | |||||
| edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] | |||||
| # edit_cost_constants = [item * 0.01 for item in edit_cost_constants] | |||||
| # pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) | |||||
| options = ged_options.copy() | |||||
| options['edit_cost_constants'] = edit_cost_constants | |||||
| options['node_labels'] = [] | |||||
| options['edge_labels'] = [] | |||||
| options['node_attrs'] = [] | |||||
| options['edge_attrs'] = [] | |||||
| parallel = True # if num_solutions == 1 else False | |||||
| """**5. Compute GED matrix.**""" | |||||
| ged_mat = 'error' | |||||
| runtime = 0 | |||||
| try: | |||||
| time0 = time.time() | |||||
| ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=repeats, parallel=parallel, verbose=True) | |||||
| runtime = time.time() - time0 | |||||
| except Exception as exp: | |||||
| print('An exception occured when running this experiment:') | |||||
| LOG_FILENAME = save_dir + 'error.txt' | |||||
| logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) | |||||
| logging.exception(save_file_suffix) | |||||
| print(repr(exp)) | |||||
| """**6. Get results.**""" | |||||
| with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f: | |||||
| pickle.dump(ged_mat, f) | |||||
| with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: | |||||
| pickle.dump(runtime, f) | |||||
| return ged_mat, runtime | |||||
| def save_trials_as_group(graphs, N, repeats, ratio): | |||||
| # Return if the group file exists. | |||||
| name_middle = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' | |||||
| if os.path.isfile(name_group): | |||||
| return | |||||
| ged_mats = [] | |||||
| runtimes = [] | |||||
| for trial in range(1, 101): | |||||
| print() | |||||
| print('Trial:', trial) | |||||
| ged_mat, runtime = xp_compute_ged_matrix(graphs, N, repeats, ratio, trial) | |||||
| ged_mats.append(ged_mat) | |||||
| runtimes.append(runtime) | |||||
| # Group trials and Remove single files. | |||||
| name_prefix = 'ged_matrix' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| name_prefix = 'runtime' + name_middle | |||||
| group_trials(save_dir, name_prefix, True, True, False) | |||||
| def results_for_a_ratio(ratio): | |||||
| for N in N_list: | |||||
| print() | |||||
| print('# of graphs:', N) | |||||
| for repeats in [1, 20, 40, 60, 80, 100]: | |||||
| print() | |||||
| print('Repeats:', repeats) | |||||
| save_trials_as_group(graphs[:N], N, repeats, ratio) | |||||
| if __name__ == '__main__': | |||||
| if len(sys.argv) > 1: | |||||
| N_list = [int(i) for i in sys.argv[1:]] | |||||
| else: | |||||
| N_list = [10, 50, 100] | |||||
| # Generate graphs. | |||||
| graphs = generate_graphs() | |||||
| save_dir = 'outputs/edit_costs.repeats.N.bipartite/' | |||||
| os.makedirs(save_dir, exist_ok=True) | |||||
| os.makedirs(save_dir + 'groups/', exist_ok=True) | |||||
| for ratio in [10, 1, 0.1]: | |||||
| print() | |||||
| print('Ratio:', ratio) | |||||
| results_for_a_ratio(ratio) | |||||
| @@ -19,11 +19,12 @@ from group_results import group_trials | |||||
| def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): | def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): | ||||
| save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | ||||
| """**1. Get dataset.**""" | |||||
| dataset = get_dataset(ds_name) | |||||
| # Return if the file exists. | |||||
| if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): | |||||
| return None, None | |||||
| """**2. Set parameters.**""" | """**2. Set parameters.**""" | ||||
| @@ -78,6 +79,12 @@ def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): | |||||
| def save_trials_as_group(dataset, ds_name, repeats, ratio): | def save_trials_as_group(dataset, ds_name, repeats, ratio): | ||||
| # Return if the group file exists. | |||||
| name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' | |||||
| if os.path.isfile(name_group): | |||||
| return | |||||
| ged_mats = [] | ged_mats = [] | ||||
| runtimes = [] | runtimes = [] | ||||
| for trial in range(1, 101): | for trial in range(1, 101): | ||||
| @@ -88,25 +95,35 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio): | |||||
| runtimes.append(runtime) | runtimes.append(runtime) | ||||
| # Group trials and Remove single files. | # Group trials and Remove single files. | ||||
| name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_prefix = 'ged_matrix' + name_middle | name_prefix = 'ged_matrix' + name_middle | ||||
| group_trials(save_dir, name_prefix, True, True, False) | group_trials(save_dir, name_prefix, True, True, False) | ||||
| name_prefix = 'runtime' + name_middle | name_prefix = 'runtime' + name_middle | ||||
| group_trials(save_dir, name_prefix, True, True, False) | group_trials(save_dir, name_prefix, True, True, False) | ||||
| def results_for_a_dataset(ds_name): | def results_for_a_dataset(ds_name): | ||||
| """**1. Get dataset.**""" | """**1. Get dataset.**""" | ||||
| dataset = get_dataset(ds_name) | dataset = get_dataset(ds_name) | ||||
| for repeats in [1, 20, 40, 60, 80, 100]: | |||||
| for repeats in repeats_list: | |||||
| print() | print() | ||||
| print('Repeats:', repeats) | print('Repeats:', repeats) | ||||
| for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]: | |||||
| for ratio in ratio_list: | |||||
| print() | print() | ||||
| print('Ratio:', ratio) | print('Ratio:', ratio) | ||||
| save_trials_as_group(dataset, ds_name, repeats, ratio) | save_trials_as_group(dataset, ds_name, repeats, ratio) | ||||
| def get_param_lists(ds_name): | |||||
| if ds_name == 'AIDS_symb': | |||||
| repeats_list = [1, 20, 40, 60, 80, 100] | |||||
| ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] | |||||
| else: | |||||
| repeats_list = [1, 20, 40, 60, 80, 100] | |||||
| ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] | |||||
| return repeats_list, ratio_list | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| if len(sys.argv) > 1: | if len(sys.argv) > 1: | ||||
| @@ -121,4 +138,5 @@ if __name__ == '__main__': | |||||
| for ds_name in ds_name_list: | for ds_name in ds_name_list: | ||||
| print() | print() | ||||
| print('Dataset:', ds_name) | print('Dataset:', ds_name) | ||||
| repeats_list, ratio_list = get_param_lists(ds_name) | |||||
| results_for_a_dataset(ds_name) | results_for_a_dataset(ds_name) | ||||
| @@ -19,11 +19,12 @@ from group_results import group_trials | |||||
| def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): | def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): | ||||
| save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | ||||
| """**1. Get dataset.**""" | |||||
| dataset = get_dataset(ds_name) | |||||
| # Return if the file exists. | |||||
| if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): | |||||
| return None, None | |||||
| """**2. Set parameters.**""" | """**2. Set parameters.**""" | ||||
| @@ -83,6 +84,12 @@ def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): | |||||
| def save_trials_as_group(dataset, ds_name, repeats, ratio): | def save_trials_as_group(dataset, ds_name, repeats, ratio): | ||||
| # Return if the group file exists. | |||||
| name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' | |||||
| if os.path.isfile(name_group): | |||||
| return | |||||
| ged_mats = [] | ged_mats = [] | ||||
| runtimes = [] | runtimes = [] | ||||
| for trial in range(1, 101): | for trial in range(1, 101): | ||||
| @@ -93,25 +100,35 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio): | |||||
| runtimes.append(runtime) | runtimes.append(runtime) | ||||
| # Group trials and Remove single files. | # Group trials and Remove single files. | ||||
| name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||||
| name_prefix = 'ged_matrix' + name_middle | name_prefix = 'ged_matrix' + name_middle | ||||
| group_trials(save_dir, name_prefix, True, True, False) | group_trials(save_dir, name_prefix, True, True, False) | ||||
| name_prefix = 'runtime' + name_middle | name_prefix = 'runtime' + name_middle | ||||
| group_trials(save_dir, name_prefix, True, True, False) | group_trials(save_dir, name_prefix, True, True, False) | ||||
| def results_for_a_dataset(ds_name): | def results_for_a_dataset(ds_name): | ||||
| """**1. Get dataset.**""" | """**1. Get dataset.**""" | ||||
| dataset = get_dataset(ds_name) | dataset = get_dataset(ds_name) | ||||
| for repeats in [1, 20, 40, 60, 80, 100]: | |||||
| for repeats in repeats_list: | |||||
| print() | print() | ||||
| print('Repeats:', repeats) | print('Repeats:', repeats) | ||||
| for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]: | |||||
| for ratio in ratio_list: | |||||
| print() | print() | ||||
| print('Ratio:', ratio) | print('Ratio:', ratio) | ||||
| save_trials_as_group(dataset, ds_name, repeats, ratio) | save_trials_as_group(dataset, ds_name, repeats, ratio) | ||||
| def get_param_lists(ds_name): | |||||
| if ds_name == 'AIDS_symb': | |||||
| repeats_list = [1, 20, 40, 60, 80, 100] | |||||
| ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] | |||||
| else: | |||||
| repeats_list = [1, 20, 40, 60, 80, 100] | |||||
| ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] | |||||
| return repeats_list, ratio_list | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| if len(sys.argv) > 1: | if len(sys.argv) > 1: | ||||
| @@ -126,4 +143,5 @@ if __name__ == '__main__': | |||||
| for ds_name in ds_name_list: | for ds_name in ds_name_list: | ||||
| print() | print() | ||||
| print('Dataset:', ds_name) | print('Dataset:', ds_name) | ||||
| repeats_list, ratio_list = get_param_lists(ds_name) | |||||
| results_for_a_dataset(ds_name) | results_for_a_dataset(ds_name) | ||||
| @@ -0,0 +1,56 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Tue Nov 3 20:23:25 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| import os | |||||
| import re | |||||
| def get_job_script(arg, params): | |||||
| ged_method = params[0] | |||||
| multi_method = params[1] | |||||
| job_name_label = r"rep." if multi_method == 'repeats' else r"" | |||||
| script = r""" | |||||
| #!/bin/bash | |||||
| #SBATCH --exclusive | |||||
| #SBATCH --job-name="st.""" + job_name_label + r"N" + arg + r"." + ged_method + r"""" | |||||
| #SBATCH --partition=tlong | |||||
| #SBATCH --mail-type=ALL | |||||
| #SBATCH --mail-user=jajupmochi@gmail.com | |||||
| #SBATCH --output="outputs/output_edit_costs.""" + multi_method + r".N." + ged_method + r"." + arg + r""".txt" | |||||
| #SBATCH --error="errors/error_edit_costs.""" + multi_method + r".N." + ged_method + r"." + arg + r""".txt" | |||||
| # | |||||
| #SBATCH --ntasks=1 | |||||
| #SBATCH --nodes=1 | |||||
| #SBATCH --cpus-per-task=1 | |||||
| #SBATCH --time=300:00:00 | |||||
| #SBATCH --mem-per-cpu=4000 | |||||
| srun hostname | |||||
| srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability | |||||
| srun python3 edit_costs.""" + multi_method + r".N." + ged_method + r".py " + arg | |||||
| script = script.strip() | |||||
| script = re.sub('\n\t+', '\n', script) | |||||
| script = re.sub('\n +', '\n', script) | |||||
| return script | |||||
| if __name__ == '__main__': | |||||
| params_list = [('IPFP', 'nums_sols'), | |||||
| ('IPFP', 'repeats'), | |||||
| ('bipartite', 'max_num_sols'), | |||||
| ('bipartite', 'repeats')] | |||||
| N_list = [10, 50, 100] | |||||
| for params in params_list[1:]: | |||||
| for N in [N_list[i] for i in [0, 1, 2]]: | |||||
| job_script = get_job_script(str(N), params) | |||||
| command = 'sbatch <<EOF\n' + job_script + '\nEOF' | |||||
| # print(command) | |||||
| os.system(command) | |||||
| # os.popen(command) | |||||
| # output = stream.readlines() | |||||
| @@ -0,0 +1,47 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Tue Nov 3 20:23:25 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| import os | |||||
| import re | |||||
| def get_job_script(arg): | |||||
| script = r""" | |||||
| #!/bin/bash | |||||
| #SBATCH --exclusive | |||||
| #SBATCH --job-name="st.""" + arg + r""".bp" | |||||
| #SBATCH --partition=tlong | |||||
| #SBATCH --mail-type=ALL | |||||
| #SBATCH --mail-user=jajupmochi@gmail.com | |||||
| #SBATCH --output="outputs/output_edit_costs.max_num_sols.ratios.bipartite.""" + arg + """.txt" | |||||
| #SBATCH --error="errors/error_edit_costs.max_num_sols.ratios.bipartite.""" + arg + """.txt" | |||||
| # | |||||
| #SBATCH --ntasks=1 | |||||
| #SBATCH --nodes=1 | |||||
| #SBATCH --cpus-per-task=1 | |||||
| #SBATCH --time=300:00:00 | |||||
| #SBATCH --mem-per-cpu=4000 | |||||
| srun hostname | |||||
| srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability | |||||
| srun python3 edit_costs.max_nums_sols.ratios.bipartite.py """ + arg | |||||
| script = script.strip() | |||||
| script = re.sub('\n\t+', '\n', script) | |||||
| script = re.sub('\n +', '\n', script) | |||||
| return script | |||||
| if __name__ == '__main__': | |||||
| ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb'] | |||||
| for ds_name in [ds_list[i] for i in [0, 1, 2, 3]]: | |||||
| job_script = get_job_script(ds_name) | |||||
| command = 'sbatch <<EOF\n' + job_script + '\nEOF' | |||||
| # print(command) | |||||
| os.system(command) | |||||
| # os.popen(command) | |||||
| # output = stream.readlines() | |||||
| @@ -0,0 +1,47 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Tue Nov 3 20:23:25 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| import os | |||||
| import re | |||||
| def get_job_script(arg): | |||||
| script = r""" | |||||
| #!/bin/bash | |||||
| #SBATCH --exclusive | |||||
| #SBATCH --job-name="st.rep.""" + arg + r""".IPFP" | |||||
| #SBATCH --partition=tlong | |||||
| #SBATCH --mail-type=ALL | |||||
| #SBATCH --mail-user=jajupmochi@gmail.com | |||||
| #SBATCH --output="outputs/output_edit_costs.repeats.ratios.IPFP.""" + arg + """.txt" | |||||
| #SBATCH --error="errors/error_edit_costs.repeats.ratios.IPFP.""" + arg + """.txt" | |||||
| # | |||||
| #SBATCH --ntasks=1 | |||||
| #SBATCH --nodes=1 | |||||
| #SBATCH --cpus-per-task=1 | |||||
| #SBATCH --time=300:00:00 | |||||
| #SBATCH --mem-per-cpu=4000 | |||||
| srun hostname | |||||
| srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability | |||||
| srun python3 edit_costs.repeats.ratios.IPFP.py """ + arg | |||||
| script = script.strip() | |||||
| script = re.sub('\n\t+', '\n', script) | |||||
| script = re.sub('\n +', '\n', script) | |||||
| return script | |||||
| if __name__ == '__main__': | |||||
| ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb'] | |||||
| for ds_name in [ds_list[i] for i in [0, 3]]: | |||||
| job_script = get_job_script(ds_name) | |||||
| command = 'sbatch <<EOF\n' + job_script + '\nEOF' | |||||
| # print(command) | |||||
| os.system(command) | |||||
| # os.popen(command) | |||||
| # output = stream.readlines() | |||||
| @@ -0,0 +1,47 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Tue Nov 3 20:23:25 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| import os | |||||
| import re | |||||
| def get_job_script(arg): | |||||
| script = r""" | |||||
| #!/bin/bash | |||||
| #SBATCH --exclusive | |||||
| #SBATCH --job-name="st.rep.""" + arg + r""".bp" | |||||
| #SBATCH --partition=tlong | |||||
| #SBATCH --mail-type=ALL | |||||
| #SBATCH --mail-user=jajupmochi@gmail.com | |||||
| #SBATCH --output="outputs/output_edit_costs.repeats.ratios.bipartite.""" + arg + """.txt" | |||||
| #SBATCH --error="errors/error_edit_costs.repeats.ratios.bipartite.""" + arg + """.txt" | |||||
| # | |||||
| #SBATCH --ntasks=1 | |||||
| #SBATCH --nodes=1 | |||||
| #SBATCH --cpus-per-task=1 | |||||
| #SBATCH --time=300:00:00 | |||||
| #SBATCH --mem-per-cpu=4000 | |||||
| srun hostname | |||||
| srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability | |||||
| srun python3 edit_costs.repeats.ratios.bipartite.py """ + arg | |||||
| script = script.strip() | |||||
| script = re.sub('\n\t+', '\n', script) | |||||
| script = re.sub('\n +', '\n', script) | |||||
| return script | |||||
| if __name__ == '__main__': | |||||
| ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb'] | |||||
| for ds_name in [ds_list[i] for i in [0, 1, 2, 3]]: | |||||
| job_script = get_job_script(ds_name) | |||||
| command = 'sbatch <<EOF\n' + job_script + '\nEOF' | |||||
| # print(command) | |||||
| os.system(command) | |||||
| # os.popen(command) | |||||
| # output = stream.readlines() | |||||