| @@ -0,0 +1,186 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Mon Oct 5 16:08:33 2020 | |||
| @author: ljia | |||
| This script compute classification accuracy of each geaph kernel on datasets | |||
| with different entropy of degree distribution. | |||
| """ | |||
| from utils import Graph_Kernel_List, cross_validate | |||
| import numpy as np | |||
| import logging | |||
| num_nodes = 40 | |||
| half_num_graphs = 100 | |||
| def generate_graphs(): | |||
| # from gklearn.utils.graph_synthesizer import GraphSynthesizer | |||
| # gsyzer = GraphSynthesizer() | |||
| # graphs = gsyzer.unified_graphs(num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) | |||
| # return graphs | |||
| import networkx as nx | |||
| degrees11 = [5] * num_nodes | |||
| # degrees12 = [2] * num_nodes | |||
| degrees12 = [5] * num_nodes | |||
| degrees21 = list(range(1, 11)) * 6 | |||
| # degrees22 = [5 * i for i in list(range(1, 11)) * 6] | |||
| degrees22 = list(range(1, 11)) * 6 | |||
| # method 1 | |||
| graphs11 = [nx.configuration_model(degrees11, create_using=nx.Graph) for i in range(half_num_graphs)] | |||
| graphs12 = [nx.configuration_model(degrees12, create_using=nx.Graph) for i in range(half_num_graphs)] | |||
| # method 2: can easily generate isomorphic graphs. | |||
| # graphs11 = [nx.random_regular_graph(2, num_nodes, seed=None) for i in range(half_num_graphs)] | |||
| # graphs12 = [nx.random_regular_graph(10, num_nodes, seed=None) for i in range(half_num_graphs)] | |||
| # Add node labels. | |||
| for g in graphs11: | |||
| for n in g.nodes(): | |||
| g.nodes[n]['atom'] = 0 | |||
| for g in graphs12: | |||
| for n in g.nodes(): | |||
| g.nodes[n]['atom'] = 1 | |||
| graphs1 = graphs11 + graphs12 | |||
| # method 1: the entorpy of the two classes is not the same. | |||
| graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)] | |||
| graphs22 = [nx.configuration_model(degrees22, create_using=nx.Graph) for i in range(half_num_graphs)] | |||
| # # method 2: tooo slow, and may fail. | |||
| # graphs21 = [nx.random_degree_sequence_graph(degrees21, seed=None, tries=100) for i in range(half_num_graphs)] | |||
| # graphs22 = [nx.random_degree_sequence_graph(degrees22, seed=None, tries=100) for i in range(half_num_graphs)] | |||
| # # method 3: no randomness. | |||
| # graphs21 = [nx.havel_hakimi_graph(degrees21, create_using=None) for i in range(half_num_graphs)] | |||
| # graphs22 = [nx.havel_hakimi_graph(degrees22, create_using=None) for i in range(half_num_graphs)] | |||
| # # method 4: | |||
| # graphs21 = [nx.configuration_model(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)] | |||
| # graphs22 = [nx.degree_sequence_tree(degrees21, create_using=nx.Graph) for i in range(half_num_graphs)] | |||
| # # method 5: the entorpy of the two classes is not the same. | |||
| # graphs21 = [nx.expected_degree_graph(degrees21, seed=None, selfloops=False) for i in range(half_num_graphs)] | |||
| # graphs22 = [nx.expected_degree_graph(degrees22, seed=None, selfloops=False) for i in range(half_num_graphs)] | |||
| # # method 6: seems there is no randomness0 | |||
| # graphs21 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)] | |||
| # graphs22 = [nx.random_powerlaw_tree(num_nodes, gamma=3, seed=None, tries=10000) for i in range(half_num_graphs)] | |||
| # Add node labels. | |||
| for g in graphs21: | |||
| for n in g.nodes(): | |||
| g.nodes[n]['atom'] = 0 | |||
| for g in graphs22: | |||
| for n in g.nodes(): | |||
| g.nodes[n]['atom'] = 1 | |||
| graphs2 = graphs21 + graphs22 | |||
| # # check for isomorphism. | |||
| # iso_mat1 = np.zeros((len(graphs1), len(graphs1))) | |||
| # num1 = 0 | |||
| # num2 = 0 | |||
| # for i in range(len(graphs1)): | |||
| # for j in range(i + 1, len(graphs1)): | |||
| # if nx.is_isomorphic(graphs1[i], graphs1[j]): | |||
| # iso_mat1[i, j] = 1 | |||
| # iso_mat1[j, i] = 1 | |||
| # num1 += 1 | |||
| # print('iso:', num1, ':', i, ',', j) | |||
| # else: | |||
| # num2 += 1 | |||
| # print('not iso:', num2, ':', i, ',', j) | |||
| # | |||
| # iso_mat2 = np.zeros((len(graphs2), len(graphs2))) | |||
| # num1 = 0 | |||
| # num2 = 0 | |||
| # for i in range(len(graphs2)): | |||
| # for j in range(i + 1, len(graphs2)): | |||
| # if nx.is_isomorphic(graphs2[i], graphs2[j]): | |||
| # iso_mat2[i, j] = 1 | |||
| # iso_mat2[j, i] = 1 | |||
| # num1 += 1 | |||
| # print('iso:', num1, ':', i, ',', j) | |||
| # else: | |||
| # num2 += 1 | |||
| # print('not iso:', num2, ':', i, ',', j) | |||
| return graphs1, graphs2 | |||
| def get_infos(graph): | |||
| from gklearn.utils import Dataset | |||
| ds = Dataset() | |||
| ds.load_graphs(graph) | |||
| infos = ds.get_dataset_infos(keys=['all_degree_entropy', 'ave_node_degree']) | |||
| infos['ave_degree_entropy'] = np.mean(infos['all_degree_entropy']) | |||
| print(infos['ave_degree_entropy'], ',', infos['ave_node_degree']) | |||
| return infos | |||
| def xp_accuracy_diff_entropy(): | |||
| # Generate graphs. | |||
| graphs1, graphs2 = generate_graphs() | |||
| # Compute entropy of degree distribution of the generated graphs. | |||
| info11 = get_infos(graphs1[0:half_num_graphs]) | |||
| info12 = get_infos(graphs1[half_num_graphs:]) | |||
| info21 = get_infos(graphs2[0:half_num_graphs]) | |||
| info22 = get_infos(graphs2[half_num_graphs:]) | |||
| # Run and save. | |||
| import pickle | |||
| import os | |||
| save_dir = 'outputs/accuracy_diff_entropy/' | |||
| if not os.path.exists(save_dir): | |||
| os.makedirs(save_dir) | |||
| accuracies = {} | |||
| confidences = {} | |||
| for kernel_name in Graph_Kernel_List: | |||
| print() | |||
| print('Kernel:', kernel_name) | |||
| accuracies[kernel_name] = [] | |||
| confidences[kernel_name] = [] | |||
| for set_i, graphs in enumerate([graphs1, graphs2]): | |||
| print() | |||
| print('Graph set', set_i) | |||
| tmp_graphs = [g.copy() for g in graphs] | |||
| targets = [0] * half_num_graphs + [1] * half_num_graphs | |||
| accuracy = 'error' | |||
| confidence = 'error' | |||
| try: | |||
| accuracy, confidence = cross_validate(tmp_graphs, targets, kernel_name, ds_name=str(set_i), output_dir=save_dir) #, n_jobs=1) | |||
| except Exception as exp: | |||
| print('An exception occured when running this experiment:') | |||
| LOG_FILENAME = save_dir + 'error.txt' | |||
| logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) | |||
| logging.exception('\n' + kernel_name + ', ' + str(set_i) + ':') | |||
| print(repr(exp)) | |||
| accuracies[kernel_name].append(accuracy) | |||
| confidences[kernel_name].append(confidence) | |||
| pickle.dump(accuracy, open(save_dir + 'accuracy.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb')) | |||
| pickle.dump(confidence, open(save_dir + 'confidence.' + kernel_name + '.' + str(set_i) + '.pkl', 'wb')) | |||
| # Save all. | |||
| pickle.dump(accuracies, open(save_dir + 'accuracies.pkl', 'wb')) | |||
| pickle.dump(confidences, open(save_dir + 'confidences.pkl', 'wb')) | |||
| return | |||
| if __name__ == '__main__': | |||
| xp_accuracy_diff_entropy() | |||