| @@ -0,0 +1,2 @@ | |||
| [run] | |||
| omit = gklearn/tests/* | |||
| @@ -15,6 +15,7 @@ datasets/* | |||
| !datasets/AIDS/ | |||
| !datasets/monoterpenoides/ | |||
| !datasets/Fingerprint/*.txt | |||
| !datasets/Cuneiform/*.txt | |||
| notebooks/results/* | |||
| notebooks/check_gm/* | |||
| notebooks/test_parallel/* | |||
| @@ -41,3 +42,4 @@ dist/ | |||
| build/ | |||
| .coverage | |||
| htmlcov | |||
| @@ -22,7 +22,7 @@ install: | |||
| script: | |||
| - python setup.py bdist_wheel | |||
| - pytest -v --cov-report term --cov=gklearn gklearn/tests/ | |||
| - pytest -v --cov-config=.coveragerc --cov-report term --cov=gklearn gklearn/tests/ | |||
| after_success: | |||
| - codecov | |||
| @@ -0,0 +1,267 @@ | |||
| 0 | |||
| 1 | |||
| 2 | |||
| 3 | |||
| 4 | |||
| 5 | |||
| 6 | |||
| 7 | |||
| 8 | |||
| 9 | |||
| 10 | |||
| 11 | |||
| 12 | |||
| 13 | |||
| 14 | |||
| 15 | |||
| 16 | |||
| 17 | |||
| 18 | |||
| 19 | |||
| 20 | |||
| 21 | |||
| 22 | |||
| 23 | |||
| 24 | |||
| 25 | |||
| 26 | |||
| 0 | |||
| 1 | |||
| 2 | |||
| 3 | |||
| 4 | |||
| 5 | |||
| 6 | |||
| 7 | |||
| 8 | |||
| 9 | |||
| 10 | |||
| 11 | |||
| 12 | |||
| 13 | |||
| 14 | |||
| 15 | |||
| 16 | |||
| 17 | |||
| 18 | |||
| 19 | |||
| 20 | |||
| 21 | |||
| 22 | |||
| 23 | |||
| 24 | |||
| 25 | |||
| 26 | |||
| 27 | |||
| 28 | |||
| 29 | |||
| 0 | |||
| 1 | |||
| 2 | |||
| 3 | |||
| 4 | |||
| 5 | |||
| 6 | |||
| 7 | |||
| 8 | |||
| 9 | |||
| 10 | |||
| 11 | |||
| 12 | |||
| 13 | |||
| 14 | |||
| 15 | |||
| 16 | |||
| 17 | |||
| 18 | |||
| 19 | |||
| 20 | |||
| 21 | |||
| 22 | |||
| 23 | |||
| 24 | |||
| 25 | |||
| 26 | |||
| 27 | |||
| 28 | |||
| 29 | |||
| 0 | |||
| 1 | |||
| 2 | |||
| 3 | |||
| 4 | |||
| 5 | |||
| 6 | |||
| 7 | |||
| 8 | |||
| 9 | |||
| 10 | |||
| 11 | |||
| 12 | |||
| 13 | |||
| 14 | |||
| 15 | |||
| 16 | |||
| 17 | |||
| 18 | |||
| 19 | |||
| 20 | |||
| 21 | |||
| 22 | |||
| 23 | |||
| 24 | |||
| 25 | |||
| 26 | |||
| 27 | |||
| 28 | |||
| 29 | |||
| 0 | |||
| 1 | |||
| 2 | |||
| 3 | |||
| 4 | |||
| 5 | |||
| 6 | |||
| 7 | |||
| 8 | |||
| 9 | |||
| 10 | |||
| 11 | |||
| 12 | |||
| 13 | |||
| 14 | |||
| 15 | |||
| 16 | |||
| 17 | |||
| 18 | |||
| 19 | |||
| 20 | |||
| 21 | |||
| 22 | |||
| 23 | |||
| 24 | |||
| 25 | |||
| 26 | |||
| 27 | |||
| 28 | |||
| 29 | |||
| 0 | |||
| 1 | |||
| 2 | |||
| 3 | |||
| 4 | |||
| 5 | |||
| 6 | |||
| 7 | |||
| 8 | |||
| 9 | |||
| 10 | |||
| 11 | |||
| 12 | |||
| 13 | |||
| 14 | |||
| 15 | |||
| 16 | |||
| 17 | |||
| 18 | |||
| 19 | |||
| 20 | |||
| 21 | |||
| 22 | |||
| 23 | |||
| 24 | |||
| 25 | |||
| 26 | |||
| 27 | |||
| 28 | |||
| 29 | |||
| 0 | |||
| 1 | |||
| 2 | |||
| 3 | |||
| 4 | |||
| 5 | |||
| 6 | |||
| 7 | |||
| 8 | |||
| 9 | |||
| 10 | |||
| 11 | |||
| 12 | |||
| 13 | |||
| 14 | |||
| 15 | |||
| 16 | |||
| 17 | |||
| 18 | |||
| 19 | |||
| 20 | |||
| 21 | |||
| 22 | |||
| 23 | |||
| 24 | |||
| 25 | |||
| 26 | |||
| 27 | |||
| 28 | |||
| 29 | |||
| 0 | |||
| 1 | |||
| 2 | |||
| 3 | |||
| 4 | |||
| 5 | |||
| 6 | |||
| 7 | |||
| 8 | |||
| 9 | |||
| 10 | |||
| 11 | |||
| 12 | |||
| 13 | |||
| 14 | |||
| 15 | |||
| 16 | |||
| 17 | |||
| 18 | |||
| 19 | |||
| 20 | |||
| 21 | |||
| 22 | |||
| 23 | |||
| 24 | |||
| 25 | |||
| 26 | |||
| 27 | |||
| 28 | |||
| 29 | |||
| 0 | |||
| 1 | |||
| 2 | |||
| 3 | |||
| 4 | |||
| 5 | |||
| 6 | |||
| 7 | |||
| 8 | |||
| 9 | |||
| 10 | |||
| 11 | |||
| 12 | |||
| 13 | |||
| 14 | |||
| 15 | |||
| 16 | |||
| 17 | |||
| 18 | |||
| 19 | |||
| 20 | |||
| 21 | |||
| 22 | |||
| 23 | |||
| 24 | |||
| 25 | |||
| 26 | |||
| 27 | |||
| 28 | |||
| 29 | |||
| @@ -0,0 +1,119 @@ | |||
| README for dataset Cuneiform | |||
| === Usage === | |||
| This folder contains the following comma separated text files | |||
| (replace DS by the name of the dataset): | |||
| n = total number of nodes | |||
| m = total number of edges | |||
| N = number of graphs | |||
| (1) DS_A.txt (m lines) | |||
| sparse (block diagonal) adjacency matrix for all graphs, | |||
| each line corresponds to (row, col) resp. (node_id, node_id) | |||
| (2) DS_graph_indicator.txt (n lines) | |||
| column vector of graph identifiers for all nodes of all graphs, | |||
| the value in the i-th line is the graph_id of the node with node_id i | |||
| (3) DS_graph_labels.txt (N lines) | |||
| class labels for all graphs in the dataset, | |||
| the value in the i-th line is the class label of the graph with graph_id i | |||
| (4) DS_node_labels.txt (n lines) | |||
| column vector of node labels, | |||
| the value in the i-th line corresponds to the node with node_id i | |||
| There are OPTIONAL files if the respective information is available: | |||
| (5) DS_edge_labels.txt (m lines; same size as DS_A_sparse.txt) | |||
| labels for the edges in DS_A_sparse.txt | |||
| (6) DS_edge_attributes.txt (m lines; same size as DS_A.txt) | |||
| attributes for the edges in DS_A.txt | |||
| (7) DS_node_attributes.txt (n lines) | |||
| matrix of node attributes, | |||
| the comma seperated values in the i-th line is the attribute vector of the node with node_id i | |||
| (8) DS_graph_attributes.txt (N lines) | |||
| regression values for all graphs in the dataset, | |||
| the value in the i-th line is the attribute of the graph with graph_id i | |||
| === Description === | |||
| The Cuneiform dataset contains graphs representing 29 different Hittite cuneiform signs. | |||
| The data was obtained from nine cuneiform tablets written by scholars of Hittitology in | |||
| the course of a study about individualistic characteristics of cuneiform hand writing. | |||
| After automated extraction of individual wedges, the affiliation of the wedges to the | |||
| cuneiform signs were determined manually. The graph model is explained in detail in the | |||
| referenced publication. | |||
| === References === | |||
| Nils M. Kriege, Matthias Fey, Denis Fisseler, Petra Mutzel, Frank Weichert | |||
| Recognizing Cuneiform Signs Using Graph Based Methods. 2018. arXiv:1802.05908 | |||
| https://arxiv.org/abs/1802.05908 | |||
| === Description of Labels === | |||
| Node labels were converted to integer values using this map: | |||
| Component 0: | |||
| 0 depthPoint | |||
| 1 tailVertex | |||
| 2 leftVertex | |||
| 3 rightVertex | |||
| Component 1: | |||
| 0 vertical | |||
| 1 Winkelhaken | |||
| 2 horizontal | |||
| Edge labels were converted to integer values using this map: | |||
| Component 0: | |||
| 0 wedge | |||
| 1 arrangement | |||
| Class labels were converted to integer values using this map: | |||
| 0 tu | |||
| 1 ta | |||
| 2 ti | |||
| 3 nu | |||
| 4 na | |||
| 5 ni | |||
| 6 bu | |||
| 7 ba | |||
| 8 bi | |||
| 9 zu | |||
| 10 za | |||
| 11 zi | |||
| 12 su | |||
| 13 sa | |||
| 14 si | |||
| 15 hu | |||
| 16 ha | |||
| 17 hi | |||
| 18 du | |||
| 19 da | |||
| 20 di | |||
| 21 ru | |||
| 22 ra | |||
| 23 ri | |||
| 24 ku | |||
| 25 ka | |||
| 26 ki | |||
| 27 lu | |||
| 28 la | |||
| 29 li | |||
| @@ -16,7 +16,6 @@ from functools import partial | |||
| import networkx as nx | |||
| import numpy as np | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.utils import direct_product | |||
| from gklearn.utils.graphdataset import get_dataset_attributes | |||
| from gklearn.utils.parallel import parallel_gm | |||
| @@ -28,7 +28,6 @@ from gklearn.utils.kernels import deltakernel | |||
| from gklearn.utils.utils import untotterTransformation | |||
| from gklearn.utils.graphdataset import get_dataset_attributes | |||
| from gklearn.utils.parallel import parallel_gm | |||
| sys.path.insert(0, "../") | |||
| def marginalizedkernel(*args, | |||
| @@ -6,8 +6,6 @@ | |||
| [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | |||
| """ | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| import time | |||
| from functools import partial | |||
| from tqdm import tqdm | |||
| @@ -20,7 +20,6 @@ import numpy as np | |||
| from gklearn.utils.utils import getSPGraph | |||
| from gklearn.utils.graphdataset import get_dataset_attributes | |||
| from gklearn.utils.parallel import parallel_gm | |||
| sys.path.insert(0, "../") | |||
| def spkernel(*args, | |||
| node_label='atom', | |||
| @@ -25,8 +25,6 @@ from gklearn.utils.graphdataset import get_dataset_attributes | |||
| from gklearn.utils.parallel import parallel_gm | |||
| from gklearn.utils.trie import Trie | |||
| sys.path.insert(0, "../") | |||
| def structuralspkernel(*args, | |||
| node_label='atom', | |||
| @@ -8,7 +8,6 @@ | |||
| """ | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| import time | |||
| from collections import Counter | |||
| from itertools import chain | |||
| @@ -9,7 +9,6 @@ | |||
| """ | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| import time | |||
| from collections import Counter | |||
| from itertools import chain | |||
| @@ -10,7 +10,6 @@ | |||
| import sys | |||
| from collections import Counter | |||
| sys.path.insert(0, "../") | |||
| from functools import partial | |||
| import time | |||
| #from multiprocessing import Pool | |||
| @@ -9,10 +9,8 @@ import numpy as np | |||
| import random | |||
| import csv | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| from preimage.test_k_closest_graphs import median_on_k_closest_graphs | |||
| from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs | |||
| def find_best_k(): | |||
| ds = {'name': 'monoterpenoides', | |||
| @@ -13,15 +13,14 @@ from multiprocessing import Pool | |||
| from functools import partial | |||
| import time | |||
| import random | |||
| import sys | |||
| from scipy import optimize | |||
| from scipy.optimize import minimize | |||
| import cvxpy as cp | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic | |||
| from preimage.utils import kernel_distance_matrix | |||
| from gklearn.preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic | |||
| from gklearn.preimage.utils import kernel_distance_matrix | |||
| def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, | |||
| params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT', | |||
| @@ -128,12 +128,10 @@ def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method | |||
| elif lib == 'gedlib-bash': | |||
| import time | |||
| import random | |||
| import sys | |||
| import os | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import saveDataset | |||
| tmp_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/' | |||
| tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/' | |||
| if not os.path.exists(tmp_dir): | |||
| os.makedirs(tmp_dir) | |||
| fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9)) | |||
| @@ -144,7 +142,7 @@ def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method | |||
| command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n' | |||
| command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n' | |||
| command += 'export LD_LIBRARY_PATH\n' | |||
| command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n' | |||
| command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n' | |||
| command += './ged_for_python_bash monoterpenoides ' + fn_collection \ | |||
| + ' \'' + algo_options + '\' ' | |||
| for ec in edit_cost_constant: | |||
| @@ -11,11 +11,9 @@ import random | |||
| import networkx as nx | |||
| from tqdm import tqdm | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphdataset import get_dataset_attributes | |||
| from gklearn.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels | |||
| from ged import GED, ged_median | |||
| from gklearn.preimage.ged import GED, ged_median | |||
| def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
| @@ -438,7 +436,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
| def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1, | |||
| dataset='monoterpenoides', | |||
| graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'): | |||
| graph_dir=''): | |||
| """Compute the iam by c++ implementation (gedlib) through bash. | |||
| """ | |||
| import os | |||
| @@ -462,18 +460,18 @@ def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1, | |||
| fgroup.write("\n</GraphCollection>") | |||
| fgroup.close() | |||
| tmp_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/' | |||
| tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/' | |||
| fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9)) | |||
| createCollectionFile(Gn_names, ['dummy'] * len(Gn_names), fn_collection) | |||
| # fn_collection = tmp_dir + 'collection_for_debug' | |||
| # graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/gxl' | |||
| # graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/gxl' | |||
| # if dataset == 'Letter-high' or dataset == 'Fingerprint': | |||
| # dataset = 'letter' | |||
| command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n' | |||
| command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n' | |||
| command += 'export LD_LIBRARY_PATH\n' | |||
| command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n' | |||
| command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n' | |||
| command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \ | |||
| + ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' + str(initial_solutions) + ' ' | |||
| if edit_cost_constant is None: | |||
| @@ -489,8 +487,8 @@ def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1, | |||
| sod_sm = float(output[0].strip()) | |||
| sod_gm = float(output[1].strip()) | |||
| fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
| fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
| fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||
| fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||
| return sod_sm, sod_gm, fname_sm, fname_gm | |||
| @@ -11,14 +11,12 @@ from tqdm import tqdm | |||
| import random | |||
| #import csv | |||
| from shutil import copyfile | |||
| import os | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from preimage.iam import iam_bash | |||
| from gklearn.preimage.iam import iam_bash | |||
| from gklearn.utils.graphfiles import loadDataset, loadGXL | |||
| from preimage.ged import GED | |||
| from preimage.utils import get_same_item_indices | |||
| from gklearn.preimage.ged import GED | |||
| from gklearn.preimage.utils import get_same_item_indices | |||
| def test_knn(): | |||
| ds = {'name': 'monoterpenoides', | |||
| @@ -30,7 +28,7 @@ def test_knn(): | |||
| # edge_label = 'bond_type' | |||
| # ds_name = 'mono' | |||
| dir_output = 'results/knn/' | |||
| graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/' | |||
| graph_dir = os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/' | |||
| k_nn = 1 | |||
| percent = 0.1 | |||
| @@ -2,5 +2,5 @@ import sys | |||
| import pathlib | |||
| # insert gedlibpy library. | |||
| sys.path.insert(0, "../../") | |||
| sys.path.insert(0, "../../../") | |||
| from gedlibpy import librariesImport, gedlibpy | |||
| @@ -14,10 +14,7 @@ from tqdm import tqdm | |||
| import networkx as nx | |||
| import matplotlib.pyplot as plt | |||
| sys.path.insert(0, "../") | |||
| from utils import compute_kernel, dis_gstar | |||
| from gklearn.preimage.utils import compute_kernel, dis_gstar | |||
| def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): | |||
| @@ -52,8 +52,6 @@ def convertGraph(G): | |||
| def testNxGrapĥ(): | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| @@ -9,12 +9,10 @@ from matplotlib import pyplot as plt | |||
| import numpy as np | |||
| from tqdm import tqdm | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| from utils import remove_edges | |||
| from fitDistance import fit_GED_to_kernel_distance | |||
| from utils import normalize_distance_matrix | |||
| from gklearn.preimage.utils import remove_edges | |||
| from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance | |||
| from gklearn.preimage.utils import normalize_distance_matrix | |||
| def test_update_costs(): | |||
| @@ -63,7 +61,7 @@ def median_paper_clcpc_python_best(): | |||
| y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | |||
| repeats = 50 | |||
| collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||
| collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/' | |||
| graph_dir = collection_path + 'gxl/' | |||
| fn_edit_costs_output = 'results/median_paper/edit_costs_output.python_init40.k10.txt' | |||
| @@ -160,7 +158,7 @@ def median_paper_clcpc_python_bash_cpp(): | |||
| y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | |||
| repeats = 50 | |||
| collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||
| collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/' | |||
| graph_dir = collection_path + 'gxl/' | |||
| fn_edit_costs_output = 'results/median_paper/edit_costs_output.txt' | |||
| @@ -14,13 +14,11 @@ import sys | |||
| def test_NON_SYMBOLIC_cost(): | |||
| """Test edit cost LETTER2. | |||
| """ | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter | |||
| from preimage.test_k_closest_graphs import reform_attributes | |||
| from gklearn.preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter | |||
| from gklearn.preimage.test_k_closest_graphs import reform_attributes | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-high/Letter-high_A.txt' | |||
| dataset = '../../datasets/Letter-high/Letter-high_A.txt' | |||
| Gn, y_all = loadDataset(dataset) | |||
| g1 = Gn[200] | |||
| @@ -53,14 +51,12 @@ def test_NON_SYMBOLIC_cost(): | |||
| def test_LETTER2_cost(): | |||
| """Test edit cost LETTER2. | |||
| """ | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from preimage.ged import GED, get_nb_edit_operations_letter | |||
| from preimage.test_k_closest_graphs import reform_attributes | |||
| from gklearn.preimage.ged import GED, get_nb_edit_operations_letter | |||
| from gklearn.preimage.test_k_closest_graphs import reform_attributes | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
| ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
| g1 = Gn[200] | |||
| @@ -96,14 +92,12 @@ def test_get_nb_edit_operations_letter(): | |||
| should be the same as the cost computed by number of operations and edit | |||
| cost constants. | |||
| """ | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from preimage.ged import GED, get_nb_edit_operations_letter | |||
| from preimage.test_k_closest_graphs import reform_attributes | |||
| from gklearn.preimage.ged import GED, get_nb_edit_operations_letter | |||
| from gklearn.preimage.test_k_closest_graphs import reform_attributes | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
| ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
| g1 = Gn[200] | |||
| @@ -136,13 +130,12 @@ def test_get_nb_edit_operations(): | |||
| numbers of edit operations. The distance/cost computed by GED should be the | |||
| same as the cost computed by number of operations and edit cost constants. | |||
| """ | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from preimage.ged import GED, get_nb_edit_operations | |||
| from gklearn.preimage.ged import GED, get_nb_edit_operations | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| import os | |||
| ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | |||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb | |||
| ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds', | |||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset']) | |||
| g1 = Gn[20] | |||
| @@ -173,11 +166,10 @@ def test_get_nb_edit_operations(): | |||
| def test_ged_python_bash_cpp(): | |||
| """Test ged computation with python invoking the c++ code by bash command (with updated library). | |||
| """ | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| from preimage.ged import GED | |||
| from gklearn.preimage.ged import GED | |||
| data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||
| data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||
| # collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | |||
| collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml' | |||
| graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | |||
| @@ -233,7 +225,7 @@ def test_ged_best_settings_updated(): | |||
| """Test ged computation with best settings the same as in the C++ code (with updated library). | |||
| """ | |||
| data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||
| data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||
| collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | |||
| # collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml' | |||
| @@ -292,7 +284,7 @@ def test_ged_best_settings(): | |||
| """Test ged computation with best settings the same as in the C++ code. | |||
| """ | |||
| data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||
| data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||
| collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | |||
| graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | |||
| @@ -350,7 +342,7 @@ def test_ged_default(): | |||
| """Test ged computation with default settings. | |||
| """ | |||
| data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||
| data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||
| collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | |||
| graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | |||
| @@ -404,11 +396,10 @@ def test_ged_default(): | |||
| def test_ged_min(): | |||
| """Test ged computation with the "min" stabilizer. | |||
| """ | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| from preimage.ged import GED | |||
| from gklearn.preimage.ged import GED | |||
| data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||
| data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||
| collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | |||
| graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | |||
| @@ -487,8 +478,6 @@ def convertGraph(G): | |||
| def testNxGrapĥ(): | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
| 'extra_params': {}} # node/edge symb | |||
| @@ -13,14 +13,11 @@ import time | |||
| import random | |||
| #from tqdm import tqdm | |||
| #import os | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| #from gklearn.utils.logger2file import * | |||
| from iam import iam_upgraded | |||
| from utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar | |||
| #from ged import ged_median | |||
| from gklearn.preimage.iam import iam_upgraded | |||
| from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar | |||
| #from gklearn.preimage.ged import ged_median | |||
| def test_iam_monoterpenoides_with_init40(): | |||
| @@ -52,7 +49,7 @@ def test_iam_monoterpenoides_with_init40(): | |||
| 'stabilizer': ged_stabilizer} | |||
| collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||
| collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/' | |||
| graph_dir = collection_path + 'gxl/' | |||
| y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | |||
| repeats = 50 | |||
| @@ -17,15 +17,12 @@ import multiprocessing | |||
| from multiprocessing import Pool | |||
| from functools import partial | |||
| #import os | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset, loadGXL | |||
| #from gklearn.utils.logger2file import * | |||
| from iam import iam_upgraded, iam_bash | |||
| from utils import compute_kernel, dis_gstar, kernel_distance_matrix | |||
| from fitDistance import fit_GED_to_kernel_distance | |||
| #from ged import ged_median | |||
| from gklearn.preimage.iam import iam_upgraded, iam_bash | |||
| from gklearn.preimage.utils import compute_kernel, dis_gstar, kernel_distance_matrix | |||
| from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance | |||
| #from gklearn.preimage.ged import ged_median | |||
| def fit_edit_cost_constants(fit_method, edit_cost_name, | |||
| @@ -204,6 +201,8 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho | |||
| if Kmatrix is not None: | |||
| Kmatrix_median = np.copy(Kmatrix[group_min,:]) | |||
| Kmatrix_median = Kmatrix_median[:,group_min] | |||
| else: | |||
| Kmatrix_median = None | |||
| # 1. fit edit cost constants. | |||
| @@ -379,7 +378,7 @@ def test_k_closest_graphs_with_cv(): | |||
| y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | |||
| repeats = 50 | |||
| collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||
| collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/' | |||
| graph_dir = collection_path + 'gxl/' | |||
| sod_sm_list = [] | |||
| @@ -11,12 +11,10 @@ import matplotlib.pyplot as plt | |||
| import time | |||
| from tqdm import tqdm | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| from median import draw_Letter_graph | |||
| from ged import GED, ged_median | |||
| from utils import get_same_item_indices, compute_kernel, gram2distances, \ | |||
| from gklearn.preimage.median import draw_Letter_graph | |||
| from gklearn.preimage.ged import GED, ged_median | |||
| from gklearn.preimage.utils import get_same_item_indices, compute_kernel, gram2distances, \ | |||
| dis_gstar, remove_edges | |||
| @@ -13,14 +13,11 @@ import time | |||
| import random | |||
| #from tqdm import tqdm | |||
| #import os | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| from utils import remove_edges, compute_kernel, get_same_item_indices | |||
| from ged import ged_median | |||
| from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices | |||
| from gklearn.preimage.ged import ged_median | |||
| from preimage_iam import preimage_iam | |||
| from gklearn.preimage.preimage_iam import preimage_iam | |||
| ############################################################################### | |||
| @@ -13,13 +13,10 @@ import time | |||
| import random | |||
| #from tqdm import tqdm | |||
| #import os | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| from ged import ged_median | |||
| from utils import compute_kernel, get_same_item_indices, remove_edges | |||
| from preimage_iam import preimage_iam_random_mix | |||
| from gklearn.preimage.ged import ged_median | |||
| from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges | |||
| from gklearn.preimage.preimage_iam import preimage_iam_random_mix | |||
| ############################################################################### | |||
| # tests on different values on grid of median-sets and k. | |||
| @@ -13,14 +13,10 @@ import time | |||
| import random | |||
| #from tqdm import tqdm | |||
| #import os | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset | |||
| from preimage_random import preimage_random | |||
| from ged import ged_median | |||
| from utils import compute_kernel, get_same_item_indices, remove_edges | |||
| from gklearn.preimage.preimage_random import preimage_random | |||
| from gklearn.preimage.ged import ged_median | |||
| from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges | |||
| ############################################################################### | |||
| @@ -11,8 +11,6 @@ Useful functions. | |||
| import multiprocessing | |||
| import numpy as np | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.kernels.marginalizedKernel import marginalizedkernel | |||
| from gklearn.kernels.untilHPathKernel import untilhpathkernel | |||
| from gklearn.kernels.spKernel import spkernel | |||
| @@ -41,7 +39,7 @@ def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||
| return np.sqrt(term1 - term2 + term3) | |||
| def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose): | |||
| def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel='imap_unordered'): | |||
| if graph_kernel == 'marginalizedkernel': | |||
| Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label, | |||
| p_quit=0.03, n_iteration=10, remove_totters=False, | |||
| @@ -49,6 +47,7 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose): | |||
| elif graph_kernel == 'untilhpathkernel': | |||
| Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label, | |||
| depth=7, k_func='MinMax', compute_method='trie', | |||
| parallel=parallel, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| elif graph_kernel == 'spkernel': | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| @@ -66,18 +65,18 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose): | |||
| Kmatrix, _ = structuralspkernel(Gn, node_label=node_label, | |||
| edge_label=edge_label, node_kernels=sub_kernels, | |||
| edge_kernels=sub_kernels, | |||
| parallel=None, n_jobs=multiprocessing.cpu_count(), | |||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), | |||
| verbose=verbose) | |||
| elif graph_kernel == 'treeletkernel': | |||
| pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | |||
| # pkernel = functools.partial(gaussiankernel, gamma=1e-6) | |||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
| Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label, | |||
| sub_kernel=pkernel, | |||
| sub_kernel=pkernel, parallel=parallel, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| elif graph_kernel == 'weisfeilerlehmankernel': | |||
| Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | |||
| height=4, base_kernel='subtree', | |||
| height=4, base_kernel='subtree', parallel=None, | |||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
| # normalization | |||
| @@ -11,11 +11,8 @@ import matplotlib.pyplot as plt | |||
| from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset | |||
| from tqdm import tqdm | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset, loadGXL | |||
| from utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices | |||
| from gklearn.preimage.utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices | |||
| def visualize_graph_dataset(dis_measure, visual_method, draw_figure, | |||
| @@ -115,11 +112,11 @@ def visualize_distances_in_kernel(): | |||
| # Gn = Gn[0:50] | |||
| fname_medians = 'expert.treelet' | |||
| # add set median. | |||
| fname_sm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl' | |||
| fname_sm = 'results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl' | |||
| set_median = loadGXL(fname_sm) | |||
| Gn.append(set_median) | |||
| # add generalized median (estimated pre-image.) | |||
| fname_gm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl' | |||
| fname_gm = 'results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl' | |||
| gen_median = loadGXL(fname_gm) | |||
| Gn.append(gen_median) | |||
| @@ -166,19 +163,19 @@ def visualize_distances_in_kernel(): | |||
| def visualize_distances_in_ged(): | |||
| from fitDistance import compute_geds | |||
| from ged import GED | |||
| from gklearn.preimage.fitDistance import compute_geds | |||
| from gklearn.preimage.ged import GED | |||
| ds = {'name': 'monoterpenoides', | |||
| 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset']) | |||
| # Gn = Gn[0:50] | |||
| # add set median. | |||
| fname_medians = 'expert.treelet' | |||
| fname_sm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl' | |||
| fname_sm = 'preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl' | |||
| set_median = loadGXL(fname_sm) | |||
| Gn.append(set_median) | |||
| # add generalized median (estimated pre-image.) | |||
| fname_gm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl' | |||
| fname_gm = 'preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl' | |||
| gen_median = loadGXL(fname_gm) | |||
| Gn.append(gen_median) | |||
| @@ -227,9 +224,10 @@ def visualize_distances_in_ged(): | |||
| def visualize_distances_in_kernel_monoterpenoides(): | |||
| import os | |||
| ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | |||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb | |||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb | |||
| Gn_original, y_all = loadDataset(ds['dataset']) | |||
| # Gn = Gn[0:50] | |||
| @@ -301,11 +299,12 @@ def visualize_distances_in_kernel_monoterpenoides(): | |||
| def visualize_distances_in_ged_monoterpenoides(): | |||
| from fitDistance import compute_geds | |||
| from ged import GED | |||
| from gklearn.preimage.fitDistance import compute_geds | |||
| from gklearn.preimage.ged import GED | |||
| import os | |||
| ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | |||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb | |||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb | |||
| Gn_original, y_all = loadDataset(ds['dataset']) | |||
| # Gn = Gn[0:50] | |||
| @@ -379,8 +378,8 @@ def visualize_distances_in_ged_monoterpenoides(): | |||
| def visualize_distances_in_kernel_letter_h(): | |||
| ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
| ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||
| Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
| # Gn = Gn[0:50] | |||
| @@ -455,8 +454,8 @@ def visualize_distances_in_ged_letter_h(): | |||
| from fitDistance import compute_geds | |||
| from preimage.test_k_closest_graphs import reform_attributes | |||
| ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
| ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||
| Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
| # Gn = Gn[0:50] | |||
| @@ -11,35 +11,37 @@ import csv | |||
| from shutil import copyfile | |||
| import networkx as nx | |||
| import matplotlib.pyplot as plt | |||
| import os | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | |||
| from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||
| from preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel | |||
| from preimage.find_best_k import getRelations | |||
| from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||
| from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel | |||
| from gklearn.preimage.find_best_k import getRelations | |||
| def get_dataset(ds_name): | |||
| if ds_name == 'Letter-high': # node non-symb | |||
| dataset = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml' | |||
| graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/' | |||
| dataset = 'cpp_ext/data/collections/Letter.xml' | |||
| graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/' | |||
| Gn, y_all = loadDataset(dataset, extra_params=graph_dir) | |||
| for G in Gn: | |||
| reform_attributes(G) | |||
| elif ds_name == 'Fingerprint': | |||
| dataset = '/media/ljia/DATA/research-repo/codes/Linlin/gedlib/data/collections/Fingerprint.xml' | |||
| graph_dir = '/media/ljia/DATA/research-repo/codes/Linlin/gedlib/data/datasets/Fingerprint/data/' | |||
| Gn, y_all = loadDataset(dataset, extra_params=graph_dir) | |||
| for G in Gn: | |||
| reform_attributes(G) | |||
| # dataset = 'cpp_ext/data/collections/Fingerprint.xml' | |||
| # graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/' | |||
| # Gn, y_all = loadDataset(dataset, extra_params=graph_dir) | |||
| # for G in Gn: | |||
| # reform_attributes(G) | |||
| dataset = '../../datasets/Fingerprint/Fingerprint_A.txt' | |||
| graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/' | |||
| Gn, y_all = loadDataset(dataset) | |||
| elif ds_name == 'SYNTHETIC': | |||
| pass | |||
| elif ds_name == 'SYNTHETICnew': | |||
| dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
| graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/SYNTHETICnew' | |||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-high/Letter-high_A.txt' | |||
| # graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/' | |||
| dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
| graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/SYNTHETICnew' | |||
| # dataset = '../../datasets/Letter-high/Letter-high_A.txt' | |||
| # graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/' | |||
| Gn, y_all = loadDataset(dataset) | |||
| elif ds_name == 'Synthie': | |||
| pass | |||
| @@ -184,6 +186,8 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti | |||
| if Kmatrix is not None: | |||
| Kmatrix_sub = Kmatrix[values,:] | |||
| Kmatrix_sub = Kmatrix_sub[:,values] | |||
| else: | |||
| Kmatrix_sub = None | |||
| for repeat in range(repeats): | |||
| print('\nrepeat =', repeat) | |||
| @@ -273,11 +277,11 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti | |||
| nb_dis_k_gi2gm[2] += 1 | |||
| # save median graphs. | |||
| fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
| fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||
| fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | |||
| + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat) | |||
| copyfile(fname_sm, fn_pre_sm_new + '.gxl') | |||
| fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
| fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||
| fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | |||
| + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat) | |||
| copyfile(fname_gm, fn_pre_gm_new + '.gxl') | |||
| @@ -427,63 +431,101 @@ if __name__ == "__main__": | |||
| # initial_solutions=40, | |||
| # Gn_data = [Gn, y_all, graph_dir], | |||
| # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]) | |||
| # #### xp 3: Fingerprint, sspkernel, using LETTER2. | |||
| # #### xp 3: SYNTHETICnew, sspkernel, using NON_SYMBOLIC. | |||
| # gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.structuralspkernel.gm.npz') | |||
| # Kmatrix = gmfile['Kmatrix'] | |||
| # run_time = gmfile['run_time'] | |||
| # # normalization | |||
| # Kmatrix_diag = Kmatrix.diagonal().copy() | |||
| # for i in range(len(Kmatrix)): | |||
| # for j in range(i, len(Kmatrix)): | |||
| # Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
| # Kmatrix[j][i] = Kmatrix[i][j] | |||
| ## np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm', | |||
| ## Kmatrix=Kmatrix, run_time=run_time) | |||
| # # load dataset. | |||
| # print('getting dataset and computing kernel distance matrix first...') | |||
| # ds_name = 'Fingerprint' | |||
| # ds_name = 'SYNTHETICnew' | |||
| # gkernel = 'structuralspkernel' | |||
| # Gn, y_all, graph_dir = get_dataset(ds_name) | |||
| # # remove graphs without nodes and edges. | |||
| # Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0 | |||
| # Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0 | |||
| # and nx.number_of_edges(G) != 0)] | |||
| # idx = [G[0] for G in Gn] | |||
| # Gn = [G[1] for G in Gn] | |||
| # y_all = [y_all[i] for i in idx] | |||
| ## Gn = Gn[0:50] | |||
| ## y_all = y_all[0:50] | |||
| ## Gn = Gn[0:10] | |||
| ## y_all = y_all[0:10] | |||
| # for G in Gn: | |||
| # G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | |||
| # # compute pair distances. | |||
| ## dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||
| ## Kmatrix=None, gkernel=gkernel, verbose=True) | |||
| # dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||
| # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||
| # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | |||
| ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||
| # # fitting and computing. | |||
| # fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random'] | |||
| # fit_methods = ['k-graphs', 'random', 'random', 'random'] | |||
| # for fit_method in fit_methods: | |||
| # print('\n-------------------------------------') | |||
| # print('fit method:', fit_method) | |||
| # parameters = {'ds_name': ds_name, | |||
| # 'gkernel': gkernel, | |||
| # 'edit_cost_name': 'LETTER2', | |||
| # 'edit_cost_name': 'NON_SYMBOLIC', | |||
| # 'ged_method': 'mIPFP', | |||
| # 'attr_distance': 'euclidean', | |||
| # 'fit_method': fit_method} | |||
| # xp_fit_method_for_non_symbolic(parameters, save_results=True, | |||
| # initial_solutions=40, | |||
| # initial_solutions=1, | |||
| # Gn_data = [Gn, y_all, graph_dir], | |||
| # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]) | |||
| # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean], | |||
| # Kmatrix=Kmatrix) | |||
| # #### xp 4: SYNTHETICnew, sspkernel, using NON_SYMBOLIC. | |||
| # ### xp 4: SYNTHETICnew, spkernel, using NON_SYMBOLIC. | |||
| # gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz') | |||
| # Kmatrix = gmfile['Kmatrix'] | |||
| # # normalization | |||
| # Kmatrix_diag = Kmatrix.diagonal().copy() | |||
| # for i in range(len(Kmatrix)): | |||
| # for j in range(i, len(Kmatrix)): | |||
| # Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
| # Kmatrix[j][i] = Kmatrix[i][j] | |||
| # run_time = 21821.35 | |||
| # np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm', | |||
| # Kmatrix=Kmatrix, run_time=run_time) | |||
| # | |||
| # # load dataset. | |||
| # print('getting dataset and computing kernel distance matrix first...') | |||
| # ds_name = 'SYNTHETICnew' | |||
| # gkernel = 'structuralspkernel' | |||
| # gkernel = 'spkernel' | |||
| # Gn, y_all, graph_dir = get_dataset(ds_name) | |||
| # # remove graphs without nodes and edges. | |||
| # Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0 | |||
| # and nx.number_of_edges(G) != 0)] | |||
| # idx = [G[0] for G in Gn] | |||
| # Gn = [G[1] for G in Gn] | |||
| # y_all = [y_all[i] for i in idx] | |||
| # Gn = Gn[0:10] | |||
| # y_all = y_all[0:10] | |||
| ## # remove graphs without nodes and edges. | |||
| ## Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_node(G) != 0 | |||
| ## and nx.number_of_edges(G) != 0)] | |||
| ## idx = [G[0] for G in Gn] | |||
| ## Gn = [G[1] for G in Gn] | |||
| ## y_all = [y_all[i] for i in idx] | |||
| ## Gn = Gn[0:5] | |||
| ## y_all = y_all[0:5] | |||
| # for G in Gn: | |||
| # G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | |||
| # # compute pair distances. | |||
| # | |||
| # # compute/read Gram matrix and pair distances. | |||
| ## Kmatrix = compute_kernel(Gn, gkernel, None, None, True) | |||
| ## np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||
| ## Kmatrix=Kmatrix) | |||
| # gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | |||
| # Kmatrix = gmfile['Kmatrix'] | |||
| # run_time = gmfile['run_time'] | |||
| ## Kmatrix = Kmatrix[[0,1,2,3,4],:] | |||
| ## Kmatrix = Kmatrix[:,[0,1,2,3,4]] | |||
| # print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | |||
| # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||
| # Kmatrix=None, gkernel=gkernel, verbose=True) | |||
| # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | |||
| ## Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
| ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||
| # | |||
| # # fitting and computing. | |||
| # fit_methods = ['k-graphs', 'random', 'random', 'random'] | |||
| # for fit_method in fit_methods: | |||
| @@ -496,68 +538,69 @@ if __name__ == "__main__": | |||
| # 'attr_distance': 'euclidean', | |||
| # 'fit_method': fit_method} | |||
| # xp_fit_method_for_non_symbolic(parameters, save_results=True, | |||
| # initial_solutions=40, | |||
| # Gn_data = [Gn, y_all, graph_dir], | |||
| # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]) | |||
| ### xp 5: SYNTHETICnew, spkernel, using NON_SYMBOLIC. | |||
| gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz') | |||
| Kmatrix = gmfile['Kmatrix'] | |||
| # normalization | |||
| Kmatrix_diag = Kmatrix.diagonal().copy() | |||
| for i in range(len(Kmatrix)): | |||
| for j in range(i, len(Kmatrix)): | |||
| Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
| Kmatrix[j][i] = Kmatrix[i][j] | |||
| run_time = 21821.35 | |||
| np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm', | |||
| Kmatrix=Kmatrix, run_time=run_time) | |||
| # initial_solutions=1, | |||
| # Gn_data=[Gn, y_all, graph_dir], | |||
| # k_dis_data=[dis_mat, dis_max, dis_min, dis_mean], | |||
| # Kmatrix=Kmatrix) | |||
| #### xp 5: Fingerprint, sspkernel, using LETTER2. | |||
| # load dataset. | |||
| print('getting dataset and computing kernel distance matrix first...') | |||
| ds_name = 'SYNTHETICnew' | |||
| gkernel = 'spkernel' | |||
| ds_name = 'Fingerprint' | |||
| gkernel = 'structuralspkernel' | |||
| Gn, y_all, graph_dir = get_dataset(ds_name) | |||
| # # remove graphs without nodes and edges. | |||
| # Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0 | |||
| # remove graphs without nodes and edges. | |||
| Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0)] | |||
| # and nx.number_of_edges(G) != 0)] | |||
| # idx = [G[0] for G in Gn] | |||
| # Gn = [G[1] for G in Gn] | |||
| # y_all = [y_all[i] for i in idx] | |||
| # Gn = Gn[0:5] | |||
| # y_all = y_all[0:5] | |||
| idx = [G[0] for G in Gn] | |||
| Gn = [G[1] for G in Gn] | |||
| y_all = [y_all[i] for i in idx] | |||
| y_idx = get_same_item_indices(y_all) | |||
| # remove unused labels. | |||
| for G in Gn: | |||
| G.graph['edge_attrs'] = [] | |||
| for edge in G.edges: | |||
| del G.edges[edge]['attributes'] | |||
| del G.edges[edge]['orient'] | |||
| del G.edges[edge]['angle'] | |||
| Gn = Gn[805:815] | |||
| y_all = y_all[805:815] | |||
| for G in Gn: | |||
| G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | |||
| # compute/read Gram matrix and pair distances. | |||
| # Kmatrix = compute_kernel(Gn, gkernel, None, None, True) | |||
| # np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||
| # Kmatrix=Kmatrix) | |||
| gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | |||
| Kmatrix = gmfile['Kmatrix'] | |||
| run_time = gmfile['run_time'] | |||
| Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') | |||
| np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||
| Kmatrix=Kmatrix) | |||
| # gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | |||
| # Kmatrix = gmfile['Kmatrix'] | |||
| # run_time = gmfile['run_time'] | |||
| # Kmatrix = Kmatrix[[0,1,2,3,4],:] | |||
| # Kmatrix = Kmatrix[:,[0,1,2,3,4]] | |||
| print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | |||
| # print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | |||
| dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||
| Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | |||
| # Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
| # dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||
| # compute pair distances. | |||
| # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||
| # Kmatrix=None, gkernel=gkernel, verbose=True) | |||
| # dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||
| # fitting and computing. | |||
| fit_methods = ['k-graphs', 'random', 'random', 'random'] | |||
| fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random'] | |||
| for fit_method in fit_methods: | |||
| print('\n-------------------------------------') | |||
| print('fit method:', fit_method) | |||
| parameters = {'ds_name': ds_name, | |||
| 'gkernel': gkernel, | |||
| 'edit_cost_name': 'NON_SYMBOLIC', | |||
| 'edit_cost_name': 'LETTER2', | |||
| 'ged_method': 'mIPFP', | |||
| 'attr_distance': 'euclidean', | |||
| 'fit_method': fit_method} | |||
| xp_fit_method_for_non_symbolic(parameters, save_results=True, | |||
| initial_solutions=1, | |||
| Gn_data=[Gn, y_all, graph_dir], | |||
| k_dis_data=[dis_mat, dis_max, dis_min, dis_mean], | |||
| initial_solutions=40, | |||
| Gn_data = [Gn, y_all, graph_dir], | |||
| k_dis_data = [dis_mat, dis_max, dis_min, dis_mean], | |||
| Kmatrix=Kmatrix) | |||
| @@ -12,17 +12,15 @@ from shutil import copyfile | |||
| import networkx as nx | |||
| import matplotlib.pyplot as plt | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | |||
| from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||
| from preimage.utils import get_same_item_indices, kernel_distance_matrix | |||
| from preimage.find_best_k import getRelations | |||
| from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||
| from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix | |||
| from gklearn.preimage.find_best_k import getRelations | |||
| def xp_letter_h_LETTER2_cost(): | |||
| ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
| ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
| dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, Kmatrix=None, gkernel='structuralspkernel') | |||
| @@ -177,11 +175,11 @@ def xp_letter_h_LETTER2_cost(): | |||
| nb_dis_k_gi2gm[2] += 1 | |||
| # save median graphs. | |||
| fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
| fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||
| fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | |||
| + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
| copyfile(fname_sm, fn_pre_sm_new + '.gxl') | |||
| fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
| fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||
| fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | |||
| + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
| copyfile(fname_gm, fn_pre_gm_new + '.gxl') | |||
| @@ -243,8 +241,8 @@ def xp_letter_h_LETTER2_cost(): | |||
| def xp_letter_h(): | |||
| ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
| ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
| for G in Gn: | |||
| reform_attributes(G) | |||
| @@ -396,11 +394,11 @@ def xp_letter_h(): | |||
| nb_dis_k_gi2gm[2] += 1 | |||
| # save median graphs. | |||
| fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
| fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||
| fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | |||
| + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
| copyfile(fname_sm, fn_pre_sm_new + '.gxl') | |||
| fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
| fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||
| fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | |||
| + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
| copyfile(fname_gm, fn_pre_gm_new + '.gxl') | |||
| @@ -13,16 +13,16 @@ from shutil import copyfile | |||
| import networkx as nx | |||
| import matplotlib.pyplot as plt | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | |||
| from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||
| from preimage.utils import get_same_item_indices | |||
| from preimage.find_best_k import getRelations | |||
| from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||
| from gklearn.preimage.utils import get_same_item_indices | |||
| from gklearn.preimage.find_best_k import getRelations | |||
| def xp_monoterpenoides(): | |||
| ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | |||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb | |||
| import os | |||
| ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds', | |||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb | |||
| Gn, y_all = loadDataset(ds['dataset']) | |||
| # ds = {'name': 'Letter-high', | |||
| # 'dataset': '../datasets/Letter-high/Letter-high_A.txt'} # node/edge symb | |||
| @@ -169,11 +169,11 @@ def xp_monoterpenoides(): | |||
| nb_dis_k_gi2gm[2] += 1 | |||
| # save median graphs. | |||
| fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
| fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||
| fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | |||
| + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat) | |||
| copyfile(fname_sm, fn_pre_sm_new + '.gxl') | |||
| fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
| fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||
| fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | |||
| + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat) | |||
| copyfile(fname_gm, fn_pre_gm_new + '.gxl') | |||
| @@ -15,10 +15,16 @@ def chooseDataset(ds_name): | |||
| ds_file = 'datasets/Alkane/dataset.ds' | |||
| ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt' | |||
| Gn, y = loadDataset(ds_file, filename_y=ds_y) | |||
| for G in Gn: | |||
| for node in G.nodes: | |||
| del G.nodes[node]['attributes'] | |||
| # node symbolic labels. | |||
| elif ds_name == 'Acyclic': | |||
| ds_file = 'datasets/acyclic/dataset_bps.ds' | |||
| Gn, y = loadDataset(ds_file) | |||
| for G in Gn: | |||
| for node in G.nodes: | |||
| del G.nodes[node]['attributes'] | |||
| # node non-symbolic labels. | |||
| elif ds_name == 'Letter-med': | |||
| ds_file = 'datasets/Letter-med/Letter-med_A.txt' | |||
| @@ -27,14 +33,39 @@ def chooseDataset(ds_name): | |||
| elif ds_name == 'AIDS': | |||
| ds_file = 'datasets/AIDS/AIDS_A.txt' | |||
| Gn, y = loadDataset(ds_file) | |||
| # edge non-symbolic labels (no node labels). | |||
| elif ds_name == 'Fingerprint_edge': | |||
| import networkx as nx | |||
| ds_file = 'datasets/Fingerprint/Fingerprint_A.txt' | |||
| Gn, y = loadDataset(ds_file) | |||
| Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0] | |||
| idx = [G[0] for G in Gn] | |||
| Gn = [G[1] for G in Gn] | |||
| y = [y[i] for i in idx] | |||
| for G in Gn: | |||
| G.graph['node_attrs'] = [] | |||
| for node in G.nodes: | |||
| del G.nodes[node]['attributes'] | |||
| del G.nodes[node]['x'] | |||
| del G.nodes[node]['y'] | |||
| # edge non-symbolic labels (and node non-symbolic labels). | |||
| elif ds_name == 'Fingerprint': | |||
| import networkx as nx | |||
| ds_file = 'datasets/Fingerprint/Fingerprint_A.txt' | |||
| Gn, y = loadDataset(ds_file) | |||
| Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0] | |||
| idx = [G[0] for G in Gn] | |||
| Gn = [G[1] for G in Gn] | |||
| y = [y[i] for i in idx] | |||
| # edge symbolic and non-symbolic labels (and node symbolic and non-symbolic labels). | |||
| elif ds_name == 'Cuneiform': | |||
| import networkx as nx | |||
| ds_file = 'datasets/Cuneiform/Cuneiform_A.txt' | |||
| Gn, y = loadDataset(ds_file) | |||
| Gn = Gn[0:10] | |||
| y = y[0:10] | |||
| Gn = Gn[0:3] | |||
| y = y[0:3] | |||
| return Gn, y | |||
| @@ -152,7 +183,7 @@ def test_spkernel(ds_name, parallel): | |||
| #@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) | |||
| @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS']) | |||
| @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint', 'Fingerprint_edge', 'Cuneiform']) | |||
| @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
| def test_structuralspkernel(ds_name, parallel): | |||
| """Test structural shortest path kernel. | |||
| @@ -246,4 +277,5 @@ def test_weisfeilerlehmankernel(ds_name, parallel, base_kernel): | |||
| if __name__ == "__main__": | |||
| test_spkernel() | |||
| # test_spkernel('Alkane', 'imap_unordered') | |||
| test_structuralspkernel('Fingerprint_edge', 'imap_unordered') | |||
| @@ -753,15 +753,12 @@ if __name__ == '__main__': | |||
| # 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb | |||
| # Gn, y = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
| # saveDataset(Gn, y, group='xml', filename='temp/temp') | |||
| dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
| Gn, y_all = loadDataset(dataset) | |||
| filename = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/SYNTHETICnew/SYNTHETICnew' | |||
| saveDataset(Gn, y_all, gformat='gxl', group='xml', filename=filename) | |||
| # test - new way to add labels and attributes. | |||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Fingerprint/Fingerprint_A.txt' | |||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-med/Letter-med_A.txt' | |||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/AIDS/AIDS_A.txt' | |||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/ENZYMES_txt/ENZYMES_A_sparse.txt' | |||
| # Gn, y_all = loadDataset(dataset) | |||
| # dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
| # dataset = '../../datasets/Fingerprint/Fingerprint_A.txt' | |||
| # dataset = '../../datasets/Letter-med/Letter-med_A.txt' | |||
| # dataset = '../../datasets/AIDS/AIDS_A.txt' | |||
| # dataset = '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt' | |||
| # Gn, y_all = loadDataset(dataset) | |||
| pass | |||
| @@ -11,7 +11,6 @@ from sklearn.model_selection import KFold, train_test_split, ParameterGrid | |||
| from multiprocessing import Pool, Array | |||
| from functools import partial | |||
| import sys | |||
| sys.path.insert(0, "../") | |||
| import os | |||
| import time | |||
| import datetime | |||
| @@ -74,8 +73,6 @@ def model_selection_for_precomputed_kernel(datafile, | |||
| Examples | |||
| -------- | |||
| >>> import numpy as np | |||
| >>> import sys | |||
| >>> sys.path.insert(0, "../") | |||
| >>> from gklearn.utils.model_selection_precomputed import model_selection_for_precomputed_kernel | |||
| >>> from gklearn.kernels.untilHPathKernel import untilhpathkernel | |||
| >>> | |||
| @@ -46466,7 +46466,7 @@ | |||
| "name": "python", | |||
| "nbconvert_exporter": "python", | |||
| "pygments_lexer": "ipython3", | |||
| "version": "3.6.8" | |||
| "version": "3.6.9" | |||
| } | |||
| }, | |||
| "nbformat": 4, | |||
| @@ -7,10 +7,6 @@ Created on Tue Jan 7 15:25:36 2020 | |||
| """ | |||
| # draw all the praphs | |||
| import sys | |||
| import pathlib | |||
| sys.path.insert(0, "../../") | |||
| import matplotlib.pyplot as plt | |||
| import networkx as nx | |||
| @@ -19,6 +15,58 @@ from gklearn.utils.graphfiles import loadDataset, loadGXL | |||
| def main(): | |||
| # MUTAG dataset. | |||
| dataset, y = loadDataset("../../datasets/MUTAG/MUTAG_A.txt") | |||
| for idx in [65]:#[6]: | |||
| G = dataset[idx] | |||
| for node in G.nodes: | |||
| if G.nodes[node]['atom'] == '0': | |||
| G.nodes[node]['atom'] = 'C' | |||
| elif G.nodes[node]['atom'] == '1': | |||
| G.nodes[node]['atom'] = 'N' | |||
| elif G.nodes[node]['atom'] == '2': | |||
| G.nodes[node]['atom'] = 'O' | |||
| elif G.nodes[node]['atom'] == '3': | |||
| G.nodes[node]['atom'] = 'F' | |||
| elif G.nodes[node]['atom'] == '4': | |||
| G.nodes[node]['atom'] = 'I' | |||
| elif G.nodes[node]['atom'] == '5': | |||
| G.nodes[node]['atom'] = 'Cl' | |||
| elif G.nodes[node]['atom'] == '6': | |||
| G.nodes[node]['atom'] = 'Br' | |||
| ecolors = [] | |||
| for edge in G.edges: | |||
| if G.edges[edge]['bond_type'] == '0': | |||
| ecolors.append('orange') | |||
| elif G.edges[edge]['bond_type'] == '1': | |||
| ecolors.append('r') | |||
| elif G.edges[edge]['bond_type'] == '2': | |||
| ecolors.append('purple') | |||
| elif G.edges[edge]['bond_type'] == '3': | |||
| ecolors.append('orange') | |||
| print(idx) | |||
| print(nx.get_node_attributes(G, 'atom')) | |||
| edge_labels = nx.get_edge_attributes(G, 'bond_type') | |||
| print(edge_labels) | |||
| pos=nx.spring_layout(G) | |||
| nx.draw(G, | |||
| pos, | |||
| node_size=500, | |||
| labels=nx.get_node_attributes(G, 'atom'), | |||
| node_color='blue', | |||
| font_color='w', | |||
| edge_color=ecolors, | |||
| width=3, | |||
| with_labels=True) | |||
| # edge_labels = nx.draw_networkx_edge_labels(G, pos, | |||
| # edge_labels=edge_labels, | |||
| # font_color='pink') | |||
| plt.savefig('mol1_graph.svg', format='svg', dpi=300) | |||
| plt.show() | |||
| plt.clf() | |||
| # # monoterpenoides dataset. | |||
| # dataset, y = loadDataset("../../datasets/monoterpenoides/dataset_10+.ds") | |||
| # for idx in [12,22,29,74]: | |||
| @@ -67,35 +115,35 @@ def main(): | |||
| # draw_Fingerprint_graph(Gn[idx], file_prefix='') | |||
| # SYNTHETIC dataset. | |||
| dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
| Gn, y_all = loadDataset(dataset) | |||
| idx_no_node = [] | |||
| idx_no_edge = [] | |||
| idx_no_both = [] | |||
| for idx, G in enumerate(Gn): | |||
| if nx.number_of_nodes(G) == 0: | |||
| idx_no_node.append(idx) | |||
| if nx.number_of_edges(G) == 0: | |||
| idx_no_both.append(idx) | |||
| if nx.number_of_edges(G) == 0: | |||
| idx_no_edge.append(idx) | |||
| # file_prefix = '../results/graph_images/SYNTHETIC/' + G.graph['name'] | |||
| # draw_SYNTHETIC_graph(Gn[idx], file_prefix=file_prefix, save=True) | |||
| # draw_SYNTHETIC_graph(Gn[idx]) | |||
| print('nb_no_node: ', len(idx_no_node)) | |||
| print('nb_no_edge: ', len(idx_no_edge)) | |||
| print('nb_no_both: ', len(idx_no_both)) | |||
| print('idx_no_node: ', idx_no_node) | |||
| print('idx_no_edge: ', idx_no_edge) | |||
| print('idx_no_both: ', idx_no_both) | |||
| for idx in [0, 10, 100]: | |||
| print(idx) | |||
| print(Gn[idx].nodes(data=True)) | |||
| print(Gn[idx].edges(data=True)) | |||
| draw_SYNTHETIC_graph(Gn[idx], save=None) | |||
| # # SYNTHETIC dataset. | |||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
| # Gn, y_all = loadDataset(dataset) | |||
| # | |||
| # idx_no_node = [] | |||
| # idx_no_edge = [] | |||
| # idx_no_both = [] | |||
| # for idx, G in enumerate(Gn): | |||
| # if nx.number_of_nodes(G) == 0: | |||
| # idx_no_node.append(idx) | |||
| # if nx.number_of_edges(G) == 0: | |||
| # idx_no_both.append(idx) | |||
| # if nx.number_of_edges(G) == 0: | |||
| # idx_no_edge.append(idx) | |||
| ## file_prefix = '../results/graph_images/SYNTHETIC/' + G.graph['name'] | |||
| ## draw_SYNTHETIC_graph(Gn[idx], file_prefix=file_prefix, save=True) | |||
| ## draw_SYNTHETIC_graph(Gn[idx]) | |||
| # print('nb_no_node: ', len(idx_no_node)) | |||
| # print('nb_no_edge: ', len(idx_no_edge)) | |||
| # print('nb_no_both: ', len(idx_no_both)) | |||
| # print('idx_no_node: ', idx_no_node) | |||
| # print('idx_no_edge: ', idx_no_edge) | |||
| # print('idx_no_both: ', idx_no_both) | |||
| # | |||
| # for idx in [0, 10, 100]: | |||
| # print(idx) | |||
| # print(Gn[idx].nodes(data=True)) | |||
| # print(Gn[idx].edges(data=True)) | |||
| # draw_SYNTHETIC_graph(Gn[idx], save=None) | |||
| def plot_a_graph(graph_filename): | |||