| @@ -0,0 +1,2 @@ | |||||
| [run] | |||||
| omit = gklearn/tests/* | |||||
| @@ -15,6 +15,7 @@ datasets/* | |||||
| !datasets/AIDS/ | !datasets/AIDS/ | ||||
| !datasets/monoterpenoides/ | !datasets/monoterpenoides/ | ||||
| !datasets/Fingerprint/*.txt | !datasets/Fingerprint/*.txt | ||||
| !datasets/Cuneiform/*.txt | |||||
| notebooks/results/* | notebooks/results/* | ||||
| notebooks/check_gm/* | notebooks/check_gm/* | ||||
| notebooks/test_parallel/* | notebooks/test_parallel/* | ||||
| @@ -41,3 +42,4 @@ dist/ | |||||
| build/ | build/ | ||||
| .coverage | .coverage | ||||
| htmlcov | |||||
| @@ -22,7 +22,7 @@ install: | |||||
| script: | script: | ||||
| - python setup.py bdist_wheel | - python setup.py bdist_wheel | ||||
| - pytest -v --cov-report term --cov=gklearn gklearn/tests/ | |||||
| - pytest -v --cov-config=.coveragerc --cov-report term --cov=gklearn gklearn/tests/ | |||||
| after_success: | after_success: | ||||
| - codecov | - codecov | ||||
| @@ -0,0 +1,267 @@ | |||||
| 0 | |||||
| 1 | |||||
| 2 | |||||
| 3 | |||||
| 4 | |||||
| 5 | |||||
| 6 | |||||
| 7 | |||||
| 8 | |||||
| 9 | |||||
| 10 | |||||
| 11 | |||||
| 12 | |||||
| 13 | |||||
| 14 | |||||
| 15 | |||||
| 16 | |||||
| 17 | |||||
| 18 | |||||
| 19 | |||||
| 20 | |||||
| 21 | |||||
| 22 | |||||
| 23 | |||||
| 24 | |||||
| 25 | |||||
| 26 | |||||
| 0 | |||||
| 1 | |||||
| 2 | |||||
| 3 | |||||
| 4 | |||||
| 5 | |||||
| 6 | |||||
| 7 | |||||
| 8 | |||||
| 9 | |||||
| 10 | |||||
| 11 | |||||
| 12 | |||||
| 13 | |||||
| 14 | |||||
| 15 | |||||
| 16 | |||||
| 17 | |||||
| 18 | |||||
| 19 | |||||
| 20 | |||||
| 21 | |||||
| 22 | |||||
| 23 | |||||
| 24 | |||||
| 25 | |||||
| 26 | |||||
| 27 | |||||
| 28 | |||||
| 29 | |||||
| 0 | |||||
| 1 | |||||
| 2 | |||||
| 3 | |||||
| 4 | |||||
| 5 | |||||
| 6 | |||||
| 7 | |||||
| 8 | |||||
| 9 | |||||
| 10 | |||||
| 11 | |||||
| 12 | |||||
| 13 | |||||
| 14 | |||||
| 15 | |||||
| 16 | |||||
| 17 | |||||
| 18 | |||||
| 19 | |||||
| 20 | |||||
| 21 | |||||
| 22 | |||||
| 23 | |||||
| 24 | |||||
| 25 | |||||
| 26 | |||||
| 27 | |||||
| 28 | |||||
| 29 | |||||
| 0 | |||||
| 1 | |||||
| 2 | |||||
| 3 | |||||
| 4 | |||||
| 5 | |||||
| 6 | |||||
| 7 | |||||
| 8 | |||||
| 9 | |||||
| 10 | |||||
| 11 | |||||
| 12 | |||||
| 13 | |||||
| 14 | |||||
| 15 | |||||
| 16 | |||||
| 17 | |||||
| 18 | |||||
| 19 | |||||
| 20 | |||||
| 21 | |||||
| 22 | |||||
| 23 | |||||
| 24 | |||||
| 25 | |||||
| 26 | |||||
| 27 | |||||
| 28 | |||||
| 29 | |||||
| 0 | |||||
| 1 | |||||
| 2 | |||||
| 3 | |||||
| 4 | |||||
| 5 | |||||
| 6 | |||||
| 7 | |||||
| 8 | |||||
| 9 | |||||
| 10 | |||||
| 11 | |||||
| 12 | |||||
| 13 | |||||
| 14 | |||||
| 15 | |||||
| 16 | |||||
| 17 | |||||
| 18 | |||||
| 19 | |||||
| 20 | |||||
| 21 | |||||
| 22 | |||||
| 23 | |||||
| 24 | |||||
| 25 | |||||
| 26 | |||||
| 27 | |||||
| 28 | |||||
| 29 | |||||
| 0 | |||||
| 1 | |||||
| 2 | |||||
| 3 | |||||
| 4 | |||||
| 5 | |||||
| 6 | |||||
| 7 | |||||
| 8 | |||||
| 9 | |||||
| 10 | |||||
| 11 | |||||
| 12 | |||||
| 13 | |||||
| 14 | |||||
| 15 | |||||
| 16 | |||||
| 17 | |||||
| 18 | |||||
| 19 | |||||
| 20 | |||||
| 21 | |||||
| 22 | |||||
| 23 | |||||
| 24 | |||||
| 25 | |||||
| 26 | |||||
| 27 | |||||
| 28 | |||||
| 29 | |||||
| 0 | |||||
| 1 | |||||
| 2 | |||||
| 3 | |||||
| 4 | |||||
| 5 | |||||
| 6 | |||||
| 7 | |||||
| 8 | |||||
| 9 | |||||
| 10 | |||||
| 11 | |||||
| 12 | |||||
| 13 | |||||
| 14 | |||||
| 15 | |||||
| 16 | |||||
| 17 | |||||
| 18 | |||||
| 19 | |||||
| 20 | |||||
| 21 | |||||
| 22 | |||||
| 23 | |||||
| 24 | |||||
| 25 | |||||
| 26 | |||||
| 27 | |||||
| 28 | |||||
| 29 | |||||
| 0 | |||||
| 1 | |||||
| 2 | |||||
| 3 | |||||
| 4 | |||||
| 5 | |||||
| 6 | |||||
| 7 | |||||
| 8 | |||||
| 9 | |||||
| 10 | |||||
| 11 | |||||
| 12 | |||||
| 13 | |||||
| 14 | |||||
| 15 | |||||
| 16 | |||||
| 17 | |||||
| 18 | |||||
| 19 | |||||
| 20 | |||||
| 21 | |||||
| 22 | |||||
| 23 | |||||
| 24 | |||||
| 25 | |||||
| 26 | |||||
| 27 | |||||
| 28 | |||||
| 29 | |||||
| 0 | |||||
| 1 | |||||
| 2 | |||||
| 3 | |||||
| 4 | |||||
| 5 | |||||
| 6 | |||||
| 7 | |||||
| 8 | |||||
| 9 | |||||
| 10 | |||||
| 11 | |||||
| 12 | |||||
| 13 | |||||
| 14 | |||||
| 15 | |||||
| 16 | |||||
| 17 | |||||
| 18 | |||||
| 19 | |||||
| 20 | |||||
| 21 | |||||
| 22 | |||||
| 23 | |||||
| 24 | |||||
| 25 | |||||
| 26 | |||||
| 27 | |||||
| 28 | |||||
| 29 | |||||
| @@ -0,0 +1,119 @@ | |||||
| README for dataset Cuneiform | |||||
| === Usage === | |||||
| This folder contains the following comma separated text files | |||||
| (replace DS by the name of the dataset): | |||||
| n = total number of nodes | |||||
| m = total number of edges | |||||
| N = number of graphs | |||||
| (1) DS_A.txt (m lines) | |||||
| sparse (block diagonal) adjacency matrix for all graphs, | |||||
| each line corresponds to (row, col) resp. (node_id, node_id) | |||||
| (2) DS_graph_indicator.txt (n lines) | |||||
| column vector of graph identifiers for all nodes of all graphs, | |||||
| the value in the i-th line is the graph_id of the node with node_id i | |||||
| (3) DS_graph_labels.txt (N lines) | |||||
| class labels for all graphs in the dataset, | |||||
| the value in the i-th line is the class label of the graph with graph_id i | |||||
| (4) DS_node_labels.txt (n lines) | |||||
| column vector of node labels, | |||||
| the value in the i-th line corresponds to the node with node_id i | |||||
| There are OPTIONAL files if the respective information is available: | |||||
| (5) DS_edge_labels.txt (m lines; same size as DS_A_sparse.txt) | |||||
| labels for the edges in DS_A_sparse.txt | |||||
| (6) DS_edge_attributes.txt (m lines; same size as DS_A.txt) | |||||
| attributes for the edges in DS_A.txt | |||||
| (7) DS_node_attributes.txt (n lines) | |||||
| matrix of node attributes, | |||||
| the comma seperated values in the i-th line is the attribute vector of the node with node_id i | |||||
| (8) DS_graph_attributes.txt (N lines) | |||||
| regression values for all graphs in the dataset, | |||||
| the value in the i-th line is the attribute of the graph with graph_id i | |||||
| === Description === | |||||
| The Cuneiform dataset contains graphs representing 29 different Hittite cuneiform signs. | |||||
| The data was obtained from nine cuneiform tablets written by scholars of Hittitology in | |||||
| the course of a study about individualistic characteristics of cuneiform hand writing. | |||||
| After automated extraction of individual wedges, the affiliation of the wedges to the | |||||
| cuneiform signs were determined manually. The graph model is explained in detail in the | |||||
| referenced publication. | |||||
| === References === | |||||
| Nils M. Kriege, Matthias Fey, Denis Fisseler, Petra Mutzel, Frank Weichert | |||||
| Recognizing Cuneiform Signs Using Graph Based Methods. 2018. arXiv:1802.05908 | |||||
| https://arxiv.org/abs/1802.05908 | |||||
| === Description of Labels === | |||||
| Node labels were converted to integer values using this map: | |||||
| Component 0: | |||||
| 0 depthPoint | |||||
| 1 tailVertex | |||||
| 2 leftVertex | |||||
| 3 rightVertex | |||||
| Component 1: | |||||
| 0 vertical | |||||
| 1 Winkelhaken | |||||
| 2 horizontal | |||||
| Edge labels were converted to integer values using this map: | |||||
| Component 0: | |||||
| 0 wedge | |||||
| 1 arrangement | |||||
| Class labels were converted to integer values using this map: | |||||
| 0 tu | |||||
| 1 ta | |||||
| 2 ti | |||||
| 3 nu | |||||
| 4 na | |||||
| 5 ni | |||||
| 6 bu | |||||
| 7 ba | |||||
| 8 bi | |||||
| 9 zu | |||||
| 10 za | |||||
| 11 zi | |||||
| 12 su | |||||
| 13 sa | |||||
| 14 si | |||||
| 15 hu | |||||
| 16 ha | |||||
| 17 hi | |||||
| 18 du | |||||
| 19 da | |||||
| 20 di | |||||
| 21 ru | |||||
| 22 ra | |||||
| 23 ri | |||||
| 24 ku | |||||
| 25 ka | |||||
| 26 ki | |||||
| 27 lu | |||||
| 28 la | |||||
| 29 li | |||||
| @@ -16,7 +16,6 @@ from functools import partial | |||||
| import networkx as nx | import networkx as nx | ||||
| import numpy as np | import numpy as np | ||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.utils import direct_product | from gklearn.utils.utils import direct_product | ||||
| from gklearn.utils.graphdataset import get_dataset_attributes | from gklearn.utils.graphdataset import get_dataset_attributes | ||||
| from gklearn.utils.parallel import parallel_gm | from gklearn.utils.parallel import parallel_gm | ||||
| @@ -28,7 +28,6 @@ from gklearn.utils.kernels import deltakernel | |||||
| from gklearn.utils.utils import untotterTransformation | from gklearn.utils.utils import untotterTransformation | ||||
| from gklearn.utils.graphdataset import get_dataset_attributes | from gklearn.utils.graphdataset import get_dataset_attributes | ||||
| from gklearn.utils.parallel import parallel_gm | from gklearn.utils.parallel import parallel_gm | ||||
| sys.path.insert(0, "../") | |||||
| def marginalizedkernel(*args, | def marginalizedkernel(*args, | ||||
| @@ -6,8 +6,6 @@ | |||||
| [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | ||||
| """ | """ | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| import time | import time | ||||
| from functools import partial | from functools import partial | ||||
| from tqdm import tqdm | from tqdm import tqdm | ||||
| @@ -20,7 +20,6 @@ import numpy as np | |||||
| from gklearn.utils.utils import getSPGraph | from gklearn.utils.utils import getSPGraph | ||||
| from gklearn.utils.graphdataset import get_dataset_attributes | from gklearn.utils.graphdataset import get_dataset_attributes | ||||
| from gklearn.utils.parallel import parallel_gm | from gklearn.utils.parallel import parallel_gm | ||||
| sys.path.insert(0, "../") | |||||
| def spkernel(*args, | def spkernel(*args, | ||||
| node_label='atom', | node_label='atom', | ||||
| @@ -25,8 +25,6 @@ from gklearn.utils.graphdataset import get_dataset_attributes | |||||
| from gklearn.utils.parallel import parallel_gm | from gklearn.utils.parallel import parallel_gm | ||||
| from gklearn.utils.trie import Trie | from gklearn.utils.trie import Trie | ||||
| sys.path.insert(0, "../") | |||||
| def structuralspkernel(*args, | def structuralspkernel(*args, | ||||
| node_label='atom', | node_label='atom', | ||||
| @@ -8,7 +8,6 @@ | |||||
| """ | """ | ||||
| import sys | import sys | ||||
| sys.path.insert(0, "../") | |||||
| import time | import time | ||||
| from collections import Counter | from collections import Counter | ||||
| from itertools import chain | from itertools import chain | ||||
| @@ -9,7 +9,6 @@ | |||||
| """ | """ | ||||
| import sys | import sys | ||||
| sys.path.insert(0, "../") | |||||
| import time | import time | ||||
| from collections import Counter | from collections import Counter | ||||
| from itertools import chain | from itertools import chain | ||||
| @@ -10,7 +10,6 @@ | |||||
| import sys | import sys | ||||
| from collections import Counter | from collections import Counter | ||||
| sys.path.insert(0, "../") | |||||
| from functools import partial | from functools import partial | ||||
| import time | import time | ||||
| #from multiprocessing import Pool | #from multiprocessing import Pool | ||||
| @@ -9,10 +9,8 @@ import numpy as np | |||||
| import random | import random | ||||
| import csv | import csv | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| from preimage.test_k_closest_graphs import median_on_k_closest_graphs | |||||
| from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs | |||||
| def find_best_k(): | def find_best_k(): | ||||
| ds = {'name': 'monoterpenoides', | ds = {'name': 'monoterpenoides', | ||||
| @@ -13,15 +13,14 @@ from multiprocessing import Pool | |||||
| from functools import partial | from functools import partial | ||||
| import time | import time | ||||
| import random | import random | ||||
| import sys | |||||
| from scipy import optimize | from scipy import optimize | ||||
| from scipy.optimize import minimize | from scipy.optimize import minimize | ||||
| import cvxpy as cp | import cvxpy as cp | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic | |||||
| from preimage.utils import kernel_distance_matrix | |||||
| from gklearn.preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic | |||||
| from gklearn.preimage.utils import kernel_distance_matrix | |||||
| def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, | def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, | ||||
| params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT', | params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT', | ||||
| @@ -128,12 +128,10 @@ def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method | |||||
| elif lib == 'gedlib-bash': | elif lib == 'gedlib-bash': | ||||
| import time | import time | ||||
| import random | import random | ||||
| import sys | |||||
| import os | import os | ||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import saveDataset | from gklearn.utils.graphfiles import saveDataset | ||||
| tmp_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/' | |||||
| tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/' | |||||
| if not os.path.exists(tmp_dir): | if not os.path.exists(tmp_dir): | ||||
| os.makedirs(tmp_dir) | os.makedirs(tmp_dir) | ||||
| fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9)) | fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9)) | ||||
| @@ -144,7 +142,7 @@ def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method | |||||
| command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n' | command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n' | ||||
| command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n' | command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n' | ||||
| command += 'export LD_LIBRARY_PATH\n' | command += 'export LD_LIBRARY_PATH\n' | ||||
| command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n' | |||||
| command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n' | |||||
| command += './ged_for_python_bash monoterpenoides ' + fn_collection \ | command += './ged_for_python_bash monoterpenoides ' + fn_collection \ | ||||
| + ' \'' + algo_options + '\' ' | + ' \'' + algo_options + '\' ' | ||||
| for ec in edit_cost_constant: | for ec in edit_cost_constant: | ||||
| @@ -11,11 +11,9 @@ import random | |||||
| import networkx as nx | import networkx as nx | ||||
| from tqdm import tqdm | from tqdm import tqdm | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphdataset import get_dataset_attributes | from gklearn.utils.graphdataset import get_dataset_attributes | ||||
| from gklearn.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels | from gklearn.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels | ||||
| from ged import GED, ged_median | |||||
| from gklearn.preimage.ged import GED, ged_median | |||||
| def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | ||||
| @@ -438,7 +436,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||||
| def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1, | def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1, | ||||
| dataset='monoterpenoides', | dataset='monoterpenoides', | ||||
| graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'): | |||||
| graph_dir=''): | |||||
| """Compute the iam by c++ implementation (gedlib) through bash. | """Compute the iam by c++ implementation (gedlib) through bash. | ||||
| """ | """ | ||||
| import os | import os | ||||
| @@ -462,18 +460,18 @@ def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1, | |||||
| fgroup.write("\n</GraphCollection>") | fgroup.write("\n</GraphCollection>") | ||||
| fgroup.close() | fgroup.close() | ||||
| tmp_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/' | |||||
| tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/' | |||||
| fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9)) | fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9)) | ||||
| createCollectionFile(Gn_names, ['dummy'] * len(Gn_names), fn_collection) | createCollectionFile(Gn_names, ['dummy'] * len(Gn_names), fn_collection) | ||||
| # fn_collection = tmp_dir + 'collection_for_debug' | # fn_collection = tmp_dir + 'collection_for_debug' | ||||
| # graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/gxl' | |||||
| # graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/gxl' | |||||
| # if dataset == 'Letter-high' or dataset == 'Fingerprint': | # if dataset == 'Letter-high' or dataset == 'Fingerprint': | ||||
| # dataset = 'letter' | # dataset = 'letter' | ||||
| command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n' | command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n' | ||||
| command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n' | command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n' | ||||
| command += 'export LD_LIBRARY_PATH\n' | command += 'export LD_LIBRARY_PATH\n' | ||||
| command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n' | |||||
| command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n' | |||||
| command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \ | command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \ | ||||
| + ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' + str(initial_solutions) + ' ' | + ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' + str(initial_solutions) + ' ' | ||||
| if edit_cost_constant is None: | if edit_cost_constant is None: | ||||
| @@ -489,8 +487,8 @@ def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1, | |||||
| sod_sm = float(output[0].strip()) | sod_sm = float(output[0].strip()) | ||||
| sod_gm = float(output[1].strip()) | sod_gm = float(output[1].strip()) | ||||
| fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||||
| fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||||
| fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||||
| fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||||
| return sod_sm, sod_gm, fname_sm, fname_gm | return sod_sm, sod_gm, fname_sm, fname_gm | ||||
| @@ -11,14 +11,12 @@ from tqdm import tqdm | |||||
| import random | import random | ||||
| #import csv | #import csv | ||||
| from shutil import copyfile | from shutil import copyfile | ||||
| import os | |||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from preimage.iam import iam_bash | |||||
| from gklearn.preimage.iam import iam_bash | |||||
| from gklearn.utils.graphfiles import loadDataset, loadGXL | from gklearn.utils.graphfiles import loadDataset, loadGXL | ||||
| from preimage.ged import GED | |||||
| from preimage.utils import get_same_item_indices | |||||
| from gklearn.preimage.ged import GED | |||||
| from gklearn.preimage.utils import get_same_item_indices | |||||
| def test_knn(): | def test_knn(): | ||||
| ds = {'name': 'monoterpenoides', | ds = {'name': 'monoterpenoides', | ||||
| @@ -30,7 +28,7 @@ def test_knn(): | |||||
| # edge_label = 'bond_type' | # edge_label = 'bond_type' | ||||
| # ds_name = 'mono' | # ds_name = 'mono' | ||||
| dir_output = 'results/knn/' | dir_output = 'results/knn/' | ||||
| graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/' | |||||
| graph_dir = os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/' | |||||
| k_nn = 1 | k_nn = 1 | ||||
| percent = 0.1 | percent = 0.1 | ||||
| @@ -2,5 +2,5 @@ import sys | |||||
| import pathlib | import pathlib | ||||
| # insert gedlibpy library. | # insert gedlibpy library. | ||||
| sys.path.insert(0, "../../") | |||||
| sys.path.insert(0, "../../../") | |||||
| from gedlibpy import librariesImport, gedlibpy | from gedlibpy import librariesImport, gedlibpy | ||||
| @@ -14,10 +14,7 @@ from tqdm import tqdm | |||||
| import networkx as nx | import networkx as nx | ||||
| import matplotlib.pyplot as plt | import matplotlib.pyplot as plt | ||||
| sys.path.insert(0, "../") | |||||
| from utils import compute_kernel, dis_gstar | |||||
| from gklearn.preimage.utils import compute_kernel, dis_gstar | |||||
| def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): | def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): | ||||
| @@ -52,8 +52,6 @@ def convertGraph(G): | |||||
| def testNxGrapĥ(): | def testNxGrapĥ(): | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ||||
| 'extra_params': {}} # node/edge symb | 'extra_params': {}} # node/edge symb | ||||
| @@ -9,12 +9,10 @@ from matplotlib import pyplot as plt | |||||
| import numpy as np | import numpy as np | ||||
| from tqdm import tqdm | from tqdm import tqdm | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| from utils import remove_edges | |||||
| from fitDistance import fit_GED_to_kernel_distance | |||||
| from utils import normalize_distance_matrix | |||||
| from gklearn.preimage.utils import remove_edges | |||||
| from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance | |||||
| from gklearn.preimage.utils import normalize_distance_matrix | |||||
| def test_update_costs(): | def test_update_costs(): | ||||
| @@ -63,7 +61,7 @@ def median_paper_clcpc_python_best(): | |||||
| y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | ||||
| repeats = 50 | repeats = 50 | ||||
| collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||||
| collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/' | |||||
| graph_dir = collection_path + 'gxl/' | graph_dir = collection_path + 'gxl/' | ||||
| fn_edit_costs_output = 'results/median_paper/edit_costs_output.python_init40.k10.txt' | fn_edit_costs_output = 'results/median_paper/edit_costs_output.python_init40.k10.txt' | ||||
| @@ -160,7 +158,7 @@ def median_paper_clcpc_python_bash_cpp(): | |||||
| y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | ||||
| repeats = 50 | repeats = 50 | ||||
| collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||||
| collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/' | |||||
| graph_dir = collection_path + 'gxl/' | graph_dir = collection_path + 'gxl/' | ||||
| fn_edit_costs_output = 'results/median_paper/edit_costs_output.txt' | fn_edit_costs_output = 'results/median_paper/edit_costs_output.txt' | ||||
| @@ -14,13 +14,11 @@ import sys | |||||
| def test_NON_SYMBOLIC_cost(): | def test_NON_SYMBOLIC_cost(): | ||||
| """Test edit cost LETTER2. | """Test edit cost LETTER2. | ||||
| """ | """ | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter | |||||
| from preimage.test_k_closest_graphs import reform_attributes | |||||
| from gklearn.preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter | |||||
| from gklearn.preimage.test_k_closest_graphs import reform_attributes | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-high/Letter-high_A.txt' | |||||
| dataset = '../../datasets/Letter-high/Letter-high_A.txt' | |||||
| Gn, y_all = loadDataset(dataset) | Gn, y_all = loadDataset(dataset) | ||||
| g1 = Gn[200] | g1 = Gn[200] | ||||
| @@ -53,14 +51,12 @@ def test_NON_SYMBOLIC_cost(): | |||||
| def test_LETTER2_cost(): | def test_LETTER2_cost(): | ||||
| """Test edit cost LETTER2. | """Test edit cost LETTER2. | ||||
| """ | """ | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from preimage.ged import GED, get_nb_edit_operations_letter | |||||
| from preimage.test_k_closest_graphs import reform_attributes | |||||
| from gklearn.preimage.ged import GED, get_nb_edit_operations_letter | |||||
| from gklearn.preimage.test_k_closest_graphs import reform_attributes | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||||
| ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | ||||
| g1 = Gn[200] | g1 = Gn[200] | ||||
| @@ -96,14 +92,12 @@ def test_get_nb_edit_operations_letter(): | |||||
| should be the same as the cost computed by number of operations and edit | should be the same as the cost computed by number of operations and edit | ||||
| cost constants. | cost constants. | ||||
| """ | """ | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from preimage.ged import GED, get_nb_edit_operations_letter | |||||
| from preimage.test_k_closest_graphs import reform_attributes | |||||
| from gklearn.preimage.ged import GED, get_nb_edit_operations_letter | |||||
| from gklearn.preimage.test_k_closest_graphs import reform_attributes | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||||
| ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | ||||
| g1 = Gn[200] | g1 = Gn[200] | ||||
| @@ -136,13 +130,12 @@ def test_get_nb_edit_operations(): | |||||
| numbers of edit operations. The distance/cost computed by GED should be the | numbers of edit operations. The distance/cost computed by GED should be the | ||||
| same as the cost computed by number of operations and edit cost constants. | same as the cost computed by number of operations and edit cost constants. | ||||
| """ | """ | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from preimage.ged import GED, get_nb_edit_operations | |||||
| from gklearn.preimage.ged import GED, get_nb_edit_operations | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| import os | |||||
| ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | |||||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb | |||||
| ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds', | |||||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb | |||||
| Gn, y_all = loadDataset(ds['dataset']) | Gn, y_all = loadDataset(ds['dataset']) | ||||
| g1 = Gn[20] | g1 = Gn[20] | ||||
| @@ -173,11 +166,10 @@ def test_get_nb_edit_operations(): | |||||
| def test_ged_python_bash_cpp(): | def test_ged_python_bash_cpp(): | ||||
| """Test ged computation with python invoking the c++ code by bash command (with updated library). | """Test ged computation with python invoking the c++ code by bash command (with updated library). | ||||
| """ | """ | ||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| from preimage.ged import GED | |||||
| from gklearn.preimage.ged import GED | |||||
| data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||||
| data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||||
| # collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | # collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | ||||
| collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml' | collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml' | ||||
| graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | ||||
| @@ -233,7 +225,7 @@ def test_ged_best_settings_updated(): | |||||
| """Test ged computation with best settings the same as in the C++ code (with updated library). | """Test ged computation with best settings the same as in the C++ code (with updated library). | ||||
| """ | """ | ||||
| data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||||
| data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||||
| collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | ||||
| # collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml' | # collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml' | ||||
| @@ -292,7 +284,7 @@ def test_ged_best_settings(): | |||||
| """Test ged computation with best settings the same as in the C++ code. | """Test ged computation with best settings the same as in the C++ code. | ||||
| """ | """ | ||||
| data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||||
| data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||||
| collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | ||||
| graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | ||||
| @@ -350,7 +342,7 @@ def test_ged_default(): | |||||
| """Test ged computation with default settings. | """Test ged computation with default settings. | ||||
| """ | """ | ||||
| data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||||
| data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||||
| collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | ||||
| graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | ||||
| @@ -404,11 +396,10 @@ def test_ged_default(): | |||||
| def test_ged_min(): | def test_ged_min(): | ||||
| """Test ged computation with the "min" stabilizer. | """Test ged computation with the "min" stabilizer. | ||||
| """ | """ | ||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| from preimage.ged import GED | |||||
| from gklearn.preimage.ged import GED | |||||
| data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||||
| data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||||
| collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | ||||
| graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | ||||
| @@ -487,8 +478,6 @@ def convertGraph(G): | |||||
| def testNxGrapĥ(): | def testNxGrapĥ(): | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ||||
| 'extra_params': {}} # node/edge symb | 'extra_params': {}} # node/edge symb | ||||
| @@ -13,14 +13,11 @@ import time | |||||
| import random | import random | ||||
| #from tqdm import tqdm | #from tqdm import tqdm | ||||
| #import os | |||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| #from gklearn.utils.logger2file import * | #from gklearn.utils.logger2file import * | ||||
| from iam import iam_upgraded | |||||
| from utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar | |||||
| #from ged import ged_median | |||||
| from gklearn.preimage.iam import iam_upgraded | |||||
| from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar | |||||
| #from gklearn.preimage.ged import ged_median | |||||
| def test_iam_monoterpenoides_with_init40(): | def test_iam_monoterpenoides_with_init40(): | ||||
| @@ -52,7 +49,7 @@ def test_iam_monoterpenoides_with_init40(): | |||||
| 'stabilizer': ged_stabilizer} | 'stabilizer': ged_stabilizer} | ||||
| collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||||
| collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/' | |||||
| graph_dir = collection_path + 'gxl/' | graph_dir = collection_path + 'gxl/' | ||||
| y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | ||||
| repeats = 50 | repeats = 50 | ||||
| @@ -17,15 +17,12 @@ import multiprocessing | |||||
| from multiprocessing import Pool | from multiprocessing import Pool | ||||
| from functools import partial | from functools import partial | ||||
| #import os | |||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset, loadGXL | from gklearn.utils.graphfiles import loadDataset, loadGXL | ||||
| #from gklearn.utils.logger2file import * | #from gklearn.utils.logger2file import * | ||||
| from iam import iam_upgraded, iam_bash | |||||
| from utils import compute_kernel, dis_gstar, kernel_distance_matrix | |||||
| from fitDistance import fit_GED_to_kernel_distance | |||||
| #from ged import ged_median | |||||
| from gklearn.preimage.iam import iam_upgraded, iam_bash | |||||
| from gklearn.preimage.utils import compute_kernel, dis_gstar, kernel_distance_matrix | |||||
| from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance | |||||
| #from gklearn.preimage.ged import ged_median | |||||
| def fit_edit_cost_constants(fit_method, edit_cost_name, | def fit_edit_cost_constants(fit_method, edit_cost_name, | ||||
| @@ -204,6 +201,8 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho | |||||
| if Kmatrix is not None: | if Kmatrix is not None: | ||||
| Kmatrix_median = np.copy(Kmatrix[group_min,:]) | Kmatrix_median = np.copy(Kmatrix[group_min,:]) | ||||
| Kmatrix_median = Kmatrix_median[:,group_min] | Kmatrix_median = Kmatrix_median[:,group_min] | ||||
| else: | |||||
| Kmatrix_median = None | |||||
| # 1. fit edit cost constants. | # 1. fit edit cost constants. | ||||
| @@ -379,7 +378,7 @@ def test_k_closest_graphs_with_cv(): | |||||
| y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | ||||
| repeats = 50 | repeats = 50 | ||||
| collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||||
| collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/' | |||||
| graph_dir = collection_path + 'gxl/' | graph_dir = collection_path + 'gxl/' | ||||
| sod_sm_list = [] | sod_sm_list = [] | ||||
| @@ -11,12 +11,10 @@ import matplotlib.pyplot as plt | |||||
| import time | import time | ||||
| from tqdm import tqdm | from tqdm import tqdm | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| from median import draw_Letter_graph | |||||
| from ged import GED, ged_median | |||||
| from utils import get_same_item_indices, compute_kernel, gram2distances, \ | |||||
| from gklearn.preimage.median import draw_Letter_graph | |||||
| from gklearn.preimage.ged import GED, ged_median | |||||
| from gklearn.preimage.utils import get_same_item_indices, compute_kernel, gram2distances, \ | |||||
| dis_gstar, remove_edges | dis_gstar, remove_edges | ||||
| @@ -13,14 +13,11 @@ import time | |||||
| import random | import random | ||||
| #from tqdm import tqdm | #from tqdm import tqdm | ||||
| #import os | |||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| from utils import remove_edges, compute_kernel, get_same_item_indices | |||||
| from ged import ged_median | |||||
| from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices | |||||
| from gklearn.preimage.ged import ged_median | |||||
| from preimage_iam import preimage_iam | |||||
| from gklearn.preimage.preimage_iam import preimage_iam | |||||
| ############################################################################### | ############################################################################### | ||||
| @@ -13,13 +13,10 @@ import time | |||||
| import random | import random | ||||
| #from tqdm import tqdm | #from tqdm import tqdm | ||||
| #import os | |||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| from ged import ged_median | |||||
| from utils import compute_kernel, get_same_item_indices, remove_edges | |||||
| from preimage_iam import preimage_iam_random_mix | |||||
| from gklearn.preimage.ged import ged_median | |||||
| from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges | |||||
| from gklearn.preimage.preimage_iam import preimage_iam_random_mix | |||||
| ############################################################################### | ############################################################################### | ||||
| # tests on different values on grid of median-sets and k. | # tests on different values on grid of median-sets and k. | ||||
| @@ -13,14 +13,10 @@ import time | |||||
| import random | import random | ||||
| #from tqdm import tqdm | #from tqdm import tqdm | ||||
| #import os | |||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset | from gklearn.utils.graphfiles import loadDataset | ||||
| from preimage_random import preimage_random | |||||
| from ged import ged_median | |||||
| from utils import compute_kernel, get_same_item_indices, remove_edges | |||||
| from gklearn.preimage.preimage_random import preimage_random | |||||
| from gklearn.preimage.ged import ged_median | |||||
| from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges | |||||
| ############################################################################### | ############################################################################### | ||||
| @@ -11,8 +11,6 @@ Useful functions. | |||||
| import multiprocessing | import multiprocessing | ||||
| import numpy as np | import numpy as np | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.kernels.marginalizedKernel import marginalizedkernel | from gklearn.kernels.marginalizedKernel import marginalizedkernel | ||||
| from gklearn.kernels.untilHPathKernel import untilhpathkernel | from gklearn.kernels.untilHPathKernel import untilhpathkernel | ||||
| from gklearn.kernels.spKernel import spkernel | from gklearn.kernels.spKernel import spkernel | ||||
| @@ -41,7 +39,7 @@ def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||||
| return np.sqrt(term1 - term2 + term3) | return np.sqrt(term1 - term2 + term3) | ||||
| def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose): | |||||
| def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel='imap_unordered'): | |||||
| if graph_kernel == 'marginalizedkernel': | if graph_kernel == 'marginalizedkernel': | ||||
| Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label, | Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label, | ||||
| p_quit=0.03, n_iteration=10, remove_totters=False, | p_quit=0.03, n_iteration=10, remove_totters=False, | ||||
| @@ -49,6 +47,7 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose): | |||||
| elif graph_kernel == 'untilhpathkernel': | elif graph_kernel == 'untilhpathkernel': | ||||
| Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label, | Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label, | ||||
| depth=7, k_func='MinMax', compute_method='trie', | depth=7, k_func='MinMax', compute_method='trie', | ||||
| parallel=parallel, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
| elif graph_kernel == 'spkernel': | elif graph_kernel == 'spkernel': | ||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | ||||
| @@ -66,18 +65,18 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose): | |||||
| Kmatrix, _ = structuralspkernel(Gn, node_label=node_label, | Kmatrix, _ = structuralspkernel(Gn, node_label=node_label, | ||||
| edge_label=edge_label, node_kernels=sub_kernels, | edge_label=edge_label, node_kernels=sub_kernels, | ||||
| edge_kernels=sub_kernels, | edge_kernels=sub_kernels, | ||||
| parallel=None, n_jobs=multiprocessing.cpu_count(), | |||||
| parallel=parallel, n_jobs=multiprocessing.cpu_count(), | |||||
| verbose=verbose) | verbose=verbose) | ||||
| elif graph_kernel == 'treeletkernel': | elif graph_kernel == 'treeletkernel': | ||||
| pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | ||||
| # pkernel = functools.partial(gaussiankernel, gamma=1e-6) | # pkernel = functools.partial(gaussiankernel, gamma=1e-6) | ||||
| mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | ||||
| Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label, | Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label, | ||||
| sub_kernel=pkernel, | |||||
| sub_kernel=pkernel, parallel=parallel, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
| elif graph_kernel == 'weisfeilerlehmankernel': | elif graph_kernel == 'weisfeilerlehmankernel': | ||||
| Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | ||||
| height=4, base_kernel='subtree', | |||||
| height=4, base_kernel='subtree', parallel=None, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
| # normalization | # normalization | ||||
| @@ -11,11 +11,8 @@ import matplotlib.pyplot as plt | |||||
| from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset | from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset | ||||
| from tqdm import tqdm | from tqdm import tqdm | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset, loadGXL | from gklearn.utils.graphfiles import loadDataset, loadGXL | ||||
| from utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices | |||||
| from gklearn.preimage.utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices | |||||
| def visualize_graph_dataset(dis_measure, visual_method, draw_figure, | def visualize_graph_dataset(dis_measure, visual_method, draw_figure, | ||||
| @@ -115,11 +112,11 @@ def visualize_distances_in_kernel(): | |||||
| # Gn = Gn[0:50] | # Gn = Gn[0:50] | ||||
| fname_medians = 'expert.treelet' | fname_medians = 'expert.treelet' | ||||
| # add set median. | # add set median. | ||||
| fname_sm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl' | |||||
| fname_sm = 'results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl' | |||||
| set_median = loadGXL(fname_sm) | set_median = loadGXL(fname_sm) | ||||
| Gn.append(set_median) | Gn.append(set_median) | ||||
| # add generalized median (estimated pre-image.) | # add generalized median (estimated pre-image.) | ||||
| fname_gm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl' | |||||
| fname_gm = 'results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl' | |||||
| gen_median = loadGXL(fname_gm) | gen_median = loadGXL(fname_gm) | ||||
| Gn.append(gen_median) | Gn.append(gen_median) | ||||
| @@ -166,19 +163,19 @@ def visualize_distances_in_kernel(): | |||||
| def visualize_distances_in_ged(): | def visualize_distances_in_ged(): | ||||
| from fitDistance import compute_geds | |||||
| from ged import GED | |||||
| from gklearn.preimage.fitDistance import compute_geds | |||||
| from gklearn.preimage.ged import GED | |||||
| ds = {'name': 'monoterpenoides', | ds = {'name': 'monoterpenoides', | ||||
| 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb | 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb | ||||
| Gn, y_all = loadDataset(ds['dataset']) | Gn, y_all = loadDataset(ds['dataset']) | ||||
| # Gn = Gn[0:50] | # Gn = Gn[0:50] | ||||
| # add set median. | # add set median. | ||||
| fname_medians = 'expert.treelet' | fname_medians = 'expert.treelet' | ||||
| fname_sm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl' | |||||
| fname_sm = 'preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl' | |||||
| set_median = loadGXL(fname_sm) | set_median = loadGXL(fname_sm) | ||||
| Gn.append(set_median) | Gn.append(set_median) | ||||
| # add generalized median (estimated pre-image.) | # add generalized median (estimated pre-image.) | ||||
| fname_gm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl' | |||||
| fname_gm = 'preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl' | |||||
| gen_median = loadGXL(fname_gm) | gen_median = loadGXL(fname_gm) | ||||
| Gn.append(gen_median) | Gn.append(gen_median) | ||||
| @@ -227,9 +224,10 @@ def visualize_distances_in_ged(): | |||||
| def visualize_distances_in_kernel_monoterpenoides(): | def visualize_distances_in_kernel_monoterpenoides(): | ||||
| import os | |||||
| ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | ||||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb | |||||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb | |||||
| Gn_original, y_all = loadDataset(ds['dataset']) | Gn_original, y_all = loadDataset(ds['dataset']) | ||||
| # Gn = Gn[0:50] | # Gn = Gn[0:50] | ||||
| @@ -301,11 +299,12 @@ def visualize_distances_in_kernel_monoterpenoides(): | |||||
| def visualize_distances_in_ged_monoterpenoides(): | def visualize_distances_in_ged_monoterpenoides(): | ||||
| from fitDistance import compute_geds | |||||
| from ged import GED | |||||
| from gklearn.preimage.fitDistance import compute_geds | |||||
| from gklearn.preimage.ged import GED | |||||
| import os | |||||
| ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | ||||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb | |||||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb | |||||
| Gn_original, y_all = loadDataset(ds['dataset']) | Gn_original, y_all = loadDataset(ds['dataset']) | ||||
| # Gn = Gn[0:50] | # Gn = Gn[0:50] | ||||
| @@ -379,8 +378,8 @@ def visualize_distances_in_ged_monoterpenoides(): | |||||
| def visualize_distances_in_kernel_letter_h(): | def visualize_distances_in_kernel_letter_h(): | ||||
| ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||||
| ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||||
| Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | ||||
| # Gn = Gn[0:50] | # Gn = Gn[0:50] | ||||
| @@ -455,8 +454,8 @@ def visualize_distances_in_ged_letter_h(): | |||||
| from fitDistance import compute_geds | from fitDistance import compute_geds | ||||
| from preimage.test_k_closest_graphs import reform_attributes | from preimage.test_k_closest_graphs import reform_attributes | ||||
| ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||||
| ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||||
| Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | ||||
| # Gn = Gn[0:50] | # Gn = Gn[0:50] | ||||
| @@ -11,35 +11,37 @@ import csv | |||||
| from shutil import copyfile | from shutil import copyfile | ||||
| import networkx as nx | import networkx as nx | ||||
| import matplotlib.pyplot as plt | import matplotlib.pyplot as plt | ||||
| import os | |||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | ||||
| from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||||
| from preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel | |||||
| from preimage.find_best_k import getRelations | |||||
| from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||||
| from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel | |||||
| from gklearn.preimage.find_best_k import getRelations | |||||
| def get_dataset(ds_name): | def get_dataset(ds_name): | ||||
| if ds_name == 'Letter-high': # node non-symb | if ds_name == 'Letter-high': # node non-symb | ||||
| dataset = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml' | |||||
| graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/' | |||||
| dataset = 'cpp_ext/data/collections/Letter.xml' | |||||
| graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/' | |||||
| Gn, y_all = loadDataset(dataset, extra_params=graph_dir) | Gn, y_all = loadDataset(dataset, extra_params=graph_dir) | ||||
| for G in Gn: | for G in Gn: | ||||
| reform_attributes(G) | reform_attributes(G) | ||||
| elif ds_name == 'Fingerprint': | elif ds_name == 'Fingerprint': | ||||
| dataset = '/media/ljia/DATA/research-repo/codes/Linlin/gedlib/data/collections/Fingerprint.xml' | |||||
| graph_dir = '/media/ljia/DATA/research-repo/codes/Linlin/gedlib/data/datasets/Fingerprint/data/' | |||||
| Gn, y_all = loadDataset(dataset, extra_params=graph_dir) | |||||
| for G in Gn: | |||||
| reform_attributes(G) | |||||
| # dataset = 'cpp_ext/data/collections/Fingerprint.xml' | |||||
| # graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/' | |||||
| # Gn, y_all = loadDataset(dataset, extra_params=graph_dir) | |||||
| # for G in Gn: | |||||
| # reform_attributes(G) | |||||
| dataset = '../../datasets/Fingerprint/Fingerprint_A.txt' | |||||
| graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/' | |||||
| Gn, y_all = loadDataset(dataset) | |||||
| elif ds_name == 'SYNTHETIC': | elif ds_name == 'SYNTHETIC': | ||||
| pass | pass | ||||
| elif ds_name == 'SYNTHETICnew': | elif ds_name == 'SYNTHETICnew': | ||||
| dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||||
| graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/SYNTHETICnew' | |||||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-high/Letter-high_A.txt' | |||||
| # graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/' | |||||
| dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||||
| graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/SYNTHETICnew' | |||||
| # dataset = '../../datasets/Letter-high/Letter-high_A.txt' | |||||
| # graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/' | |||||
| Gn, y_all = loadDataset(dataset) | Gn, y_all = loadDataset(dataset) | ||||
| elif ds_name == 'Synthie': | elif ds_name == 'Synthie': | ||||
| pass | pass | ||||
| @@ -184,6 +186,8 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti | |||||
| if Kmatrix is not None: | if Kmatrix is not None: | ||||
| Kmatrix_sub = Kmatrix[values,:] | Kmatrix_sub = Kmatrix[values,:] | ||||
| Kmatrix_sub = Kmatrix_sub[:,values] | Kmatrix_sub = Kmatrix_sub[:,values] | ||||
| else: | |||||
| Kmatrix_sub = None | |||||
| for repeat in range(repeats): | for repeat in range(repeats): | ||||
| print('\nrepeat =', repeat) | print('\nrepeat =', repeat) | ||||
| @@ -273,11 +277,11 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti | |||||
| nb_dis_k_gi2gm[2] += 1 | nb_dis_k_gi2gm[2] += 1 | ||||
| # save median graphs. | # save median graphs. | ||||
| fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||||
| fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||||
| fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | ||||
| + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat) | + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat) | ||||
| copyfile(fname_sm, fn_pre_sm_new + '.gxl') | copyfile(fname_sm, fn_pre_sm_new + '.gxl') | ||||
| fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||||
| fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||||
| fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | ||||
| + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat) | + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat) | ||||
| copyfile(fname_gm, fn_pre_gm_new + '.gxl') | copyfile(fname_gm, fn_pre_gm_new + '.gxl') | ||||
| @@ -427,63 +431,101 @@ if __name__ == "__main__": | |||||
| # initial_solutions=40, | # initial_solutions=40, | ||||
| # Gn_data = [Gn, y_all, graph_dir], | # Gn_data = [Gn, y_all, graph_dir], | ||||
| # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]) | # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]) | ||||
| # #### xp 3: Fingerprint, sspkernel, using LETTER2. | |||||
| # #### xp 3: SYNTHETICnew, sspkernel, using NON_SYMBOLIC. | |||||
| # gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.structuralspkernel.gm.npz') | |||||
| # Kmatrix = gmfile['Kmatrix'] | |||||
| # run_time = gmfile['run_time'] | |||||
| # # normalization | |||||
| # Kmatrix_diag = Kmatrix.diagonal().copy() | |||||
| # for i in range(len(Kmatrix)): | |||||
| # for j in range(i, len(Kmatrix)): | |||||
| # Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||||
| # Kmatrix[j][i] = Kmatrix[i][j] | |||||
| ## np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm', | |||||
| ## Kmatrix=Kmatrix, run_time=run_time) | |||||
| # # load dataset. | # # load dataset. | ||||
| # print('getting dataset and computing kernel distance matrix first...') | # print('getting dataset and computing kernel distance matrix first...') | ||||
| # ds_name = 'Fingerprint' | |||||
| # ds_name = 'SYNTHETICnew' | |||||
| # gkernel = 'structuralspkernel' | # gkernel = 'structuralspkernel' | ||||
| # Gn, y_all, graph_dir = get_dataset(ds_name) | # Gn, y_all, graph_dir = get_dataset(ds_name) | ||||
| # # remove graphs without nodes and edges. | # # remove graphs without nodes and edges. | ||||
| # Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0 | |||||
| # Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0 | |||||
| # and nx.number_of_edges(G) != 0)] | # and nx.number_of_edges(G) != 0)] | ||||
| # idx = [G[0] for G in Gn] | # idx = [G[0] for G in Gn] | ||||
| # Gn = [G[1] for G in Gn] | # Gn = [G[1] for G in Gn] | ||||
| # y_all = [y_all[i] for i in idx] | # y_all = [y_all[i] for i in idx] | ||||
| ## Gn = Gn[0:50] | |||||
| ## y_all = y_all[0:50] | |||||
| ## Gn = Gn[0:10] | |||||
| ## y_all = y_all[0:10] | |||||
| # for G in Gn: | |||||
| # G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | |||||
| # # compute pair distances. | # # compute pair distances. | ||||
| ## dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||||
| ## Kmatrix=None, gkernel=gkernel, verbose=True) | |||||
| # dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||||
| # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||||
| # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | |||||
| ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||||
| # # fitting and computing. | # # fitting and computing. | ||||
| # fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random'] | |||||
| # fit_methods = ['k-graphs', 'random', 'random', 'random'] | |||||
| # for fit_method in fit_methods: | # for fit_method in fit_methods: | ||||
| # print('\n-------------------------------------') | # print('\n-------------------------------------') | ||||
| # print('fit method:', fit_method) | # print('fit method:', fit_method) | ||||
| # parameters = {'ds_name': ds_name, | # parameters = {'ds_name': ds_name, | ||||
| # 'gkernel': gkernel, | # 'gkernel': gkernel, | ||||
| # 'edit_cost_name': 'LETTER2', | |||||
| # 'edit_cost_name': 'NON_SYMBOLIC', | |||||
| # 'ged_method': 'mIPFP', | # 'ged_method': 'mIPFP', | ||||
| # 'attr_distance': 'euclidean', | # 'attr_distance': 'euclidean', | ||||
| # 'fit_method': fit_method} | # 'fit_method': fit_method} | ||||
| # xp_fit_method_for_non_symbolic(parameters, save_results=True, | # xp_fit_method_for_non_symbolic(parameters, save_results=True, | ||||
| # initial_solutions=40, | |||||
| # initial_solutions=1, | |||||
| # Gn_data = [Gn, y_all, graph_dir], | # Gn_data = [Gn, y_all, graph_dir], | ||||
| # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]) | |||||
| # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean], | |||||
| # Kmatrix=Kmatrix) | |||||
| # #### xp 4: SYNTHETICnew, sspkernel, using NON_SYMBOLIC. | |||||
| # ### xp 4: SYNTHETICnew, spkernel, using NON_SYMBOLIC. | |||||
| # gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz') | |||||
| # Kmatrix = gmfile['Kmatrix'] | |||||
| # # normalization | |||||
| # Kmatrix_diag = Kmatrix.diagonal().copy() | |||||
| # for i in range(len(Kmatrix)): | |||||
| # for j in range(i, len(Kmatrix)): | |||||
| # Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||||
| # Kmatrix[j][i] = Kmatrix[i][j] | |||||
| # run_time = 21821.35 | |||||
| # np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm', | |||||
| # Kmatrix=Kmatrix, run_time=run_time) | |||||
| # | |||||
| # # load dataset. | # # load dataset. | ||||
| # print('getting dataset and computing kernel distance matrix first...') | # print('getting dataset and computing kernel distance matrix first...') | ||||
| # ds_name = 'SYNTHETICnew' | # ds_name = 'SYNTHETICnew' | ||||
| # gkernel = 'structuralspkernel' | |||||
| # gkernel = 'spkernel' | |||||
| # Gn, y_all, graph_dir = get_dataset(ds_name) | # Gn, y_all, graph_dir = get_dataset(ds_name) | ||||
| # # remove graphs without nodes and edges. | |||||
| # Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0 | |||||
| # and nx.number_of_edges(G) != 0)] | |||||
| # idx = [G[0] for G in Gn] | |||||
| # Gn = [G[1] for G in Gn] | |||||
| # y_all = [y_all[i] for i in idx] | |||||
| # Gn = Gn[0:10] | |||||
| # y_all = y_all[0:10] | |||||
| ## # remove graphs without nodes and edges. | |||||
| ## Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_node(G) != 0 | |||||
| ## and nx.number_of_edges(G) != 0)] | |||||
| ## idx = [G[0] for G in Gn] | |||||
| ## Gn = [G[1] for G in Gn] | |||||
| ## y_all = [y_all[i] for i in idx] | |||||
| ## Gn = Gn[0:5] | |||||
| ## y_all = y_all[0:5] | |||||
| # for G in Gn: | # for G in Gn: | ||||
| # G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | # G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | ||||
| # # compute pair distances. | |||||
| # | |||||
| # # compute/read Gram matrix and pair distances. | |||||
| ## Kmatrix = compute_kernel(Gn, gkernel, None, None, True) | |||||
| ## np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||||
| ## Kmatrix=Kmatrix) | |||||
| # gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | |||||
| # Kmatrix = gmfile['Kmatrix'] | |||||
| # run_time = gmfile['run_time'] | |||||
| ## Kmatrix = Kmatrix[[0,1,2,3,4],:] | |||||
| ## Kmatrix = Kmatrix[:,[0,1,2,3,4]] | |||||
| # print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | |||||
| # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | ||||
| # Kmatrix=None, gkernel=gkernel, verbose=True) | |||||
| # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | |||||
| ## Kmatrix = np.zeros((len(Gn), len(Gn))) | |||||
| ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | ||||
| # | |||||
| # # fitting and computing. | # # fitting and computing. | ||||
| # fit_methods = ['k-graphs', 'random', 'random', 'random'] | # fit_methods = ['k-graphs', 'random', 'random', 'random'] | ||||
| # for fit_method in fit_methods: | # for fit_method in fit_methods: | ||||
| @@ -496,68 +538,69 @@ if __name__ == "__main__": | |||||
| # 'attr_distance': 'euclidean', | # 'attr_distance': 'euclidean', | ||||
| # 'fit_method': fit_method} | # 'fit_method': fit_method} | ||||
| # xp_fit_method_for_non_symbolic(parameters, save_results=True, | # xp_fit_method_for_non_symbolic(parameters, save_results=True, | ||||
| # initial_solutions=40, | |||||
| # Gn_data = [Gn, y_all, graph_dir], | |||||
| # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]) | |||||
| ### xp 5: SYNTHETICnew, spkernel, using NON_SYMBOLIC. | |||||
| gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz') | |||||
| Kmatrix = gmfile['Kmatrix'] | |||||
| # normalization | |||||
| Kmatrix_diag = Kmatrix.diagonal().copy() | |||||
| for i in range(len(Kmatrix)): | |||||
| for j in range(i, len(Kmatrix)): | |||||
| Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| run_time = 21821.35 | |||||
| np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm', | |||||
| Kmatrix=Kmatrix, run_time=run_time) | |||||
| # initial_solutions=1, | |||||
| # Gn_data=[Gn, y_all, graph_dir], | |||||
| # k_dis_data=[dis_mat, dis_max, dis_min, dis_mean], | |||||
| # Kmatrix=Kmatrix) | |||||
| #### xp 5: Fingerprint, sspkernel, using LETTER2. | |||||
| # load dataset. | # load dataset. | ||||
| print('getting dataset and computing kernel distance matrix first...') | print('getting dataset and computing kernel distance matrix first...') | ||||
| ds_name = 'SYNTHETICnew' | |||||
| gkernel = 'spkernel' | |||||
| ds_name = 'Fingerprint' | |||||
| gkernel = 'structuralspkernel' | |||||
| Gn, y_all, graph_dir = get_dataset(ds_name) | Gn, y_all, graph_dir = get_dataset(ds_name) | ||||
| # # remove graphs without nodes and edges. | |||||
| # Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0 | |||||
| # remove graphs without nodes and edges. | |||||
| Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0)] | |||||
| # and nx.number_of_edges(G) != 0)] | # and nx.number_of_edges(G) != 0)] | ||||
| # idx = [G[0] for G in Gn] | |||||
| # Gn = [G[1] for G in Gn] | |||||
| # y_all = [y_all[i] for i in idx] | |||||
| # Gn = Gn[0:5] | |||||
| # y_all = y_all[0:5] | |||||
| idx = [G[0] for G in Gn] | |||||
| Gn = [G[1] for G in Gn] | |||||
| y_all = [y_all[i] for i in idx] | |||||
| y_idx = get_same_item_indices(y_all) | |||||
| # remove unused labels. | |||||
| for G in Gn: | |||||
| G.graph['edge_attrs'] = [] | |||||
| for edge in G.edges: | |||||
| del G.edges[edge]['attributes'] | |||||
| del G.edges[edge]['orient'] | |||||
| del G.edges[edge]['angle'] | |||||
| Gn = Gn[805:815] | |||||
| y_all = y_all[805:815] | |||||
| for G in Gn: | for G in Gn: | ||||
| G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | ||||
| # compute/read Gram matrix and pair distances. | # compute/read Gram matrix and pair distances. | ||||
| # Kmatrix = compute_kernel(Gn, gkernel, None, None, True) | |||||
| # np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||||
| # Kmatrix=Kmatrix) | |||||
| gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | |||||
| Kmatrix = gmfile['Kmatrix'] | |||||
| run_time = gmfile['run_time'] | |||||
| Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') | |||||
| np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||||
| Kmatrix=Kmatrix) | |||||
| # gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | |||||
| # Kmatrix = gmfile['Kmatrix'] | |||||
| # run_time = gmfile['run_time'] | |||||
| # Kmatrix = Kmatrix[[0,1,2,3,4],:] | # Kmatrix = Kmatrix[[0,1,2,3,4],:] | ||||
| # Kmatrix = Kmatrix[:,[0,1,2,3,4]] | # Kmatrix = Kmatrix[:,[0,1,2,3,4]] | ||||
| print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | |||||
| # print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | |||||
| dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | ||||
| Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | ||||
| # Kmatrix = np.zeros((len(Gn), len(Gn))) | # Kmatrix = np.zeros((len(Gn), len(Gn))) | ||||
| # dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | # dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | ||||
| # compute pair distances. | |||||
| # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||||
| # Kmatrix=None, gkernel=gkernel, verbose=True) | |||||
| # dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||||
| # fitting and computing. | # fitting and computing. | ||||
| fit_methods = ['k-graphs', 'random', 'random', 'random'] | |||||
| fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random'] | |||||
| for fit_method in fit_methods: | for fit_method in fit_methods: | ||||
| print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
| print('fit method:', fit_method) | print('fit method:', fit_method) | ||||
| parameters = {'ds_name': ds_name, | parameters = {'ds_name': ds_name, | ||||
| 'gkernel': gkernel, | 'gkernel': gkernel, | ||||
| 'edit_cost_name': 'NON_SYMBOLIC', | |||||
| 'edit_cost_name': 'LETTER2', | |||||
| 'ged_method': 'mIPFP', | 'ged_method': 'mIPFP', | ||||
| 'attr_distance': 'euclidean', | 'attr_distance': 'euclidean', | ||||
| 'fit_method': fit_method} | 'fit_method': fit_method} | ||||
| xp_fit_method_for_non_symbolic(parameters, save_results=True, | xp_fit_method_for_non_symbolic(parameters, save_results=True, | ||||
| initial_solutions=1, | |||||
| Gn_data=[Gn, y_all, graph_dir], | |||||
| k_dis_data=[dis_mat, dis_max, dis_min, dis_mean], | |||||
| initial_solutions=40, | |||||
| Gn_data = [Gn, y_all, graph_dir], | |||||
| k_dis_data = [dis_mat, dis_max, dis_min, dis_mean], | |||||
| Kmatrix=Kmatrix) | Kmatrix=Kmatrix) | ||||
| @@ -12,17 +12,15 @@ from shutil import copyfile | |||||
| import networkx as nx | import networkx as nx | ||||
| import matplotlib.pyplot as plt | import matplotlib.pyplot as plt | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | ||||
| from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||||
| from preimage.utils import get_same_item_indices, kernel_distance_matrix | |||||
| from preimage.find_best_k import getRelations | |||||
| from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||||
| from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix | |||||
| from gklearn.preimage.find_best_k import getRelations | |||||
| def xp_letter_h_LETTER2_cost(): | def xp_letter_h_LETTER2_cost(): | ||||
| ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||||
| ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | ||||
| dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, Kmatrix=None, gkernel='structuralspkernel') | dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, Kmatrix=None, gkernel='structuralspkernel') | ||||
| @@ -177,11 +175,11 @@ def xp_letter_h_LETTER2_cost(): | |||||
| nb_dis_k_gi2gm[2] += 1 | nb_dis_k_gi2gm[2] += 1 | ||||
| # save median graphs. | # save median graphs. | ||||
| fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||||
| fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||||
| fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | ||||
| + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | ||||
| copyfile(fname_sm, fn_pre_sm_new + '.gxl') | copyfile(fname_sm, fn_pre_sm_new + '.gxl') | ||||
| fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||||
| fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||||
| fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | ||||
| + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | ||||
| copyfile(fname_gm, fn_pre_gm_new + '.gxl') | copyfile(fname_gm, fn_pre_gm_new + '.gxl') | ||||
| @@ -243,8 +241,8 @@ def xp_letter_h_LETTER2_cost(): | |||||
| def xp_letter_h(): | def xp_letter_h(): | ||||
| ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||||
| ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | ||||
| for G in Gn: | for G in Gn: | ||||
| reform_attributes(G) | reform_attributes(G) | ||||
| @@ -396,11 +394,11 @@ def xp_letter_h(): | |||||
| nb_dis_k_gi2gm[2] += 1 | nb_dis_k_gi2gm[2] += 1 | ||||
| # save median graphs. | # save median graphs. | ||||
| fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||||
| fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||||
| fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | ||||
| + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | ||||
| copyfile(fname_sm, fn_pre_sm_new + '.gxl') | copyfile(fname_sm, fn_pre_sm_new + '.gxl') | ||||
| fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||||
| fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||||
| fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | ||||
| + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | ||||
| copyfile(fname_gm, fn_pre_gm_new + '.gxl') | copyfile(fname_gm, fn_pre_gm_new + '.gxl') | ||||
| @@ -13,16 +13,16 @@ from shutil import copyfile | |||||
| import networkx as nx | import networkx as nx | ||||
| import matplotlib.pyplot as plt | import matplotlib.pyplot as plt | ||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | ||||
| from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||||
| from preimage.utils import get_same_item_indices | |||||
| from preimage.find_best_k import getRelations | |||||
| from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||||
| from gklearn.preimage.utils import get_same_item_indices | |||||
| from gklearn.preimage.find_best_k import getRelations | |||||
| def xp_monoterpenoides(): | def xp_monoterpenoides(): | ||||
| ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | |||||
| 'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb | |||||
| import os | |||||
| ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds', | |||||
| 'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb | |||||
| Gn, y_all = loadDataset(ds['dataset']) | Gn, y_all = loadDataset(ds['dataset']) | ||||
| # ds = {'name': 'Letter-high', | # ds = {'name': 'Letter-high', | ||||
| # 'dataset': '../datasets/Letter-high/Letter-high_A.txt'} # node/edge symb | # 'dataset': '../datasets/Letter-high/Letter-high_A.txt'} # node/edge symb | ||||
| @@ -169,11 +169,11 @@ def xp_monoterpenoides(): | |||||
| nb_dis_k_gi2gm[2] += 1 | nb_dis_k_gi2gm[2] += 1 | ||||
| # save median graphs. | # save median graphs. | ||||
| fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||||
| fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||||
| fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | ||||
| + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat) | + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat) | ||||
| copyfile(fname_sm, fn_pre_sm_new + '.gxl') | copyfile(fname_sm, fn_pre_sm_new + '.gxl') | ||||
| fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||||
| fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||||
| fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | ||||
| + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat) | + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat) | ||||
| copyfile(fname_gm, fn_pre_gm_new + '.gxl') | copyfile(fname_gm, fn_pre_gm_new + '.gxl') | ||||
| @@ -15,10 +15,16 @@ def chooseDataset(ds_name): | |||||
| ds_file = 'datasets/Alkane/dataset.ds' | ds_file = 'datasets/Alkane/dataset.ds' | ||||
| ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt' | ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt' | ||||
| Gn, y = loadDataset(ds_file, filename_y=ds_y) | Gn, y = loadDataset(ds_file, filename_y=ds_y) | ||||
| for G in Gn: | |||||
| for node in G.nodes: | |||||
| del G.nodes[node]['attributes'] | |||||
| # node symbolic labels. | # node symbolic labels. | ||||
| elif ds_name == 'Acyclic': | elif ds_name == 'Acyclic': | ||||
| ds_file = 'datasets/acyclic/dataset_bps.ds' | ds_file = 'datasets/acyclic/dataset_bps.ds' | ||||
| Gn, y = loadDataset(ds_file) | Gn, y = loadDataset(ds_file) | ||||
| for G in Gn: | |||||
| for node in G.nodes: | |||||
| del G.nodes[node]['attributes'] | |||||
| # node non-symbolic labels. | # node non-symbolic labels. | ||||
| elif ds_name == 'Letter-med': | elif ds_name == 'Letter-med': | ||||
| ds_file = 'datasets/Letter-med/Letter-med_A.txt' | ds_file = 'datasets/Letter-med/Letter-med_A.txt' | ||||
| @@ -27,14 +33,39 @@ def chooseDataset(ds_name): | |||||
| elif ds_name == 'AIDS': | elif ds_name == 'AIDS': | ||||
| ds_file = 'datasets/AIDS/AIDS_A.txt' | ds_file = 'datasets/AIDS/AIDS_A.txt' | ||||
| Gn, y = loadDataset(ds_file) | Gn, y = loadDataset(ds_file) | ||||
| # edge non-symbolic labels (no node labels). | |||||
| elif ds_name == 'Fingerprint_edge': | |||||
| import networkx as nx | |||||
| ds_file = 'datasets/Fingerprint/Fingerprint_A.txt' | |||||
| Gn, y = loadDataset(ds_file) | |||||
| Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0] | |||||
| idx = [G[0] for G in Gn] | |||||
| Gn = [G[1] for G in Gn] | |||||
| y = [y[i] for i in idx] | |||||
| for G in Gn: | |||||
| G.graph['node_attrs'] = [] | |||||
| for node in G.nodes: | |||||
| del G.nodes[node]['attributes'] | |||||
| del G.nodes[node]['x'] | |||||
| del G.nodes[node]['y'] | |||||
| # edge non-symbolic labels (and node non-symbolic labels). | # edge non-symbolic labels (and node non-symbolic labels). | ||||
| elif ds_name == 'Fingerprint': | elif ds_name == 'Fingerprint': | ||||
| import networkx as nx | |||||
| ds_file = 'datasets/Fingerprint/Fingerprint_A.txt' | ds_file = 'datasets/Fingerprint/Fingerprint_A.txt' | ||||
| Gn, y = loadDataset(ds_file) | Gn, y = loadDataset(ds_file) | ||||
| Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0] | |||||
| idx = [G[0] for G in Gn] | |||||
| Gn = [G[1] for G in Gn] | |||||
| y = [y[i] for i in idx] | |||||
| # edge symbolic and non-symbolic labels (and node symbolic and non-symbolic labels). | |||||
| elif ds_name == 'Cuneiform': | |||||
| import networkx as nx | |||||
| ds_file = 'datasets/Cuneiform/Cuneiform_A.txt' | |||||
| Gn, y = loadDataset(ds_file) | |||||
| Gn = Gn[0:10] | |||||
| y = y[0:10] | |||||
| Gn = Gn[0:3] | |||||
| y = y[0:3] | |||||
| return Gn, y | return Gn, y | ||||
| @@ -152,7 +183,7 @@ def test_spkernel(ds_name, parallel): | |||||
| #@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) | #@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) | ||||
| @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS']) | |||||
| @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint', 'Fingerprint_edge', 'Cuneiform']) | |||||
| @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | ||||
| def test_structuralspkernel(ds_name, parallel): | def test_structuralspkernel(ds_name, parallel): | ||||
| """Test structural shortest path kernel. | """Test structural shortest path kernel. | ||||
| @@ -246,4 +277,5 @@ def test_weisfeilerlehmankernel(ds_name, parallel, base_kernel): | |||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||
| test_spkernel() | |||||
| # test_spkernel('Alkane', 'imap_unordered') | |||||
| test_structuralspkernel('Fingerprint_edge', 'imap_unordered') | |||||
| @@ -753,15 +753,12 @@ if __name__ == '__main__': | |||||
| # 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb | # 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb | ||||
| # Gn, y = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | # Gn, y = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | ||||
| # saveDataset(Gn, y, group='xml', filename='temp/temp') | # saveDataset(Gn, y, group='xml', filename='temp/temp') | ||||
| dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||||
| Gn, y_all = loadDataset(dataset) | |||||
| filename = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/SYNTHETICnew/SYNTHETICnew' | |||||
| saveDataset(Gn, y_all, gformat='gxl', group='xml', filename=filename) | |||||
| # test - new way to add labels and attributes. | # test - new way to add labels and attributes. | ||||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Fingerprint/Fingerprint_A.txt' | |||||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-med/Letter-med_A.txt' | |||||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/AIDS/AIDS_A.txt' | |||||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/ENZYMES_txt/ENZYMES_A_sparse.txt' | |||||
| # Gn, y_all = loadDataset(dataset) | |||||
| # dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||||
| # dataset = '../../datasets/Fingerprint/Fingerprint_A.txt' | |||||
| # dataset = '../../datasets/Letter-med/Letter-med_A.txt' | |||||
| # dataset = '../../datasets/AIDS/AIDS_A.txt' | |||||
| # dataset = '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt' | |||||
| # Gn, y_all = loadDataset(dataset) | |||||
| pass | |||||
| @@ -11,7 +11,6 @@ from sklearn.model_selection import KFold, train_test_split, ParameterGrid | |||||
| from multiprocessing import Pool, Array | from multiprocessing import Pool, Array | ||||
| from functools import partial | from functools import partial | ||||
| import sys | import sys | ||||
| sys.path.insert(0, "../") | |||||
| import os | import os | ||||
| import time | import time | ||||
| import datetime | import datetime | ||||
| @@ -74,8 +73,6 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
| Examples | Examples | ||||
| -------- | -------- | ||||
| >>> import numpy as np | >>> import numpy as np | ||||
| >>> import sys | |||||
| >>> sys.path.insert(0, "../") | |||||
| >>> from gklearn.utils.model_selection_precomputed import model_selection_for_precomputed_kernel | >>> from gklearn.utils.model_selection_precomputed import model_selection_for_precomputed_kernel | ||||
| >>> from gklearn.kernels.untilHPathKernel import untilhpathkernel | >>> from gklearn.kernels.untilHPathKernel import untilhpathkernel | ||||
| >>> | >>> | ||||
| @@ -46466,7 +46466,7 @@ | |||||
| "name": "python", | "name": "python", | ||||
| "nbconvert_exporter": "python", | "nbconvert_exporter": "python", | ||||
| "pygments_lexer": "ipython3", | "pygments_lexer": "ipython3", | ||||
| "version": "3.6.8" | |||||
| "version": "3.6.9" | |||||
| } | } | ||||
| }, | }, | ||||
| "nbformat": 4, | "nbformat": 4, | ||||
| @@ -7,10 +7,6 @@ Created on Tue Jan 7 15:25:36 2020 | |||||
| """ | """ | ||||
| # draw all the praphs | # draw all the praphs | ||||
| import sys | |||||
| import pathlib | |||||
| sys.path.insert(0, "../../") | |||||
| import matplotlib.pyplot as plt | import matplotlib.pyplot as plt | ||||
| import networkx as nx | import networkx as nx | ||||
| @@ -19,6 +15,58 @@ from gklearn.utils.graphfiles import loadDataset, loadGXL | |||||
| def main(): | def main(): | ||||
| # MUTAG dataset. | |||||
| dataset, y = loadDataset("../../datasets/MUTAG/MUTAG_A.txt") | |||||
| for idx in [65]:#[6]: | |||||
| G = dataset[idx] | |||||
| for node in G.nodes: | |||||
| if G.nodes[node]['atom'] == '0': | |||||
| G.nodes[node]['atom'] = 'C' | |||||
| elif G.nodes[node]['atom'] == '1': | |||||
| G.nodes[node]['atom'] = 'N' | |||||
| elif G.nodes[node]['atom'] == '2': | |||||
| G.nodes[node]['atom'] = 'O' | |||||
| elif G.nodes[node]['atom'] == '3': | |||||
| G.nodes[node]['atom'] = 'F' | |||||
| elif G.nodes[node]['atom'] == '4': | |||||
| G.nodes[node]['atom'] = 'I' | |||||
| elif G.nodes[node]['atom'] == '5': | |||||
| G.nodes[node]['atom'] = 'Cl' | |||||
| elif G.nodes[node]['atom'] == '6': | |||||
| G.nodes[node]['atom'] = 'Br' | |||||
| ecolors = [] | |||||
| for edge in G.edges: | |||||
| if G.edges[edge]['bond_type'] == '0': | |||||
| ecolors.append('orange') | |||||
| elif G.edges[edge]['bond_type'] == '1': | |||||
| ecolors.append('r') | |||||
| elif G.edges[edge]['bond_type'] == '2': | |||||
| ecolors.append('purple') | |||||
| elif G.edges[edge]['bond_type'] == '3': | |||||
| ecolors.append('orange') | |||||
| print(idx) | |||||
| print(nx.get_node_attributes(G, 'atom')) | |||||
| edge_labels = nx.get_edge_attributes(G, 'bond_type') | |||||
| print(edge_labels) | |||||
| pos=nx.spring_layout(G) | |||||
| nx.draw(G, | |||||
| pos, | |||||
| node_size=500, | |||||
| labels=nx.get_node_attributes(G, 'atom'), | |||||
| node_color='blue', | |||||
| font_color='w', | |||||
| edge_color=ecolors, | |||||
| width=3, | |||||
| with_labels=True) | |||||
| # edge_labels = nx.draw_networkx_edge_labels(G, pos, | |||||
| # edge_labels=edge_labels, | |||||
| # font_color='pink') | |||||
| plt.savefig('mol1_graph.svg', format='svg', dpi=300) | |||||
| plt.show() | |||||
| plt.clf() | |||||
| # # monoterpenoides dataset. | # # monoterpenoides dataset. | ||||
| # dataset, y = loadDataset("../../datasets/monoterpenoides/dataset_10+.ds") | # dataset, y = loadDataset("../../datasets/monoterpenoides/dataset_10+.ds") | ||||
| # for idx in [12,22,29,74]: | # for idx in [12,22,29,74]: | ||||
| @@ -67,35 +115,35 @@ def main(): | |||||
| # draw_Fingerprint_graph(Gn[idx], file_prefix='') | # draw_Fingerprint_graph(Gn[idx], file_prefix='') | ||||
| # SYNTHETIC dataset. | |||||
| dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||||
| Gn, y_all = loadDataset(dataset) | |||||
| idx_no_node = [] | |||||
| idx_no_edge = [] | |||||
| idx_no_both = [] | |||||
| for idx, G in enumerate(Gn): | |||||
| if nx.number_of_nodes(G) == 0: | |||||
| idx_no_node.append(idx) | |||||
| if nx.number_of_edges(G) == 0: | |||||
| idx_no_both.append(idx) | |||||
| if nx.number_of_edges(G) == 0: | |||||
| idx_no_edge.append(idx) | |||||
| # file_prefix = '../results/graph_images/SYNTHETIC/' + G.graph['name'] | |||||
| # draw_SYNTHETIC_graph(Gn[idx], file_prefix=file_prefix, save=True) | |||||
| # draw_SYNTHETIC_graph(Gn[idx]) | |||||
| print('nb_no_node: ', len(idx_no_node)) | |||||
| print('nb_no_edge: ', len(idx_no_edge)) | |||||
| print('nb_no_both: ', len(idx_no_both)) | |||||
| print('idx_no_node: ', idx_no_node) | |||||
| print('idx_no_edge: ', idx_no_edge) | |||||
| print('idx_no_both: ', idx_no_both) | |||||
| for idx in [0, 10, 100]: | |||||
| print(idx) | |||||
| print(Gn[idx].nodes(data=True)) | |||||
| print(Gn[idx].edges(data=True)) | |||||
| draw_SYNTHETIC_graph(Gn[idx], save=None) | |||||
| # # SYNTHETIC dataset. | |||||
| # dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||||
| # Gn, y_all = loadDataset(dataset) | |||||
| # | |||||
| # idx_no_node = [] | |||||
| # idx_no_edge = [] | |||||
| # idx_no_both = [] | |||||
| # for idx, G in enumerate(Gn): | |||||
| # if nx.number_of_nodes(G) == 0: | |||||
| # idx_no_node.append(idx) | |||||
| # if nx.number_of_edges(G) == 0: | |||||
| # idx_no_both.append(idx) | |||||
| # if nx.number_of_edges(G) == 0: | |||||
| # idx_no_edge.append(idx) | |||||
| ## file_prefix = '../results/graph_images/SYNTHETIC/' + G.graph['name'] | |||||
| ## draw_SYNTHETIC_graph(Gn[idx], file_prefix=file_prefix, save=True) | |||||
| ## draw_SYNTHETIC_graph(Gn[idx]) | |||||
| # print('nb_no_node: ', len(idx_no_node)) | |||||
| # print('nb_no_edge: ', len(idx_no_edge)) | |||||
| # print('nb_no_both: ', len(idx_no_both)) | |||||
| # print('idx_no_node: ', idx_no_node) | |||||
| # print('idx_no_edge: ', idx_no_edge) | |||||
| # print('idx_no_both: ', idx_no_both) | |||||
| # | |||||
| # for idx in [0, 10, 100]: | |||||
| # print(idx) | |||||
| # print(Gn[idx].nodes(data=True)) | |||||
| # print(Gn[idx].edges(data=True)) | |||||
| # draw_SYNTHETIC_graph(Gn[idx], save=None) | |||||
| def plot_a_graph(graph_filename): | def plot_a_graph(graph_filename): | ||||