[exp] fcsp: add program for slurm and the exps for space complexity.

4 years ago · 62145db207
--- a/gklearn/experiments/thesis/graph_kernels/fcsp/compare_fcsp.py
+++ b/gklearn/experiments/thesis/graph_kernels/fcsp/compare_fcsp.py
@@ -10,6 +10,7 @@ This script compares the results with and without FCSP.
 from gklearn.dataset import Dataset
 from gklearn.utils import get_graph_kernel_by_name
 from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
 from gklearn.experiments import DATASET_ROOT
 import functools
 import os
 import pickle
@@ -17,50 +18,77 @@ import sys
 import logging


 def run_all(fcsp):
 	save_dir = 'outputs/' + ('fscp' if fcsp == True else 'naive') + '/'
 	os.makedirs(save_dir, exist_ok=True)
 # def run_all(fcsp):

 # 	from sklearn.model_selection import ParameterGrid

 # 	Dataset_List = ['Alkane_unlabeled', 'Alkane', 'Acyclic', 'MAO_lite', 'MAO',
 # 				    'PAH_unlabeled', 'PAH', 'MUTAG', 'Monoterpens',
 # 					'Letter-high', 'Letter-med', 'Letter-low',
 # 					'ENZYMES', 'AIDS', 'NCI1', 'NCI109', 'DD',
 # 					'BZR', 'COX2', 'DHFR', 'PTC_FM', 'PTC_FR', 'PTC_MM', 'PTC_MR',
 # 					'Cuneiform', 'KKI', 'OHSU', 'Peking_1', 'SYNTHETICnew',
 # 					'Synthie', 'SYNTHETIC', 'Fingerprint', 'IMDB-BINARY',
 # 					'IMDB-MULTI', 'COIL-DEL', 'PROTEINS', 'PROTEINS_full',
 # 					'Mutagenicity', 'REDDIT-BINARY']

 # 	Kernel_List = ['ShortestPath', 'StructuralSP']

 # 	task_grid = ParameterGrid({'kernel': Kernel_List[:], 'dataset': Dataset_List[:]})

 # 	for task in list(task_grid):

 	from sklearn.model_selection import ParameterGrid
 # 		save_file_suffix = '.' + task['kernel'] + '.' + task['dataset']
 # 		file_name = os.path.join(save_dir, 'run_time' + save_file_suffix + '.pkl')
 # 		if not os.path.isfile(file_name):
 # 			print()
 # 			print((task['kernel'], task['dataset']))

 	Dataset_List = ['Alkane_unlabeled', 'Alkane', 'Acyclic', 'MAO_lite', 'MAO',
 				    'PAH_unlabeled', 'PAH', 'MUTAG', 'Letter-high', 'Letter-med', 'Letter-low',
 					'ENZYMES', 'AIDS', 'NCI1', 'NCI109', 'DD',
 					'BZR', 'COX2', 'DHFR', 'PTC_FM', 'PTC_FR', 'PTC_MM', 'PTC_MR',
 					'Cuneiform', 'KKI', 'OHSU', 'Peking_1', 'SYNTHETICnew',
 					'Synthie', 'SYNTHETIC', 'Fingerprint', 'IMDB-BINARY',
 					'IMDB-MULTI', 'COIL-DEL', 'PROTEINS', 'PROTEINS_full',
 					'Mutagenicity', 'REDDIT-BINARY']
 # 			try:
 # 				gram_matrix, run_time = compute(task['kernel'], task['dataset'], fcsp)

 	Kernel_List = ['ShortestPath', 'StructuralSP']
 # 			except Exception as exp:
 # 				print('An exception occured when running this experiment:')
 # 				LOG_FILENAME = save_dir + 'error.txt'
 # 				logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
 # 				logging.exception('\n--------------' + save_file_suffix + '------------------')
 # 				print(repr(exp))
 # 			else:
 # 				save_file_suffix = '.' + task['kernel'] + task['dataset']

 	work_grid = ParameterGrid({'kernel': Kernel_List[:], 'dataset': Dataset_List[:]})
 # 				with open(file_name, 'wb') as f:
 # 					pickle.dump(run_time, f)

 	for work in list(work_grid):

 		save_file_suffix = '.' + work['kernel'] + '.' + work['dataset']
 		file_name = os.path.join(save_dir, 'run_time' + save_file_suffix + '.pkl')
 		if not os.path.isfile(file_name):
 			print()
 			print((work['kernel'], work['dataset']))

 			try:
 				gram_matrix, run_time = run_work(work['kernel'], work['dataset'], fcsp)
 			except Exception as exp:
 				print('An exception occured when running this experiment:')
 				LOG_FILENAME = save_dir + 'error.txt'
 				logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
 				logging.exception(save_file_suffix)
 				print(repr(exp))
 def run_task(kernel_name, ds_name, fcsp):
 	save_file_suffix = '.' + kernel_name + '.' + ds_name + '.' + str(fcsp)
 	file_name = os.path.join(save_dir, 'run_time' + save_file_suffix + '.pkl')

 			save_file_suffix = '.' + work['kernel'] + work['dataset']
 	if not os.path.isfile(file_name):
 		print()
 		print((kernel_name, ds_name, str(fcsp)))

 		try:
 			gram_matrix, run_time = compute(kernel_name, ds_name, fcsp)

 		except Exception as exp:
 			print('An exception occured when running this experiment:')
 			LOG_FILENAME = os.path.join(save_dir, 'error' + save_file_suffix + '.txt')
 			logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
 			logging.exception('\n--------------' + save_file_suffix + '------------------')
 			print(repr(exp))

 		else:
 			with open(file_name, 'wb') as f:
 				pickle.dump(run_time, f)


 def run_work(kernel_name, ds_name, fcsp):
 	dataset = Dataset(ds_name, verbose=True)
 def compute(kernel_name, ds_name, fcsp):
 	dataset = Dataset(ds_name, root=DATASET_ROOT, verbose=True)
 	if kernel_name == 'ShortestPath':
 		dataset.trim_dataset(edge_required=True)


 	mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
 	node_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
@@ -87,8 +115,15 @@ def run_work(kernel_name, ds_name, fcsp):

 if __name__ == '__main__':
 	if len(sys.argv) > 1:
 		fcsp = True if sys.argv[1] == 'True' else False
 		kernel_name = sys.argv[1]
 		ds_name = sys.argv[2]
 		fcsp = True if sys.argv[3] == 'True' else False
 	else:
 		kernel_name = 'ShortestPath'
 		ds_name = 'Acyclic'
 		fcsp = True
 	run_all(fcsp)

 	save_dir = 'outputs/'
 	os.makedirs(save_dir, exist_ok=True)

 	run_task(kernel_name, ds_name, fcsp)
--- a/gklearn/experiments/thesis/graph_kernels/fcsp/compare_fcsp_space.py
+++ b/gklearn/experiments/thesis/graph_kernels/fcsp/compare_fcsp_space.py
@@ -0,0 +1,98 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Wed Dec  2 17:41:54 2020

@author: ljia

 This script compares the results with and without FCSP.
 """
 from gklearn.dataset import Dataset
 from shortest_path import SPSpace
 from structural_sp import SSPSpace
 from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
 from gklearn.experiments import DATASET_ROOT
 import functools
 import os
 import pickle
 import sys
 import logging


 def run_task(kernel_name, ds_name, fcsp):
 	save_file_suffix = '.' + kernel_name + '.' + ds_name + '.' + str(fcsp)
 	file_name = os.path.join(save_dir, 'space' + save_file_suffix + '.pkl')

 	# Return if the task is already completed.
 	if os.path.isfile(file_name):
 		with open(file_name, 'rb') as f:
 			data = pickle.load(f)
 			if data['completed']:
 				return

 	print()
 	print((kernel_name, ds_name, str(fcsp)))

 	try:
 		gram_matrix, run_time = compute(kernel_name, ds_name, fcsp, file_name)

 	except Exception as exp:
 		print('An exception occured when running this experiment:')
 		LOG_FILENAME = os.path.join(save_dir, 'error.space' + save_file_suffix + '.txt')
 		logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
 		logging.exception('\n--------------' + save_file_suffix + '------------------')
 		print(repr(exp))

 # 	else:
 # 		with open(file_name, 'wb') as f:
 # 			pickle.dump(run_time, f)


 def compute(kernel_name, ds_name, fcsp, file_name):
 	dataset = Dataset(ds_name, root=DATASET_ROOT, verbose=True)
 	if kernel_name == 'ShortestPath':
 		dataset.trim_dataset(edge_required=True)
 # 		dataset.cut_graphs(range(0, 10))
 		kernel_class = SPSpace
 	else:
 # 		dataset.cut_graphs(range(0, 10))
 		kernel_class = SSPSpace

 	mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
 	node_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
 	edge_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}

 	graph_kernel = kernel_class(name=kernel_name,
 							  node_labels=dataset.node_labels,
 							  edge_labels=dataset.edge_labels,
 							  node_attrs=dataset.node_attrs,
 							  edge_attrs=dataset.edge_attrs,
 							  ds_infos=dataset.get_dataset_infos(keys=['directed']),
 							  fcsp=fcsp,
 							  compute_method='naive',
 							  node_kernels=node_kernels,
 							  edge_kernels=edge_kernels,
 							  file_name=file_name
 							  )
 	gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
 											  parallel=None,
 											  normalize=False,
 											  verbose=2
 											  )
 	return gram_matrix, run_time


 if __name__ == '__main__':
 	if len(sys.argv) > 1:
 		kernel_name = sys.argv[1]
 		ds_name = sys.argv[2]
 		fcsp = True if sys.argv[3] == 'True' else False
 	else:
 		kernel_name = 'StructuralSP'
 		ds_name = 'Fingerprint'
 		fcsp = True

 	save_dir = 'outputs/'
 	os.makedirs(save_dir, exist_ok=True)

 	run_task(kernel_name, ds_name, fcsp)
--- a/gklearn/experiments/thesis/graph_kernels/fcsp/run_jobs_compare_fcsp.py
+++ b/gklearn/experiments/thesis/graph_kernels/fcsp/run_jobs_compare_fcsp.py
@@ -10,27 +10,60 @@ import os
 import re


 def get_job_script(param):
 OUT_TIME_LIST = [('ShortestPath', 'ENZYMES', 'False'),
 				 ('StructuralSP', 'ENZYMES', 'True'),
 				 ('StructuralSP', 'ENZYMES', 'False'),
 				 ('StructuralSP', 'AIDS', 'False'),
 				 ('ShortestPath', 'NCI1', 'False'),
 				 ('StructuralSP', 'NCI1', 'True'),
 				 ('StructuralSP', 'NCI1', 'False'),
 				 ('ShortestPath', 'NCI109', 'False'),
 				 ('StructuralSP', 'NCI109', 'True'),
 				 ('StructuralSP', 'NCI109', 'False'),
 				 ('ShortestPath', 'DD', 'True'),
 				 ('ShortestPath', 'DD', 'False'),
 				 ('StructuralSP', 'BZR', 'False'),
 				 ('ShortestPath', 'COX2', 'False'),
 				 ('StructuralSP', 'COX2', 'False'),
 				 ('ShortestPath', 'DHFR', 'False'),
 				 ]

 OUT_MEM_LIST = [('StructuralSP', 'PROTEINS', 'True'),
 				('StructuralSP', 'PROTEINS', 'False'),
 				('StructuralSP', 'PROTEINS_full', 'True'),
 				('StructuralSP', 'PROTEINS_full', 'False'),
 				('ShortestPath', 'REDDIT-BINARY', 'True'),
 				]

 MISS_LABEL_LIST = [('StructuralSP', 'GREC', 'True'),
 				   ('StructuralSP', 'GREC', 'False'),
 				   ('StructuralSP', 'Web', 'True'),
 				   ('StructuralSP', 'Web', 'False'),
 				   ]


 def get_job_script(kernel, dataset, fcsp):
 	script = r"""
 #!/bin/bash

 #SBATCH --exclusive
 #SBATCH --job-name="fcsp.""" + param + r""""
 #SBATCH --partition=long
 #SBATCH --job-name="fcsp.""" + kernel + r"." + dataset + r"." + fcsp + r""""
 #SBATCH --partition=tlong
 #SBATCH --mail-type=ALL
 #SBATCH --mail-user=jajupmochi@gmail.com
 #SBATCH --output="outputs/output_fcsp.""" + param + r""".txt"
 #SBATCH --error="errors/error_fcsp.""" + param + r""".txt"
 #SBATCH --output="outputs/output_fcsp.""" + kernel + r"." + dataset + r"." + fcsp + r""".txt"
 #SBATCH --error="errors/error_fcsp.""" + kernel + r"." + dataset + r"." + fcsp + r""".txt"
 #
 #SBATCH --ntasks=1
 #SBATCH --nodes=1
 #SBATCH --cpus-per-task=1
 #SBATCH --time=100:00:00
 #SBATCH --mem-per-cpu=4000
 #SBATCH --time=300:00:00
 ##SBATCH --mem-per-cpu=4000
 #SBATCH --mem=40000

 srun hostname
 srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/thesis/graph_kernels/fcsp
 srun python3 compare_fcsp.py """ + param
 srun python3 compare_fcsp.py """ + kernel + r" " + dataset + r" " + fcsp
 	script = script.strip()
 	script = re.sub('\n\t+', '\n', script)
 	script = re.sub('\n +', '\n', script)
@@ -38,15 +71,75 @@ srun python3 compare_fcsp.py """ + param
 	return script


 def check_task_status(save_dir, *params):
 	str_task_id = '.' + '.'.join(params)

 	# Check if the task is in out of memeory or out of space lists or missing labels.
 	if params in OUT_MEM_LIST or params in OUT_TIME_LIST or params in MISS_LABEL_LIST:
 		return True

 	# Check if the task is running or in queue of slurm.
 	command = 'squeue --user ljia02 --name "fcsp' + str_task_id + '" --format "%.2t" --noheader'
 	stream = os.popen(command)
 	output = stream.readlines()
 	if len(output) > 0:
 		return True

 	# Check if the results are already computed.
 	file_name = os.path.join(save_dir, 'run_time' + str_task_id + '.pkl')
 	if os.path.isfile(file_name):
 		return True

 	return False


 if __name__ == '__main__':
 	save_dir = 'outputs/'
 	os.makedirs(save_dir, exist_ok=True)
 	os.makedirs('outputs/', exist_ok=True)
 	os.makedirs('errors/', exist_ok=True)

 	param_list = ['True', 'False']
 	for param in param_list[:]:
 		job_script = get_job_script(param)
 		command = 'sbatch <<EOF\n' + job_script + '\nEOF'
 # 			print(command)
 		os.system(command)
 	from sklearn.model_selection import ParameterGrid

 	Dataset_List = ['Alkane_unlabeled', 'Alkane', 'Acyclic', 'MAO_lite', 'MAO',
 				    'PAH_unlabeled', 'PAH', 'MUTAG', 'Monoterpens',
 					'Letter-high', 'Letter-med', 'Letter-low',
 					'ENZYMES', 'AIDS', 'NCI1', 'NCI109', 'DD',
 					# new: not so large.
 					'PTC_FM', 'PTC_FR', 'PTC_MM', 'PTC_MR', 'Chiral', 'Vitamin_D',
 					'ACE', 'Steroid', 'KKI', 'Fingerprint', 'IMDB-BINARY',
 					'IMDB-MULTI', 'Peking_1', 'Cuneiform', 'OHSU', 'BZR', 'COX2',
 					'DHFR', 'SYNTHETICnew', 'Synthie', 'SYNTHETIC',
 					# new: large.
 					'TWITTER-Real-Graph-Partial', 'GREC', 'Web', 'MCF-7',
 					'MCF-7H', 'MOLT-4', 'MOLT-4H', 'NCI-H23', 'NCI-H23H',
 					'OVCAR-8', 'OVCAR-8H', 'P388', 'P388H', 'PC-3', 'PC-3H',
 					'SF-295', 'SF-295H', 'SN12C', 'SN12CH', 'SW-620', 'SW-620H',
 					'TRIANGLES', 'UACC257', 'UACC257H', 'Yeast', 'YeastH',
 					'COLORS-3', 'DBLP_v1', 'REDDIT-MULTI-12K',
 					'REDDIT-MULTI-12K', 'REDDIT-MULTI-12K',
 					'REDDIT-MULTI-12K', 'MSRC_9', 'MSRC_21', 'MSRC_21C',
 					'COLLAB', 'COIL-DEL',
 					'COIL-RAG', 'PROTEINS', 'PROTEINS_full', 'Mutagenicity',
 					'REDDIT-BINARY', 'FRANKENSTEIN', 'REDDIT-MULTI-5K',
 					'REDDIT-MULTI-12K']

 	Kernel_List = ['ShortestPath', 'StructuralSP']

 	fcsp_list = ['True', 'False']

 	task_grid = ParameterGrid({'kernel': Kernel_List[:],
 							'dataset': Dataset_List[:],
 							'fcsp': fcsp_list[:]})

 	from tqdm import tqdm

 	for task in tqdm(list(task_grid), desc='submitting tasks/jobs'):

 		if False == check_task_status(save_dir, task['kernel'], task['dataset'], task['fcsp']):
 			job_script = get_job_script(task['kernel'], task['dataset'], task['fcsp'])
 			command = 'sbatch <<EOF\n' + job_script + '\nEOF'
 	# 			print(command)
 			os.system(command)
 	# 		os.popen(command)
 	# 		output = stream.readlines()
--- a/gklearn/experiments/thesis/graph_kernels/fcsp/run_jobs_compare_fcsp_space.py
+++ b/gklearn/experiments/thesis/graph_kernels/fcsp/run_jobs_compare_fcsp_space.py
@@ -0,0 +1,225 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Mon Dec 14 11:49:43 2020

@author: ljia
 """

 import os
 import re
 import pickle


 OUT_TIME_LIST = []


 OUT_MEM_LIST = [('ShortestPath', 'REDDIT-BINARY', 'True'),
 				('ShortestPath', 'REDDIT-BINARY', 'False'),
 				('ShortestPath', 'DD', 'True'),
 				('ShortestPath', 'DD', 'False'),
 				('ShortestPath', 'MCF-7', 'True'),
 				('ShortestPath', 'MCF-7', 'False'),
 				('StructuralSP', 'MCF-7', 'True'),
 				('StructuralSP', 'MCF-7', 'False'),
 				('ShortestPath', 'MCF-7H', 'True'),
 				('ShortestPath', 'MCF-7H', 'False'),
 				('StructuralSP', 'MCF-7H', 'True'),
 				('StructuralSP', 'MCF-7H', 'False'),
 				('ShortestPath', 'MOLT-4', 'True'),
 				('ShortestPath', 'MOLT-4', 'False'),
 				('StructuralSP', 'MOLT-4', 'True'),
 				('StructuralSP', 'MOLT-4', 'False'),
 				('ShortestPath', 'MOLT-4H', 'True'),
 				('ShortestPath', 'MOLT-4H', 'False'),
 				('StructuralSP', 'MOLT-4H', 'True'),
 				('StructuralSP', 'MOLT-4H', 'False'),
 				('ShortestPath', 'P388', 'True'),
 				('ShortestPath', 'P388H', 'True'),
 				('ShortestPath', 'NCI-H23', 'True'),
 				('ShortestPath', 'NCI-H23', 'False'),
 				('StructuralSP', 'NCI-H23', 'True'),
 				('StructuralSP', 'NCI-H23', 'False'),
 				('ShortestPath', 'NCI-H23H', 'True'),
 				('ShortestPath', 'NCI-H23H', 'False'),
 				('StructuralSP', 'NCI-H23H', 'True'),
 				('StructuralSP', 'NCI-H23H', 'False'),
 				('ShortestPath', 'OVCAR-8', 'True'),
 				('ShortestPath', 'OVCAR-8', 'False'),
 				('StructuralSP', 'OVCAR-8', 'True'),
 				('StructuralSP', 'OVCAR-8', 'False'),
 				('ShortestPath', 'OVCAR-8H', 'False'),
 				('StructuralSP', 'OVCAR-8H', 'False'),
 				('ShortestPath', 'SN12C', 'True'),
 				('ShortestPath', 'SN12C', 'False'),
 				('StructuralSP', 'SN12C', 'True'),
 				('StructuralSP', 'SN12C', 'False'),
 				('ShortestPath', 'SN12CH', 'True'),
 				('ShortestPath', 'SN12CH', 'False'),
 				('ShortestPath', 'SF-295', 'True'),
 				('ShortestPath', 'SF-295', 'False'),
 				('StructuralSP', 'SF-295', 'True'),
 				('StructuralSP', 'SF-295', 'False'),
 				('ShortestPath', 'SF-295H', 'False'),
 				('StructuralSP', 'SF-295H', 'False'),
 				('ShortestPath', 'SW-620', 'True'),
 				('ShortestPath', 'SW-620', 'False'),
 				('StructuralSP', 'SW-620', 'True'),
 				('StructuralSP', 'SW-620', 'False'),
 				('ShortestPath', 'SW-620H', 'False'),
 				('StructuralSP', 'SW-620H', 'False'),
 				('ShortestPath', 'TRIANGLES', 'False'),
 				('StructuralSP', 'TRIANGLES', 'False'),
 				('ShortestPath', 'Yeast', 'True'),
 				('ShortestPath', 'Yeast', 'False'),
 				('StructuralSP', 'Yeast', 'True'),
 				('StructuralSP', 'Yeast', 'False'),
 				('ShortestPath', 'YeastH', 'True'),
 				('ShortestPath', 'FRANKENSTEIN', 'True'),
 				('ShortestPath', 'FRANKENSTEIN', 'False'),
 				('StructuralSP', 'FRANKENSTEIN', 'True'),
 				('StructuralSP', 'FRANKENSTEIN', 'False'),
 				('StructuralSP', 'SN12CH', 'True'),
 				('StructuralSP', 'SN12CH', 'False'),
 				('ShortestPath', 'UACC257', 'True'),
 				('ShortestPath', 'UACC257', 'False'),
 				('StructuralSP', 'UACC257', 'True'),
 				('StructuralSP', 'UACC257', 'False'),
 				('ShortestPath', 'UACC257H', 'True'),
 				('ShortestPath', 'UACC257H', 'False'),
 				('StructuralSP', 'UACC257H', 'True'),
 				('StructuralSP', 'UACC257H', 'False'),
 				('ShortestPath', 'PC-3', 'True'),
 				('ShortestPath', 'PC-3', 'False'),
 				('StructuralSP', 'PC-3', 'True'),
 				('StructuralSP', 'PC-3', 'False'),
 				('ShortestPath', 'PC-3H', 'True'),
 				('ShortestPath', 'PC-3H', 'False'),
 				('StructuralSP', 'PC-3H', 'True'),
 				('StructuralSP', 'PC-3H', 'False'),
 				('ShortestPath', 'DBLP_v1', 'False'),
 				('StructuralSP', 'DBLP_v1', 'True'),
 				('ShortestPath', 'REDDIT-BINARY', 'False'),
 				('ShortestPath', 'REDDIT-MULTI-12K', 'False'),
 				('StructuralSP', 'REDDIT-MULTI-12K', 'False'),
 				('ShortestPath', 'TWITTER-Real-Graph-Partial', 'True'),
 				('ShortestPath', 'TWITTER-Real-Graph-Partial', 'False'),
 				('StructuralSP', 'TWITTER-Real-Graph-Partial', 'True'),
 				('StructuralSP', 'TWITTER-Real-Graph-Partial', 'False'),
 				]

 MISS_LABEL_LIST = [('StructuralSP', 'GREC', 'True'),
 				   ('StructuralSP', 'GREC', 'False'),
 				   ('StructuralSP', 'Web', 'True'),
 				   ('StructuralSP', 'Web', 'False'),
 				   ]


 def get_job_script(kernel, dataset, fcsp):
 # 	if (kernel, dataset, fcsp) in OUT_MEM_LIST:
 # 		mem = '2560000'
 # 	else:
 	mem = '4000'
 	script = r"""
 #!/bin/bash

 #SBATCH --exclusive
 #SBATCH --job-name="fcsp.space.""" + kernel + r"." + dataset + r"." + fcsp + r""""
 #SBATCH --partition=""" + (r"court" if kernel == 'ShortestPath' else r"court") + r"""
 #SBATCH --mail-type=ALL
 #SBATCH --mail-user=jajupmochi@gmail.com
 #SBATCH --output="outputs/output_fcsp.space.""" + kernel + r"." + dataset + r"." + fcsp + r""".txt"
 #SBATCH --error="errors/error_fcsp.space.""" + kernel + r"." + dataset + r"." + fcsp + r""".txt"
 #
 #SBATCH --ntasks=1
 #SBATCH --nodes=1
 #SBATCH --cpus-per-task=1
 #SBATCH --time=""" + (r"48" if kernel == 'ShortestPath' else r"48") + r""":00:00
 ##SBATCH --mem-per-cpu=""" + mem + r"""
 #SBATCH --mem=4000

 srun hostname
 srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/thesis/graph_kernels/fcsp
 srun python3 compare_fcsp_space.py """ + kernel + r" " + dataset + r" " + fcsp
 	script = script.strip()
 	script = re.sub('\n\t+', '\n', script)
 	script = re.sub('\n +', '\n', script)

 	return script


 def check_task_status(save_dir, *params):
 	str_task_id = '.' + '.'.join(params)

 	# Check if the task is in out of memeory or out of space lists or missing labels.
 	if params in OUT_MEM_LIST or params in OUT_TIME_LIST or params in MISS_LABEL_LIST:
 		return True

 	# Check if the task is running or in queue of slurm.
 	command = 'squeue --user ljia02 --name "fcsp.space' + str_task_id + '" --format "%.2t" --noheader'
 	stream = os.popen(command)
 	output = stream.readlines()
 	if len(output) > 0:
 		return True

 	# Check if the task is already computed.
 	file_name = os.path.join(save_dir, 'space' + str_task_id + '.pkl')
 	if os.path.isfile(file_name):
 		with open(file_name, 'rb') as f:
 			data = pickle.load(f)
 			if data['completed']:
 				return True

 	return False


 if __name__ == '__main__':
 	save_dir = 'outputs/'
 	os.makedirs(save_dir, exist_ok=True)
 	os.makedirs('outputs/', exist_ok=True)
 	os.makedirs('errors/', exist_ok=True)

 	from sklearn.model_selection import ParameterGrid

 	Dataset_List = ['Alkane_unlabeled', 'Alkane', 'Acyclic', 'MAO_lite', 'MAO',
 				    'PAH_unlabeled', 'PAH', 'MUTAG', 'Monoterpens',
 					'Letter-high', 'Letter-med', 'Letter-low',
 					'ENZYMES', 'AIDS', 'NCI1', 'NCI109', 'DD',
 					# new: not so large.
 					'PTC_FM', 'PTC_FR', 'PTC_MM', 'PTC_MR', 'Chiral', 'Vitamin_D',
 					'ACE', 'Steroid', 'KKI', 'Fingerprint', 'IMDB-BINARY',
 					'IMDB-MULTI', 'Peking_1', 'Cuneiform', 'OHSU', 'BZR', 'COX2',
 					'DHFR', 'SYNTHETICnew', 'Synthie', 'SYNTHETIC',
 					# new: large.
 					'TWITTER-Real-Graph-Partial', 'GREC', 'Web', 'MCF-7',
 					'MCF-7H', 'MOLT-4', 'MOLT-4H', 'NCI-H23', 'NCI-H23H',
 					'OVCAR-8', 'OVCAR-8H', 'P388', 'P388H', 'PC-3', 'PC-3H',
 					'SF-295', 'SF-295H', 'SN12C', 'SN12CH', 'SW-620', 'SW-620H',
 					'TRIANGLES', 'UACC257', 'UACC257H', 'Yeast', 'YeastH',
 					'COLORS-3', 'DBLP_v1', 'REDDIT-MULTI-12K',
 					'REDDIT-MULTI-12K', 'REDDIT-MULTI-12K',
 					'REDDIT-MULTI-12K', 'MSRC_9', 'MSRC_21', 'MSRC_21C',
 					'COLLAB', 'COIL-DEL',
 					'COIL-RAG', 'PROTEINS', 'PROTEINS_full', 'Mutagenicity',
 					'REDDIT-BINARY', 'FRANKENSTEIN', 'REDDIT-MULTI-5K',
 					'REDDIT-MULTI-12K']

 	Kernel_List = ['ShortestPath', 'StructuralSP']

 	fcsp_list = ['True', 'False']

 	task_grid = ParameterGrid({'kernel': Kernel_List[:],
 							'dataset': Dataset_List[:],
 							'fcsp': fcsp_list[:]})

 	from tqdm import tqdm

 	for task in tqdm(list(task_grid), desc='submitting tasks/jobs'):

 		if False == check_task_status(save_dir, task['kernel'], task['dataset'], task['fcsp']):
 			job_script = get_job_script(task['kernel'], task['dataset'], task['fcsp'])
 			command = 'sbatch <<EOF\n' + job_script + '\nEOF'
 	# 			print(command)
 			os.system(command)
 	# 		os.popen(command)
 	# 		output = stream.readlines()
--- a/gklearn/experiments/thesis/graph_kernels/fcsp/shortest_path.py
+++ b/gklearn/experiments/thesis/graph_kernels/fcsp/shortest_path.py
@@ -0,0 +1,253 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Tue Apr  7 15:24:58 2020

@author: ljia

@references:

    [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData
    Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
 """

 import sys
 from itertools import product
 # from functools import partial
 from gklearn.utils import get_iters
 import numpy as np
 from gklearn.utils.utils import getSPGraph
 from gklearn.kernels import ShortestPath
 import os
 import pickle
 from pympler import asizeof
 import time
 import networkx as nx


 def load_results(file_name, fcsp):
 	if os.path.isfile(file_name):
 		with open(file_name, 'rb') as f:
 			return pickle.load(f)
 	else:
 		results = {'nb_comparison': [], 'i': -1, 'j': -1, 'completed': False}
 		if fcsp:
 			results['vk_dict_mem'] = []
 		return results


 def save_results(file_name, results):
 	with open(file_name, 'wb') as f:
 		pickle.dump(results, f)


 def estimate_vk_memory(obj, nb_nodes1, nb_nodes2):
 # asizeof.asized(obj, detail=1).format()
 # 	return asizeof.asizeof(obj)
 	key, val = next(iter(obj.items()))
 # 	key = dict.iterkeys().next()
 # 	key_mem = asizeof.asizeof(key)
 	dict_flat = sys.getsizeof(obj)
 	key_mem = 64

 	if isinstance(val, float):
 		val_mem = 24
 		mem = (key_mem + val_mem) * len(obj) + dict_flat + 28 * (nb_nodes1 + nb_nodes2)
 	else: # value is True or False
 		mem = (key_mem) * len(obj) + dict_flat + 52 + 28 * (nb_nodes1 + nb_nodes2)

 # 	print(mem, asizeof.asizeof(obj), '\n', asizeof.asized(obj, detail=3).format(), '\n')
 	return mem


 def compute_stats(file_name, results):
 	del results['i']
 	del results['j']
 	results['nb_comparison'] = np.mean(results['nb_comparison'])
 	results['completed'] = True
 	if 'vk_dict_mem' in results and len(results['vk_dict_mem']) > 0:
 		results['vk_dict_mem'] = np.mean(results['vk_dict_mem'])
 	save_results(file_name, results)


 class SPSpace(ShortestPath):

 	def __init__(self, **kwargs):
 		super().__init__(**kwargs)
 		self._file_name = kwargs.get('file_name')

 # 	@profile
 	def _compute_gm_series(self):
 		self._all_graphs_have_edges(self._graphs)
 		# get shortest path graph of each graph.
 		iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
 		self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]


 		results = load_results(self._file_name, self._fcsp)

 		# compute Gram matrix.
 		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

 		from itertools import combinations_with_replacement
 		itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
 		len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
 		iterator = get_iters(itr, desc='Computing kernels',
 					length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2))

 		time0 = time.time()
 		for i, j in iterator:
 			if i > results['i'] or (i == results['i'] and j > results['j']):
 				data = self._sp_do_space(self._graphs[i], self._graphs[j])
 				if self._fcsp:
 					results['nb_comparison'].append(data[0])
 					if data[1] != {}:
 						results['vk_dict_mem'].append(estimate_vk_memory(data[1],
 								    nx.number_of_nodes(self._graphs[i]),
 									nx.number_of_nodes(self._graphs[j])))
 				else:
 					results['nb_comparison'].append(data)
 				results['i'] = i
 				results['j'] = j

 				time1 = time.time()
 				if time1 - time0 > 600:
 					save_results(self._file_name, results)
 					time0 = time1

 		compute_stats(self._file_name, results)

 		return gram_matrix


 	def _sp_do_space(self, g1, g2):

 		if self._fcsp: # @todo: it may be put outside the _sp_do().
 			return self._sp_do_fcsp(g1, g2)
 		else:
 			return self._sp_do_naive(g1, g2)


 	def _sp_do_fcsp(self, g1, g2):

 		nb_comparison = 0

 		# compute shortest path matrices first, method borrowed from FCSP.
 		vk_dict = {}  # shortest path matrices dict
 		if len(self._node_labels) > 0: # @todo: it may be put outside the _sp_do().
 			# node symb and non-synb labeled
 			if len(self._node_attrs) > 0:
 				kn = self._node_kernels['mix']
 				for n1, n2 in product(
 						g1.nodes(data=True), g2.nodes(data=True)):
 					n1_labels = [n1[1][nl] for nl in self._node_labels]
 					n2_labels = [n2[1][nl] for nl in self._node_labels]
 					n1_attrs = [n1[1][na] for na in self._node_attrs]
 					n2_attrs = [n2[1][na] for na in self._node_attrs]
 					vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
 					nb_comparison += 1
 			# node symb labeled
 			else:
 				kn = self._node_kernels['symb']
 				for n1 in g1.nodes(data=True):
 					for n2 in g2.nodes(data=True):
 						n1_labels = [n1[1][nl] for nl in self._node_labels]
 						n2_labels = [n2[1][nl] for nl in self._node_labels]
 						vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
 						nb_comparison += 1
 		else:
 			# node non-synb labeled
 			if len(self._node_attrs) > 0:
 				kn = self._node_kernels['nsymb']
 				for n1 in g1.nodes(data=True):
 					for n2 in g2.nodes(data=True):
 						n1_attrs = [n1[1][na] for na in self._node_attrs]
 						n2_attrs = [n2[1][na] for na in self._node_attrs]
 						vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
 						nb_comparison += 1
 			# node unlabeled
 			else:
 				for e1, e2 in product(
 						g1.edges(data=True), g2.edges(data=True)):
 					pass
 # 					if e1[2]['cost'] == e2[2]['cost']:
 # 						kernel += 1
 # 					nb_comparison += 1

 		return nb_comparison, vk_dict

 # 		# compute graph kernels
 # 		if self._ds_infos['directed']:
 # 			for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
 # 				if e1[2]['cost'] == e2[2]['cost']:
 # 					nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], e2[1])]
 # 					kn1 = nk11 * nk22
 # 					kernel += kn1
 # 		else:
 # 			for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
 # 				if e1[2]['cost'] == e2[2]['cost']:
 # 					# each edge walk is counted twice, starting from both its extreme nodes.
 # 					nk11, nk12, nk21, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(
 # 						e1[0], e2[1])], vk_dict[(e1[1], e2[0])], vk_dict[(e1[1], e2[1])]
 # 					kn1 = nk11 * nk22
 # 					kn2 = nk12 * nk21
 # 					kernel += kn1 + kn2


 	def _sp_do_naive(self, g1, g2):

 		nb_comparison = 0

 		# Define the function to compute kernels between vertices in each condition.
 		if len(self._node_labels) > 0:
 			# node symb and non-synb labeled
 			if len(self._node_attrs) > 0:
 				def compute_vk(n1, n2):
 					kn = self._node_kernels['mix']
 					n1_labels = [g1.nodes[n1][nl] for nl in self._node_labels]
 					n2_labels = [g2.nodes[n2][nl] for nl in self._node_labels]
 					n1_attrs = [g1.nodes[n1][na] for na in self._node_attrs]
 					n2_attrs = [g2.nodes[n2][na] for na in self._node_attrs]
 					return kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
 			# node symb labeled
 			else:
 				def compute_vk(n1, n2):
 					kn = self._node_kernels['symb']
 					n1_labels = [g1.nodes[n1][nl] for nl in self._node_labels]
 					n2_labels = [g2.nodes[n2][nl] for nl in self._node_labels]
 					return kn(n1_labels, n2_labels)
 		else:
 			# node non-synb labeled
 			if len(self._node_attrs) > 0:
 				def compute_vk(n1, n2):
 					kn = self._node_kernels['nsymb']
 					n1_attrs = [g1.nodes[n1][na] for na in self._node_attrs]
 					n2_attrs = [g2.nodes[n2][na] for na in self._node_attrs]
 					return kn(n1_attrs, n2_attrs)
 			# node unlabeled
 			else:
 # 				for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
 # 					if e1[2]['cost'] == e2[2]['cost']:
 # 						kernel += 1
 				return 0

 		# compute graph kernels
 		if self._ds_infos['directed']:
 			for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
 				if e1[2]['cost'] == e2[2]['cost']:
 # 					nk11, nk22 = compute_vk(e1[0], e2[0]), compute_vk(e1[1], e2[1])
 # 					kn1 = nk11 * nk22
 # 					kernel += kn1
 					nb_comparison += 2
 		else:
 			for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
 				if e1[2]['cost'] == e2[2]['cost']:
 					# each edge walk is counted twice, starting from both its extreme nodes.
 # 					nk11, nk12, nk21, nk22 = compute_vk(e1[0], e2[0]), compute_vk(
 # 						e1[0], e2[1]), compute_vk(e1[1], e2[0]), compute_vk(e1[1], e2[1])
 # 					kn1 = nk11 * nk22
 # 					kn2 = nk12 * nk21
 # 					kernel += kn1 + kn2
 					nb_comparison += 4

 		return nb_comparison
--- a/gklearn/experiments/thesis/graph_kernels/fcsp/structural_sp.py
+++ b/gklearn/experiments/thesis/graph_kernels/fcsp/structural_sp.py
@@ -0,0 +1,439 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Mon Mar 30 11:59:57 2020

@author: ljia

@references:

    [1] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For
    Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360).
 """
 import sys
 from itertools import product
 from gklearn.utils import get_iters
 import numpy as np
 import time
 import os, errno
 import pickle
 from pympler import asizeof
 import networkx as nx
 from gklearn.utils.utils import get_shortest_paths
 from gklearn.kernels import StructuralSP


 def load_splist(file_name):
 	if os.path.isfile(file_name):
 		with open(file_name, 'rb') as f:
 			return pickle.load(f)
 	else:
 		results_path = {'splist': [], 'i': -1, 'completed': False}
 		return results_path


 def load_results(file_name, fcsp):
 	if os.path.isfile(file_name):
 		with open(file_name, 'rb') as f:
 			return pickle.load(f)
 	else:
 		results = {'nb_v_comparison': [], 'nb_e_comparison': [], 'i': -1, 'j': -1, 'completed': False}
 		if fcsp:
 			results['vk_dict_mem'] = []
 			results['ek_dict_mem'] = []
 		return results


 def save_results(file_name, results):
 	with open(file_name, 'wb') as f:
 		pickle.dump(results, f)


 def estimate_vk_memory(obj, nb_nodes1, nb_nodes2):
 # asizeof.asized(obj, detail=1).format()
 # 	return asizeof.asizeof(obj)
 	key, val = next(iter(obj.items()))
 # 	key = dict.iterkeys().next()
 # 	key_mem = asizeof.asizeof(key)
 	dict_flat = sys.getsizeof(obj)
 	key_mem = 64

 	if isinstance(val, float):
 		val_mem = 24
 		mem = (key_mem + val_mem) * len(obj) + dict_flat + 28 * (nb_nodes1 + nb_nodes2)
 	else: # value is True or False
 		mem = (key_mem) * len(obj) + dict_flat + 52 + 28 * (nb_nodes1 + nb_nodes2)

 # 	print(mem, asizeof.asizeof(obj), '\n', asizeof.asized(obj, detail=3).format(), '\n')
 	return mem


 def estimate_ek_memory(obj, nb_nodes1, nb_nodes2):
 # asizeof.asized(obj, detail=1).format()
 # 	return asizeof.asizeof(obj)
 	key, val = next(iter(obj.items()))
 # 	key = dict.iterkeys().next()
 # 	key_mem = asizeof.asizeof(key)
 	dict_flat = sys.getsizeof(obj)
 	key_mem = 192

 	if isinstance(val, float):
 		val_mem = 24
 		mem = (key_mem + val_mem) * len(obj) + dict_flat + 28 * (nb_nodes1 + nb_nodes2)
 	else: # value is True or False
 		mem = (key_mem) * len(obj) + dict_flat + 52 + 28 * (nb_nodes1 + nb_nodes2)

 # 	print(mem, asizeof.asizeof(obj), '\n', asizeof.asized(obj, detail=3).format(), '\n')
 	return mem


 def compute_stats(file_name, results, splist):
 	del results['i']
 	del results['j']
 	results['nb_v_comparison'] = np.mean(results['nb_v_comparison'])
 # 	if len(results['nb_e_comparison']) > 0:
 	results['nb_e_comparison'] = np.mean(results['nb_e_comparison'])
 	results['completed'] = True
 	if 'vk_dict_mem' in results and len(results['vk_dict_mem']) > 0:
 		results['vk_dict_mem'] = np.mean(results['vk_dict_mem'])
 	if 'ek_dict_mem' in results and len(results['ek_dict_mem']) > 0:
 		results['ek_dict_mem'] = np.mean(results['ek_dict_mem'])
 	results['nb_sp_ave'] = np.mean([len(ps) for ps in splist])
 	results['sp_len_ave'] = np.mean([np.mean([len(p) for p in ps]) for ps in splist])
 	results['sp_mem_all'] = asizeof.asizeof(splist)
 	save_results(file_name, results)


 class SSPSpace(StructuralSP):

 	def __init__(self, **kwargs):
 		super().__init__(**kwargs)
 		self._file_name = kwargs.get('file_name')

 # 	@profile
 	def _compute_gm_series(self):
 		# get shortest paths of each graph in the graphs.
 		fn_paths = os.path.splitext(self._file_name)[0] + '.paths.pkl'
 		results_path = load_splist(fn_paths)

 		if not results_path['completed']:

 			iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
 			if self._compute_method == 'trie':
 				for g in iterator:
 					splist.append(self._get_sps_as_trie(g))
 			else:
 				time0 = time.time()
 				for i, g in enumerate(iterator):
 					if i > results_path['i']:
 						results_path['splist'].append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']))
 						results_path['i'] = i

 						time1 = time.time()
 						if time1 - time0 > 600:
 							save_results(fn_paths, results_path)
 							time0 = time1

 				del results_path['i']
 				results_path['completed'] = True
 				save_results(fn_paths, results_path)

 		#########
 		splist = results_path['splist']
 		results = load_results(self._file_name, self._fcsp)

 		# compute Gram matrix.
 		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

 		from itertools import combinations_with_replacement
 		itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
 		len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
 		iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout,
 					   length=len_itr, verbose=(self._verbose >= 2))
 		if self._compute_method == 'trie':
 			for i, j in iterator:
 				kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j])
 				gram_matrix[i][j] = kernel
 				gram_matrix[j][i] = kernel
 		else:
 			time0 = time.time()
 			for i, j in iterator:
 				if i > results['i'] or (i == results['i'] and j > results['j']):
 					data = self._ssp_do_naive_space(self._graphs[i], self._graphs[j], splist[i], splist[j])
 					results['nb_v_comparison'].append(data[0])
 					results['nb_e_comparison'].append(data[1])
 					if self._fcsp:
 						if data[2] != {}:
 							results['vk_dict_mem'].append(estimate_vk_memory(data[2],
 									    nx.number_of_nodes(self._graphs[i]),
 										nx.number_of_nodes(self._graphs[j])))
 						if data[3] != {}:
 							results['ek_dict_mem'].append(estimate_ek_memory(data[3],
 									    nx.number_of_nodes(self._graphs[i]),
 										nx.number_of_nodes(self._graphs[j])))
 					results['i'] = i
 					results['j'] = j

 					time1 = time.time()
 					if time1 - time0 > 600:
 						save_results(self._file_name, results)
 						time0 = time1

 			compute_stats(self._file_name, results, splist)
 			# @todo: may not remove the path file if the program stops exactly here.
 			try:
 				os.remove(fn_paths)
 			except OSError as e:
 				if e.errno != errno.ENOENT:
 					raise

 		return gram_matrix


 	def _ssp_do_naive_space(self, g1, g2, spl1, spl2):
 		if self._fcsp: # @todo: it may be put outside the _sp_do().
 			return self._sp_do_naive_fcsp(g1, g2, spl1, spl2)
 		else:
 			return self._sp_do_naive_naive(g1, g2, spl1, spl2)


 	def _sp_do_naive_fcsp(self, g1, g2, spl1, spl2):

 		# First, compute shortest path matrices, method borrowed from FCSP.
 		vk_dict, nb_v_comparison = self._get_all_node_kernels(g1, g2)
 		# Then, compute kernels between all pairs of edges, which is an idea of
 		# extension of FCSP. It suits sparse graphs, which is the most case we
 		# went though. For dense graphs, this would be slow.
 		ek_dict, nb_e_comparison = self._get_all_edge_kernels(g1, g2)

 		return nb_v_comparison, nb_e_comparison, vk_dict, ek_dict


 	def _sp_do_naive_naive(self, g1, g2, spl1, spl2):

 		nb_v_comparison = 0
 		nb_e_comparison = 0

 		# Define the function to compute kernels between vertices in each condition.
 		if len(self._node_labels) > 0:
 			# node symb and non-synb labeled
 			if len(self._node_attrs) > 0:
 				def compute_vk(n1, n2):
 					kn = self._node_kernels['mix']
 					n1_labels = [g1.nodes[n1][nl] for nl in self._node_labels]
 					n2_labels = [g2.nodes[n2][nl] for nl in self._node_labels]
 					n1_attrs = [g1.nodes[n1][na] for na in self._node_attrs]
 					n2_attrs = [g2.nodes[n2][na] for na in self._node_attrs]
 					return kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
 			# node symb labeled
 			else:
 				def compute_vk(n1, n2):
 					kn = self._node_kernels['symb']
 					n1_labels = [g1.nodes[n1][nl] for nl in self._node_labels]
 					n2_labels = [g2.nodes[n2][nl] for nl in self._node_labels]
 					return kn(n1_labels, n2_labels)
 		else:
 			# node non-synb labeled
 			if len(self._node_attrs) > 0:
 				def compute_vk(n1, n2):
 					kn = self._node_kernels['nsymb']
 					n1_attrs = [g1.nodes[n1][na] for na in self._node_attrs]
 					n2_attrs = [g2.nodes[n2][na] for na in self._node_attrs]
 					return kn(n1_attrs, n2_attrs)
 # 			# node unlabeled
 # 			else:
 # 				for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
 # 					if e1[2]['cost'] == e2[2]['cost']:
 # 						kernel += 1
 # 				return kernel

 		# Define the function to compute kernels between edges in each condition.
 		if len(self._edge_labels) > 0:
 			# edge symb and non-synb labeled
 			if len(self._edge_attrs) > 0:
 				def compute_ek(e1, e2):
 					ke = self._edge_kernels['mix']
 					e1_labels = [g1.edges[e1][el] for el in self._edge_labels]
 					e2_labels = [g2.edges[e2][el] for el in self._edge_labels]
 					e1_attrs = [g1.edges[e1][ea] for ea in self._edge_attrs]
 					e2_attrs = [g2.edges[e2][ea] for ea in self._edge_attrs]
 					return ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
 			# edge symb labeled
 			else:
 				def compute_ek(e1, e2):
 					ke = self._edge_kernels['symb']
 					e1_labels = [g1.edges[e1][el] for el in self._edge_labels]
 					e2_labels = [g2.edges[e2][el] for el in self._edge_labels]
 					return ke(e1_labels, e2_labels)
 		else:
 			# edge non-synb labeled
 			if len(self._edge_attrs) > 0:
 				def compute_ek(e1, e2):
 					ke = self._edge_kernels['nsymb']
 					e1_attrs = [g1.edges[e1][ea] for ea in self._edge_attrs]
 					e2_attrs = [g2.edges[e2][ea] for ea in self._edge_attrs]
 					return ke(e1_attrs, e2_attrs)


 		# compute graph kernels
 		if len(self._node_labels) > 0 or len(self._node_attrs) > 0:
 			if len(self._edge_labels) > 0 or len(self._edge_attrs) > 0:
 				for p1, p2 in product(spl1, spl2):
 					if len(p1) == len(p2):
 # 						nb_v_comparison = len(p1)
 # 						nb_e_comparison = len(p1) - 1
 						kpath = compute_vk(p1[0], p2[0])
 						nb_v_comparison += 1
 						if kpath:
 							for idx in range(1, len(p1)):
 								kpath *= compute_vk(p1[idx], p2[idx]) * \
 									compute_ek((p1[idx-1], p1[idx]),
 											 (p2[idx-1], p2[idx]))
 								nb_v_comparison += 1
 								nb_e_comparison += 1
 								if not kpath:
 									break
 # 							kernel += kpath  # add up kernels of all paths
 			else:
 				for p1, p2 in product(spl1, spl2):
 					if len(p1) == len(p2):
 						kpath = compute_vk(p1[0], p2[0])
 						nb_v_comparison += 1
 						if kpath:
 							for idx in range(1, len(p1)):
 								kpath *= compute_vk(p1[idx], p2[idx])
 								nb_v_comparison += 1
 								if not kpath:
 									break
 # 							kernel += kpath  # add up kernels of all paths
 		else:
 			if len(self._edge_labels) > 0 or len(self._edge_attrs) > 0:
 				for p1, p2 in product(spl1, spl2):
 					if len(p1) == len(p2):
 						if len(p1) == 0:
 							pass
 						else:
 							kpath = 1
 							for idx in range(0, len(p1) - 1):
 								kpath *= compute_ek((p1[idx], p1[idx+1]),
 												  (p2[idx], p2[idx+1]))
 								nb_e_comparison += 1
 								if not kpath:
 									break
 			else:
 				pass
 # 				for p1, p2 in product(spl1, spl2):
 # 					if len(p1) == len(p2):
 # 						kernel += 1
 # 		try:
 # 			kernel = kernel / (len(spl1) * len(spl2))  # Compute mean average
 # 		except ZeroDivisionError:
 # 			print(spl1, spl2)
 # 			print(g1.nodes(data=True))
 # 			print(g1.edges(data=True))
 # 			raise Exception

 		return nb_v_comparison, nb_e_comparison


 	def _get_all_node_kernels(self, g1, g2):
 		nb_comparison = 0

 		vk_dict = {}  # shortest path matrices dict
 		if len(self._node_labels) > 0:
 			# node symb and non-synb labeled
 			if len(self._node_attrs) > 0:
 				kn = self._node_kernels['mix']
 				for n1 in g1.nodes(data=True):
 					for n2 in g2.nodes(data=True):
 						n1_labels = [n1[1][nl] for nl in self._node_labels]
 						n2_labels = [n2[1][nl] for nl in self._node_labels]
 						n1_attrs = [n1[1][na] for na in self._node_attrs]
 						n2_attrs = [n2[1][na] for na in self._node_attrs]
 						vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
 						nb_comparison += 1
 			# node symb labeled
 			else:
 				kn = self._node_kernels['symb']
 				for n1 in g1.nodes(data=True):
 					for n2 in g2.nodes(data=True):
 						n1_labels = [n1[1][nl] for nl in self._node_labels]
 						n2_labels = [n2[1][nl] for nl in self._node_labels]
 						vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
 						nb_comparison += 1
 		else:
 			# node non-synb labeled
 			if len(self._node_attrs) > 0:
 				kn = self._node_kernels['nsymb']
 				for n1 in g1.nodes(data=True):
 					for n2 in g2.nodes(data=True):
 						n1_attrs = [n1[1][na] for na in self._node_attrs]
 						n2_attrs = [n2[1][na] for na in self._node_attrs]
 						vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
 						nb_comparison += 1
 			# node unlabeled
 			else:
 				pass # @todo: add edge weights.
 	# 			for e1 in g1.edges(data=True):
 	# 				for e2 in g2.edges(data=True):
 	# 					if e1[2]['cost'] == e2[2]['cost']:
 	# 						kernel += 1
 	# 			return kernel

 		return vk_dict, nb_comparison


 	def _get_all_edge_kernels(self, g1, g2):
 		nb_comparison = 0

 		# compute kernels between all pairs of edges, which is an idea of
 		# extension of FCSP. It suits sparse graphs, which is the most case we
 		# went though. For dense graphs, this would be slow.
 		ek_dict = {}  # dict of edge kernels
 		if len(self._edge_labels) > 0:
 			# edge symb and non-synb labeled
 			if len(self._edge_attrs) > 0:
 				ke = self._edge_kernels['mix']
 				for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
 					e1_labels = [e1[2][el] for el in self._edge_labels]
 					e2_labels = [e2[2][el] for el in self._edge_labels]
 					e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
 					e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
 					ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
 					ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
 					ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
 					ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
 					ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
 					nb_comparison += 1
 			# edge symb labeled
 			else:
 				ke = self._edge_kernels['symb']
 				for e1 in g1.edges(data=True):
 					for e2 in g2.edges(data=True):
 						e1_labels = [e1[2][el] for el in self._edge_labels]
 						e2_labels = [e2[2][el] for el in self._edge_labels]
 						ek_temp = ke(e1_labels, e2_labels)
 						ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
 						ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
 						ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
 						ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
 						nb_comparison += 1
 		else:
 			# edge non-synb labeled
 			if len(self._edge_attrs) > 0:
 				ke = self._edge_kernels['nsymb']
 				for e1 in g1.edges(data=True):
 					for e2 in g2.edges(data=True):
 						e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
 						e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
 						ek_temp = ke(e1_attrs, e2_attrs)
 						ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
 						ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
 						ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
 						ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
 						nb_comparison += 1
 			# edge unlabeled
 			else:
 				pass

 		return ek_dict, nb_comparison