Add the chunksize argument to the function version of graph kernels.

5 years ago · c898c62eb8
--- a/gklearn/kernels/commonWalkKernel.py
+++ b/gklearn/kernels/commonWalkKernel.py
@@ -3,9 +3,9 @@

@references: 

    [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: 
    Hardness results and efficient alternatives. Learning Theory and Kernel
    Machines, pages 129–143, 2003.
 	[1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: 
 	Hardness results and efficient alternatives. Learning Theory and Kernel
 	Machines, pages 129–143, 2003.
 """

 import sys
@@ -22,428 +22,429 @@ from gklearn.utils.parallel import parallel_gm


 def commonwalkkernel(*args,
                     node_label='atom',
                     edge_label='bond_type',
 #                     n=None,
                     weight=1,
                     compute_method=None,
                     n_jobs=None,
                     verbose=True):
    """Calculate common walk graph kernels between graphs.

    Parameters
    ----------
    Gn : List of NetworkX graph
        List of graphs between which the kernels are calculated.
    
    G1, G2 : NetworkX graphs
        Two graphs between which the kernel is calculated.
    node_label : string
        Node attribute used as symbolic label. The default node label is 'atom'.
    edge_label : string
        Edge attribute used as symbolic label. The default edge label is 'bond_type'.
    weight: integer
        Weight coefficient of different lengths of walks, which represents beta
        in 'exp' method and gamma in 'geo'.
    compute_method : string
        Method used to compute walk kernel. The Following choices are 
        available:

        'exp': method based on exponential serials applied on the direct 
        product graph, as shown in reference [1]. The time complexity is O(n^6) 
        for graphs with n vertices.

        'geo': method based on geometric serials applied on the direct product 
        graph, as shown in reference [1]. The time complexity is O(n^6) for 
        graphs with n vertices.

    n_jobs : int
        Number of jobs for parallelization. 

    Return
    ------
    Kmatrix : Numpy matrix
        Kernel matrix, each element of which is a common walk kernel between 2 
        graphs.
    """
 #    n : integer
 #        Longest length of walks. Only useful when applying the 'brute' method.
 #        'brute': brute force, simply search for all walks and compare them.
    compute_method = compute_method.lower()
    # arrange all graphs in a list
    Gn = args[0] if len(args) == 1 else [args[0], args[1]]
    
    # remove graphs with only 1 node, as they do not have adjacency matrices 
    len_gn = len(Gn)
    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 1]
    idx = [G[0] for G in Gn]
    Gn = [G[1] for G in Gn]
    if len(Gn) != len_gn:
        if verbose:
            print('\n %d graphs are removed as they have only 1 node.\n' %
                  (len_gn - len(Gn)))
        
    ds_attrs = get_dataset_attributes(
        Gn,
        attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
        node_label=node_label, edge_label=edge_label)
    if not ds_attrs['node_labeled']:
        for G in Gn:
            nx.set_node_attributes(G, '0', 'atom')
    if not ds_attrs['edge_labeled']:
        for G in Gn:
            nx.set_edge_attributes(G, '0', 'bond_type')
    if not ds_attrs['is_directed']:  #  convert
        Gn = [G.to_directed() for G in Gn]

    start_time = time.time()
    
    Kmatrix = np.zeros((len(Gn), len(Gn)))

    # ---- use pool.imap_unordered to parallel and track progress. ----
    def init_worker(gn_toshare):
        global G_gn
        G_gn = gn_toshare
    # direct product graph method - exponential
    if compute_method == 'exp':
        do_partial = partial(wrapper_cw_exp, node_label, edge_label, weight)
    # direct product graph method - geometric
    elif compute_method == 'geo':
        do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight)  
    parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
                glbv=(Gn,), n_jobs=n_jobs, verbose=verbose)  
    
    
 #    pool = Pool(n_jobs)
 #    itr = zip(combinations_with_replacement(Gn, 2),
 #              combinations_with_replacement(range(0, len(Gn)), 2))
 #    len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
 #    if len_itr < 1000 * n_jobs:
 #        chunksize = int(len_itr / n_jobs) + 1
 #    else:
 #        chunksize = 1000
 					 node_label='atom',
 					 edge_label='bond_type',
 #					 n=None,
 					 weight=1,
 					 compute_method=None,
 					 n_jobs=None,
 					 chunksize=None,
 					 verbose=True):
 	"""Calculate common walk graph kernels between graphs.

 	Parameters
 	----------
 	Gn : List of NetworkX graph
 		List of graphs between which the kernels are calculated.
 	
 	G1, G2 : NetworkX graphs
 		Two graphs between which the kernel is calculated.
 	node_label : string
 		Node attribute used as symbolic label. The default node label is 'atom'.
 	edge_label : string
 		Edge attribute used as symbolic label. The default edge label is 'bond_type'.
 	weight: integer
 		Weight coefficient of different lengths of walks, which represents beta
 		in 'exp' method and gamma in 'geo'.
 	compute_method : string
 		Method used to compute walk kernel. The Following choices are 
 		available:

 		'exp': method based on exponential serials applied on the direct 
 		product graph, as shown in reference [1]. The time complexity is O(n^6) 
 		for graphs with n vertices.

 		'geo': method based on geometric serials applied on the direct product 
 		graph, as shown in reference [1]. The time complexity is O(n^6) for 
 		graphs with n vertices.

 	n_jobs : int
 		Number of jobs for parallelization. 

 	Return
 	------
 	Kmatrix : Numpy matrix
 		Kernel matrix, each element of which is a common walk kernel between 2 
 		graphs.
 	"""
 #	n : integer
 #		Longest length of walks. Only useful when applying the 'brute' method.
 #		'brute': brute force, simply search for all walks and compare them.
 	compute_method = compute_method.lower()
 	# arrange all graphs in a list
 	Gn = args[0] if len(args) == 1 else [args[0], args[1]]
 	
 	# remove graphs with only 1 node, as they do not have adjacency matrices 
 	len_gn = len(Gn)
 	Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 1]
 	idx = [G[0] for G in Gn]
 	Gn = [G[1] for G in Gn]
 	if len(Gn) != len_gn:
 		if verbose:
 			print('\n %d graphs are removed as they have only 1 node.\n' %
 				  (len_gn - len(Gn)))
 		
 	ds_attrs = get_dataset_attributes(
 		Gn,
 		attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
 		node_label=node_label, edge_label=edge_label)
 	if not ds_attrs['node_labeled']:
 		for G in Gn:
 			nx.set_node_attributes(G, '0', 'atom')
 	if not ds_attrs['edge_labeled']:
 		for G in Gn:
 			nx.set_edge_attributes(G, '0', 'bond_type')
 	if not ds_attrs['is_directed']:  #  convert
 		Gn = [G.to_directed() for G in Gn]

 	start_time = time.time()
 	
 	Kmatrix = np.zeros((len(Gn), len(Gn)))

 	# ---- use pool.imap_unordered to parallel and track progress. ----
 	def init_worker(gn_toshare):
 		global G_gn
 		G_gn = gn_toshare
 	# direct product graph method - exponential
 	if compute_method == 'exp':
 		do_partial = partial(wrapper_cw_exp, node_label, edge_label, weight)
 	# direct product graph method - geometric
 	elif compute_method == 'geo':
 		do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight)  
 	parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
 				glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)  
 	
 	
 #	pool = Pool(n_jobs)
 #	itr = zip(combinations_with_replacement(Gn, 2),
 #			  combinations_with_replacement(range(0, len(Gn)), 2))
 #	len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
 #	if len_itr < 1000 * n_jobs:
 #		chunksize = int(len_itr / n_jobs) + 1
 #	else:
 #		chunksize = 1000
 #
 #    # direct product graph method - exponential
 #    if compute_method == 'exp':
 #        do_partial = partial(wrapper_cw_exp, node_label, edge_label, weight)
 #    # direct product graph method - geometric
 #    elif compute_method == 'geo':
 #        do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight)
 #	# direct product graph method - exponential
 #	if compute_method == 'exp':
 #		do_partial = partial(wrapper_cw_exp, node_label, edge_label, weight)
 #	# direct product graph method - geometric
 #	elif compute_method == 'geo':
 #		do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight)
 #
 #    for i, j, kernel in tqdm(
 #            pool.imap_unordered(do_partial, itr, chunksize),
 #            desc='calculating kernels',
 #            file=sys.stdout):
 #        Kmatrix[i][j] = kernel
 #        Kmatrix[j][i] = kernel
 #    pool.close()
 #    pool.join()


 #    # ---- direct running, normally use single CPU core. ----
 #    # direct product graph method - exponential
 #    itr = combinations_with_replacement(range(0, len(Gn)), 2)
 #    if compute_method == 'exp':
 #        for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
 #            Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label,
 #                                                      edge_label, weight)
 #            Kmatrix[j][i] = Kmatrix[i][j]
 #	for i, j, kernel in tqdm(
 #			pool.imap_unordered(do_partial, itr, chunksize),
 #			desc='calculating kernels',
 #			file=sys.stdout):
 #		Kmatrix[i][j] = kernel
 #		Kmatrix[j][i] = kernel
 #	pool.close()
 #	pool.join()


 #	# ---- direct running, normally use single CPU core. ----
 #	# direct product graph method - exponential
 #	itr = combinations_with_replacement(range(0, len(Gn)), 2)
 #	if compute_method == 'exp':
 #		for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
 #			Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label,
 #													  edge_label, weight)
 #			Kmatrix[j][i] = Kmatrix[i][j]
 #
 #    # direct product graph method - geometric
 #    elif compute_method == 'geo':
 #        for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
 #            Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label,
 #                                                      edge_label, weight)
 #            Kmatrix[j][i] = Kmatrix[i][j]


 #    # search all paths use brute force.
 #    elif compute_method == 'brute':
 #        n = int(n)
 #        # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset.
 #        all_walks = [
 #            find_all_walks_until_length(Gn[i], n, node_label, edge_label)
 #                for i in range(0, len(Gn))
 #        ]
 #	# direct product graph method - geometric
 #	elif compute_method == 'geo':
 #		for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
 #			Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label,
 #													  edge_label, weight)
 #			Kmatrix[j][i] = Kmatrix[i][j]


 #	# search all paths use brute force.
 #	elif compute_method == 'brute':
 #		n = int(n)
 #		# get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset.
 #		all_walks = [
 #			find_all_walks_until_length(Gn[i], n, node_label, edge_label)
 #				for i in range(0, len(Gn))
 #		]
 #
 #        for i in range(0, len(Gn)):
 #            for j in range(i, len(Gn)):
 #                Kmatrix[i][j] = _commonwalkkernel_brute(
 #                    all_walks[i],
 #                    all_walks[j],
 #                    node_label=node_label,
 #                    edge_label=edge_label)
 #                Kmatrix[j][i] = Kmatrix[i][j]
 #		for i in range(0, len(Gn)):
 #			for j in range(i, len(Gn)):
 #				Kmatrix[i][j] = _commonwalkkernel_brute(
 #					all_walks[i],
 #					all_walks[j],
 #					node_label=node_label,
 #					edge_label=edge_label)
 #				Kmatrix[j][i] = Kmatrix[i][j]

    run_time = time.time() - start_time
    if verbose:
        print("\n --- kernel matrix of common walk kernel of size %d built in %s seconds ---"
              % (len(Gn), run_time))
 	run_time = time.time() - start_time
 	if verbose:
 		print("\n --- kernel matrix of common walk kernel of size %d built in %s seconds ---"
 			  % (len(Gn), run_time))

    return Kmatrix, run_time, idx
 	return Kmatrix, run_time, idx


 def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta):
    """Calculate walk graph kernels up to n between 2 graphs using exponential 
    series.

    Parameters
    ----------
    Gn : List of NetworkX graph
        List of graphs between which the kernels are calculated.
    node_label : string
        Node attribute used as label.
    edge_label : string
        Edge attribute used as label.
    beta : integer
        Weight.
    ij : tuple of integer
        Index of graphs between which the kernel is computed.

    Return
    ------
    kernel : float
        The common walk Kernel between 2 graphs.
    """

    # get tensor product / direct product
    gp = direct_product(g1, g2, node_label, edge_label)
    # return 0 if the direct product graph have no more than 1 node.
    if nx.number_of_nodes(gp) < 2:
        return 0
    A = nx.adjacency_matrix(gp).todense()
    # print(A)

    # from matplotlib import pyplot as plt
    # nx.draw_networkx(G1)
    # plt.show()
    # nx.draw_networkx(G2)
    # plt.show()
    # nx.draw_networkx(gp)
    # plt.show()
    # print(G1.nodes(data=True))
    # print(G2.nodes(data=True))
    # print(gp.nodes(data=True))
    # print(gp.edges(data=True))

    ew, ev = np.linalg.eig(A)
    # print('ew: ', ew)
    # print(ev)
    # T = np.matrix(ev)
    # print('T: ', T)
    # T = ev.I
    D = np.zeros((len(ew), len(ew)))
    for i in range(len(ew)):
        D[i][i] = np.exp(beta * ew[i])
        # print('D: ', D)
    # print('hshs: ', T.I * D * T)

    # print(np.exp(-2))
    # print(D)
    # print(np.exp(weight * D))
    # print(ev)
    # print(np.linalg.inv(ev))
    exp_D = ev * D * ev.T
    # print(exp_D)
    # print(np.exp(weight * A))
    # print('-------')

    return exp_D.sum()
 	"""Calculate walk graph kernels up to n between 2 graphs using exponential 
 	series.

 	Parameters
 	----------
 	Gn : List of NetworkX graph
 		List of graphs between which the kernels are calculated.
 	node_label : string
 		Node attribute used as label.
 	edge_label : string
 		Edge attribute used as label.
 	beta : integer
 		Weight.
 	ij : tuple of integer
 		Index of graphs between which the kernel is computed.

 	Return
 	------
 	kernel : float
 		The common walk Kernel between 2 graphs.
 	"""

 	# get tensor product / direct product
 	gp = direct_product(g1, g2, node_label, edge_label)
 	# return 0 if the direct product graph have no more than 1 node.
 	if nx.number_of_nodes(gp) < 2:
 		return 0
 	A = nx.adjacency_matrix(gp).todense()
 	# print(A)

 	# from matplotlib import pyplot as plt
 	# nx.draw_networkx(G1)
 	# plt.show()
 	# nx.draw_networkx(G2)
 	# plt.show()
 	# nx.draw_networkx(gp)
 	# plt.show()
 	# print(G1.nodes(data=True))
 	# print(G2.nodes(data=True))
 	# print(gp.nodes(data=True))
 	# print(gp.edges(data=True))

 	ew, ev = np.linalg.eig(A)
 	# print('ew: ', ew)
 	# print(ev)
 	# T = np.matrix(ev)
 	# print('T: ', T)
 	# T = ev.I
 	D = np.zeros((len(ew), len(ew)))
 	for i in range(len(ew)):
 		D[i][i] = np.exp(beta * ew[i])
 		# print('D: ', D)
 	# print('hshs: ', T.I * D * T)

 	# print(np.exp(-2))
 	# print(D)
 	# print(np.exp(weight * D))
 	# print(ev)
 	# print(np.linalg.inv(ev))
 	exp_D = ev * D * ev.T
 	# print(exp_D)
 	# print(np.exp(weight * A))
 	# print('-------')

 	return exp_D.sum()


 def wrapper_cw_exp(node_label, edge_label, beta, itr):
    i = itr[0]
    j = itr[1]
    return i, j, _commonwalkkernel_exp(G_gn[i], G_gn[j], node_label, edge_label, beta)
 	i = itr[0]
 	j = itr[1]
 	return i, j, _commonwalkkernel_exp(G_gn[i], G_gn[j], node_label, edge_label, beta)


 def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma):
    """Calculate common walk graph kernels up to n between 2 graphs using 
    geometric series.

    Parameters
    ----------
    Gn : List of NetworkX graph
        List of graphs between which the kernels are calculated.
    node_label : string
        Node attribute used as label.
    edge_label : string
        Edge attribute used as label.
    gamma: integer
        Weight.
    ij : tuple of integer
        Index of graphs between which the kernel is computed.

    Return
    ------
    kernel : float
        The common walk Kernel between 2 graphs.
    """
    # get tensor product / direct product
    gp = direct_product(g1, g2, node_label, edge_label)
    # return 0 if the direct product graph have no more than 1 node.
    if nx.number_of_nodes(gp) < 2:
        return 0
    A = nx.adjacency_matrix(gp).todense()
    mat = np.identity(len(A)) - gamma * A
 #    try:
    return mat.I.sum()
 #    except np.linalg.LinAlgError:
 #        return np.nan
    
    
 	"""Calculate common walk graph kernels up to n between 2 graphs using 
 	geometric series.

 	Parameters
 	----------
 	Gn : List of NetworkX graph
 		List of graphs between which the kernels are calculated.
 	node_label : string
 		Node attribute used as label.
 	edge_label : string
 		Edge attribute used as label.
 	gamma: integer
 		Weight.
 	ij : tuple of integer
 		Index of graphs between which the kernel is computed.

 	Return
 	------
 	kernel : float
 		The common walk Kernel between 2 graphs.
 	"""
 	# get tensor product / direct product
 	gp = direct_product(g1, g2, node_label, edge_label)
 	# return 0 if the direct product graph have no more than 1 node.
 	if nx.number_of_nodes(gp) < 2:
 		return 0
 	A = nx.adjacency_matrix(gp).todense()
 	mat = np.identity(len(A)) - gamma * A
 #	try:
 	return mat.I.sum()
 #	except np.linalg.LinAlgError:
 #		return np.nan
 	
 	
 def wrapper_cw_geo(node_label, edge_label, gama, itr):
    i = itr[0]
    j = itr[1]
    return i, j, _commonwalkkernel_geo(G_gn[i], G_gn[j], node_label, edge_label, gama)
 	i = itr[0]
 	j = itr[1]
 	return i, j, _commonwalkkernel_geo(G_gn[i], G_gn[j], node_label, edge_label, gama)


 def _commonwalkkernel_brute(walks1,
                            walks2,
                            node_label='atom',
                            edge_label='bond_type',
                            labeled=True):
    """Calculate walk graph kernels up to n between 2 graphs.

    Parameters
    ----------
    walks1, walks2 : list
        List of walks in 2 graphs, where for unlabeled graphs, each walk is 
        represented by a list of nodes; while for labeled graphs, each walk is 
        represented by a string consists of labels of nodes and edges on that 
        walk.
    node_label : string
        node attribute used as label. The default node label is atom.
    edge_label : string
        edge attribute used as label. The default edge label is bond_type.
    labeled : boolean
        Whether the graphs are labeled. The default is True.

    Return
    ------
    kernel : float
        Treelet Kernel between 2 graphs.
    """
    counts_walks1 = dict(Counter(walks1))
    counts_walks2 = dict(Counter(walks2))
    all_walks = list(set(walks1 + walks2))

    vector1 = [(counts_walks1[walk] if walk in walks1 else 0)
               for walk in all_walks]
    vector2 = [(counts_walks2[walk] if walk in walks2 else 0)
               for walk in all_walks]
    kernel = np.dot(vector1, vector2)

    return kernel
 							walks2,
 							node_label='atom',
 							edge_label='bond_type',
 							labeled=True):
 	"""Calculate walk graph kernels up to n between 2 graphs.

 	Parameters
 	----------
 	walks1, walks2 : list
 		List of walks in 2 graphs, where for unlabeled graphs, each walk is 
 		represented by a list of nodes; while for labeled graphs, each walk is 
 		represented by a string consists of labels of nodes and edges on that 
 		walk.
 	node_label : string
 		node attribute used as label. The default node label is atom.
 	edge_label : string
 		edge attribute used as label. The default edge label is bond_type.
 	labeled : boolean
 		Whether the graphs are labeled. The default is True.

 	Return
 	------
 	kernel : float
 		Treelet Kernel between 2 graphs.
 	"""
 	counts_walks1 = dict(Counter(walks1))
 	counts_walks2 = dict(Counter(walks2))
 	all_walks = list(set(walks1 + walks2))

 	vector1 = [(counts_walks1[walk] if walk in walks1 else 0)
 			   for walk in all_walks]
 	vector2 = [(counts_walks2[walk] if walk in walks2 else 0)
 			   for walk in all_walks]
 	kernel = np.dot(vector1, vector2)

 	return kernel


 # this method find walks repetively, it could be faster.
 def find_all_walks_until_length(G,
                                length,
                                node_label='atom',
                                edge_label='bond_type',
                                labeled=True):
    """Find all walks with a certain maximum length in a graph. 
    A recursive depth first search is applied.

    Parameters
    ----------
    G : NetworkX graphs
        The graph in which walks are searched.
    length : integer
        The maximum length of walks.
    node_label : string
        node attribute used as label. The default node label is atom.
    edge_label : string
        edge attribute used as label. The default edge label is bond_type.
    labeled : boolean
        Whether the graphs are labeled. The default is True.

    Return
    ------
    walk : list
        List of walks retrieved, where for unlabeled graphs, each walk is 
        represented by a list of nodes; while for labeled graphs, each walk 
        is represented by a string consists of labels of nodes and edges on 
        that walk.
    """
    all_walks = []
    # @todo: in this way, the time complexity is close to N(d^n+d^(n+1)+...+1), which could be optimized to O(Nd^n)
    for i in range(0, length + 1):
        new_walks = find_all_walks(G, i)
        if new_walks == []:
            break
        all_walks.extend(new_walks)

    if labeled == True:  # convert paths to strings
        walk_strs = []
        for walk in all_walks:
            strlist = [
                G.node[node][node_label] +
                G[node][walk[walk.index(node) + 1]][edge_label]
                for node in walk[:-1]
            ]
            walk_strs.append(''.join(strlist) + G.node[walk[-1]][node_label])

        return walk_strs

    return all_walks
 								length,
 								node_label='atom',
 								edge_label='bond_type',
 								labeled=True):
 	"""Find all walks with a certain maximum length in a graph. 
 	A recursive depth first search is applied.

 	Parameters
 	----------
 	G : NetworkX graphs
 		The graph in which walks are searched.
 	length : integer
 		The maximum length of walks.
 	node_label : string
 		node attribute used as label. The default node label is atom.
 	edge_label : string
 		edge attribute used as label. The default edge label is bond_type.
 	labeled : boolean
 		Whether the graphs are labeled. The default is True.

 	Return
 	------
 	walk : list
 		List of walks retrieved, where for unlabeled graphs, each walk is 
 		represented by a list of nodes; while for labeled graphs, each walk 
 		is represented by a string consists of labels of nodes and edges on 
 		that walk.
 	"""
 	all_walks = []
 	# @todo: in this way, the time complexity is close to N(d^n+d^(n+1)+...+1), which could be optimized to O(Nd^n)
 	for i in range(0, length + 1):
 		new_walks = find_all_walks(G, i)
 		if new_walks == []:
 			break
 		all_walks.extend(new_walks)

 	if labeled == True:  # convert paths to strings
 		walk_strs = []
 		for walk in all_walks:
 			strlist = [
 				G.node[node][node_label] +
 				G[node][walk[walk.index(node) + 1]][edge_label]
 				for node in walk[:-1]
 			]
 			walk_strs.append(''.join(strlist) + G.node[walk[-1]][node_label])

 		return walk_strs

 	return all_walks


 def find_walks(G, source_node, length):
    """Find all walks with a certain length those start from a source node. A 
    recursive depth first search is applied.

    Parameters
    ----------
    G : NetworkX graphs
        The graph in which walks are searched.
    source_node : integer
        The number of the node from where all walks start.
    length : integer
        The length of walks.

    Return
    ------
    walk : list of list
        List of walks retrieved, where each walk is represented by a list of 
        nodes.
    """
    return [[source_node]] if length == 0 else \
        [[source_node] + walk for neighbor in G[source_node]
         for walk in find_walks(G, neighbor, length - 1)]
 	"""Find all walks with a certain length those start from a source node. A 
 	recursive depth first search is applied.

 	Parameters
 	----------
 	G : NetworkX graphs
 		The graph in which walks are searched.
 	source_node : integer
 		The number of the node from where all walks start.
 	length : integer
 		The length of walks.

 	Return
 	------
 	walk : list of list
 		List of walks retrieved, where each walk is represented by a list of 
 		nodes.
 	"""
 	return [[source_node]] if length == 0 else \
 		[[source_node] + walk for neighbor in G[source_node]
 		 for walk in find_walks(G, neighbor, length - 1)]


 def find_all_walks(G, length):
    """Find all walks with a certain length in a graph. A recursive depth first
    search is applied.

    Parameters
    ----------
    G : NetworkX graphs
        The graph in which walks are searched.
    length : integer
        The length of walks.

    Return
    ------
    walk : list of list
        List of walks retrieved, where each walk is represented by a list of 
        nodes.
    """
    all_walks = []
    for node in G:
        all_walks.extend(find_walks(G, node, length))

    # The following process is not carried out according to the original article
    # all_paths_r = [ path[::-1] for path in all_paths ]

    # # For each path, two presentation are retrieved from its two extremities. Remove one of them.
    # for idx, path in enumerate(all_paths[:-1]):
    #     for path2 in all_paths_r[idx+1::]:
    #         if path == path2:
    #             all_paths[idx] = []
    #             break

    # return list(filter(lambda a: a != [], all_paths))
    return all_walks
 	"""Find all walks with a certain length in a graph. A recursive depth first
 	search is applied.

 	Parameters
 	----------
 	G : NetworkX graphs
 		The graph in which walks are searched.
 	length : integer
 		The length of walks.

 	Return
 	------
 	walk : list of list
 		List of walks retrieved, where each walk is represented by a list of 
 		nodes.
 	"""
 	all_walks = []
 	for node in G:
 		all_walks.extend(find_walks(G, node, length))

 	# The following process is not carried out according to the original article
 	# all_paths_r = [ path[::-1] for path in all_paths ]

 	# # For each path, two presentation are retrieved from its two extremities. Remove one of them.
 	# for idx, path in enumerate(all_paths[:-1]):
 	#	 for path2 in all_paths_r[idx+1::]:
 	#		 if path == path2:
 	#			 all_paths[idx] = []
 	#			 break

 	# return list(filter(lambda a: a != [], all_paths))
 	return all_walks
--- a/gklearn/kernels/marginalizedKernel.py
+++ b/gklearn/kernels/marginalizedKernel.py
@@ -3,14 +3,14 @@

@references:

    [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between 
    labeled graphs. In Proceedings of the 20th International Conference on 
    Machine Learning, Washington, DC, United States, 2003.

    [2] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and 
    Jean-Philippe Vert. Extensions of marginalized graph kernels. In 
    Proceedings of the twenty-first international conference on Machine 
    learning, page 70. ACM, 2004.
 	[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between 
 	labeled graphs. In Proceedings of the 20th International Conference on 
 	Machine Learning, Washington, DC, United States, 2003.

 	[2] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and 
 	Jean-Philippe Vert. Extensions of marginalized graph kernels. In 
 	Proceedings of the twenty-first international conference on Machine 
 	learning, page 70. ACM, 2004.
 """

 import sys
@@ -31,275 +31,277 @@ from gklearn.utils.parallel import parallel_gm


 def marginalizedkernel(*args,
                       node_label='atom',
                       edge_label='bond_type',
                       p_quit=0.5,
                       n_iteration=20,
                       remove_totters=False,
                       n_jobs=None,
                       verbose=True):
    """Calculate marginalized graph kernels between graphs.

    Parameters
    ----------
    Gn : List of NetworkX graph
        List of graphs between which the kernels are calculated.
    
    G1, G2 : NetworkX graphs
        Two graphs between which the kernel is calculated.

    node_label : string
        Node attribute used as symbolic label. The default node label is 'atom'.

    edge_label : string
        Edge attribute used as symbolic label. The default edge label is 'bond_type'.

    p_quit : integer
        The termination probability in the random walks generating step.

    n_iteration : integer
        Time of iterations to calculate R_inf.

    remove_totters : boolean
        Whether to remove totterings by method introduced in [2]. The default 
        value is False.

    n_jobs : int
        Number of jobs for parallelization.   

    Return
    ------
    Kmatrix : Numpy matrix
        Kernel matrix, each element of which is the marginalized kernel between
        2 praphs.
    """
    # pre-process
    n_iteration = int(n_iteration)
    Gn = args[0][:] if len(args) == 1 else [args[0].copy(), args[1].copy()]
    Gn = [g.copy() for g in Gn]
    
    ds_attrs = get_dataset_attributes(
        Gn,
        attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
        node_label=node_label, edge_label=edge_label)
    if not ds_attrs['node_labeled'] or node_label == None:
        node_label = 'atom'
        for G in Gn:
            nx.set_node_attributes(G, '0', 'atom')
    if not ds_attrs['edge_labeled'] or edge_label == None:
        edge_label = 'bond_type'
        for G in Gn:
            nx.set_edge_attributes(G, '0', 'bond_type')

    start_time = time.time()
    
    if remove_totters:
        # ---- use pool.imap_unordered to parallel and track progress. ----
        pool = Pool(n_jobs)
        untotter_partial = partial(wrapper_untotter, Gn, node_label, edge_label)
        if len(Gn) < 100 * n_jobs:
            chunksize = int(len(Gn) / n_jobs) + 1
        else:
            chunksize = 100
        for i, g in tqdm(
                pool.imap_unordered(
                    untotter_partial, range(0, len(Gn)), chunksize),
                desc='removing tottering',
                file=sys.stdout):
            Gn[i] = g
        pool.close()
        pool.join()

 #        # ---- direct running, normally use single CPU core. ----
 #        Gn = [
 #            untotterTransformation(G, node_label, edge_label)
 #            for G in tqdm(Gn, desc='removing tottering', file=sys.stdout)
 #        ]

    Kmatrix = np.zeros((len(Gn), len(Gn)))

    # ---- use pool.imap_unordered to parallel and track progress. ----
    def init_worker(gn_toshare):
                global G_gn
                G_gn = gn_toshare
    do_partial = partial(wrapper_marg_do, node_label, edge_label,
                         p_quit, n_iteration)   
    parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
                glbv=(Gn,), n_jobs=n_jobs, verbose=verbose)


 #    # ---- direct running, normally use single CPU core. ----
 ##    pbar = tqdm(
 ##        total=(1 + len(Gn)) * len(Gn) / 2,
 ##        desc='calculating kernels',
 ##        file=sys.stdout)
 #    for i in range(0, len(Gn)):
 #        for j in range(i, len(Gn)):
 ##            print(i, j)
 #            Kmatrix[i][j] = _marginalizedkernel_do(Gn[i], Gn[j], node_label,
 #                                                   edge_label, p_quit, n_iteration)
 #            Kmatrix[j][i] = Kmatrix[i][j]
 ##            pbar.update(1)

    run_time = time.time() - start_time
    if verbose:
        print("\n --- marginalized kernel matrix of size %d built in %s seconds ---"
              % (len(Gn), run_time))

    return Kmatrix, run_time
 					   node_label='atom',
 					   edge_label='bond_type',
 					   p_quit=0.5,
 					   n_iteration=20,
 					   remove_totters=False,
 					   n_jobs=None,
 					   chunksize=None,
 					   verbose=True):
 	"""Calculate marginalized graph kernels between graphs.

 	Parameters
 	----------
 	Gn : List of NetworkX graph
 		List of graphs between which the kernels are calculated.
 	
 	G1, G2 : NetworkX graphs
 		Two graphs between which the kernel is calculated.

 	node_label : string
 		Node attribute used as symbolic label. The default node label is 'atom'.

 	edge_label : string
 		Edge attribute used as symbolic label. The default edge label is 'bond_type'.

 	p_quit : integer
 		The termination probability in the random walks generating step.

 	n_iteration : integer
 		Time of iterations to calculate R_inf.

 	remove_totters : boolean
 		Whether to remove totterings by method introduced in [2]. The default 
 		value is False.

 	n_jobs : int
 		Number of jobs for parallelization.   

 	Return
 	------
 	Kmatrix : Numpy matrix
 		Kernel matrix, each element of which is the marginalized kernel between
 		2 praphs.
 	"""
 	# pre-process
 	n_iteration = int(n_iteration)
 	Gn = args[0][:] if len(args) == 1 else [args[0].copy(), args[1].copy()]
 	Gn = [g.copy() for g in Gn]
 	
 	ds_attrs = get_dataset_attributes(
 		Gn,
 		attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
 		node_label=node_label, edge_label=edge_label)
 	if not ds_attrs['node_labeled'] or node_label == None:
 		node_label = 'atom'
 		for G in Gn:
 			nx.set_node_attributes(G, '0', 'atom')
 	if not ds_attrs['edge_labeled'] or edge_label == None:
 		edge_label = 'bond_type'
 		for G in Gn:
 			nx.set_edge_attributes(G, '0', 'bond_type')

 	start_time = time.time()
 	
 	if remove_totters:
 		# ---- use pool.imap_unordered to parallel and track progress. ----
 		pool = Pool(n_jobs)
 		untotter_partial = partial(wrapper_untotter, Gn, node_label, edge_label)
 		if chunksize is None:
 			if len(Gn) < 100 * n_jobs:
 				chunksize = int(len(Gn) / n_jobs) + 1
 			else:
 				chunksize = 100
 		for i, g in tqdm(
 				pool.imap_unordered(
 					untotter_partial, range(0, len(Gn)), chunksize),
 				desc='removing tottering',
 				file=sys.stdout):
 			Gn[i] = g
 		pool.close()
 		pool.join()

 #		# ---- direct running, normally use single CPU core. ----
 #		Gn = [
 #			untotterTransformation(G, node_label, edge_label)
 #			for G in tqdm(Gn, desc='removing tottering', file=sys.stdout)
 #		]

 	Kmatrix = np.zeros((len(Gn), len(Gn)))

 	# ---- use pool.imap_unordered to parallel and track progress. ----
 	def init_worker(gn_toshare):
 				global G_gn
 				G_gn = gn_toshare
 	do_partial = partial(wrapper_marg_do, node_label, edge_label,
 						 p_quit, n_iteration)   
 	parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
 				glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)


 #	# ---- direct running, normally use single CPU core. ----
 ##	pbar = tqdm(
 ##		total=(1 + len(Gn)) * len(Gn) / 2,
 ##		desc='calculating kernels',
 ##		file=sys.stdout)
 #	for i in range(0, len(Gn)):
 #		for j in range(i, len(Gn)):
 ##			print(i, j)
 #			Kmatrix[i][j] = _marginalizedkernel_do(Gn[i], Gn[j], node_label,
 #												   edge_label, p_quit, n_iteration)
 #			Kmatrix[j][i] = Kmatrix[i][j]
 ##			pbar.update(1)

 	run_time = time.time() - start_time
 	if verbose:
 		print("\n --- marginalized kernel matrix of size %d built in %s seconds ---"
 			  % (len(Gn), run_time))

 	return Kmatrix, run_time


 def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
    """Calculate marginalized graph kernel between 2 graphs.

    Parameters
    ----------
    G1, G2 : NetworkX graphs
        2 graphs between which the kernel is calculated.
    node_label : string
        node attribute used as label.
    edge_label : string
        edge attribute used as label.
    p_quit : integer
        the termination probability in the random walks generating step.
    n_iteration : integer
        time of iterations to calculate R_inf.

    Return
    ------
    kernel : float
        Marginalized Kernel between 2 graphs.
    """
    # init parameters
    kernel = 0
    num_nodes_G1 = nx.number_of_nodes(g1)
    num_nodes_G2 = nx.number_of_nodes(g2)
    # the initial probability distribution in the random walks generating step
    # (uniform distribution over |G|)
    p_init_G1 = 1 / num_nodes_G1
    p_init_G2 = 1 / num_nodes_G2

    q = p_quit * p_quit
    r1 = q

 #    # initial R_inf
 #    # matrix to save all the R_inf for all pairs of nodes
 #    R_inf = np.zeros([num_nodes_G1, num_nodes_G2])
 	"""Calculate marginalized graph kernel between 2 graphs.

 	Parameters
 	----------
 	G1, G2 : NetworkX graphs
 		2 graphs between which the kernel is calculated.
 	node_label : string
 		node attribute used as label.
 	edge_label : string
 		edge attribute used as label.
 	p_quit : integer
 		the termination probability in the random walks generating step.
 	n_iteration : integer
 		time of iterations to calculate R_inf.

 	Return
 	------
 	kernel : float
 		Marginalized Kernel between 2 graphs.
 	"""
 	# init parameters
 	kernel = 0
 	num_nodes_G1 = nx.number_of_nodes(g1)
 	num_nodes_G2 = nx.number_of_nodes(g2)
 	# the initial probability distribution in the random walks generating step
 	# (uniform distribution over |G|)
 	p_init_G1 = 1 / num_nodes_G1
 	p_init_G2 = 1 / num_nodes_G2

 	q = p_quit * p_quit
 	r1 = q

 #	# initial R_inf
 #	# matrix to save all the R_inf for all pairs of nodes
 #	R_inf = np.zeros([num_nodes_G1, num_nodes_G2])
 #
 #    # calculate R_inf with a simple interative method
 #    for i in range(1, n_iteration):
 #        R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
 #        R_inf_new.fill(r1)
 #	# calculate R_inf with a simple interative method
 #	for i in range(1, n_iteration):
 #		R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
 #		R_inf_new.fill(r1)
 #
 #        # calculate R_inf for each pair of nodes
 #        for node1 in g1.nodes(data=True):
 #            neighbor_n1 = g1[node1[0]]
 #            # the transition probability distribution in the random walks
 #            # generating step (uniform distribution over the vertices adjacent
 #            # to the current vertex)
 #            if len(neighbor_n1) > 0:
 #                p_trans_n1 = (1 - p_quit) / len(neighbor_n1)
 #                for node2 in g2.nodes(data=True):
 #                    neighbor_n2 = g2[node2[0]]
 #                    if len(neighbor_n2) > 0:
 #                        p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
 #        
 #                        for neighbor1 in neighbor_n1:
 #                            for neighbor2 in neighbor_n2:
 #                                t = p_trans_n1 * p_trans_n2 * \
 #                                    deltakernel(g1.node[neighbor1][node_label],
 #                                                g2.node[neighbor2][node_label]) * \
 #                                    deltakernel(
 #                                        neighbor_n1[neighbor1][edge_label],
 #                                        neighbor_n2[neighbor2][edge_label])
 #        
 #                                R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][
 #                                    neighbor2]  # ref [1] equation (8)
 #        R_inf[:] = R_inf_new
 #		# calculate R_inf for each pair of nodes
 #		for node1 in g1.nodes(data=True):
 #			neighbor_n1 = g1[node1[0]]
 #			# the transition probability distribution in the random walks
 #			# generating step (uniform distribution over the vertices adjacent
 #			# to the current vertex)
 #			if len(neighbor_n1) > 0:
 #				p_trans_n1 = (1 - p_quit) / len(neighbor_n1)
 #				for node2 in g2.nodes(data=True):
 #					neighbor_n2 = g2[node2[0]]
 #					if len(neighbor_n2) > 0:
 #						p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
 #		
 #						for neighbor1 in neighbor_n1:
 #							for neighbor2 in neighbor_n2:
 #								t = p_trans_n1 * p_trans_n2 * \
 #									deltakernel(g1.node[neighbor1][node_label],
 #												g2.node[neighbor2][node_label]) * \
 #									deltakernel(
 #										neighbor_n1[neighbor1][edge_label],
 #										neighbor_n2[neighbor2][edge_label])
 #		
 #								R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][
 #									neighbor2]  # ref [1] equation (8)
 #		R_inf[:] = R_inf_new
 #
 #    # add elements of R_inf up and calculate kernel
 #    for node1 in g1.nodes(data=True):
 #        for node2 in g2.nodes(data=True):
 #            s = p_init_G1 * p_init_G2 * deltakernel(
 #                node1[1][node_label], node2[1][node_label])
 #            kernel += s * R_inf[node1[0]][node2[0]]  # ref [1] equation (6)
    
    
    R_inf = {} # dict to save all the R_inf for all pairs of nodes
    # initial R_inf, the 1st iteration.
    for node1 in g1.nodes():
        for node2 in g2.nodes():
 #            R_inf[(node1[0], node2[0])] = r1
            if len(g1[node1]) > 0:
                if len(g2[node2]) > 0:
                    R_inf[(node1, node2)] = r1
                else:
                    R_inf[(node1, node2)] = p_quit
            else:
                if len(g2[node2]) > 0:
                    R_inf[(node1, node2)] = p_quit
                else:
                    R_inf[(node1, node2)] = 1
            
    # compute all transition probability first.
    t_dict = {}
    if n_iteration > 1:
        for node1 in g1.nodes():
            neighbor_n1 = g1[node1]
            # the transition probability distribution in the random walks
            # generating step (uniform distribution over the vertices adjacent
            # to the current vertex)
            if len(neighbor_n1) > 0:
                p_trans_n1 = (1 - p_quit) / len(neighbor_n1)
                for node2 in g2.nodes():
                    neighbor_n2 = g2[node2]
                    if len(neighbor_n2) > 0:
                        p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
                        for neighbor1 in neighbor_n1:
                            for neighbor2 in neighbor_n2:
                                t_dict[(node1, node2, neighbor1, neighbor2)] = \
                                    p_trans_n1 * p_trans_n2 * \
                                    deltakernel(g1.nodes[neighbor1][node_label],
                                                g2.nodes[neighbor2][node_label]) * \
                                    deltakernel(
                                        neighbor_n1[neighbor1][edge_label],
                                        neighbor_n2[neighbor2][edge_label])

    # calculate R_inf with a simple interative method
    for i in range(2, n_iteration + 1):
        R_inf_old = R_inf.copy()

        # calculate R_inf for each pair of nodes
        for node1 in g1.nodes():
            neighbor_n1 = g1[node1]
            # the transition probability distribution in the random walks
            # generating step (uniform distribution over the vertices adjacent
            # to the current vertex)
            if len(neighbor_n1) > 0:
                for node2 in g2.nodes():
                    neighbor_n2 = g2[node2]
                    if len(neighbor_n2) > 0:   
                        R_inf[(node1, node2)] = r1
                        for neighbor1 in neighbor_n1:
                            for neighbor2 in neighbor_n2:
                                R_inf[(node1, node2)] += \
                                    (t_dict[(node1, node2, neighbor1, neighbor2)] * \
                                    R_inf_old[(neighbor1, neighbor2)])  # ref [1] equation (8)

    # add elements of R_inf up and calculate kernel
    for (n1, n2), value in R_inf.items():
        s = p_init_G1 * p_init_G2 * deltakernel(
                g1.nodes[n1][node_label], g2.nodes[n2][node_label])
        kernel += s * value  # ref [1] equation (6)

    return kernel
        
        
 #	# add elements of R_inf up and calculate kernel
 #	for node1 in g1.nodes(data=True):
 #		for node2 in g2.nodes(data=True):
 #			s = p_init_G1 * p_init_G2 * deltakernel(
 #				node1[1][node_label], node2[1][node_label])
 #			kernel += s * R_inf[node1[0]][node2[0]]  # ref [1] equation (6)
 	
 	
 	R_inf = {} # dict to save all the R_inf for all pairs of nodes
 	# initial R_inf, the 1st iteration.
 	for node1 in g1.nodes():
 		for node2 in g2.nodes():
 #			R_inf[(node1[0], node2[0])] = r1
 			if len(g1[node1]) > 0:
 				if len(g2[node2]) > 0:
 					R_inf[(node1, node2)] = r1
 				else:
 					R_inf[(node1, node2)] = p_quit
 			else:
 				if len(g2[node2]) > 0:
 					R_inf[(node1, node2)] = p_quit
 				else:
 					R_inf[(node1, node2)] = 1
 			
 	# compute all transition probability first.
 	t_dict = {}
 	if n_iteration > 1:
 		for node1 in g1.nodes():
 			neighbor_n1 = g1[node1]
 			# the transition probability distribution in the random walks
 			# generating step (uniform distribution over the vertices adjacent
 			# to the current vertex)
 			if len(neighbor_n1) > 0:
 				p_trans_n1 = (1 - p_quit) / len(neighbor_n1)
 				for node2 in g2.nodes():
 					neighbor_n2 = g2[node2]
 					if len(neighbor_n2) > 0:
 						p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
 						for neighbor1 in neighbor_n1:
 							for neighbor2 in neighbor_n2:
 								t_dict[(node1, node2, neighbor1, neighbor2)] = \
 									p_trans_n1 * p_trans_n2 * \
 									deltakernel(g1.nodes[neighbor1][node_label],
 												g2.nodes[neighbor2][node_label]) * \
 									deltakernel(
 										neighbor_n1[neighbor1][edge_label],
 										neighbor_n2[neighbor2][edge_label])

 	# calculate R_inf with a simple interative method
 	for i in range(2, n_iteration + 1):
 		R_inf_old = R_inf.copy()

 		# calculate R_inf for each pair of nodes
 		for node1 in g1.nodes():
 			neighbor_n1 = g1[node1]
 			# the transition probability distribution in the random walks
 			# generating step (uniform distribution over the vertices adjacent
 			# to the current vertex)
 			if len(neighbor_n1) > 0:
 				for node2 in g2.nodes():
 					neighbor_n2 = g2[node2]
 					if len(neighbor_n2) > 0:   
 						R_inf[(node1, node2)] = r1
 						for neighbor1 in neighbor_n1:
 							for neighbor2 in neighbor_n2:
 								R_inf[(node1, node2)] += \
 									(t_dict[(node1, node2, neighbor1, neighbor2)] * \
 									R_inf_old[(neighbor1, neighbor2)])  # ref [1] equation (8)

 	# add elements of R_inf up and calculate kernel
 	for (n1, n2), value in R_inf.items():
 		s = p_init_G1 * p_init_G2 * deltakernel(
 				g1.nodes[n1][node_label], g2.nodes[n2][node_label])
 		kernel += s * value  # ref [1] equation (6)

 	return kernel
 		
 		
 def wrapper_marg_do(node_label, edge_label, p_quit, n_iteration, itr):
    i= itr[0]
    j = itr[1]
    return i, j, _marginalizedkernel_do(G_gn[i], G_gn[j], node_label, edge_label, p_quit, n_iteration)
    
 	i= itr[0]
 	j = itr[1]
 	return i, j, _marginalizedkernel_do(G_gn[i], G_gn[j], node_label, edge_label, p_quit, n_iteration)
 	

 def wrapper_untotter(Gn, node_label, edge_label, i):
    return i, untotterTransformation(Gn[i], node_label, edge_label)
 	return i, untotterTransformation(Gn[i], node_label, edge_label)
--- a/gklearn/kernels/path_up_to_h.py
+++ b/gklearn/kernels/path_up_to_h.py
@@ -373,8 +373,18 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
 					   for key in all_paths]
 			kernel = np.sum(np.minimum(vector1, vector2)) / \
 				np.sum(np.maximum(vector1, vector2))
 				
 		elif self.__k_func is None: # no sub-kernel used; compare paths directly.
 			path_count1 = Counter(paths1)
 			path_count2 = Counter(paths2)
 			vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
 					   for key in all_paths]
 			vector2 = [(path_count2[key] if (key in path_count2.keys()) else 0)
 					   for key in all_paths]
 			kernel = np.dot(vector1, vector2)
 			
 		else:
 			raise Exception('The given "k_func" cannot be recognized. Possible choices include: "tanimoto", "MinMax".')
 			raise Exception('The given "k_func" cannot be recognized. Possible choices include: "tanimoto", "MinMax" and None.')
 	
 		return kernel
 	
--- a/gklearn/kernels/randomWalkKernel.py
+++ b/gklearn/kernels/randomWalkKernel.py
--- a/gklearn/kernels/spKernel.py
+++ b/gklearn/kernels/spKernel.py
@@ -2,9 +2,9 @@
@author: linlin

@references: 
    
    [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData 
    Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
 	
 	[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData 
 	Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
 """

 import sys
@@ -22,303 +22,305 @@ from gklearn.utils.graphdataset import get_dataset_attributes
 from gklearn.utils.parallel import parallel_gm

 def spkernel(*args,
             node_label='atom',
             edge_weight=None,
             node_kernels=None,
             parallel='imap_unordered',
             n_jobs=None,
             verbose=True):
    """Calculate shortest-path kernels between graphs.

    Parameters
    ----------
    Gn : List of NetworkX graph
        List of graphs between which the kernels are calculated.
    
    G1, G2 : NetworkX graphs
        Two graphs between which the kernel is calculated.

    node_label : string
        Node attribute used as label. The default node label is atom.

    edge_weight : string
        Edge attribute name corresponding to the edge weight.

    node_kernels : dict
        A dictionary of kernel functions for nodes, including 3 items: 'symb' 
        for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix' 
        for both labels. The first 2 functions take two node labels as 
        parameters, and the 'mix' function takes 4 parameters, a symbolic and a
        non-symbolic label for each the two nodes. Each label is in form of 2-D
        dimension array (n_samples, n_features). Each function returns an 
        number as the kernel value. Ignored when nodes are unlabeled.

    n_jobs : int
        Number of jobs for parallelization.

    Return
    ------
    Kmatrix : Numpy matrix
        Kernel matrix, each element of which is the sp kernel between 2 praphs.
    """
    # pre-process
    Gn = args[0] if len(args) == 1 else [args[0], args[1]]
    Gn = [g.copy() for g in Gn]
    weight = None
    if edge_weight is None:
        if verbose:
            print('\n None edge weight specified. Set all weight to 1.\n')
    else:
        try:
            some_weight = list(
                nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
            if isinstance(some_weight, (float, int)):
                weight = edge_weight
            else:
                if verbose:
                    print(
                            '\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
                            % edge_weight)
        except:
            if verbose:
                print(
                        '\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
                        % edge_weight)
    ds_attrs = get_dataset_attributes(
        Gn,
        attr_names=['node_labeled', 'node_attr_dim', 'is_directed'],
        node_label=node_label)

    # remove graphs with no edges, as no sp can be found in their structures, 
    # so the kernel between such a graph and itself will be zero.
    len_gn = len(Gn)
    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
    idx = [G[0] for G in Gn]
    Gn = [G[1] for G in Gn]
    if len(Gn) != len_gn:
        if verbose:
            print('\n %d graphs are removed as they don\'t contain edges.\n' %
                  (len_gn - len(Gn)))

    start_time = time.time()

    if parallel == 'imap_unordered':
        pool = Pool(n_jobs)
        # get shortest path graphs of Gn
        getsp_partial = partial(wrapper_getSPGraph, weight)
        itr = zip(Gn, range(0, len(Gn)))
        if len(Gn) < 100 * n_jobs:
    #        # use default chunksize as pool.map when iterable is less than 100
    #        chunksize, extra = divmod(len(Gn), n_jobs * 4)
    #        if extra:
    #            chunksize += 1
            chunksize = int(len(Gn) / n_jobs) + 1
        else:
            chunksize = 100
        if verbose:
            iterator = tqdm(pool.imap_unordered(getsp_partial, itr, chunksize),
                            desc='getting sp graphs', file=sys.stdout)
        else:
            iterator = pool.imap_unordered(getsp_partial, itr, chunksize)
        for i, g in iterator:
            Gn[i] = g
        pool.close()
        pool.join()
        
    elif parallel is None:
        pass
 #    # ---- direct running, normally use single CPU core. ----
 #    for i in tqdm(range(len(Gn)), desc='getting sp graphs', file=sys.stdout):
 #        i, Gn[i] = wrapper_getSPGraph(weight, (Gn[i], i))

    # # ---- use pool.map to parallel ----
    # result_sp = pool.map(getsp_partial, range(0, len(Gn)))
    # for i in result_sp:
    #     Gn[i[0]] = i[1]
    # or
    # getsp_partial = partial(wrap_getSPGraph, Gn, weight)
    # for i, g in tqdm(
    #         pool.map(getsp_partial, range(0, len(Gn))),
    #         desc='getting sp graphs',
    #         file=sys.stdout):
    #     Gn[i] = g

    # # ---- only for the Fast Computation of Shortest Path Kernel (FCSP)
    # sp_ml = [0] * len(Gn)  # shortest path matrices
    # for i in result_sp:
    #     sp_ml[i[0]] = i[1]
    # edge_x_g = [[] for i in range(len(sp_ml))]
    # edge_y_g = [[] for i in range(len(sp_ml))]
    # edge_w_g = [[] for i in range(len(sp_ml))]
    # for idx, item in enumerate(sp_ml):
    #     for i1 in range(len(item)):
    #         for i2 in range(i1 + 1, len(item)):
    #             if item[i1, i2] != np.inf:
    #                 edge_x_g[idx].append(i1)
    #                 edge_y_g[idx].append(i2)
    #                 edge_w_g[idx].append(item[i1, i2])
    # print(len(edge_x_g[0]))
    # print(len(edge_y_g[0]))
    # print(len(edge_w_g[0]))

    Kmatrix = np.zeros((len(Gn), len(Gn)))

    # ---- use pool.imap_unordered to parallel and track progress. ----
    def init_worker(gn_toshare):
        global G_gn
        G_gn = gn_toshare
    do_partial = partial(wrapper_sp_do, ds_attrs, node_label, node_kernels)   
    parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
                glbv=(Gn,), n_jobs=n_jobs, verbose=verbose)


    # # ---- use pool.map to parallel. ----
    # # result_perf = pool.map(do_partial, itr)
    # do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels)
    # itr = combinations_with_replacement(range(0, len(Gn)), 2)
    # for i, j, kernel in tqdm(
    #         pool.map(do_partial, itr), desc='calculating kernels',
    #         file=sys.stdout):
    #     Kmatrix[i][j] = kernel
    #     Kmatrix[j][i] = kernel
    # pool.close()
    # pool.join()

    # # ---- use joblib.Parallel to parallel and track progress. ----
    # result_perf = Parallel(
    #     n_jobs=n_jobs, verbose=10)(
    #         delayed(do_partial)(ij)
    #         for ij in combinations_with_replacement(range(0, len(Gn)), 2))
    # result_perf = [
    #     do_partial(ij)
    #     for ij in combinations_with_replacement(range(0, len(Gn)), 2)
    # ]
    # for i in result_perf:
    #     Kmatrix[i[0]][i[1]] = i[2]
    #     Kmatrix[i[1]][i[0]] = i[2]

 #    # ---- direct running, normally use single CPU core. ----
 #    from itertools import combinations_with_replacement
 #    itr = combinations_with_replacement(range(0, len(Gn)), 2)
 #    for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
 #        kernel = spkernel_do(Gn[i], Gn[j], ds_attrs, node_label, node_kernels)
 #        Kmatrix[i][j] = kernel
 #        Kmatrix[j][i] = kernel

    run_time = time.time() - start_time
    if verbose:
        print(
                "\n --- shortest path kernel matrix of size %d built in %s seconds ---"
                % (len(Gn), run_time))

    return Kmatrix, run_time, idx
 			 node_label='atom',
 			 edge_weight=None,
 			 node_kernels=None,
 			 parallel='imap_unordered',
 			 n_jobs=None,
 			 chunksize=None,
 			 verbose=True):
 	"""Calculate shortest-path kernels between graphs.

 	Parameters
 	----------
 	Gn : List of NetworkX graph
 		List of graphs between which the kernels are calculated.
 	
 	G1, G2 : NetworkX graphs
 		Two graphs between which the kernel is calculated.

 	node_label : string
 		Node attribute used as label. The default node label is atom.

 	edge_weight : string
 		Edge attribute name corresponding to the edge weight.

 	node_kernels : dict
 		A dictionary of kernel functions for nodes, including 3 items: 'symb' 
 		for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix' 
 		for both labels. The first 2 functions take two node labels as 
 		parameters, and the 'mix' function takes 4 parameters, a symbolic and a
 		non-symbolic label for each the two nodes. Each label is in form of 2-D
 		dimension array (n_samples, n_features). Each function returns an 
 		number as the kernel value. Ignored when nodes are unlabeled.

 	n_jobs : int
 		Number of jobs for parallelization.

 	Return
 	------
 	Kmatrix : Numpy matrix
 		Kernel matrix, each element of which is the sp kernel between 2 praphs.
 	"""
 	# pre-process
 	Gn = args[0] if len(args) == 1 else [args[0], args[1]]
 	Gn = [g.copy() for g in Gn]
 	weight = None
 	if edge_weight is None:
 		if verbose:
 			print('\n None edge weight specified. Set all weight to 1.\n')
 	else:
 		try:
 			some_weight = list(
 				nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
 			if isinstance(some_weight, (float, int)):
 				weight = edge_weight
 			else:
 				if verbose:
 					print(
 							'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
 							% edge_weight)
 		except:
 			if verbose:
 				print(
 						'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
 						% edge_weight)
 	ds_attrs = get_dataset_attributes(
 		Gn,
 		attr_names=['node_labeled', 'node_attr_dim', 'is_directed'],
 		node_label=node_label)

 	# remove graphs with no edges, as no sp can be found in their structures, 
 	# so the kernel between such a graph and itself will be zero.
 	len_gn = len(Gn)
 	Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
 	idx = [G[0] for G in Gn]
 	Gn = [G[1] for G in Gn]
 	if len(Gn) != len_gn:
 		if verbose:
 			print('\n %d graphs are removed as they don\'t contain edges.\n' %
 				  (len_gn - len(Gn)))

 	start_time = time.time()

 	if parallel == 'imap_unordered':
 		pool = Pool(n_jobs)
 		# get shortest path graphs of Gn
 		getsp_partial = partial(wrapper_getSPGraph, weight)
 		itr = zip(Gn, range(0, len(Gn)))
 		if chunksize is None:
 			if len(Gn) < 100 * n_jobs:
 		#		# use default chunksize as pool.map when iterable is less than 100
 		#		chunksize, extra = divmod(len(Gn), n_jobs * 4)
 		#		if extra:
 		#			chunksize += 1
 				chunksize = int(len(Gn) / n_jobs) + 1
 			else:
 				chunksize = 100
 		if verbose:
 			iterator = tqdm(pool.imap_unordered(getsp_partial, itr, chunksize),
 							desc='getting sp graphs', file=sys.stdout)
 		else:
 			iterator = pool.imap_unordered(getsp_partial, itr, chunksize)
 		for i, g in iterator:
 			Gn[i] = g
 		pool.close()
 		pool.join()
 		
 	elif parallel is None:
 		pass
 #	# ---- direct running, normally use single CPU core. ----
 #	for i in tqdm(range(len(Gn)), desc='getting sp graphs', file=sys.stdout):
 #		i, Gn[i] = wrapper_getSPGraph(weight, (Gn[i], i))

 	# # ---- use pool.map to parallel ----
 	# result_sp = pool.map(getsp_partial, range(0, len(Gn)))
 	# for i in result_sp:
 	#	 Gn[i[0]] = i[1]
 	# or
 	# getsp_partial = partial(wrap_getSPGraph, Gn, weight)
 	# for i, g in tqdm(
 	#		 pool.map(getsp_partial, range(0, len(Gn))),
 	#		 desc='getting sp graphs',
 	#		 file=sys.stdout):
 	#	 Gn[i] = g

 	# # ---- only for the Fast Computation of Shortest Path Kernel (FCSP)
 	# sp_ml = [0] * len(Gn)  # shortest path matrices
 	# for i in result_sp:
 	#	 sp_ml[i[0]] = i[1]
 	# edge_x_g = [[] for i in range(len(sp_ml))]
 	# edge_y_g = [[] for i in range(len(sp_ml))]
 	# edge_w_g = [[] for i in range(len(sp_ml))]
 	# for idx, item in enumerate(sp_ml):
 	#	 for i1 in range(len(item)):
 	#		 for i2 in range(i1 + 1, len(item)):
 	#			 if item[i1, i2] != np.inf:
 	#				 edge_x_g[idx].append(i1)
 	#				 edge_y_g[idx].append(i2)
 	#				 edge_w_g[idx].append(item[i1, i2])
 	# print(len(edge_x_g[0]))
 	# print(len(edge_y_g[0]))
 	# print(len(edge_w_g[0]))

 	Kmatrix = np.zeros((len(Gn), len(Gn)))

 	# ---- use pool.imap_unordered to parallel and track progress. ----
 	def init_worker(gn_toshare):
 		global G_gn
 		G_gn = gn_toshare
 	do_partial = partial(wrapper_sp_do, ds_attrs, node_label, node_kernels)   
 	parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
 				glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)


 	# # ---- use pool.map to parallel. ----
 	# # result_perf = pool.map(do_partial, itr)
 	# do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels)
 	# itr = combinations_with_replacement(range(0, len(Gn)), 2)
 	# for i, j, kernel in tqdm(
 	#		 pool.map(do_partial, itr), desc='calculating kernels',
 	#		 file=sys.stdout):
 	#	 Kmatrix[i][j] = kernel
 	#	 Kmatrix[j][i] = kernel
 	# pool.close()
 	# pool.join()

 	# # ---- use joblib.Parallel to parallel and track progress. ----
 	# result_perf = Parallel(
 	#	 n_jobs=n_jobs, verbose=10)(
 	#		 delayed(do_partial)(ij)
 	#		 for ij in combinations_with_replacement(range(0, len(Gn)), 2))
 	# result_perf = [
 	#	 do_partial(ij)
 	#	 for ij in combinations_with_replacement(range(0, len(Gn)), 2)
 	# ]
 	# for i in result_perf:
 	#	 Kmatrix[i[0]][i[1]] = i[2]
 	#	 Kmatrix[i[1]][i[0]] = i[2]

 #	# ---- direct running, normally use single CPU core. ----
 #	from itertools import combinations_with_replacement
 #	itr = combinations_with_replacement(range(0, len(Gn)), 2)
 #	for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
 #		kernel = spkernel_do(Gn[i], Gn[j], ds_attrs, node_label, node_kernels)
 #		Kmatrix[i][j] = kernel
 #		Kmatrix[j][i] = kernel

 	run_time = time.time() - start_time
 	if verbose:
 		print(
 				"\n --- shortest path kernel matrix of size %d built in %s seconds ---"
 				% (len(Gn), run_time))

 	return Kmatrix, run_time, idx


 def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels):
    
    kernel = 0

    # compute shortest path matrices first, method borrowed from FCSP.
    vk_dict = {}  # shortest path matrices dict
    if ds_attrs['node_labeled']:
        # node symb and non-synb labeled
        if ds_attrs['node_attr_dim'] > 0:
            kn = node_kernels['mix']
            for n1, n2 in product(
                    g1.nodes(data=True), g2.nodes(data=True)):
                vk_dict[(n1[0], n2[0])] = kn(
                    n1[1][node_label], n2[1][node_label],
                    n1[1]['attributes'], n2[1]['attributes'])
        # node symb labeled
        else:
            kn = node_kernels['symb']
            for n1 in g1.nodes(data=True):
                for n2 in g2.nodes(data=True):
                    vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
                                                 n2[1][node_label])
    else:
        # node non-synb labeled
        if ds_attrs['node_attr_dim'] > 0:
            kn = node_kernels['nsymb']
            for n1 in g1.nodes(data=True):
                for n2 in g2.nodes(data=True):
                    vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
                                                 n2[1]['attributes'])
        # node unlabeled
        else:
            for e1, e2 in product(
                    g1.edges(data=True), g2.edges(data=True)):
                if e1[2]['cost'] == e2[2]['cost']:
                    kernel += 1
            return kernel

    # compute graph kernels
    if ds_attrs['is_directed']:
        for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
            if e1[2]['cost'] == e2[2]['cost']:
                nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1],
                                                               e2[1])]
                kn1 = nk11 * nk22
                kernel += kn1
    else:
        for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
            if e1[2]['cost'] == e2[2]['cost']:
                # each edge walk is counted twice, starting from both its extreme nodes.
                nk11, nk12, nk21, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(
                    e1[0], e2[1])], vk_dict[(e1[1],
                                             e2[0])], vk_dict[(e1[1],
                                                               e2[1])]
                kn1 = nk11 * nk22
                kn2 = nk12 * nk21
                kernel += kn1 + kn2

        # # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation
        # # compute vertex kernels
        # try:
        #     vk_mat = np.zeros((nx.number_of_nodes(g1),
        #                        nx.number_of_nodes(g2)))
        #     g1nl = enumerate(g1.nodes(data=True))
        #     g2nl = enumerate(g2.nodes(data=True))
        #     for i1, n1 in g1nl:
        #         for i2, n2 in g2nl:
        #             vk_mat[i1][i2] = kn(
        #                 n1[1][node_label], n2[1][node_label],
        #                 [n1[1]['attributes']], [n2[1]['attributes']])

        #     range1 = range(0, len(edge_w_g[i]))
        #     range2 = range(0, len(edge_w_g[j]))
        #     for i1 in range1:
        #         x1 = edge_x_g[i][i1]
        #         y1 = edge_y_g[i][i1]
        #         w1 = edge_w_g[i][i1]
        #         for i2 in range2:
        #             x2 = edge_x_g[j][i2]
        #             y2 = edge_y_g[j][i2]
        #             w2 = edge_w_g[j][i2]
        #             ke = (w1 == w2)
        #             if ke > 0:
        #                 kn1 = vk_mat[x1][x2] * vk_mat[y1][y2]
        #                 kn2 = vk_mat[x1][y2] * vk_mat[y1][x2]
        #                 kernel += kn1 + kn2

    return kernel
 	
 	kernel = 0

 	# compute shortest path matrices first, method borrowed from FCSP.
 	vk_dict = {}  # shortest path matrices dict
 	if ds_attrs['node_labeled']:
 		# node symb and non-synb labeled
 		if ds_attrs['node_attr_dim'] > 0:
 			kn = node_kernels['mix']
 			for n1, n2 in product(
 					g1.nodes(data=True), g2.nodes(data=True)):
 				vk_dict[(n1[0], n2[0])] = kn(
 					n1[1][node_label], n2[1][node_label],
 					n1[1]['attributes'], n2[1]['attributes'])
 		# node symb labeled
 		else:
 			kn = node_kernels['symb']
 			for n1 in g1.nodes(data=True):
 				for n2 in g2.nodes(data=True):
 					vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
 												 n2[1][node_label])
 	else:
 		# node non-synb labeled
 		if ds_attrs['node_attr_dim'] > 0:
 			kn = node_kernels['nsymb']
 			for n1 in g1.nodes(data=True):
 				for n2 in g2.nodes(data=True):
 					vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
 												 n2[1]['attributes'])
 		# node unlabeled
 		else:
 			for e1, e2 in product(
 					g1.edges(data=True), g2.edges(data=True)):
 				if e1[2]['cost'] == e2[2]['cost']:
 					kernel += 1
 			return kernel

 	# compute graph kernels
 	if ds_attrs['is_directed']:
 		for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
 			if e1[2]['cost'] == e2[2]['cost']:
 				nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1],
 															   e2[1])]
 				kn1 = nk11 * nk22
 				kernel += kn1
 	else:
 		for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
 			if e1[2]['cost'] == e2[2]['cost']:
 				# each edge walk is counted twice, starting from both its extreme nodes.
 				nk11, nk12, nk21, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(
 					e1[0], e2[1])], vk_dict[(e1[1],
 											 e2[0])], vk_dict[(e1[1],
 															   e2[1])]
 				kn1 = nk11 * nk22
 				kn2 = nk12 * nk21
 				kernel += kn1 + kn2

 		# # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation
 		# # compute vertex kernels
 		# try:
 		#	 vk_mat = np.zeros((nx.number_of_nodes(g1),
 		#						nx.number_of_nodes(g2)))
 		#	 g1nl = enumerate(g1.nodes(data=True))
 		#	 g2nl = enumerate(g2.nodes(data=True))
 		#	 for i1, n1 in g1nl:
 		#		 for i2, n2 in g2nl:
 		#			 vk_mat[i1][i2] = kn(
 		#				 n1[1][node_label], n2[1][node_label],
 		#				 [n1[1]['attributes']], [n2[1]['attributes']])

 		#	 range1 = range(0, len(edge_w_g[i]))
 		#	 range2 = range(0, len(edge_w_g[j]))
 		#	 for i1 in range1:
 		#		 x1 = edge_x_g[i][i1]
 		#		 y1 = edge_y_g[i][i1]
 		#		 w1 = edge_w_g[i][i1]
 		#		 for i2 in range2:
 		#			 x2 = edge_x_g[j][i2]
 		#			 y2 = edge_y_g[j][i2]
 		#			 w2 = edge_w_g[j][i2]
 		#			 ke = (w1 == w2)
 		#			 if ke > 0:
 		#				 kn1 = vk_mat[x1][x2] * vk_mat[y1][y2]
 		#				 kn2 = vk_mat[x1][y2] * vk_mat[y1][x2]
 		#				 kernel += kn1 + kn2

 	return kernel


 def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr):
    i = itr[0]
    j = itr[1]
    return i, j, spkernel_do(G_gn[i], G_gn[j], ds_attrs, node_label, node_kernels)
 	i = itr[0]
 	j = itr[1]
 	return i, j, spkernel_do(G_gn[i], G_gn[j], ds_attrs, node_label, node_kernels)

 #def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr_item):
 #    g1 = itr_item[0][0]
 #    g2 = itr_item[0][1]
 #    i = itr_item[1][0]
 #    j = itr_item[1][1]
 #    return i, j, spkernel_do(g1, g2, ds_attrs, node_label, node_kernels)
 #	g1 = itr_item[0][0]
 #	g2 = itr_item[0][1]
 #	i = itr_item[1][0]
 #	j = itr_item[1][1]
 #	return i, j, spkernel_do(g1, g2, ds_attrs, node_label, node_kernels)


 def wrapper_getSPGraph(weight, itr_item):
    g = itr_item[0]
    i = itr_item[1]
    return i, getSPGraph(g, edge_weight=weight)
    # return i, nx.floyd_warshall_numpy(g, weight=weight)
 	g = itr_item[0]
 	i = itr_item[1]
 	return i, getSPGraph(g, edge_weight=weight)
 	# return i, nx.floyd_warshall_numpy(g, weight=weight)
--- a/gklearn/kernels/structuralspKernel.py
+++ b/gklearn/kernels/structuralspKernel.py
--- a/gklearn/kernels/treeletKernel.py
+++ b/gklearn/kernels/treeletKernel.py
@@ -27,6 +27,7 @@ def treeletkernel(*args,
 				  edge_label='bond_type', 
 				  parallel='imap_unordered',
 				  n_jobs=None, 
 				  chunksize=None,
 				  verbose=True):
 	"""Calculate treelet graph kernels between graphs.

@@ -92,10 +93,11 @@ def treeletkernel(*args,
 		# time, but this may cost a lot of memory for large dataset.
 		pool = Pool(n_jobs)
 		itr = zip(Gn, range(0, len(Gn)))
 		if len(Gn) < 100 * n_jobs:
 			chunksize = int(len(Gn) / n_jobs) + 1
 		else:
 			chunksize = 100
 		if chunksize is None:
 			if len(Gn) < 100 * n_jobs:
 				chunksize = int(len(Gn) / n_jobs) + 1
 			else:
 				chunksize = 100
 		canonkeys = [[] for _ in range(len(Gn))]
 		get_partial = partial(wrapper_get_canonkeys, node_label, edge_label, 
 								labeled, ds_attrs['is_directed'])
@@ -115,7 +117,7 @@ def treeletkernel(*args,
 			G_canonkeys = canonkeys_toshare
 		do_partial = partial(wrapper_treeletkernel_do, sub_kernel)
 		parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
 					glbv=(canonkeys,), n_jobs=n_jobs, verbose=verbose)
 					glbv=(canonkeys,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
 		
 	# ---- do not use parallelization. ----
 	elif parallel == None:
--- a/gklearn/kernels/untilHPathKernel.py
+++ b/gklearn/kernels/untilHPathKernel.py
--- a/gklearn/kernels/weisfeilerLehmanKernel.py
+++ b/gklearn/kernels/weisfeilerLehmanKernel.py
@@ -30,6 +30,7 @@ def weisfeilerlehmankernel(*args,
 						   base_kernel='subtree',
 						   parallel=None,
 						   n_jobs=None, 
 						   chunksize=None,
 						   verbose=True):
 	"""Calculate Weisfeiler-Lehman kernels between graphs.
 	
@@ -91,7 +92,7 @@ def weisfeilerlehmankernel(*args,

 	# for WL subtree kernel
 	if base_kernel == 'subtree':		   
 		Kmatrix = _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, verbose)
 		Kmatrix = _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksize, verbose)

 	# for WL shortest path kernel
 	elif base_kernel == 'sp':
@@ -113,7 +114,7 @@ def weisfeilerlehmankernel(*args,
 	return Kmatrix, run_time


 def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, verbose):
 def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksize, verbose):
 	"""Calculate Weisfeiler-Lehman kernels between graphs.

 	Parameters
@@ -146,7 +147,7 @@ def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, verbose)
 		all_num_of_each_label.append(dict(Counter(labels_ori)))

 	# calculate subtree kernel with the 0th iteration and add it to the final kernel
 	compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, False)
 	compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False)

 	# iterate each height
 	for h in range(1, height + 1):
@@ -304,7 +305,7 @@ def wrapper_wl_iteration(node_label, itr_item):
 	return i, all_multisets


 def compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, verbose):
 def compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, verbose):
 	"""Compute kernel matrix using the base kernel.
 	"""
 	if parallel == 'imap_unordered':
@@ -314,7 +315,7 @@ def compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs,
 			G_alllabels = alllabels_toshare
 		do_partial = partial(wrapper_compute_subtree_kernel, Kmatrix)
 		parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
 					glbv=(all_num_of_each_label,), n_jobs=n_jobs, verbose=verbose)
 					glbv=(all_num_of_each_label,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
 	elif parallel == None:
 		for i in range(len(Kmatrix)):
 			for j in range(i, len(Kmatrix)):
--- a/gklearn/utils/parallel.py
+++ b/gklearn/utils/parallel.py
@@ -24,7 +24,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker
                n_jobs = multiprocessing.cpu_count()
            with Pool(processes=n_jobs, initializer=init_worker, 
                      initargs=glbv) as pool:                
                if chunksize == None:
                if chunksize is None:
                    if len_itr < 100 * n_jobs:
                        chunksize = int(len_itr / n_jobs) + 1
                    else:
@@ -39,7 +39,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker
            if n_jobs == None:
                n_jobs = multiprocessing.cpu_count()
            with Pool(processes=n_jobs) as pool:
                if chunksize == None:
                if chunksize is None:
                    if len_itr < 100 * n_jobs:
                        chunksize = int(len_itr / n_jobs) + 1
                    else: