New translations untilHPathKernel.py (Chinese Simplified)

5 years ago · f435b840d1
--- a/lang/zh/gklearn/kernels/untilHPathKernel.py
+++ b/lang/zh/gklearn/kernels/untilHPathKernel.py
@@ -0,0 +1,726 @@
 """
@author: linlin
@references: 
 	[1] Liva Ralaivola, Sanjay J Swamidass, Hiroto Saigo, and Pierre 
 	Baldi. Graph kernels for chemical informatics. Neural networks, 
 	18(8):1093–1110, 2005.
 """
 import sys
 import time
 from collections import Counter
 from itertools import chain
 from functools import partial
 from multiprocessing import Pool
 from tqdm import tqdm
 import networkx as nx
 import numpy as np
 from gklearn.utils.graphdataset import get_dataset_attributes
 from gklearn.utils.parallel import parallel_gm
 from gklearn.utils.trie import Trie
 def untilhpathkernel(*args,
 					 node_label='atom',
 					 edge_label='bond_type',
 					 depth=10,
 					 k_func='MinMax',
 					 compute_method='trie',
 					 parallel='imap_unordered',
 					 n_jobs=None,
 					 chunksize=None,
 					 verbose=True):
 	"""Calculate path graph kernels up to depth/hight h between graphs.
 	Parameters
 	----------
 	Gn : List of NetworkX graph
 		List of graphs between which the kernels are calculated.
 	G1, G2 : NetworkX graphs
 		Two graphs between which the kernel is calculated.
 	node_label : string
 		Node attribute used as label. The default node label is atom.
 	edge_label : string
 		Edge attribute used as label. The default edge label is bond_type.
 	depth : integer
 		Depth of search. Longest length of paths.
 	k_func : function
 		A kernel function applied using different notions of fingerprint 
 		similarity, defining the type of feature map and normalization method 
 		applied for the graph kernel. The Following choices are available:
 		'MinMax': use the MiniMax kernel and counting feature map.
 		'tanimoto': use the Tanimoto kernel and binary feature map.
 		None: no sub-kernel is used, the kernel is computed directly.
 	compute_method : string
 		Computation method to store paths and compute the graph kernel. The 
 		Following choices are available:
 		'trie': store paths as tries.
 		'naive': store paths to lists.
 	n_jobs : int
 		Number of jobs for parallelization.
 	Return
 	------
 	Kmatrix : Numpy matrix
 		Kernel matrix, each element of which is the path kernel up to h between
 		2 praphs.
 	"""
 	# pre-process
 	depth = int(depth)
 	Gn = args[0] if len(args) == 1 else [args[0], args[1]]
 	Gn = [g.copy() for g in Gn]
 	Kmatrix = np.zeros((len(Gn), len(Gn)))
 	ds_attrs = get_dataset_attributes(
 		Gn,
 		attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', 
 					'edge_attr_dim', 'is_directed'],
 		node_label=node_label, edge_label=edge_label)
 	if k_func != None:
 		if not ds_attrs['node_labeled']:
 			for G in Gn:
 				nx.set_node_attributes(G, '0', 'atom')
 		if not ds_attrs['edge_labeled']:
 			for G in Gn:
 				nx.set_edge_attributes(G, '0', 'bond_type')
 	start_time = time.time()		
 	if parallel == 'imap_unordered':
 		# ---- use pool.imap_unordered to parallel and track progress. ----
 		# get all paths of all graphs before calculating kernels to save time,
 		# but this may cost a lot of memory for large datasets.
 		pool = Pool(n_jobs)
 		itr = zip(Gn, range(0, len(Gn)))
 		if chunksize is None:
 			if len(Gn) < 100 * n_jobs:
 				chunksize = int(len(Gn) / n_jobs) + 1
 			else:
 				chunksize = 100
 		all_paths = [[] for _ in range(len(Gn))]
 		if compute_method == 'trie' and k_func != None:
 			getps_partial = partial(wrapper_find_all_path_as_trie, depth, 
 									ds_attrs, node_label, edge_label)
 		elif compute_method != 'trie' and k_func != None:  
 			getps_partial = partial(wrapper_find_all_paths_until_length, depth, 
 									ds_attrs, node_label, edge_label, True)  
 		else: 
 			getps_partial = partial(wrapper_find_all_paths_until_length, depth, 
 									ds_attrs, node_label, edge_label, False)
 		if verbose:
 			iterator = tqdm(pool.imap_unordered(getps_partial, itr, chunksize),
 							desc='getting paths', file=sys.stdout)
 		else:
 			iterator = pool.imap_unordered(getps_partial, itr, chunksize)
 		for i, ps in iterator:
 			all_paths[i] = ps
 		pool.close()
 		pool.join()
 #	for g in Gn:
 #		if compute_method == 'trie' and k_func != None:
 #			find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label)
 #		elif compute_method != 'trie' and k_func != None:  
 #			find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label)
 #		else: 
 #			find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label, False)
 ##	size = sys.getsizeof(all_paths)
 ##	for item in all_paths:
 ##		size += sys.getsizeof(item)
 ##		for pppps in item:
 ##			size += sys.getsizeof(pppps)
 ##	print(size)
 #			
 ##	ttt = time.time()
 ##	# ---- ---- use pool.map to parallel ----
 ##	for i, ps in tqdm(
 ##			pool.map(getps_partial, range(0, len(Gn))),
 ##			desc='getting paths', file=sys.stdout):
 ##		all_paths[i] = ps
 ##	print(time.time() - ttt)
 		if compute_method == 'trie' and k_func != None:
 			def init_worker(trie_toshare):
 				global G_trie
 				G_trie = trie_toshare
 			do_partial = partial(wrapper_uhpath_do_trie, k_func)
 			parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
 						glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) 
 		elif compute_method != 'trie' and k_func != None:
 			def init_worker(plist_toshare):
 				global G_plist
 				G_plist = plist_toshare
 			do_partial = partial(wrapper_uhpath_do_naive, k_func)   
 			parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
 						glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) 
 		else:
 			def init_worker(plist_toshare):
 				global G_plist
 				G_plist = plist_toshare
 			do_partial = partial(wrapper_uhpath_do_kernelless, ds_attrs, edge_kernels)   
 			parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
 						glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) 
 	elif parallel == None:
 #		from pympler import asizeof
 		# ---- direct running, normally use single CPU core. ----
 #		print(asizeof.asized(all_paths, detail=1).format())
 		if compute_method == 'trie':
 			all_paths = [
 				find_all_path_as_trie(Gn[i],
 					 depth,
 					 ds_attrs,
 					 node_label=node_label,
 					 edge_label=edge_label) for i in tqdm(
 						range(0, len(Gn)), desc='getting paths', file=sys.stdout)
 			]
 #			sizeof_allpaths = asizeof.asizeof(all_paths)
 #			print(sizeof_allpaths)
 			pbar = tqdm(
 				total=((len(Gn) + 1) * len(Gn) / 2),
 				desc='calculating kernels',
 				file=sys.stdout)
 			for i in range(0, len(Gn)):
 				for j in range(i, len(Gn)):
 					Kmatrix[i][j] = _untilhpathkernel_do_trie(all_paths[i], 
 						   all_paths[j], k_func)
 					Kmatrix[j][i] = Kmatrix[i][j]
 					pbar.update(1)
 		else:
 			all_paths = [
 				find_all_paths_until_length(
 					Gn[i],
 					depth,
 					ds_attrs,
 					node_label=node_label,
 					edge_label=edge_label) for i in tqdm(
 						range(0, len(Gn)), desc='getting paths', file=sys.stdout)
 			]
 #			sizeof_allpaths = asizeof.asizeof(all_paths)
 #			print(sizeof_allpaths)
 			pbar = tqdm(
 				total=((len(Gn) + 1) * len(Gn) / 2),
 				desc='calculating kernels',
 				file=sys.stdout)
 			for i in range(0, len(Gn)):
 				for j in range(i, len(Gn)):
 					Kmatrix[i][j] = _untilhpathkernel_do_naive(all_paths[i], all_paths[j],
 														 k_func)
 					Kmatrix[j][i] = Kmatrix[i][j]
 					pbar.update(1)
 	run_time = time.time() - start_time
 	if verbose:
 		print("\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---"
 			  % (depth, len(Gn), run_time))
 #	print(Kmatrix[0][0:10])
 	return Kmatrix, run_time
 def _untilhpathkernel_do_trie(trie1, trie2, k_func):
 	"""Calculate path graph kernels up to depth d between 2 graphs using trie.
 	Parameters
 	----------
 	trie1, trie2 : list
 		Tries that contains all paths in 2 graphs.
 	k_func : function
 		A kernel function applied using different notions of fingerprint 
 		similarity.
 	Return
 	------
 	kernel : float
 		Path kernel up to h between 2 graphs.
 	"""
 	if k_func == 'tanimoto':	  
 		# traverse all paths in graph1 and search them in graph2. Deep-first 
 		# search is applied.
 		def traverseTrie1t(root, trie2, setlist, pcurrent=[]):
 			for key, node in root['children'].items():
 				pcurrent.append(key)
 				if node['isEndOfWord']:					
 					setlist[1] += 1
 					count2 = trie2.searchWord(pcurrent)
 					if count2 != 0:
 						setlist[0] += 1
 				if node['children'] != {}:
 					traverseTrie1t(node, trie2, setlist, pcurrent)
 				else:
 					del pcurrent[-1]
 			if pcurrent != []:
 				del pcurrent[-1]
 		# traverse all paths in graph2 and find out those that are not in 
 		# graph1. Deep-first search is applied. 
 		def traverseTrie2t(root, trie1, setlist, pcurrent=[]):
 			for key, node in root['children'].items():
 				pcurrent.append(key)
 				if node['isEndOfWord']:
 		#					print(node['count'])
 					count1 = trie1.searchWord(pcurrent)
 					if count1 == 0:	
 						setlist[1] += 1
 				if node['children'] != {}:
 					traverseTrie2t(node, trie1, setlist, pcurrent)
 				else:
 					del pcurrent[-1]
 			if pcurrent != []:
 				del pcurrent[-1]
 		setlist = [0, 0] # intersection and union of path sets of g1, g2.
 #		print(trie1.root)
 #		print(trie2.root)
 		traverseTrie1t(trie1.root, trie2, setlist)
 #		print(setlist)
 		traverseTrie2t(trie2.root, trie1, setlist)
 #		print(setlist)
 		kernel = setlist[0] / setlist[1]
 	else: # MinMax kernel		  
 		# traverse all paths in graph1 and search them in graph2. Deep-first 
 		# search is applied.
 		def traverseTrie1m(root, trie2, sumlist, pcurrent=[]):
 			for key, node in root['children'].items():
 				pcurrent.append(key)
 				if node['isEndOfWord']:
 		#					print(node['count'])
 					count1 = node['count']
 					count2 = trie2.searchWord(pcurrent)
 					sumlist[0] += min(count1, count2)
 					sumlist[1] += max(count1, count2)
 				if node['children'] != {}:
 					traverseTrie1m(node, trie2, sumlist, pcurrent)
 				else:
 					del pcurrent[-1]
 			if pcurrent != []:
 				del pcurrent[-1]
 		# traverse all paths in graph2 and find out those that are not in 
 		# graph1. Deep-first search is applied.				
 		def traverseTrie2m(root, trie1, sumlist, pcurrent=[]):
 			for key, node in root['children'].items():
 				pcurrent.append(key)
 				if node['isEndOfWord']:				   
 		#					print(node['count'])
 					count1 = trie1.searchWord(pcurrent)
 					if count1 == 0:	
 						sumlist[1] += node['count']
 				if node['children'] != {}:
 					traverseTrie2m(node, trie1, sumlist, pcurrent)
 				else:
 					del pcurrent[-1]
 			if pcurrent != []:
 				del pcurrent[-1]
 		sumlist = [0, 0] # sum of mins and sum of maxs
 #		print(trie1.root)
 #		print(trie2.root)
 		traverseTrie1m(trie1.root, trie2, sumlist)
 #		print(sumlist)
 		traverseTrie2m(trie2.root, trie1, sumlist)
 #		print(sumlist)
 		kernel = sumlist[0] / sumlist[1]
 	return kernel
 def wrapper_uhpath_do_trie(k_func, itr):
 	i = itr[0]
 	j = itr[1]
 	return i, j, _untilhpathkernel_do_trie(G_trie[i], G_trie[j], k_func)
 def _untilhpathkernel_do_naive(paths1, paths2, k_func):
 	"""Calculate path graph kernels up to depth d between 2 graphs naively.
 	Parameters
 	----------
 	paths_list : list of list
 		List of list of paths in all graphs, where for unlabeled graphs, each 
 		path is represented by a list of nodes; while for labeled graphs, each 
 		path is represented by a string consists of labels of nodes and/or 
 		edges on that path.
 	k_func : function
 		A kernel function applied using different notions of fingerprint 
 		similarity.
 	Return
 	------
 	kernel : float
 		Path kernel up to h between 2 graphs.
 	"""
 	all_paths = list(set(paths1 + paths2))
 	if k_func == 'tanimoto':
 		length_union = len(set(paths1 + paths2))
 		kernel = (len(set(paths1)) + len(set(paths2)) -
 				  length_union) / length_union
 #		vector1 = [(1 if path in paths1 else 0) for path in all_paths]
 #		vector2 = [(1 if path in paths2 else 0) for path in all_paths]
 #		kernel_uv = np.dot(vector1, vector2)
 #		kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv)
 	else:  # MinMax kernel
 		path_count1 = Counter(paths1)
 		path_count2 = Counter(paths2)
 		vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
 				   for key in all_paths]
 		vector2 = [(path_count2[key] if (key in path_count2.keys()) else 0)
 				   for key in all_paths]
 		kernel = np.sum(np.minimum(vector1, vector2)) / \
 			np.sum(np.maximum(vector1, vector2))
 	return kernel
 def wrapper_uhpath_do_naive(k_func, itr):
 	i = itr[0]
 	j = itr[1]
 	return i, j, _untilhpathkernel_do_naive(G_plist[i], G_plist[j], k_func)
 def _untilhpathkernel_do_kernelless(paths1, paths2, k_func):
 	"""Calculate path graph kernels up to depth d between 2 graphs naively.
 	Parameters
 	----------
 	paths_list : list of list
 		List of list of paths in all graphs, where for unlabeled graphs, each 
 		path is represented by a list of nodes; while for labeled graphs, each 
 		path is represented by a string consists of labels of nodes and/or 
 		edges on that path.
 	k_func : function
 		A kernel function applied using different notions of fingerprint 
 		similarity.
 	Return
 	------
 	kernel : float
 		Path kernel up to h between 2 graphs.
 	"""
 	all_paths = list(set(paths1 + paths2))
 	if k_func == 'tanimoto':
 		length_union = len(set(paths1 + paths2))
 		kernel = (len(set(paths1)) + len(set(paths2)) -
 				  length_union) / length_union
 #		vector1 = [(1 if path in paths1 else 0) for path in all_paths]
 #		vector2 = [(1 if path in paths2 else 0) for path in all_paths]
 #		kernel_uv = np.dot(vector1, vector2)
 #		kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv)
 	else:  # MinMax kernel
 		path_count1 = Counter(paths1)
 		path_count2 = Counter(paths2)
 		vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
 				   for key in all_paths]
 		vector2 = [(path_count2[key] if (key in path_count2.keys()) else 0)
 				   for key in all_paths]
 		kernel = np.sum(np.minimum(vector1, vector2)) / \
 			np.sum(np.maximum(vector1, vector2))
 	return kernel
 def wrapper_uhpath_do_kernelless(k_func, itr):
 	i = itr[0]
 	j = itr[1]
 	return i, j, _untilhpathkernel_do_kernelless(G_plist[i], G_plist[j], k_func)
 # @todo: (can be removed maybe)  this method find paths repetively, it could be faster.
 def find_all_paths_until_length(G,
 								length,
 								ds_attrs,
 								node_label='atom',
 								edge_label='bond_type',
 								tolabelseqs=True):
 	"""Find all paths no longer than a certain maximum length in a graph. A 
 	recursive depth first search is applied.
 	Parameters
 	----------
 	G : NetworkX graphs
 		The graph in which paths are searched.
 	length : integer
 		The maximum length of paths.
 	ds_attrs: dict
 		Dataset attributes.
 	node_label : string
 		Node attribute used as label. The default node label is atom.
 	edge_label : string
 		Edge attribute used as label. The default edge label is bond_type.
 	Return
 	------
 	path : list
 		List of paths retrieved, where for unlabeled graphs, each path is 
 		represented by a list of nodes; while for labeled graphs, each path is 
 		represented by a list of strings consists of labels of nodes and/or 
 		edges on that path.
 	"""
 	# path_l = [tuple([n]) for n in G.nodes]  # paths of length l
 	# all_paths = path_l[:]
 	# for l in range(1, length + 1):
 	#	 path_l_new = []
 	#	 for path in path_l:
 	#		 for neighbor in G[path[-1]]:
 	#			 if len(path) < 2 or neighbor != path[-2]:
 	#				 tmp = path + (neighbor, )
 	#				 if tuple(tmp[::-1]) not in path_l_new:
 	#					 path_l_new.append(tuple(tmp))
 	#	 all_paths += path_l_new
 	#	 path_l = path_l_new[:]
 	path_l = [[n] for n in G.nodes]  # paths of length l
 	all_paths = [p.copy() for p in path_l]
 	for l in range(1, length + 1):
 		path_lplus1 = []
 		for path in path_l:
 			for neighbor in G[path[-1]]:
 				if neighbor not in path:
 					tmp = path + [neighbor]
 #					if tmp[::-1] not in path_lplus1:
 					path_lplus1.append(tmp)
 		all_paths += path_lplus1
 		path_l = [p.copy() for p in path_lplus1]
 	# for i in range(0, length + 1):
 	#	 new_paths = find_all_paths(G, i)
 	#	 if new_paths == []:
 	#		 break
 	#	 all_paths.extend(new_paths)
 	# consider labels
 #	print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label))
 #	print()
 	return (paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label) 
 			if tolabelseqs else all_paths)
 def wrapper_find_all_paths_until_length(length, ds_attrs, node_label, 
 									 edge_label, tolabelseqs, itr_item):
 	g = itr_item[0]
 	i = itr_item[1]
 	return i, find_all_paths_until_length(g, length, ds_attrs,
 				node_label=node_label, edge_label=edge_label, 
 				tolabelseqs=tolabelseqs)
 def find_all_path_as_trie(G,
 						 length,
 						 ds_attrs,
 						 node_label='atom',
 						 edge_label='bond_type'):
 #	time1 = time.time()
 #	all_path = find_all_paths_until_length(G, length, ds_attrs, 
 #										   node_label=node_label,
 #										   edge_label=edge_label)
 #	ptrie = Trie()
 #	for path in all_path:
 #		ptrie.insertWord(path)
 #	ptrie = Trie()
 #	path_l = [[n] for n in G.nodes]  # paths of length l
 #	path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label)
 #	for p in path_l_str:
 #		ptrie.insertWord(p)
 #	for l in range(1, length + 1):
 #		path_lplus1 = []
 #		for path in path_l:
 #			for neighbor in G[path[-1]]:
 #				if neighbor not in path:
 #					tmp = path + [neighbor]
 ##					if tmp[::-1] not in path_lplus1:
 #					path_lplus1.append(tmp)
 #		path_l = path_lplus1[:]
 #		# consider labels
 #		path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label)
 #		for p in path_l_str:
 #			ptrie.insertWord(p)
 #	
 #	print(time.time() - time1)
 #	print(ptrie.root)
 #	print()
 	# traverse all paths up to length h in a graph and construct a trie with 
 	# them. Deep-first search is applied. Notice the reverse of each path is 
 	# also stored to the trie.			   
 	def traverseGraph(root, ptrie, length, G, ds_attrs, node_label, edge_label,
 					  pcurrent=[]):
 		if len(pcurrent) < length + 1:
 			for neighbor in G[root]:
 				if neighbor not in pcurrent:
 					pcurrent.append(neighbor)
 					plstr = paths2labelseqs([pcurrent], G, ds_attrs, 
 											node_label, edge_label)
 					ptrie.insertWord(plstr[0])
 					traverseGraph(neighbor, ptrie, length, G, ds_attrs, 
 								   node_label, edge_label, pcurrent)
 		del pcurrent[-1]
 	ptrie = Trie()
 	path_l = [[n] for n in G.nodes]  # paths of length l
 	path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label)
 	for p in path_l_str:
 		ptrie.insertWord(p)
 	for n in G.nodes:
 		traverseGraph(n, ptrie, length, G, ds_attrs, node_label, edge_label, 
 					   pcurrent=[n])
 #	def traverseGraph(root, all_paths, length, G, ds_attrs, node_label, edge_label,
 #					  pcurrent=[]):
 #		if len(pcurrent) < length + 1:
 #			for neighbor in G[root]:
 #				if neighbor not in pcurrent:
 #					pcurrent.append(neighbor)
 #					plstr = paths2labelseqs([pcurrent], G, ds_attrs, 
 #											node_label, edge_label)
 #					all_paths.append(pcurrent[:])
 #					traverseGraph(neighbor, all_paths, length, G, ds_attrs, 
 #								   node_label, edge_label, pcurrent)
 #		del pcurrent[-1]
 #
 #
 #	path_l = [[n] for n in G.nodes]  # paths of length l
 #	all_paths = path_l[:]
 #	path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label)
 ##	for p in path_l_str:
 ##		ptrie.insertWord(p)
 #	for n in G.nodes:
 #		traverseGraph(n, all_paths, length, G, ds_attrs, node_label, edge_label, 
 #					   pcurrent=[n])
 #	print(ptrie.root)
 	return ptrie
 def wrapper_find_all_path_as_trie(length, ds_attrs, node_label, 
 									 edge_label, itr_item):
 	g = itr_item[0]
 	i = itr_item[1]
 	return i, find_all_path_as_trie(g, length, ds_attrs,
 				node_label=node_label, edge_label=edge_label)
 def paths2labelseqs(plist, G, ds_attrs, node_label, edge_label):
 	if ds_attrs['node_labeled']:
 		if ds_attrs['edge_labeled']:
 			path_strs = [
 				tuple(
 					list(
 						chain.from_iterable(
 							(G.nodes[node][node_label],
 							 G[node][path[idx + 1]][edge_label])
 							for idx, node in enumerate(path[:-1]))) +
 					[G.nodes[path[-1]][node_label]]) for path in plist
 			]
 			# path_strs = []
 			# for path in all_paths:
 			#	 strlist = list(
 			#		 chain.from_iterable((G.node[node][node_label],
 			#							  G[node][path[idx + 1]][edge_label])
 			#							 for idx, node in enumerate(path[:-1])))
 			#	 strlist.append(G.node[path[-1]][node_label])
 			#	 path_strs.append(tuple(strlist))
 		else:
 			path_strs = [
 				tuple([G.nodes[node][node_label] for node in path])
 				for path in plist
 			]
 		return path_strs
 	else:
 		if ds_attrs['edge_labeled']:
 			return [
 				tuple([] if len(path) == 1 else [
 					G[node][path[idx + 1]][edge_label]
 					for idx, node in enumerate(path[:-1])
 				]) for path in plist
 			]
 		else:
 			return [tuple(['0' for node in path]) for path in plist]
 #			return [tuple([len(path)]) for path in all_paths]   
 #
 #def paths2GSuffixTree(paths):
 #	return Tree(paths, builder=ukkonen.Builder)
 # def find_paths(G, source_node, length):
 #	 """Find all paths no longer than a certain length those start from a source node. A recursive depth first search is applied.
 #	 Parameters
 #	 ----------
 #	 G : NetworkX graphs
 #		 The graph in which paths are searched.
 #	 source_node : integer
 #		 The number of the node from where all paths start.
 #	 length : integer
 #		 The length of paths.
 #	 Return
 #	 ------
 #	 path : list of list
 #		 List of paths retrieved, where each path is represented by a list of nodes.
 #	 """
 #	 return [[source_node]] if length == 0 else \
 #		 [[source_node] + path for neighbor in G[source_node]
 #		  for path in find_paths(G, neighbor, length - 1) if source_node not in path]
 # def find_all_paths(G, length):
 #	 """Find all paths with a certain length in a graph. A recursive depth first search is applied.
 #	 Parameters
 #	 ----------
 #	 G : NetworkX graphs
 #		 The graph in which paths are searched.
 #	 length : integer
 #		 The length of paths.
 #	 Return
 #	 ------
 #	 path : list of list
 #		 List of paths retrieved, where each path is represented by a list of nodes.
 #	 """
 #	 all_paths = []
 #	 for node in G:
 #		 all_paths.extend(find_paths(G, node, length))
 #	 # The following process is not carried out according to the original article
 #	 # all_paths_r = [ path[::-1] for path in all_paths ]
 #	 # # For each path, two presentation are retrieved from its two extremities. Remove one of them.
 #	 # for idx, path in enumerate(all_paths[:-1]):
 #	 #	 for path2 in all_paths_r[idx+1::]:
 #	 #		 if path == path2:
 #	 #			 all_paths[idx] = []
 #	 #			 break
 #	 # return list(filter(lambda a: a != [], all_paths))
 #	 return all_paths