New translations path_up_to_h.py (French)

5 years ago · 940a268b9f
--- a/lang/fr/gklearn/kernels/path_up_to_h.py
+++ b/lang/fr/gklearn/kernels/path_up_to_h.py
@@ -28,16 +28,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 	
 	def __init__(self, **kwargs):
 		GraphKernel.__init__(self)
 		self.__node_labels = kwargs.get('node_labels', [])
 		self.__edge_labels = kwargs.get('edge_labels', [])
 		self.__depth = int(kwargs.get('depth', 10))
 		self.__k_func = kwargs.get('k_func', 'MinMax')
 		self.__compute_method = kwargs.get('compute_method', 'trie')
 		self.__ds_infos = kwargs.get('ds_infos', {})
 		self._node_labels = kwargs.get('node_labels', [])
 		self._edge_labels = kwargs.get('edge_labels', [])
 		self._depth = int(kwargs.get('depth', 10))
 		self._k_func = kwargs.get('k_func', 'MinMax')
 		self._compute_method = kwargs.get('compute_method', 'trie')
 		self._ds_infos = kwargs.get('ds_infos', {})


 	def _compute_gm_series(self):
 		self.__add_dummy_labels(self._graphs)
 		self._add_dummy_labels(self._graphs)
 		
 		from itertools import combinations_with_replacement
 		itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2)	
@@ -50,16 +50,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 			
 		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

 		if self.__compute_method == 'trie':
 			all_paths = [self.__find_all_path_as_trie(self._graphs[i]) for i in iterator_ps]
 		if self._compute_method == 'trie':
 			all_paths = [self._find_all_path_as_trie(self._graphs[i]) for i in iterator_ps]
 			for i, j in iterator_kernel:
 				kernel = self.__kernel_do_trie(all_paths[i], all_paths[j])
 				kernel = self._kernel_do_trie(all_paths[i], all_paths[j])
 				gram_matrix[i][j] = kernel
 				gram_matrix[j][i] = kernel
 		else:
 			all_paths = [self.__find_all_paths_until_length(self._graphs[i]) for i in iterator_ps]
 			all_paths = [self._find_all_paths_until_length(self._graphs[i]) for i in iterator_ps]
 			for i, j in iterator_kernel:
 				kernel = self.__kernel_do_naive(all_paths[i], all_paths[j])
 				kernel = self._kernel_do_naive(all_paths[i], all_paths[j])
 				gram_matrix[i][j] = kernel
 				gram_matrix[j][i] = kernel
 				
@@ -67,7 +67,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 			
 			
 	def _compute_gm_imap_unordered(self):
 		self.__add_dummy_labels(self._graphs)
 		self._add_dummy_labels(self._graphs)
 		
 		# get all paths of all graphs before computing kernels to save time,
 		# but this may cost a lot of memory for large datasets.
@@ -78,9 +78,9 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 		else:
 			chunksize = 100
 		all_paths = [[] for _ in range(len(self._graphs))]
 		if self.__compute_method == 'trie' and self.__k_func is not None:
 		if self._compute_method == 'trie' and self._k_func is not None:
 			get_ps_fun = self._wrapper_find_all_path_as_trie
 		elif self.__compute_method != 'trie' and self.__k_func is not None:  
 		elif self._compute_method != 'trie' and self._k_func is not None:  
 			get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)  
 		else: 
 			get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
@@ -97,12 +97,12 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 		# compute Gram matrix.
 		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
 	 
 		if self.__compute_method == 'trie' and self.__k_func is not None:
 		if self._compute_method == 'trie' and self._k_func is not None:
 			def init_worker(trie_toshare):
 				global G_trie
 				G_trie = trie_toshare
 			do_fun = self._wrapper_kernel_do_trie
 		elif self.__compute_method != 'trie' and self.__k_func is not None:
 		elif self._compute_method != 'trie' and self._k_func is not None:
 			def init_worker(plist_toshare):
 				global G_plist
 				G_plist = plist_toshare
@@ -111,7 +111,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 			def init_worker(plist_toshare):
 				global G_plist
 				G_plist = plist_toshare
 			do_fun = self.__wrapper_kernel_do_kernelless # @todo: what is this?  
 			do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this?  
 		parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, 
 					glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose) 	
 			
@@ -119,7 +119,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 	
 	
 	def _compute_kernel_list_series(self, g1, g_list):
 		self.__add_dummy_labels(g_list + [g1])
 		self._add_dummy_labels(g_list + [g1])
 		
 		if self._verbose >= 2:
 			iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout)
@@ -130,24 +130,24 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 			
 		kernel_list = [None] * len(g_list)

 		if self.__compute_method == 'trie':
 			paths_g1 = self.__find_all_path_as_trie(g1)
 			paths_g_list = [self.__find_all_path_as_trie(g) for g in iterator_ps]
 		if self._compute_method == 'trie':
 			paths_g1 = self._find_all_path_as_trie(g1)
 			paths_g_list = [self._find_all_path_as_trie(g) for g in iterator_ps]
 			for i in iterator_kernel:
 				kernel = self.__kernel_do_trie(paths_g1, paths_g_list[i])
 				kernel = self._kernel_do_trie(paths_g1, paths_g_list[i])
 				kernel_list[i] = kernel
 		else:
 			paths_g1 = self.__find_all_paths_until_length(g1)
 			paths_g_list = [self.__find_all_paths_until_length(g) for g in iterator_ps]
 			paths_g1 = self._find_all_paths_until_length(g1)
 			paths_g_list = [self._find_all_paths_until_length(g) for g in iterator_ps]
 			for i in iterator_kernel:
 				kernel = self.__kernel_do_naive(paths_g1, paths_g_list[i])
 				kernel = self._kernel_do_naive(paths_g1, paths_g_list[i])
 				kernel_list[i] = kernel
 				
 		return kernel_list
 	
 	
 	def _compute_kernel_list_imap_unordered(self, g1, g_list):
 		self.__add_dummy_labels(g_list + [g1])
 		self._add_dummy_labels(g_list + [g1])
 		
 		# get all paths of all graphs before computing kernels to save time,
 		# but this may cost a lot of memory for large datasets.
@@ -158,14 +158,14 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 		else:
 			chunksize = 100
 		paths_g_list = [[] for _ in range(len(g_list))]
 		if self.__compute_method == 'trie' and self.__k_func is not None:
 			paths_g1 = self.__find_all_path_as_trie(g1)
 		if self._compute_method == 'trie' and self._k_func is not None:
 			paths_g1 = self._find_all_path_as_trie(g1)
 			get_ps_fun = self._wrapper_find_all_path_as_trie
 		elif self.__compute_method != 'trie' and self.__k_func is not None:
 			paths_g1 = self.__find_all_paths_until_length(g1) 
 		elif self._compute_method != 'trie' and self._k_func is not None:
 			paths_g1 = self._find_all_paths_until_length(g1) 
 			get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)  
 		else:
 			paths_g1 = self.__find_all_paths_until_length(g1)  
 			paths_g1 = self._find_all_paths_until_length(g1)  
 			get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
 		if self._verbose >= 2:
 			iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize),
@@ -196,28 +196,28 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 	
 	
 	def _wrapper_kernel_list_do(self, itr):
 		if self.__compute_method == 'trie' and self.__k_func is not None:
 			return itr, self.__kernel_do_trie(G_p1, G_plist[itr])
 		elif self.__compute_method != 'trie' and self.__k_func is not None:
 			return itr, self.__kernel_do_naive(G_p1, G_plist[itr])  
 		if self._compute_method == 'trie' and self._k_func is not None:
 			return itr, self._kernel_do_trie(G_p1, G_plist[itr])
 		elif self._compute_method != 'trie' and self._k_func is not None:
 			return itr, self._kernel_do_naive(G_p1, G_plist[itr])  
 		else:
 			return itr, self.__kernel_do_kernelless(G_p1, G_plist[itr])
 			return itr, self._kernel_do_kernelless(G_p1, G_plist[itr])
 	
 	
 	def _compute_single_kernel_series(self, g1, g2):
 		self.__add_dummy_labels([g1] + [g2])
 		if self.__compute_method == 'trie':
 			paths_g1 = self.__find_all_path_as_trie(g1)
 			paths_g2 = self.__find_all_path_as_trie(g2)
 			kernel = self.__kernel_do_trie(paths_g1, paths_g2)
 		self._add_dummy_labels([g1] + [g2])
 		if self._compute_method == 'trie':
 			paths_g1 = self._find_all_path_as_trie(g1)
 			paths_g2 = self._find_all_path_as_trie(g2)
 			kernel = self._kernel_do_trie(paths_g1, paths_g2)
 		else:
 			paths_g1 = self.__find_all_paths_until_length(g1)
 			paths_g2 = self.__find_all_paths_until_length(g2)
 			kernel = self.__kernel_do_naive(paths_g1, paths_g2)
 			paths_g1 = self._find_all_paths_until_length(g1)
 			paths_g2 = self._find_all_paths_until_length(g2)
 			kernel = self._kernel_do_naive(paths_g1, paths_g2)
 		return kernel			

 	
 	def __kernel_do_trie(self, trie1, trie2):
 	def _kernel_do_trie(self, trie1, trie2):
 		"""Compute path graph kernels up to depth d between 2 graphs using trie.
 	
 		Parameters
@@ -233,7 +233,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 		kernel : float
 			Path kernel up to h between 2 graphs.
 		"""
 		if self.__k_func == 'tanimoto':	  
 		if self._k_func == 'tanimoto':	  
 			# traverse all paths in graph1 and search them in graph2. Deep-first 
 			# search is applied.
 			def traverseTrie1t(root, trie2, setlist, pcurrent=[]):
@@ -278,7 +278,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 	#		print(setlist)
 			kernel = setlist[0] / setlist[1]
 			
 		elif self.__k_func == 'MinMax': # MinMax kernel		  
 		elif self._k_func == 'MinMax': # MinMax kernel		  
 			# traverse all paths in graph1 and search them in graph2. Deep-first 
 			# search is applied.
 			def traverseTrie1m(root, trie2, sumlist, pcurrent=[]):
@@ -331,10 +331,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 	def _wrapper_kernel_do_trie(self, itr):
 		i = itr[0]
 		j = itr[1]
 		return i, j, self.__kernel_do_trie(G_trie[i], G_trie[j])
 		return i, j, self._kernel_do_trie(G_trie[i], G_trie[j])
 	
 	
 	def __kernel_do_naive(self, paths1, paths2):
 	def _kernel_do_naive(self, paths1, paths2):
 		"""Compute path graph kernels up to depth d between 2 graphs naively.
 	
 		Parameters
@@ -355,7 +355,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 		"""
 		all_paths = list(set(paths1 + paths2))
 	
 		if self.__k_func == 'tanimoto':
 		if self._k_func == 'tanimoto':
 			length_union = len(set(paths1 + paths2))
 			kernel = (len(set(paths1)) + len(set(paths2)) -
 					  length_union) / length_union
@@ -364,7 +364,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 	#		kernel_uv = np.dot(vector1, vector2)
 	#		kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv)
 	
 		elif self.__k_func == 'MinMax':  # MinMax kernel
 		elif self._k_func == 'MinMax':  # MinMax kernel
 			path_count1 = Counter(paths1)
 			path_count2 = Counter(paths2)
 			vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
@@ -374,7 +374,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 			kernel = np.sum(np.minimum(vector1, vector2)) / \
 				np.sum(np.maximum(vector1, vector2))
 				
 		elif self.__k_func is None: # no sub-kernel used; compare paths directly.
 		elif self._k_func is None: # no sub-kernel used; compare paths directly.
 			path_count1 = Counter(paths1)
 			path_count2 = Counter(paths2)
 			vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
@@ -392,10 +392,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 	def _wrapper_kernel_do_naive(self, itr):
 		i = itr[0]
 		j = itr[1]
 		return i, j, self.__kernel_do_naive(G_plist[i], G_plist[j])
 		return i, j, self._kernel_do_naive(G_plist[i], G_plist[j])
 	
 	
 	def __find_all_path_as_trie(self, G):
 	def _find_all_path_as_trie(self, G):
 	#	all_path = find_all_paths_until_length(G, length, ds_attrs, 
 	#										   node_label=node_label,
 	#										   edge_label=edge_label)
@@ -431,11 +431,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 		# them. Deep-first search is applied. Notice the reverse of each path is 
 		# also stored to the trie.			   
 		def traverseGraph(root, ptrie, G, pcurrent=[]):
 			if len(pcurrent) < self.__depth + 1:
 			if len(pcurrent) < self._depth + 1:
 				for neighbor in G[root]:
 					if neighbor not in pcurrent:
 						pcurrent.append(neighbor)
 						plstr = self.__paths2labelseqs([pcurrent], G)
 						plstr = self._paths2labelseqs([pcurrent], G)
 						ptrie.insertWord(plstr[0])
 						traverseGraph(neighbor, ptrie, G, pcurrent)
 			del pcurrent[-1]
@@ -443,7 +443,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 	
 		ptrie = Trie()
 		path_l = [[n] for n in G.nodes]  # paths of length l
 		path_l_str = self.__paths2labelseqs(path_l, G)
 		path_l_str = self._paths2labelseqs(path_l, G)
 		for p in path_l_str:
 			ptrie.insertWord(p)
 		for n in G.nodes:
@@ -480,11 +480,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 	def _wrapper_find_all_path_as_trie(self, itr_item):
 		g = itr_item[0]
 		i = itr_item[1]
 		return i, self.__find_all_path_as_trie(g)
 		return i, self._find_all_path_as_trie(g)
 	
 	
 	# @todo: (can be removed maybe)  this method find paths repetively, it could be faster.
 	def __find_all_paths_until_length(self, G, tolabelseqs=True):
 	def _find_all_paths_until_length(self, G, tolabelseqs=True):
 		"""Find all paths no longer than a certain maximum length in a graph. A 
 		recursive depth first search is applied.
 	
@@ -511,7 +511,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 		"""
 		# path_l = [tuple([n]) for n in G.nodes]  # paths of length l
 		# all_paths = path_l[:]
 		# for l in range(1, self.__depth + 1):
 		# for l in range(1, self._depth + 1):
 		#	 path_l_new = []
 		#	 for path in path_l:
 		#		 for neighbor in G[path[-1]]:
@@ -525,7 +525,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 	
 		path_l = [[n] for n in G.nodes]  # paths of length l
 		all_paths = [p.copy() for p in path_l]
 		for l in range(1, self.__depth + 1):
 		for l in range(1, self._depth + 1):
 			path_lplus1 = []
 			for path in path_l:
 				for neighbor in G[path[-1]]:
@@ -537,7 +537,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 			all_paths += path_lplus1
 			path_l = [p.copy() for p in path_lplus1]
 	
 		# for i in range(0, self.__depth + 1):
 		# for i in range(0, self._depth + 1):
 		#	 new_paths = find_all_paths(G, i)
 		#	 if new_paths == []:
 		#		 break
@@ -546,36 +546,36 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 		# consider labels
 	#	print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label))
 	#	print()
 		return (self.__paths2labelseqs(all_paths, G) if tolabelseqs else all_paths)
 		return (self._paths2labelseqs(all_paths, G) if tolabelseqs else all_paths)
 			
 			
 	def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item):
 		g = itr_item[0]
 		i = itr_item[1]
 		return i, self.__find_all_paths_until_length(g, tolabelseqs=tolabelseqs)
 		return i, self._find_all_paths_until_length(g, tolabelseqs=tolabelseqs)
 	
 	
 	def __paths2labelseqs(self, plist, G):
 		if len(self.__node_labels) > 0:
 			if len(self.__edge_labels) > 0:
 	def _paths2labelseqs(self, plist, G):
 		if len(self._node_labels) > 0:
 			if len(self._edge_labels) > 0:
 				path_strs = []
 				for path in plist:
 					pths_tmp = []
 					for idx, node in enumerate(path[:-1]):
 						pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels))
 						pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels))
 					pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self.__node_labels))
 						pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
 						pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels))
 					pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self._node_labels))
 					path_strs.append(tuple(pths_tmp))
 			else:
 				path_strs = []
 				for path in plist:
 					pths_tmp = []
 					for node in path:
 						pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels))
 						pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
 					path_strs.append(tuple(pths_tmp))
 			return path_strs
 		else:
 			if len(self.__edge_labels) > 0:
 			if len(self._edge_labels) > 0:
 				path_strs = []
 				for path in plist:
 					if len(path) == 1:
@@ -583,7 +583,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 					else:
 						pths_tmp = []
 						for idx, node in enumerate(path[:-1]):
 							pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels))
 							pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels))
 						path_strs.append(tuple(pths_tmp))
 				return path_strs
 			else:
@@ -591,13 +591,13 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
 	#			return [tuple([len(path)]) for path in all_paths]
 	
 	
 	def __add_dummy_labels(self, Gn):
 		if self.__k_func is not None:
 			if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
 	def _add_dummy_labels(self, Gn):
 		if self._k_func is not None:
 			if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
 				for i in range(len(Gn)):
 					nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
 				self.__node_labels = [SpecialLabel.DUMMY]
 			if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
 				self._node_labels = [SpecialLabel.DUMMY]
 			if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
 				for i in range(len(Gn)):
 					nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
 				self.__edge_labels = [SpecialLabel.DUMMY]
 				self._edge_labels = [SpecialLabel.DUMMY]