| @@ -32,6 +32,8 @@ def structuralspkernel(*args, | |||||
| node_kernels=None, | node_kernels=None, | ||||
| edge_kernels=None, | edge_kernels=None, | ||||
| compute_method='naive', | compute_method='naive', | ||||
| # parallel='imap_unordered', | |||||
| parallel=None, | |||||
| n_jobs=None, | n_jobs=None, | ||||
| verbose=True): | verbose=True): | ||||
| """Calculate mean average structural shortest path kernels between graphs. | """Calculate mean average structural shortest path kernels between graphs. | ||||
| @@ -112,29 +114,42 @@ def structuralspkernel(*args, | |||||
| start_time = time.time() | start_time = time.time() | ||||
| # get shortest paths of each graph in Gn | # get shortest paths of each graph in Gn | ||||
| splist = [None] * len(Gn) | |||||
| pool = Pool(n_jobs) | |||||
| itr = zip(Gn, range(0, len(Gn))) | |||||
| if len(Gn) < 100 * n_jobs: | |||||
| chunksize = int(len(Gn) / n_jobs) + 1 | |||||
| else: | |||||
| chunksize = 100 | |||||
| # get shortest path graphs of Gn | |||||
| if compute_method == 'trie': | |||||
| getsp_partial = partial(wrapper_getSP_trie, weight, ds_attrs['is_directed']) | |||||
| else: | |||||
| getsp_partial = partial(wrapper_getSP_naive, weight, ds_attrs['is_directed']) | |||||
| if verbose: | |||||
| iterator = tqdm(pool.imap_unordered(getsp_partial, itr, chunksize), | |||||
| desc='getting shortest paths', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(getsp_partial, itr, chunksize) | |||||
| for i, sp in iterator: | |||||
| splist[i] = sp | |||||
| # time.sleep(10) | |||||
| pool.close() | |||||
| pool.join() | |||||
| if parallel == 'imap_unordered': | |||||
| splist = [None] * len(Gn) | |||||
| pool = Pool(n_jobs) | |||||
| itr = zip(Gn, range(0, len(Gn))) | |||||
| if len(Gn) < 100 * n_jobs: | |||||
| chunksize = int(len(Gn) / n_jobs) + 1 | |||||
| else: | |||||
| chunksize = 100 | |||||
| # get shortest path graphs of Gn | |||||
| if compute_method == 'trie': | |||||
| getsp_partial = partial(wrapper_getSP_trie, weight, ds_attrs['is_directed']) | |||||
| else: | |||||
| getsp_partial = partial(wrapper_getSP_naive, weight, ds_attrs['is_directed']) | |||||
| if verbose: | |||||
| iterator = tqdm(pool.imap_unordered(getsp_partial, itr, chunksize), | |||||
| desc='getting shortest paths', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(getsp_partial, itr, chunksize) | |||||
| for i, sp in iterator: | |||||
| splist[i] = sp | |||||
| # time.sleep(10) | |||||
| pool.close() | |||||
| pool.join() | |||||
| # ---- direct running, normally use single CPU core. ---- | |||||
| elif parallel == None: | |||||
| splist = [] | |||||
| if verbose: | |||||
| iterator = tqdm(Gn, desc='getting sp graphs', file=sys.stdout) | |||||
| else: | |||||
| iterator = Gn | |||||
| if compute_method == 'trie': | |||||
| for g in iterator: | |||||
| splist.append(get_sps_as_trie(g, weight, ds_attrs['is_directed'])) | |||||
| else: | |||||
| for g in iterator: | |||||
| splist.append(get_shortest_paths(g, weight, ds_attrs['is_directed'])) | |||||
| # ss = 0 | # ss = 0 | ||||
| # ss += sys.getsizeof(splist) | # ss += sys.getsizeof(splist) | ||||
| @@ -146,14 +161,7 @@ def structuralspkernel(*args, | |||||
| # time.sleep(20) | # time.sleep(20) | ||||
| # # ---- direct running, normally use single CPU core. ---- | |||||
| # splist = [] | |||||
| # if compute_method == 'trie': | |||||
| # for g in tqdm(Gn, desc='getting sp graphs', file=sys.stdout): | |||||
| # splist.append(get_sps_as_trie(g, weight, ds_attrs['is_directed'])) | |||||
| # else: | |||||
| # for g in tqdm(Gn, desc='getting sp graphs', file=sys.stdout): | |||||
| # splist.append(get_shortest_paths(g, weight, ds_attrs['is_directed'])) | |||||
| # # ---- only for the Fast Computation of Shortest Path Kernel (FCSP) | # # ---- only for the Fast Computation of Shortest Path Kernel (FCSP) | ||||
| # sp_ml = [0] * len(Gn) # shortest path matrices | # sp_ml = [0] * len(Gn) # shortest path matrices | ||||
| @@ -174,22 +182,45 @@ def structuralspkernel(*args, | |||||
| # print(len(edge_w_g[0])) | # print(len(edge_w_g[0])) | ||||
| Kmatrix = np.zeros((len(Gn), len(Gn))) | Kmatrix = np.zeros((len(Gn), len(Gn))) | ||||
| # ---- use pool.imap_unordered to parallel and track progress. ---- | |||||
| def init_worker(spl_toshare, gs_toshare): | |||||
| global G_spl, G_gs | |||||
| G_spl = spl_toshare | |||||
| G_gs = gs_toshare | |||||
| if compute_method == 'trie': | |||||
| do_partial = partial(wrapper_ssp_do_trie, ds_attrs, node_label, edge_label, | |||||
| node_kernels, edge_kernels) | |||||
| parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||||
| glbv=(splist, Gn), n_jobs=n_jobs, verbose=verbose) | |||||
| else: | |||||
| do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label, | |||||
| node_kernels, edge_kernels) | |||||
| parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||||
| glbv=(splist, Gn), n_jobs=n_jobs, verbose=verbose) | |||||
| # ---- use pool.imap_unordered to parallel and track progress. ---- | |||||
| if parallel == 'imap_unordered': | |||||
| def init_worker(spl_toshare, gs_toshare): | |||||
| global G_spl, G_gs | |||||
| G_spl = spl_toshare | |||||
| G_gs = gs_toshare | |||||
| if compute_method == 'trie': | |||||
| do_partial = partial(wrapper_ssp_do_trie, ds_attrs, node_label, edge_label, | |||||
| node_kernels, edge_kernels) | |||||
| parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||||
| glbv=(splist, Gn), n_jobs=n_jobs, verbose=verbose) | |||||
| else: | |||||
| do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label, | |||||
| node_kernels, edge_kernels) | |||||
| parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||||
| glbv=(splist, Gn), n_jobs=n_jobs, verbose=verbose) | |||||
| # ---- direct running, normally use single CPU core. ---- | |||||
| elif parallel == None: | |||||
| from itertools import combinations_with_replacement | |||||
| itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||||
| if verbose: | |||||
| iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = itr | |||||
| if compute_method == 'trie': | |||||
| for i, j in iterator: | |||||
| kernel = ssp_do_trie(Gn[i], Gn[j], splist[i], splist[j], | |||||
| ds_attrs, node_label, edge_label, node_kernels, edge_kernels) | |||||
| Kmatrix[i][j] = kernel | |||||
| Kmatrix[j][i] = kernel | |||||
| else: | |||||
| for i, j in iterator: | |||||
| kernel = structuralspkernel_do(Gn[i], Gn[j], splist[i], splist[j], | |||||
| ds_attrs, node_label, edge_label, node_kernels, edge_kernels) | |||||
| # if(kernel > 1): | |||||
| # print("error here ") | |||||
| Kmatrix[i][j] = kernel | |||||
| Kmatrix[j][i] = kernel | |||||
| # # ---- use pool.map to parallel. ---- | # # ---- use pool.map to parallel. ---- | ||||
| # pool = Pool(n_jobs) | # pool = Pool(n_jobs) | ||||
| @@ -229,23 +260,6 @@ def structuralspkernel(*args, | |||||
| # pool.join() | # pool.join() | ||||
| # # ---- direct running, normally use single CPU core. ---- | |||||
| # from itertools import combinations_with_replacement | |||||
| # itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||||
| # if compute_method == 'trie': | |||||
| # for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout): | |||||
| # kernel = ssp_do_trie(Gn[i], Gn[j], splist[i], splist[j], | |||||
| # ds_attrs, node_label, edge_label, node_kernels, edge_kernels) | |||||
| # Kmatrix[i][j] = kernel | |||||
| # Kmatrix[j][i] = kernel | |||||
| # else: | |||||
| # for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout): | |||||
| # kernel = structuralspkernel_do(Gn[i], Gn[j], splist[i], splist[j], | |||||
| # ds_attrs, node_label, edge_label, node_kernels, edge_kernels) | |||||
| # # if(kernel > 1): | |||||
| # # print("error here ") | |||||
| # Kmatrix[i][j] = kernel | |||||
| # Kmatrix[j][i] = kernel | |||||
| run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
| if verbose: | if verbose: | ||||
| @@ -309,8 +323,13 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label, | |||||
| for p1, p2 in product(spl1, spl2): | for p1, p2 in product(spl1, spl2): | ||||
| if len(p1) == len(p2): | if len(p1) == len(p2): | ||||
| kernel += 1 | kernel += 1 | ||||
| kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average | |||||
| try: | |||||
| kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average | |||||
| except ZeroDivisionError: | |||||
| print(spl1, spl2) | |||||
| print(g1.nodes(data=True)) | |||||
| print(g1.edges(data=True)) | |||||
| raise Exception | |||||
| # # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation | # # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation | ||||
| # # compute vertex kernel matrix | # # compute vertex kernel matrix | ||||