|
- # Copyright (c) OpenMMLab. All rights reserved.
- # Copyright (c) Alibaba, Inc. and its affiliates.
- import os
- import pickle
- import shutil
- import tempfile
- import time
-
- import torch
- from torch import distributed as dist
- from tqdm import tqdm
-
- from modelscope.utils.torch_utils import get_dist_info
-
-
- def single_gpu_test(model,
- data_loader,
- data_collate_fn=None,
- metric_classes=None):
- """Test model with a single gpu.
-
- Args:
- data_collate_fn: An optional data_collate_fn before fed into the model
- model (nn.Module): Model to be tested.
- data_loader (nn.Dataloader): Pytorch data loader.
- metric_classes(List): List of Metric class that uses to collect metrics
-
- Returns:
- list: The prediction results.
- """
- model.eval()
- dataset = data_loader.dataset
- with tqdm(total=len(dataset), desc='test samples') as pbar:
- for data in data_loader:
- if data_collate_fn is not None:
- data = data_collate_fn(data)
- with torch.no_grad():
- result = model(**data)
- if metric_classes is not None:
- for metric_cls in metric_classes:
- metric_cls.add(result, data)
-
- batch_size = len(result)
- for _ in range(batch_size):
- pbar.update()
-
-
- def multi_gpu_test(model,
- data_loader,
- tmpdir=None,
- gpu_collect=False,
- data_collate_fn=None,
- metric_classes=None):
- """Test model with multiple gpus.
-
- This method tests model with multiple gpus and collects the results
- under two different modes: gpu and cpu modes. By setting
- ``gpu_collect=True``, it encodes results to gpu tensors and use gpu
- communication for results collection. On cpu mode it saves the results on
- different gpus to ``tmpdir`` and collects them by the rank 0 worker.
-
- Args:
- model (nn.Module): Model to be tested.
- data_loader (nn.Dataloader): Pytorch data loader.
- data_collate_fn: An optional data_collate_fn before fed into the model
- tmpdir (str): Path of directory to save the temporary results from
- different gpus under cpu mode.
- gpu_collect (bool): Option to use either gpu or cpu to collect results.
- metric_classes(List): List of Metric class that uses to collect metrics
-
- Returns:
- list: The prediction results.
- """
- model.eval()
- results = []
- dataset = data_loader.dataset
-
- time.sleep(2) # This line can prevent deadlock problem in some cases.
-
- count = 0
- with tqdm(total=len(dataset), desc='test samples with multi gpus') as pbar:
- for _, data in enumerate(data_loader):
- if data_collate_fn is not None:
- data = data_collate_fn(data)
- with torch.no_grad():
- result = model(**data)
- results.extend(result)
-
- rank, world_size = get_dist_info()
- if rank == 0:
- batch_size = len(result)
- batch_size_all = batch_size * world_size
- count += batch_size_all
- if count > len(dataset):
- batch_size_all = len(dataset) - (count - batch_size_all)
- for _ in range(batch_size_all):
- pbar.update()
-
- # collect results from all ranks
- if gpu_collect:
- results = collect_results_gpu(results, len(dataset))
- else:
- results = collect_results_cpu(results, len(dataset), tmpdir)
- ground_truths = [dataset[i] for i in range(len(dataset))]
- if metric_classes is not None:
- for metric_cls in metric_classes:
- metric_cls.add(results, ground_truths)
-
-
- def collect_results_cpu(result_part, size, tmpdir=None):
- """Collect results under cpu mode.
-
- On cpu mode, this function will save the results on different gpus to
- ``tmpdir`` and collect them by the rank 0 worker.
-
- Args:
- result_part (list): Result list containing result parts
- to be collected.
- size (int): Size of the results, commonly equal to length of
- the results.
- tmpdir (str | None): temporal directory for collected results to
- store. If set to None, it will create a random temporal directory
- for it.
-
- Returns:
- list: The collected results.
- """
- rank, world_size = get_dist_info()
- # TODO create a random tmp dir if it is not specified
- if tmpdir is None:
- tmpdir = tempfile.gettempdir()
- if not os.path.exists(tmpdir):
- os.makedirs(tmpdir)
- # dump the part result to the dir
- pickle.dump(result_part, os.path.join(tmpdir, f'part_{rank}.pkl'))
- dist.barrier()
- # collect all parts
- if rank != 0:
- return None
- else:
- # load results of all parts from tmp dir
- part_list = []
- for i in range(world_size):
- part_file = os.path.join(tmpdir, f'part_{i}.pkl')
- part_result = pickle.load(part_file)
- # When data is severely insufficient, an empty part_result
- # on a certain gpu could makes the overall outputs empty.
- if part_result:
- part_list.append(part_result)
- # sort the results
- ordered_results = []
- for res in zip(*part_list):
- ordered_results.extend(list(res))
- # the dataloader may pad some samples
- ordered_results = ordered_results[:size]
- # remove tmp dir
- shutil.rmtree(tmpdir)
- return ordered_results
-
-
- def collect_results_gpu(result_part, size):
- """Collect results under gpu mode.
-
- On gpu mode, this function will encode results to gpu tensors and use gpu
- communication for results collection.
-
- Args:
- result_part (list): Result list containing result parts
- to be collected.
- size (int): Size of the results, commonly equal to length of
- the results.
-
- Returns:
- list: The collected results.
- """
- rank, world_size = get_dist_info()
- # dump result part to tensor with pickle
- part_tensor = torch.tensor(
- bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
- # gather all result part tensor shape
- shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
- shape_list = [shape_tensor.clone() for _ in range(world_size)]
- dist.all_gather(shape_list, shape_tensor)
- # padding result part tensor to max length
- shape_max = torch.tensor(shape_list).max()
- part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
- part_send[:shape_tensor[0]] = part_tensor
- part_recv_list = [
- part_tensor.new_zeros(shape_max) for _ in range(world_size)
- ]
- # gather all result part
- dist.all_gather(part_recv_list, part_send)
-
- if rank == 0:
- part_list = []
- for recv, shape in zip(part_recv_list, shape_list):
- part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())
- # When data is severely insufficient, an empty part_result
- # on a certain gpu could makes the overall outputs empty.
- if part_result:
- part_list.append(part_result)
- # sort the results
- ordered_results = []
- for res in zip(*part_list):
- ordered_results.extend(list(res))
- # the dataloader may pad some samples
- ordered_results = ordered_results[:size]
- return ordered_results
|