@@ -402,6 +402,7 @@ class Metrics(object): | |||||
# accuracy | # accuracy | ||||
accuracy = 'accuracy' | accuracy = 'accuracy' | ||||
multi_average_precision = 'mAP' | |||||
audio_noise_metric = 'audio-noise-metric' | audio_noise_metric = 'audio-noise-metric' | ||||
# text gen | # text gen | ||||
@@ -24,6 +24,7 @@ class MetricKeys(object): | |||||
ROUGE_1 = 'rouge-1' | ROUGE_1 = 'rouge-1' | ||||
ROUGE_L = 'rouge-l' | ROUGE_L = 'rouge-l' | ||||
NED = 'ned' # ocr metric | NED = 'ned' # ocr metric | ||||
mAP = 'mAP' | |||||
BatchAcc = 'inbatch_t2i_recall_at_1' | BatchAcc = 'inbatch_t2i_recall_at_1' | ||||
@@ -0,0 +1,67 @@ | |||||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||||
from typing import Dict | |||||
import numpy as np | |||||
from modelscope.metainfo import Metrics | |||||
from modelscope.outputs import OutputKeys | |||||
from modelscope.utils.registry import default_group | |||||
from .base import Metric | |||||
from .builder import METRICS, MetricKeys | |||||
@METRICS.register_module( | |||||
group_key=default_group, module_name=Metrics.multi_average_precision) | |||||
class AveragePrecisionMetric(Metric): | |||||
"""The metric computation class for multi avarage precision classes. | |||||
This metric class calculates multi avarage precision for the whole input batches. | |||||
""" | |||||
def __init__(self, *args, **kwargs): | |||||
super().__init__(*args, **kwargs) | |||||
self.preds = [] | |||||
self.labels = [] | |||||
self.thresh = kwargs.get('threshold', 0.5) | |||||
def add(self, outputs: Dict, inputs: Dict): | |||||
label_name = OutputKeys.LABEL if OutputKeys.LABEL in inputs else OutputKeys.LABELS | |||||
ground_truths = inputs[label_name] | |||||
eval_results = outputs[label_name] | |||||
for key in [ | |||||
OutputKeys.CAPTION, OutputKeys.TEXT, OutputKeys.BOXES, | |||||
OutputKeys.LABELS, OutputKeys.SCORES | |||||
]: | |||||
if key in outputs and outputs[key] is not None: | |||||
eval_results = outputs[key] | |||||
break | |||||
assert type(ground_truths) == type(eval_results) | |||||
for truth in ground_truths: | |||||
self.labels.append(truth) | |||||
for result in eval_results: | |||||
if isinstance(truth, str): | |||||
self.preds.append(result.strip().replace(' ', '')) | |||||
else: | |||||
self.preds.append(result) | |||||
def evaluate(self): | |||||
assert len(self.preds) == len(self.labels) | |||||
scores = self._calculate_ap_score(self.preds, self.labels, self.thresh) | |||||
return {MetricKeys.mAP: scores.mean().item()} | |||||
def _calculate_ap_score(self, preds, labels, thresh=0.5): | |||||
hyps = np.array(preds) | |||||
refs = np.array(labels) | |||||
a = np.where(hyps[:, :2] < refs[:, :2], refs[:, :2], hyps[:, :2]) | |||||
b = np.where(hyps[:, 2:] < refs[:, 2:], hyps[:, 2:], refs[:, 2:]) | |||||
interacts = np.concatenate([a, b], axis=1) | |||||
area_predictions = (hyps[:, 2] - hyps[:, 0]) * ( | |||||
hyps[:, 3] - hyps[:, 1]) | |||||
area_targets = (refs[:, 2] - refs[:, 0]) * (refs[:, 3] - refs[:, 1]) | |||||
interacts_w = interacts[:, 2] - interacts[:, 0] | |||||
interacts_h = interacts[:, 3] - interacts[:, 1] | |||||
area_interacts = interacts_w * interacts_h | |||||
ious = area_interacts / ( | |||||
area_predictions + area_targets - area_interacts + 1e-6) | |||||
return (ious >= thresh) & (interacts_w > 0) & (interacts_h > 0) |
@@ -9,6 +9,7 @@ from torchvision import transforms | |||||
from modelscope.preprocessors.image import load_image | from modelscope.preprocessors.image import load_image | ||||
from modelscope.utils.constant import ModeKeys | from modelscope.utils.constant import ModeKeys | ||||
from .base import OfaBasePreprocessor | from .base import OfaBasePreprocessor | ||||
from .utils import transforms as T | |||||
class OfaVisualGroundingPreprocessor(OfaBasePreprocessor): | class OfaVisualGroundingPreprocessor(OfaBasePreprocessor): | ||||
@@ -29,13 +30,14 @@ class OfaVisualGroundingPreprocessor(OfaBasePreprocessor): | |||||
super(OfaVisualGroundingPreprocessor, | super(OfaVisualGroundingPreprocessor, | ||||
self).__init__(cfg, model_dir, mode, *args, **kwargs) | self).__init__(cfg, model_dir, mode, *args, **kwargs) | ||||
self.num_bins = self.cfg.model.get('num_bins', 1000) | |||||
if self.mode == ModeKeys.TRAIN: | if self.mode == ModeKeys.TRAIN: | ||||
# for positioning | # for positioning | ||||
self.positioning_transform = transforms.Compose([ | |||||
transforms.RandomResize([self.patch_image_size], | |||||
max_size=self.patch_image_size), | |||||
transforms.ToTensor(), | |||||
transforms.Normalize( | |||||
self.positioning_transform = T.Compose([ | |||||
T.RandomResize([self.patch_image_size], | |||||
max_size=self.patch_image_size), | |||||
T.ToTensor(), | |||||
T.Normalize( | |||||
mean=self.mean, | mean=self.mean, | ||||
std=self.std, | std=self.std, | ||||
max_image_size=self.max_image_size) | max_image_size=self.max_image_size) | ||||
@@ -130,4 +132,10 @@ class OfaVisualGroundingPreprocessor(OfaBasePreprocessor): | |||||
'w_resize_ratio': w_resize_ratio, | 'w_resize_ratio': w_resize_ratio, | ||||
'h_resize_ratio': h_resize_ratio, | 'h_resize_ratio': h_resize_ratio, | ||||
} | } | ||||
if 'region_coord' in self.column_map and self.column_map[ | |||||
'region_coord'] in data: | |||||
x0, y0, x1, y1 = data[ | |||||
self.column_map['region_coord']].strip().split(',') | |||||
sample['label'] = [float(x0), float(y0), float(x1), float(y1)] | |||||
return sample | return sample |
@@ -34,6 +34,7 @@ class OFATrainer(EpochBasedTrainer): | |||||
self, | self, | ||||
model: Optional[Union[TorchModel, nn.Module, str]] = None, | model: Optional[Union[TorchModel, nn.Module, str]] = None, | ||||
cfg_file: Optional[str] = None, | cfg_file: Optional[str] = None, | ||||
cfg_modify_fn: Optional[Callable] = None, | |||||
arg_parse_fn: Optional[Callable] = None, | arg_parse_fn: Optional[Callable] = None, | ||||
data_collator: Optional[Union[Callable, Dict[str, | data_collator: Optional[Union[Callable, Dict[str, | ||||
Callable]]] = None, | Callable]]] = None, | ||||
@@ -49,7 +50,8 @@ class OFATrainer(EpochBasedTrainer): | |||||
**kwargs): | **kwargs): | ||||
model = Model.from_pretrained(model, revision=model_revision) | model = Model.from_pretrained(model, revision=model_revision) | ||||
model_dir = model.model_dir | model_dir = model.model_dir | ||||
cfg = Config.from_file(cfg_file) | |||||
self.cfg_modify_fn = cfg_modify_fn | |||||
cfg = self.rebuild_config(Config.from_file(cfg_file)) | |||||
if 'work_dir' not in kwargs or len(kwargs['work_dir']) == 0: | if 'work_dir' not in kwargs or len(kwargs['work_dir']) == 0: | ||||
work_dir = cfg.train.work_dir | work_dir = cfg.train.work_dir | ||||
else: | else: | ||||
@@ -57,10 +59,12 @@ class OFATrainer(EpochBasedTrainer): | |||||
tokenizer_files = { | tokenizer_files = { | ||||
'zh': [ | 'zh': [ | ||||
'tokenizer.json', 'tokenizer_config.json', 'vocab.txt', | 'tokenizer.json', 'tokenizer_config.json', 'vocab.txt', | ||||
'config.json' | |||||
'config.json', 'ans2label.json' | |||||
], | |||||
'en': [ | |||||
'tokenizer.json', 'vocab.json', 'merges.txt', 'config.json', | |||||
'ans2label.json' | |||||
], | ], | ||||
'en': | |||||
['tokenizer.json', 'vocab.json', 'merges.txt', 'config.json'], | |||||
} | } | ||||
for filename in tokenizer_files[cfg.model.get('language', 'en')]: | for filename in tokenizer_files[cfg.model.get('language', 'en')]: | ||||
finetune_file = os.path.join(work_dir, filename) | finetune_file = os.path.join(work_dir, filename) | ||||
@@ -127,6 +131,11 @@ class OFATrainer(EpochBasedTrainer): | |||||
**kwargs, | **kwargs, | ||||
) | ) | ||||
def rebuild_config(self, cfg: Config): | |||||
if self.cfg_modify_fn is not None: | |||||
cfg = self.cfg_modify_fn(cfg) | |||||
return cfg | |||||
def train_step(self, model, inputs): | def train_step(self, model, inputs): | ||||
model.train() | model.train() | ||||
loss, sample_size, logging_output = self.criterion(model, inputs) | loss, sample_size, logging_output = self.criterion(model, inputs) | ||||
@@ -5,10 +5,10 @@ import unittest | |||||
import json | import json | ||||
from modelscope.metainfo import Trainers | |||||
from modelscope.msdatasets import MsDataset | from modelscope.msdatasets import MsDataset | ||||
from modelscope.trainers import build_trainer | from modelscope.trainers import build_trainer | ||||
from modelscope.utils.constant import DownloadMode, ModelFile | from modelscope.utils.constant import DownloadMode, ModelFile | ||||
from modelscope.utils.hub import read_config | |||||
from modelscope.utils.test_utils import test_level | from modelscope.utils.test_utils import test_level | ||||
@@ -73,11 +73,12 @@ class TestOfaTrainer(unittest.TestCase): | |||||
def test_trainer_std(self): | def test_trainer_std(self): | ||||
WORKSPACE = './workspace/ckpts/recognition' | WORKSPACE = './workspace/ckpts/recognition' | ||||
os.makedirs(WORKSPACE, exist_ok=True) | os.makedirs(WORKSPACE, exist_ok=True) | ||||
config_file = os.path.join(WORKSPACE, ModelFile.CONFIGURATION) | |||||
with open(config_file, 'w') as writer: | |||||
json.dump(self.finetune_cfg, writer) | |||||
pretrained_model = 'damo/ofa_ocr-recognition_scene_base_zh' | pretrained_model = 'damo/ofa_ocr-recognition_scene_base_zh' | ||||
cfg = read_config(pretrained_model) | |||||
config_file = os.path.join(WORKSPACE, ModelFile.CONFIGURATION) | |||||
cfg.dump(config_file) | |||||
args = dict( | args = dict( | ||||
model=pretrained_model, | model=pretrained_model, | ||||
work_dir=WORKSPACE, | work_dir=WORKSPACE, | ||||
@@ -94,7 +95,7 @@ class TestOfaTrainer(unittest.TestCase): | |||||
split='test[:20]', | split='test[:20]', | ||||
download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS), | download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS), | ||||
cfg_file=config_file) | cfg_file=config_file) | ||||
trainer = build_trainer(name=Trainers.ofa, default_args=args) | |||||
trainer = build_trainer(name='ofa', default_args=args) | |||||
trainer.train() | trainer.train() | ||||
self.assertIn( | self.assertIn( | ||||