Browse Source

[to #42322933] format outputs for movie scene segmentation demo

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10328357
master
shuying.shu yingda.chen 3 years ago
parent
commit
38a399cf38
4 changed files with 48 additions and 21 deletions
  1. +12
    -9
      modelscope/models/cv/movie_scene_segmentation/model.py
  2. +10
    -2
      modelscope/models/cv/movie_scene_segmentation/utils/save_op.py
  3. +20
    -7
      modelscope/outputs.py
  4. +6
    -3
      modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py

+ 12
- 9
modelscope/models/cv/movie_scene_segmentation/model.py View File

@@ -67,7 +67,6 @@ class MovieSceneSegmentationModel(TorchModel):
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

self.infer_result = {'vid': [], 'sid': [], 'pred': []}
sampling_method = self.cfg.dataset.sampling_method.name
self.neighbor_size = self.cfg.dataset.sampling_method.params[
sampling_method].neighbor_size
@@ -104,6 +103,8 @@ class MovieSceneSegmentationModel(TorchModel):
shot_num = len(sids)
cnt = shot_num // bs + 1

infer_sid, infer_pred = [], []
infer_result = {}
for i in range(cnt):
start = i * bs
end = (i + 1) * bs if (i + 1) * bs < shot_num else shot_num
@@ -112,13 +113,14 @@ class MovieSceneSegmentationModel(TorchModel):
input_ = torch.stack(input_)
outputs = self.shared_step(input_) # shape [b,2]
prob = F.softmax(outputs, dim=1)
self.infer_result['sid'].extend(sid_.cpu().detach().numpy())
self.infer_result['pred'].extend(prob[:, 1].cpu().detach().numpy())
self.infer_result['pred'] = np.stack(self.infer_result['pred'])
infer_sid.extend(sid_.cpu().detach().numpy())
infer_pred.extend(prob[:, 1].cpu().detach().numpy())
infer_result.update({'pred': np.stack(infer_pred)})
infer_result.update({'sid': infer_sid})

assert len(self.infer_result['sid']) == len(sids)
assert len(self.infer_result['pred']) == len(inputs)
return self.infer_result
assert len(infer_result['sid']) == len(sids)
assert len(infer_result['pred']) == len(inputs)
return infer_result

def shared_step(self, inputs):
with torch.no_grad():
@@ -162,11 +164,12 @@ class MovieSceneSegmentationModel(TorchModel):
thres = self.cfg.pipeline.save_threshold

anno_dict = get_pred_boundary(pred_dict, thres)
scene_dict_lst, scene_list = pred2scene(self.shot2keyf, anno_dict)
scene_dict_lst, scene_list, shot_num, shot_dict_lst = pred2scene(
self.shot2keyf, anno_dict)
if self.cfg.pipeline.save_split_scene:
re_dir = scene2video(inputs['input_video_pth'], scene_list, thres)
print(f'Split scene video saved to {re_dir}')
return len(scene_list), scene_dict_lst
return len(scene_list), scene_dict_lst, shot_num, shot_dict_lst

def preprocess(self, inputs):
logger.info('Begin shot detect......')


+ 10
- 2
modelscope/models/cv/movie_scene_segmentation/utils/save_op.py View File

@@ -22,15 +22,23 @@ def pred2scene(shot2keyf, anno_dict):
scene_list, pair_list = get_demo_scene_list(shot2keyf, anno_dict)

scene_dict_lst = []
shot_num = len(shot2keyf)
shot_dict_lst = []
for item in shot2keyf:
tmp = item.split(' ')
shot_dict_lst.append({
'frame': [tmp[0], tmp[1]],
'timestamps': [tmp[-2], tmp[-1]]
})
assert len(scene_list) == len(pair_list)
for scene_ind, scene_item in enumerate(scene_list):
scene_dict_lst.append({
'shot': pair_list[scene_ind],
'frame': scene_item[0],
'timestamp': scene_item[1]
'timestamps': scene_item[1]
})

return scene_dict_lst, scene_list
return scene_dict_lst, scene_list, shot_num, shot_dict_lst


def scene2video(source_movie_fn, scene_list, thres):


+ 20
- 7
modelscope/outputs.py View File

@@ -38,8 +38,10 @@ class OutputKeys(object):
KWS_LIST = 'kws_list'
HISTORY = 'history'
TIMESTAMPS = 'timestamps'
SPLIT_VIDEO_NUM = 'split_video_num'
SPLIT_META_LIST = 'split_meta_list'
SHOT_NUM = 'shot_num'
SCENE_NUM = 'scene_num'
SCENE_META_LIST = 'scene_meta_list'
SHOT_META_LIST = 'shot_meta_list'


TASK_OUTPUTS = {
@@ -309,19 +311,30 @@ TASK_OUTPUTS = {
Tasks.shop_segmentation: [OutputKeys.MASKS],
# movide scene segmentation result for a single video
# {
# "split_video_num":3,
# "split_meta_list":
# "shot_num":15,
# "shot_meta_list":
# [
# {
# "frame": [start_frame, end_frame],
# "timestamps": [start_timestamp, end_timestamp] # ['00:00:01.133', '00:00:02.245']
#
# }
# ]
# "scene_num":3,
# "scene_meta_list":
# [
# {
# "shot": [0,1,2],
# "frame": [start_frame, end_frame],
# "timestamp": [start_timestamp, end_timestamp] # ['00:00:01.133', '00:00:02.245']
# "timestamps": [start_timestamp, end_timestamp] # ['00:00:01.133', '00:00:02.245']
# }
# ]
#
# }
Tasks.movie_scene_segmentation:
[OutputKeys.SPLIT_VIDEO_NUM, OutputKeys.SPLIT_META_LIST],
Tasks.movie_scene_segmentation: [
OutputKeys.SHOT_NUM, OutputKeys.SHOT_META_LIST, OutputKeys.SCENE_NUM,
OutputKeys.SCENE_META_LIST
],

# ============ nlp tasks ===================



+ 6
- 3
modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py View File

@@ -60,9 +60,12 @@ class MovieSceneSegmentationPipeline(Pipeline):

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
data = {'input_video_pth': self.input_video_pth, 'feat': inputs}
video_num, meta_lst = self.model.postprocess(data)
scene_num, scene_meta_lst, shot_num, shot_meta_lst = self.model.postprocess(
data)
result = {
OutputKeys.SPLIT_VIDEO_NUM: video_num,
OutputKeys.SPLIT_META_LIST: meta_lst
OutputKeys.SHOT_NUM: shot_num,
OutputKeys.SHOT_META_LIST: shot_meta_lst,
OutputKeys.SCENE_NUM: scene_num,
OutputKeys.SCENE_META_LIST: scene_meta_lst
}
return result

Loading…
Cancel
Save