Browse Source

[to #42322933]兼容新增clip huge模型

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10585552

    * compatiable with vit huge, and set clip base default mm-ebed pipeline
master
yichang.zyc yingda.chen 2 years ago
parent
commit
9187103e3a
2 changed files with 6 additions and 5 deletions
  1. +4
    -2
      modelscope/models/multi_modal/clip/model.py
  2. +2
    -3
      modelscope/pipelines/builder.py

+ 4
- 2
modelscope/models/multi_modal/clip/model.py View File

@@ -349,11 +349,13 @@ class CLIP(nn.Module):
text_num_hidden_layers: int,
text_type_vocab_size: int,
tokenizer: FullTokenizer,
# vision_head_width, added this param for ViT-H
vision_head_width: int = 64,
):
super().__init__()

if isinstance(vision_layers, (tuple, list)):
vision_heads = vision_width * 32 // 64
vision_heads = vision_width * 32 // vision_head_width
self.visual = ModifiedResNet(
layers=vision_layers,
output_dim=embed_dim,
@@ -361,7 +363,7 @@ class CLIP(nn.Module):
input_resolution=image_resolution,
width=vision_width)
else:
vision_heads = vision_width // 64
vision_heads = vision_width // vision_head_width
self.visual = VisualTransformer(
input_resolution=image_resolution,
patch_size=vision_patch_size,


+ 2
- 3
modelscope/pipelines/builder.py View File

@@ -93,9 +93,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
'damo/cv_resnet50_live-category'),
Tasks.video_category: (Pipelines.video_category,
'damo/cv_resnet50_video-category'),
Tasks.multi_modal_embedding:
(Pipelines.multi_modal_embedding,
'damo/multi-modal_clip-vit-large-patch14_zh'),
Tasks.multi_modal_embedding: (Pipelines.multi_modal_embedding,
'damo/multi-modal_clip-vit-base-patch16_zh'),
Tasks.generative_multi_modal_embedding:
(Pipelines.generative_multi_modal_embedding,
'damo/multi-modal_gemm-vit-large-patch14_generative-multi-modal-embedding'


Loading…
Cancel
Save