Browse Source

fix bug

Signed-off-by: jagger <cossjie@foxmail.com>

Former-commit-id: adc07bd843
pull/194/head
jagger 1 year ago
parent
commit
61f7363302
4 changed files with 68 additions and 9 deletions
  1. +12
    -0
      api/internal/scheduler/schedulers/aiScheduler.go
  2. +1
    -1
      api/internal/scheduler/service/aiService.go
  3. +54
    -7
      api/internal/storeLink/modelarts.go
  4. +1
    -1
      api/internal/storeLink/storeLink.go

+ 12
- 0
api/internal/scheduler/schedulers/aiScheduler.go View File

@@ -31,6 +31,7 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/JointCloud/pcm-modelarts/client/modelartsservice"
"gitlink.org.cn/JointCloud/pcm-octopus/octopus"
"sync"
)
@@ -340,6 +341,17 @@ func convertType(in interface{}) (*AiResult, error) {
result.Msg = resp.Error.Message
}

return &result, nil
case *modelartsservice.CreateTrainingJobResp:
resp := (in).(*modelartsservice.CreateTrainingJobResp)

if resp.ErrorMsg != "" {
result.Msg = resp.ErrorMsg
} else {

result.JobId = resp.Metadata.Id
}

return &result, nil
default:
return nil, errors.New("ai task response failed")


+ 1
- 1
api/internal/scheduler/service/aiService.go View File

@@ -64,7 +64,7 @@ func InitAiClusterMap(conf *config.Config, clusters []types.ClusterInfo) (map[st
id, _ := strconv.ParseInt(c.Id, 10, 64)
modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(conf.ModelArtsRpcConf))
modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(conf.ModelArtsImgRpcConf))
modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Nickname, id)
modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Name, id, c.Nickname)
collectorMap[c.Id] = modelarts
executorMap[c.Id] = modelarts
case SHUGUANGAI:


+ 54
- 7
api/internal/storeLink/modelarts.go View File

@@ -27,6 +27,10 @@ import (
"strings"
)

const (
Ascend = "Ascend"
)

type ModelArtsLink struct {
modelArtsRpc modelartsservice.ModelArtsService
modelArtsImgRpc imagesservice.ImagesService
@@ -36,8 +40,8 @@ type ModelArtsLink struct {
pageSize int32
}

func NewModelArtsLink(modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, name string, id int64) *ModelArtsLink {
return &ModelArtsLink{modelArtsRpc: modelArtsRpc, modelArtsImgRpc: modelArtsImgRpc, platform: name, participantId: id, pageIndex: 1, pageSize: 100}
func NewModelArtsLink(modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, name string, id int64, nickname string) *ModelArtsLink {
return &ModelArtsLink{modelArtsRpc: modelArtsRpc, modelArtsImgRpc: modelArtsImgRpc, platform: nickname, participantId: id, pageIndex: 0, pageSize: 50}
}

func (m *ModelArtsLink) UploadImage(ctx context.Context, path string) (interface{}, error) {
@@ -87,6 +91,7 @@ func (m *ModelArtsLink) SubmitTask(ctx context.Context, imageId string, cmd stri
WorkspaceId: "0",
},
Algorithm: &modelarts.Algorithms{
Id: algorithmId,
Engine: &modelarts.EngineCreateTraining{
ImageUrl: imageId,
},
@@ -184,7 +189,9 @@ func (m *ModelArtsLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorit
}

func (m *ModelArtsLink) GetComputeCards(ctx context.Context) ([]string, error) {
return nil, nil
var cards []string
cards = append(cards, Ascend)
return cards, nil
}

func (m *ModelArtsLink) GetUserBalance(ctx context.Context) (float64, error) {
@@ -224,6 +231,10 @@ func (m *ModelArtsLink) GenerateSubmitParams(ctx context.Context, option *option
if err != nil {
return err
}
err = m.generateAlgorithmId(ctx, option)
if err != nil {
return err
}
err = m.generateImageId(option)
if err != nil {
return err
@@ -244,10 +255,7 @@ func (m *ModelArtsLink) GenerateSubmitParams(ctx context.Context, option *option
}

func (m *ModelArtsLink) generateResourceId(ctx context.Context, option *option.AiOption) error {
_, err := m.QuerySpecs(ctx)
if err != nil {
return err
}
option.ResourceId = "modelarts.kat1.xlarge"
return nil
}

@@ -270,3 +278,42 @@ func (m *ModelArtsLink) generateParams(option *option.AiOption) error {

return nil
}

func (m *ModelArtsLink) generateAlgorithmId(ctx context.Context, option *option.AiOption) error {
req := &modelarts.ListAlgorithmsReq{
Platform: m.platform,
Offset: m.pageIndex,
Limit: m.pageSize,
}
resp, err := m.modelArtsRpc.ListAlgorithms(ctx, req)
if err != nil {
return err
}
if resp.ErrorMsg != "" {
return errors.New("failed to get algorithmId")
}

for _, algorithm := range resp.Items {
engVersion := algorithm.JobConfig.Engine.EngineVersion
if strings.Contains(engVersion, option.TaskType) {
ns := strings.Split(algorithm.Metadata.Name, DASH)
if ns[0] != option.TaskType {
continue
}
if ns[1] != option.DatasetsName {
continue
}
if ns[2] != option.AlgorithmName {
continue
}
option.AlgorithmId = algorithm.Metadata.Id
return nil
}
}

if option.AlgorithmId == "" {
return errors.New("Algorithm does not exist")
}

return errors.New("failed to get AlgorithmId")
}

+ 1
- 1
api/internal/storeLink/storeLink.go View File

@@ -99,7 +99,7 @@ func NewStoreLink(octopusRpc octopusclient.Octopus, modelArtsRpc modelartsservic
linkStruct := NewOctopusLink(octopusRpc, participant.Name, participant.Id)
return &StoreLink{ILinkage: linkStruct}
case TYPE_MODELARTS:
linkStruct := NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, participant.Name, participant.Id)
linkStruct := NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, participant.Name, participant.Id, "")
return &StoreLink{ILinkage: linkStruct}
case TYPE_SHUGUANGAI:
linkStruct := NewShuguangAi(aCRpc, participant.Name, participant.Id)


Loading…
Cancel
Save