@@ -81,6 +81,14 @@ var (
MLU: CAMBRICONMLU290,
GCU: EnflameT20,
}
CardModelNameCmdMap = map[string]map[string]string{
BIV100: {"blip-image-captioning-base": "pip install -U transformers; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code; python infer_biv100.py",
"imagenet_resnet50": "pip install -U transformers; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code/infer; python infer_biv100.py",
"chatGLM_6B": "su root; pip install transformers==4.33.2; pip install fastapi uvicorn[standard]; cd /code; python infer_biv100.py"},
MLU: {"blip-image-captioning-base": "",
"imagenet_resnet50": "su root; . /torch/venv3/pytorch/bin/activate; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code/infer; python infer_mlu.py",
"chatGLM_6B": ""},
}
)
func NewOctopusLink(octopusRpc octopusclient.Octopus, name string, id int64) *OctopusLink {
@@ -561,7 +569,7 @@ func (o *OctopusLink) Execute(ctx context.Context, option *option.AiOption) (int
}
func (o *OctopusLink) GenerateSubmitParams(ctx context.Context, option *option.AiOption) error {
err := o.generateResourceId(ctx, option)
err := o.generateResourceId(ctx, option, nil )
if err != nil {
return err
}
@@ -569,15 +577,15 @@ func (o *OctopusLink) GenerateSubmitParams(ctx context.Context, option *option.A
if err != nil {
return err
}
err = o.generateImageId(ctx, option)
err = o.generateImageId(ctx, option, nil )
if err != nil {
return err
}
err = o.generateAlgorithmId(ctx, option)
err = o.generateAlgorithmId(ctx, option, nil )
if err != nil {
return err
}
err = o.generateCmd(option)
err = o.generateCmd(option, nil )
if err != nil {
return err
}
@@ -592,10 +600,7 @@ func (o *OctopusLink) GenerateSubmitParams(ctx context.Context, option *option.A
return nil
}
func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiOption) error {
if option.ResourceType == "" {
return errors.New("ResourceType not set")
}
func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error {
req := &octopus.GetResourceSpecsReq{
Platform: o.platform,
ResourcePool: RESOURCE_POOL,
@@ -608,6 +613,30 @@ func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiO
return errors.New(specResp.Error.Message)
}
if option != nil {
err = generateResourceIdForTraining(option, specResp)
if err != nil {
return err
}
return nil
}
if ifoption != nil {
err = generateResourceIdForInferDeployInstance(ifoption, specResp)
if err != nil {
return err
}
return nil
}
return errors.New("failed to set ResourceId")
}
func generateResourceIdForTraining(option *option.AiOption, specResp *octopus.GetResourceSpecsResp) error {
if option.ResourceType == "" {
return errors.New("ResourceType not set")
}
if option.ResourceType == CPU {
for _, spec := range specResp.TrainResourceSpecs {
if spec.Price == 0 {
@@ -621,14 +650,44 @@ func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiO
if option.ComputeCard == "" {
option.ComputeCard = GCU
}
err = setResourceIdByCard(option, specResp, option.ComputeCard)
err : = setResourceIdByCard(option, specResp, option.ComputeCard)
if err != nil {
return err
}
return nil
}
return errors.New("failed to get ResourceId")
return errors.New("ResourceType not set")
}
func generateResourceIdForInferDeployInstance(option *option.InferOption, specResp *octopus.GetResourceSpecsResp) error {
// temporarily use bi-v100
cardName, ok := cardCnMap[BIV100]
if !ok {
errors.New("computeCard not set")
}
// set computeCard
option.ComputeCard = BIV100
for _, spec := range specResp.TrainResourceSpecs {
names := strings.Split(spec.Name, COMMA)
if len(names) != 4 {
continue
}
ns := strings.Split(names[0], STAR)
if len(ns) != 2 {
continue
}
if ns[0] == "1" && ns[1] == cardName {
option.ResourceId = spec.Id
return nil
}
}
return errors.New("failed to set ResourceId")
}
func (o *OctopusLink) generateDatasetsId(ctx context.Context, option *option.AiOption) error {
@@ -656,46 +715,66 @@ func (o *OctopusLink) generateDatasetsId(ctx context.Context, option *option.AiO
return errors.New("failed to get DatasetsId")
}
func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOption) error {
if option.TaskType == "" {
return errors.New("TaskType not set")
}
req := &octopus.GetUserImageListReq{
func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error {
preImgReq := &octopus.GetPresetImageListReq{
Platform: o.platform,
PageIndex: o.pageIndex,
PageSize: o.pageSize,
}
resp, err := o.octopusRpc.GetUserImageList(ctx, r eq)
preImgResp, err := o.octopusRpc.GetPresetImageList(ctx, preImgR eq)
if err != nil {
return err
}
if !resp.Success {
return errors.New("failed to get imageId ")
if !p reImgR esp.Success {
return errors.New("failed to get PresetImages ")
}
if option.ResourceType == CPU {
for _, img := range resp.Payload.Images {
if img.Image.ImageName == "test-image" {
option.ImageId = img.Image.Id
return nil
if option != nil {
if option.TaskType == "" {
return errors.New("TaskType not set")
}
req := &octopus.GetUserImageListReq{
Platform: o.platform,
PageIndex: o.pageIndex,
PageSize: o.pageSize,
}
resp, err := o.octopusRpc.GetUserImageList(ctx, req)
if err != nil {
return err
}
if !resp.Success {
return errors.New("failed to get imageId")
}
if option.ResourceType == CPU {
for _, img := range resp.Payload.Images {
if img.Image.ImageName == "test-image" {
option.ImageId = img.Image.Id
return nil
}
}
}
}
preImgReq := &octopus.GetPresetImageListReq{
Platform: o.platform,
PageIndex: o.pageIndex,
PageSize: o.pageSize,
}
preImgResp, err := o.octopusRpc.GetPresetImageList(ctx, preImgReq)
if err != nil {
return err
err = generateImageIdForTraining(option, preImgResp)
if err != nil {
return err
}
return nil
}
if !preImgResp.Success {
return errors.New("failed to get PresetImages")
if ifoption != nil {
err = generateImageIdForInferDeployInstance(ifoption, preImgResp)
if err != nil {
return err
}
return nil
}
return errors.New("failed to get ImageId")
}
func generateImageIdForTraining(option *option.AiOption, preImgResp *octopus.GetPresetImageListResp) error {
if option.ResourceType == CARD {
for _, image := range preImgResp.Payload.Images {
if strings.Contains(image.ImageName, cardAliasMap[strings.ToUpper(option.ComputeCard)]) {
@@ -717,11 +796,35 @@ func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOpti
}
}
}
return errors.New("failed to set ImageId")
}
return errors.New("failed to get ImageId")
func generateImageIdForInferDeployInstance(option *option.InferOption, preImgResp *octopus.GetPresetImageListResp) error {
for _, image := range preImgResp.Payload.Images {
// temporarily use bi-v100
if strings.Contains(image.ImageName, cardAliasMap[strings.ToUpper(BIV100)]) {
switch strings.ToUpper(BIV100) {
case GCU:
if strings.HasPrefix(image.ImageVersion, "t20_") {
option.ImageId = image.Id
return nil
}
case BIV100:
if strings.HasPrefix(image.ImageVersion, "bi_") {
option.ImageId = image.Id
return nil
}
case MLU:
option.ImageId = image.Id
return nil
}
}
}
return errors.New("failed to set ImageId")
}
func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.AiOption) error {
func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption ) error {
req := &octopus.GetMyAlgorithmListReq{
Platform: o.platform,
PageIndex: o.pageIndex,
@@ -735,6 +838,26 @@ func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.Ai
return errors.New("failed to get algorithmId")
}
if option != nil {
err = generateAlgorithmIdForTraining(option, resp)
if err != nil {
return err
}
return nil
}
if ifoption != nil {
err = generateAlgorithmIdForInferDeployInstance(ifoption, resp)
if err != nil {
return err
}
return nil
}
return errors.New("failed to set AlgorithmId")
}
func generateAlgorithmIdForTraining(option *option.AiOption, resp *octopus.GetMyAlgorithmListResp) error {
for _, algorithm := range resp.Payload.Algorithms {
if algorithm.FrameworkName == strings.Title(option.TaskType) {
ns := strings.Split(algorithm.AlgorithmName, UNDERSCORE)
@@ -760,14 +883,48 @@ func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.Ai
}
}
if option.AlgorithmId == "" {
return errors.New("Algorithm does not exist")
return errors.New("Algorithm does not exist")
}
func generateAlgorithmIdForInferDeployInstance(option *option.InferOption, resp *octopus.GetMyAlgorithmListResp) error {
if option.ModelType == "" {
return errors.New("ModelType not set")
}
return errors.New("failed to get AlgorithmId")
if option.ModelName == "" {
return errors.New("ModelName not set")
}
for _, algorithm := range resp.Payload.Algorithms {
if strings.Contains(algorithm.AlgorithmName, option.ModelName) {
option.AlgorithmId = algorithm.AlgorithmId
return nil
}
}
return errors.New("ModelName does not exist")
}
func (o *OctopusLink) generateCmd(option *option.AiOption) error {
func (o *OctopusLink) generateCmd(option *option.AiOption, ifoption *option.InferOption) error {
if option != nil {
err := generateCmdForTraining(option)
if err != nil {
return err
}
return nil
}
if ifoption != nil {
err := generateCmdForInferDeployInstance(ifoption)
if err != nil {
return err
}
return nil
}
return errors.New("failed to set cmd")
}
func generateCmdForTraining(option *option.AiOption) error {
if option.Cmd == "" {
switch option.ComputeCard {
case GCU:
@@ -782,6 +939,23 @@ func (o *OctopusLink) generateCmd(option *option.AiOption) error {
return nil
}
func generateCmdForInferDeployInstance(option *option.InferOption) error {
if option.Cmd == "" {
nameCmd, ok := CardModelNameCmdMap[option.ComputeCard]
if !ok {
return errors.New("failed to set cmd, ComputeCard not exist")
}
cmd, ok := nameCmd[option.ModelName]
if !ok {
return errors.New("failed to set cmd, ModelName not exist")
}
option.Cmd = cmd
return nil
}
return nil
}
func (o *OctopusLink) generateEnv(option *option.AiOption) error {
return nil
@@ -1020,3 +1194,55 @@ func (o *OctopusLink) GetInferResult(ctx context.Context, url string, file multi
return recv.Result, nil
}
func (o *OctopusLink) CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error) {
err := o.generateResourceId(ctx, nil, option)
if err != nil {
return "", err
}
err = o.generateAlgorithmId(ctx, nil, option)
if err != nil {
return "", err
}
err = o.generateImageId(ctx, nil, option)
if err != nil {
return "", err
}
err = o.generateCmd(nil, option)
if err != nil {
return "", err
}
desc := option.ModelType + FORWARD_SLASH + option.ModelName + FORWARD_SLASH + strings.ToLower(BIV100)
param := &octopus.CreateNotebookParam{
Name: option.TaskName,
ResourcePool: RESOURCE_POOL,
ResourceSpecId: option.ResourceId,
AlgorithmId: option.AlgorithmId,
AlgorithmVersion: VERSION,
ImageId: option.ImageId,
DatasetId: "",
DatasetVersion: "",
Command: option.Cmd,
Desc: desc,
TaskNumber: 1,
}
req := &octopus.CreateNotebookReq{
Platform: o.platform,
Params: param,
}
resp, err := o.octopusRpc.CreateNotebook(ctx, req)
if err != nil {
return "", err
}
if !resp.Success {
return "", errors.New(resp.Error.Message)
}
return resp.Payload.Id, nil
}