Browse Source

提交代码

tags/v1.22.11.2^2
ychao_1983 3 years ago
parent
commit
f453610611
6 changed files with 175 additions and 13 deletions
  1. +6
    -0
      models/base_message.go
  2. +1
    -0
      options/locale/locale_en-US.ini
  3. +1
    -0
      options/locale/locale_zh-CN.ini
  4. +44
    -2
      routers/api/v1/repo/cloudbrain.go
  5. +11
    -11
      services/cloudbrain/cloudbrainTask/count.go
  6. +112
    -0
      services/cloudbrain/cloudbrainTask/train.go

+ 6
- 0
models/base_message.go View File

@@ -29,3 +29,9 @@ func BaseErrorMessageApi(message string) BaseMessageApi {
1, message, 1, message,
} }
} }

type BaseMessageWithDataApi struct {
Code int `json:"code"`
Message string `json:"message"`
Data interface{} `json:"data"`
}

+ 1
- 0
options/locale/locale_en-US.ini View File

@@ -616,6 +616,7 @@ organization = Organizations
uid = Uid uid = Uid
u2f = Security Keys u2f = Security Keys
bind_wechat = Bind WeChat bind_wechat = Bind WeChat
no_wechat_bind = Can not do the operation, please bind WeChat first.
wechat_bind = WeChat Binding wechat_bind = WeChat Binding
bind_account_information = Bind account information bind_account_information = Bind account information
bind_time = Bind Time bind_time = Bind Time


+ 1
- 0
options/locale/locale_zh-CN.ini View File

@@ -621,6 +621,7 @@ organization=组织
uid=用户 ID uid=用户 ID
u2f=安全密钥 u2f=安全密钥
wechat_bind = 微信绑定 wechat_bind = 微信绑定
no_wechat_bind = 不能创建任务,请先绑定微信。
bind_wechat = 绑定微信 bind_wechat = 绑定微信
bind_account_information = 绑定账号信息 bind_account_information = 绑定账号信息
bind_time = 绑定时间 bind_time = 绑定时间


+ 44
- 2
routers/api/v1/repo/cloudbrain.go View File

@@ -33,6 +33,48 @@ import (
routerRepo "code.gitea.io/gitea/routers/repo" routerRepo "code.gitea.io/gitea/routers/repo"
) )


func CloudBrainShow(ctx *context.APIContext) {

task, err := models.GetCloudbrainByJobID(ctx.Params(":jobid"))

if err != nil {
log.Info("error:" + err.Error())
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("repo.cloudbrain_query_fail"))
return
}
cloudbrainTask.PrepareSpec4Show(task)
task.ContainerIp = ""
if cloudbrainTask.IsTaskNotStop(task) {
cloudbrainTask.SyncTaskStatus(task)
}

if task.TrainJobDuration == "" {
if task.Duration == 0 {
var duration int64
if task.Status == string(models.JobWaiting) {
duration = 0
} else if task.Status == string(models.JobRunning) {
duration = time.Now().Unix() - int64(task.CreatedUnix)
} else {
duration = int64(task.UpdatedUnix) - int64(task.CreatedUnix)
}
task.Duration = duration
}
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
}
//to unify image output
if task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter {
task.ImageID = strconv.FormatInt(task.EngineID, 10)
task.Image = task.EngineName

} else if task.Type == models.TypeC2Net {
task.Image = task.EngineName
}

ctx.JSON(http.StatusOK, models.BaseMessageWithDataApi{Code: 0, Message: "", Data: task})

}

func CreateCloudBrain(ctx *context.APIContext, option api.CreateTrainJobOption) { func CreateCloudBrain(ctx *context.APIContext, option api.CreateTrainJobOption) {


if option.Type == 2 { if option.Type == 2 {
@@ -47,10 +89,10 @@ func CreateCloudBrain(ctx *context.APIContext, option api.CreateTrainJobOption)
func CreateCloudBrainInferenceTask(ctx *context.APIContext, option api.CreateTrainJobOption) { func CreateCloudBrainInferenceTask(ctx *context.APIContext, option api.CreateTrainJobOption) {


if option.Type == 0 { if option.Type == 0 {
cloudbrainTask.GrampusTrainJobGpuCreate(ctx.Context, option)
cloudbrainTask.CloudBrainInferenceJobCreate(ctx.Context, option)
} }
if option.Type == 1 { if option.Type == 1 {
cloudbrainTask.GrampusTrainJobNpuCreate(ctx.Context, option)
cloudbrainTask.ModelArtsInferenceJobCreate(ctx.Context, option)
} }


} }


+ 11
- 11
services/cloudbrain/cloudbrainTask/count.go View File

@@ -14,28 +14,28 @@ type StatusInfo struct {
ComputeResource string ComputeResource string
} }


var cloudbrainOneNotFinalStatuses = []string{string(models.JobWaiting), string(models.JobRunning)}
var cloudbrainTwoNotFinalStatuses = []string{string(models.ModelArtsTrainJobInit), string(models.ModelArtsTrainJobImageCreating), string(models.ModelArtsTrainJobSubmitTrying), string(models.ModelArtsTrainJobWaiting), string(models.ModelArtsTrainJobRunning), string(models.ModelArtsTrainJobScaling), string(models.ModelArtsTrainJobCheckInit), string(models.ModelArtsTrainJobCheckRunning), string(models.ModelArtsTrainJobCheckRunningCompleted)}
var grampusTwoNotFinalStatuses = []string{models.GrampusStatusWaiting, models.GrampusStatusRunning}
var CloudbrainOneNotFinalStatuses = []string{string(models.JobWaiting), string(models.JobRunning)}
var CloudbrainTwoNotFinalStatuses = []string{string(models.ModelArtsTrainJobInit), string(models.ModelArtsTrainJobImageCreating), string(models.ModelArtsTrainJobSubmitTrying), string(models.ModelArtsTrainJobWaiting), string(models.ModelArtsTrainJobRunning), string(models.ModelArtsTrainJobScaling), string(models.ModelArtsTrainJobCheckInit), string(models.ModelArtsTrainJobCheckRunning), string(models.ModelArtsTrainJobCheckRunningCompleted)}
var GrampusNotFinalStatuses = []string{models.GrampusStatusWaiting, models.GrampusStatusRunning}
var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainOne): {
CloudBrainTypes: []int{models.TypeCloudBrainOne}, CloudBrainTypes: []int{models.TypeCloudBrainOne},
JobType: []models.JobType{models.JobTypeDebug}, JobType: []models.JobType{models.JobTypeDebug},
NotFinalStatuses: cloudbrainOneNotFinalStatuses,
NotFinalStatuses: CloudbrainOneNotFinalStatuses,
ComputeResource: models.GPUResource, ComputeResource: models.GPUResource,
}, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainOne): {
CloudBrainTypes: []int{models.TypeCloudBrainOne}, CloudBrainTypes: []int{models.TypeCloudBrainOne},
JobType: []models.JobType{models.JobTypeTrain}, JobType: []models.JobType{models.JobTypeTrain},
NotFinalStatuses: cloudbrainOneNotFinalStatuses,
NotFinalStatuses: CloudbrainOneNotFinalStatuses,
ComputeResource: models.GPUResource, ComputeResource: models.GPUResource,
}, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { }, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainOne): {
CloudBrainTypes: []int{models.TypeCloudBrainOne}, CloudBrainTypes: []int{models.TypeCloudBrainOne},
JobType: []models.JobType{models.JobTypeInference}, JobType: []models.JobType{models.JobTypeInference},
NotFinalStatuses: cloudbrainOneNotFinalStatuses,
NotFinalStatuses: CloudbrainOneNotFinalStatuses,
ComputeResource: models.GPUResource, ComputeResource: models.GPUResource,
}, string(models.JobTypeBenchmark) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { }, string(models.JobTypeBenchmark) + "-" + strconv.Itoa(models.TypeCloudBrainOne): {
CloudBrainTypes: []int{models.TypeCloudBrainOne}, CloudBrainTypes: []int{models.TypeCloudBrainOne},
JobType: []models.JobType{models.JobTypeBenchmark, models.JobTypeBrainScore, models.JobTypeSnn4imagenet}, JobType: []models.JobType{models.JobTypeBenchmark, models.JobTypeBrainScore, models.JobTypeSnn4imagenet},
NotFinalStatuses: cloudbrainOneNotFinalStatuses,
NotFinalStatuses: CloudbrainOneNotFinalStatuses,
ComputeResource: models.GPUResource, ComputeResource: models.GPUResource,
}, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { }, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): {
CloudBrainTypes: []int{models.TypeCloudBrainTwo, models.TypeCDCenter}, CloudBrainTypes: []int{models.TypeCloudBrainTwo, models.TypeCDCenter},
@@ -45,22 +45,22 @@ var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + s
}, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): {
CloudBrainTypes: []int{models.TypeCloudBrainTwo}, CloudBrainTypes: []int{models.TypeCloudBrainTwo},
JobType: []models.JobType{models.JobTypeTrain}, JobType: []models.JobType{models.JobTypeTrain},
NotFinalStatuses: cloudbrainTwoNotFinalStatuses,
NotFinalStatuses: CloudbrainTwoNotFinalStatuses,
ComputeResource: models.NPUResource, ComputeResource: models.NPUResource,
}, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { }, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): {
CloudBrainTypes: []int{models.TypeCloudBrainTwo}, CloudBrainTypes: []int{models.TypeCloudBrainTwo},
JobType: []models.JobType{models.JobTypeInference}, JobType: []models.JobType{models.JobTypeInference},
NotFinalStatuses: cloudbrainTwoNotFinalStatuses,
NotFinalStatuses: CloudbrainTwoNotFinalStatuses,
ComputeResource: models.NPUResource, ComputeResource: models.NPUResource,
}, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GPUResource: { }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GPUResource: {
CloudBrainTypes: []int{models.TypeC2Net}, CloudBrainTypes: []int{models.TypeC2Net},
JobType: []models.JobType{models.JobTypeTrain}, JobType: []models.JobType{models.JobTypeTrain},
NotFinalStatuses: grampusTwoNotFinalStatuses,
NotFinalStatuses: GrampusNotFinalStatuses,
ComputeResource: models.GPUResource, ComputeResource: models.GPUResource,
}, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.NPUResource: { }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.NPUResource: {
CloudBrainTypes: []int{models.TypeC2Net}, CloudBrainTypes: []int{models.TypeC2Net},
JobType: []models.JobType{models.JobTypeTrain}, JobType: []models.JobType{models.JobTypeTrain},
NotFinalStatuses: grampusTwoNotFinalStatuses,
NotFinalStatuses: GrampusNotFinalStatuses,
ComputeResource: models.NPUResource, ComputeResource: models.NPUResource,
}} }}




+ 112
- 0
services/cloudbrain/cloudbrainTask/train.go View File

@@ -12,6 +12,10 @@ import (
"regexp" "regexp"
"strings" "strings"


"code.gitea.io/gitea/modules/timeutil"

"code.gitea.io/gitea/modules/notification"

"code.gitea.io/gitea/modules/obs" "code.gitea.io/gitea/modules/obs"


"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
@@ -680,3 +684,111 @@ func getPoolId() string {


return resourcePools.Info[0].ID return resourcePools.Info[0].ID
} }

func PrepareSpec4Show(task *models.Cloudbrain) {
s, err := resource.GetCloudbrainSpec(task.ID)
if err != nil {
log.Info("error:" + err.Error())
return
}
task.Spec = s
}

func IsTaskNotStop(task *models.Cloudbrain) bool {
statuses := CloudbrainOneNotFinalStatuses
if task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter {
statuses = CloudbrainTwoNotFinalStatuses
} else {
statuses = GrampusNotFinalStatuses
}

for _, status := range statuses {
if task.Status == status {
return true
}
}
return false

}

func SyncTaskStatus(task *models.Cloudbrain) error {
if task.Type == models.TypeCloudBrainOne {
result, err := cloudbrain.GetJob(task.JobID)
if err != nil {
log.Info("error:" + err.Error())
return fmt.Errorf("repo.cloudbrain_query_fail")
}

if result != nil {
jobRes, _ := models.ConvertToJobResultPayload(result.Payload)
taskRoles := jobRes.TaskRoles
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))

oldStatus := task.Status
task.Status = taskRes.TaskStatuses[0].State

task.ContainerID = taskRes.TaskStatuses[0].ContainerID
models.ParseAndSetDurationFromCloudBrainOne(jobRes, task)

if task.DeletedAt.IsZero() { //normal record
if oldStatus != task.Status {
notification.NotifyChangeCloudbrainStatus(task, oldStatus)
}
err = models.UpdateJob(task)
if err != nil {
return fmt.Errorf("repo.cloudbrain_query_fail")

}
}

} else {
log.Info("error:" + err.Error())
return fmt.Errorf("repo.cloudbrain_query_fail")
}
} else if task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter {
err := modelarts.HandleTrainJobInfo(task)
if err != nil {
return fmt.Errorf("repo.cloudbrain_query_fail")
}

} else if task.Type == models.TypeC2Net {
result, err := grampus.GetJob(task.JobID)
if err != nil {
log.Error("GetJob failed:" + err.Error())
return fmt.Errorf("repo.cloudbrain_query_fail")
}

if result != nil {
if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 {
task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0]
}
oldStatus := task.Status
task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
if task.Status != result.JobInfo.Status || result.JobInfo.Status == models.GrampusStatusRunning {
task.Duration = result.JobInfo.RunSec
if task.Duration < 0 {
task.Duration = 0
}
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)

if task.StartTime == 0 && result.JobInfo.StartedAt > 0 {
task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
}
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
task.EndTime = task.StartTime.Add(task.Duration)
}
task.CorrectCreateUnix()
if oldStatus != task.Status {
notification.NotifyChangeCloudbrainStatus(task, oldStatus)
}
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob failed:" + err.Error())
return fmt.Errorf("repo.cloudbrain_query_fail")
}
}
}
}
return nil

}

Loading…
Cancel
Save