| @@ -29,3 +29,9 @@ func BaseErrorMessageApi(message string) BaseMessageApi { | |||||
| 1, message, | 1, message, | ||||
| } | } | ||||
| } | } | ||||
| type BaseMessageWithDataApi struct { | |||||
| Code int `json:"code"` | |||||
| Message string `json:"message"` | |||||
| Data interface{} `json:"data"` | |||||
| } | |||||
| @@ -616,6 +616,7 @@ organization = Organizations | |||||
| uid = Uid | uid = Uid | ||||
| u2f = Security Keys | u2f = Security Keys | ||||
| bind_wechat = Bind WeChat | bind_wechat = Bind WeChat | ||||
| no_wechat_bind = Can not do the operation, please bind WeChat first. | |||||
| wechat_bind = WeChat Binding | wechat_bind = WeChat Binding | ||||
| bind_account_information = Bind account information | bind_account_information = Bind account information | ||||
| bind_time = Bind Time | bind_time = Bind Time | ||||
| @@ -621,6 +621,7 @@ organization=组织 | |||||
| uid=用户 ID | uid=用户 ID | ||||
| u2f=安全密钥 | u2f=安全密钥 | ||||
| wechat_bind = 微信绑定 | wechat_bind = 微信绑定 | ||||
| no_wechat_bind = 不能创建任务,请先绑定微信。 | |||||
| bind_wechat = 绑定微信 | bind_wechat = 绑定微信 | ||||
| bind_account_information = 绑定账号信息 | bind_account_information = 绑定账号信息 | ||||
| bind_time = 绑定时间 | bind_time = 绑定时间 | ||||
| @@ -33,6 +33,48 @@ import ( | |||||
| routerRepo "code.gitea.io/gitea/routers/repo" | routerRepo "code.gitea.io/gitea/routers/repo" | ||||
| ) | ) | ||||
| func CloudBrainShow(ctx *context.APIContext) { | |||||
| task, err := models.GetCloudbrainByJobID(ctx.Params(":jobid")) | |||||
| if err != nil { | |||||
| log.Info("error:" + err.Error()) | |||||
| ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("repo.cloudbrain_query_fail")) | |||||
| return | |||||
| } | |||||
| cloudbrainTask.PrepareSpec4Show(task) | |||||
| task.ContainerIp = "" | |||||
| if cloudbrainTask.IsTaskNotStop(task) { | |||||
| cloudbrainTask.SyncTaskStatus(task) | |||||
| } | |||||
| if task.TrainJobDuration == "" { | |||||
| if task.Duration == 0 { | |||||
| var duration int64 | |||||
| if task.Status == string(models.JobWaiting) { | |||||
| duration = 0 | |||||
| } else if task.Status == string(models.JobRunning) { | |||||
| duration = time.Now().Unix() - int64(task.CreatedUnix) | |||||
| } else { | |||||
| duration = int64(task.UpdatedUnix) - int64(task.CreatedUnix) | |||||
| } | |||||
| task.Duration = duration | |||||
| } | |||||
| task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||||
| } | |||||
| //to unify image output | |||||
| if task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter { | |||||
| task.ImageID = strconv.FormatInt(task.EngineID, 10) | |||||
| task.Image = task.EngineName | |||||
| } else if task.Type == models.TypeC2Net { | |||||
| task.Image = task.EngineName | |||||
| } | |||||
| ctx.JSON(http.StatusOK, models.BaseMessageWithDataApi{Code: 0, Message: "", Data: task}) | |||||
| } | |||||
| func CreateCloudBrain(ctx *context.APIContext, option api.CreateTrainJobOption) { | func CreateCloudBrain(ctx *context.APIContext, option api.CreateTrainJobOption) { | ||||
| if option.Type == 2 { | if option.Type == 2 { | ||||
| @@ -47,10 +89,10 @@ func CreateCloudBrain(ctx *context.APIContext, option api.CreateTrainJobOption) | |||||
| func CreateCloudBrainInferenceTask(ctx *context.APIContext, option api.CreateTrainJobOption) { | func CreateCloudBrainInferenceTask(ctx *context.APIContext, option api.CreateTrainJobOption) { | ||||
| if option.Type == 0 { | if option.Type == 0 { | ||||
| cloudbrainTask.GrampusTrainJobGpuCreate(ctx.Context, option) | |||||
| cloudbrainTask.CloudBrainInferenceJobCreate(ctx.Context, option) | |||||
| } | } | ||||
| if option.Type == 1 { | if option.Type == 1 { | ||||
| cloudbrainTask.GrampusTrainJobNpuCreate(ctx.Context, option) | |||||
| cloudbrainTask.ModelArtsInferenceJobCreate(ctx.Context, option) | |||||
| } | } | ||||
| } | } | ||||
| @@ -14,28 +14,28 @@ type StatusInfo struct { | |||||
| ComputeResource string | ComputeResource string | ||||
| } | } | ||||
| var cloudbrainOneNotFinalStatuses = []string{string(models.JobWaiting), string(models.JobRunning)} | |||||
| var cloudbrainTwoNotFinalStatuses = []string{string(models.ModelArtsTrainJobInit), string(models.ModelArtsTrainJobImageCreating), string(models.ModelArtsTrainJobSubmitTrying), string(models.ModelArtsTrainJobWaiting), string(models.ModelArtsTrainJobRunning), string(models.ModelArtsTrainJobScaling), string(models.ModelArtsTrainJobCheckInit), string(models.ModelArtsTrainJobCheckRunning), string(models.ModelArtsTrainJobCheckRunningCompleted)} | |||||
| var grampusTwoNotFinalStatuses = []string{models.GrampusStatusWaiting, models.GrampusStatusRunning} | |||||
| var CloudbrainOneNotFinalStatuses = []string{string(models.JobWaiting), string(models.JobRunning)} | |||||
| var CloudbrainTwoNotFinalStatuses = []string{string(models.ModelArtsTrainJobInit), string(models.ModelArtsTrainJobImageCreating), string(models.ModelArtsTrainJobSubmitTrying), string(models.ModelArtsTrainJobWaiting), string(models.ModelArtsTrainJobRunning), string(models.ModelArtsTrainJobScaling), string(models.ModelArtsTrainJobCheckInit), string(models.ModelArtsTrainJobCheckRunning), string(models.ModelArtsTrainJobCheckRunningCompleted)} | |||||
| var GrampusNotFinalStatuses = []string{models.GrampusStatusWaiting, models.GrampusStatusRunning} | |||||
| var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { | var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { | ||||
| CloudBrainTypes: []int{models.TypeCloudBrainOne}, | CloudBrainTypes: []int{models.TypeCloudBrainOne}, | ||||
| JobType: []models.JobType{models.JobTypeDebug}, | JobType: []models.JobType{models.JobTypeDebug}, | ||||
| NotFinalStatuses: cloudbrainOneNotFinalStatuses, | |||||
| NotFinalStatuses: CloudbrainOneNotFinalStatuses, | |||||
| ComputeResource: models.GPUResource, | ComputeResource: models.GPUResource, | ||||
| }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { | }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { | ||||
| CloudBrainTypes: []int{models.TypeCloudBrainOne}, | CloudBrainTypes: []int{models.TypeCloudBrainOne}, | ||||
| JobType: []models.JobType{models.JobTypeTrain}, | JobType: []models.JobType{models.JobTypeTrain}, | ||||
| NotFinalStatuses: cloudbrainOneNotFinalStatuses, | |||||
| NotFinalStatuses: CloudbrainOneNotFinalStatuses, | |||||
| ComputeResource: models.GPUResource, | ComputeResource: models.GPUResource, | ||||
| }, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { | }, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { | ||||
| CloudBrainTypes: []int{models.TypeCloudBrainOne}, | CloudBrainTypes: []int{models.TypeCloudBrainOne}, | ||||
| JobType: []models.JobType{models.JobTypeInference}, | JobType: []models.JobType{models.JobTypeInference}, | ||||
| NotFinalStatuses: cloudbrainOneNotFinalStatuses, | |||||
| NotFinalStatuses: CloudbrainOneNotFinalStatuses, | |||||
| ComputeResource: models.GPUResource, | ComputeResource: models.GPUResource, | ||||
| }, string(models.JobTypeBenchmark) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { | }, string(models.JobTypeBenchmark) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { | ||||
| CloudBrainTypes: []int{models.TypeCloudBrainOne}, | CloudBrainTypes: []int{models.TypeCloudBrainOne}, | ||||
| JobType: []models.JobType{models.JobTypeBenchmark, models.JobTypeBrainScore, models.JobTypeSnn4imagenet}, | JobType: []models.JobType{models.JobTypeBenchmark, models.JobTypeBrainScore, models.JobTypeSnn4imagenet}, | ||||
| NotFinalStatuses: cloudbrainOneNotFinalStatuses, | |||||
| NotFinalStatuses: CloudbrainOneNotFinalStatuses, | |||||
| ComputeResource: models.GPUResource, | ComputeResource: models.GPUResource, | ||||
| }, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { | }, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { | ||||
| CloudBrainTypes: []int{models.TypeCloudBrainTwo, models.TypeCDCenter}, | CloudBrainTypes: []int{models.TypeCloudBrainTwo, models.TypeCDCenter}, | ||||
| @@ -45,22 +45,22 @@ var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + s | |||||
| }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { | }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { | ||||
| CloudBrainTypes: []int{models.TypeCloudBrainTwo}, | CloudBrainTypes: []int{models.TypeCloudBrainTwo}, | ||||
| JobType: []models.JobType{models.JobTypeTrain}, | JobType: []models.JobType{models.JobTypeTrain}, | ||||
| NotFinalStatuses: cloudbrainTwoNotFinalStatuses, | |||||
| NotFinalStatuses: CloudbrainTwoNotFinalStatuses, | |||||
| ComputeResource: models.NPUResource, | ComputeResource: models.NPUResource, | ||||
| }, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { | }, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { | ||||
| CloudBrainTypes: []int{models.TypeCloudBrainTwo}, | CloudBrainTypes: []int{models.TypeCloudBrainTwo}, | ||||
| JobType: []models.JobType{models.JobTypeInference}, | JobType: []models.JobType{models.JobTypeInference}, | ||||
| NotFinalStatuses: cloudbrainTwoNotFinalStatuses, | |||||
| NotFinalStatuses: CloudbrainTwoNotFinalStatuses, | |||||
| ComputeResource: models.NPUResource, | ComputeResource: models.NPUResource, | ||||
| }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GPUResource: { | }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GPUResource: { | ||||
| CloudBrainTypes: []int{models.TypeC2Net}, | CloudBrainTypes: []int{models.TypeC2Net}, | ||||
| JobType: []models.JobType{models.JobTypeTrain}, | JobType: []models.JobType{models.JobTypeTrain}, | ||||
| NotFinalStatuses: grampusTwoNotFinalStatuses, | |||||
| NotFinalStatuses: GrampusNotFinalStatuses, | |||||
| ComputeResource: models.GPUResource, | ComputeResource: models.GPUResource, | ||||
| }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.NPUResource: { | }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.NPUResource: { | ||||
| CloudBrainTypes: []int{models.TypeC2Net}, | CloudBrainTypes: []int{models.TypeC2Net}, | ||||
| JobType: []models.JobType{models.JobTypeTrain}, | JobType: []models.JobType{models.JobTypeTrain}, | ||||
| NotFinalStatuses: grampusTwoNotFinalStatuses, | |||||
| NotFinalStatuses: GrampusNotFinalStatuses, | |||||
| ComputeResource: models.NPUResource, | ComputeResource: models.NPUResource, | ||||
| }} | }} | ||||
| @@ -12,6 +12,10 @@ import ( | |||||
| "regexp" | "regexp" | ||||
| "strings" | "strings" | ||||
| "code.gitea.io/gitea/modules/timeutil" | |||||
| "code.gitea.io/gitea/modules/notification" | |||||
| "code.gitea.io/gitea/modules/obs" | "code.gitea.io/gitea/modules/obs" | ||||
| "code.gitea.io/gitea/modules/git" | "code.gitea.io/gitea/modules/git" | ||||
| @@ -680,3 +684,111 @@ func getPoolId() string { | |||||
| return resourcePools.Info[0].ID | return resourcePools.Info[0].ID | ||||
| } | } | ||||
| func PrepareSpec4Show(task *models.Cloudbrain) { | |||||
| s, err := resource.GetCloudbrainSpec(task.ID) | |||||
| if err != nil { | |||||
| log.Info("error:" + err.Error()) | |||||
| return | |||||
| } | |||||
| task.Spec = s | |||||
| } | |||||
| func IsTaskNotStop(task *models.Cloudbrain) bool { | |||||
| statuses := CloudbrainOneNotFinalStatuses | |||||
| if task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter { | |||||
| statuses = CloudbrainTwoNotFinalStatuses | |||||
| } else { | |||||
| statuses = GrampusNotFinalStatuses | |||||
| } | |||||
| for _, status := range statuses { | |||||
| if task.Status == status { | |||||
| return true | |||||
| } | |||||
| } | |||||
| return false | |||||
| } | |||||
| func SyncTaskStatus(task *models.Cloudbrain) error { | |||||
| if task.Type == models.TypeCloudBrainOne { | |||||
| result, err := cloudbrain.GetJob(task.JobID) | |||||
| if err != nil { | |||||
| log.Info("error:" + err.Error()) | |||||
| return fmt.Errorf("repo.cloudbrain_query_fail") | |||||
| } | |||||
| if result != nil { | |||||
| jobRes, _ := models.ConvertToJobResultPayload(result.Payload) | |||||
| taskRoles := jobRes.TaskRoles | |||||
| taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) | |||||
| oldStatus := task.Status | |||||
| task.Status = taskRes.TaskStatuses[0].State | |||||
| task.ContainerID = taskRes.TaskStatuses[0].ContainerID | |||||
| models.ParseAndSetDurationFromCloudBrainOne(jobRes, task) | |||||
| if task.DeletedAt.IsZero() { //normal record | |||||
| if oldStatus != task.Status { | |||||
| notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||||
| } | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| return fmt.Errorf("repo.cloudbrain_query_fail") | |||||
| } | |||||
| } | |||||
| } else { | |||||
| log.Info("error:" + err.Error()) | |||||
| return fmt.Errorf("repo.cloudbrain_query_fail") | |||||
| } | |||||
| } else if task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter { | |||||
| err := modelarts.HandleTrainJobInfo(task) | |||||
| if err != nil { | |||||
| return fmt.Errorf("repo.cloudbrain_query_fail") | |||||
| } | |||||
| } else if task.Type == models.TypeC2Net { | |||||
| result, err := grampus.GetJob(task.JobID) | |||||
| if err != nil { | |||||
| log.Error("GetJob failed:" + err.Error()) | |||||
| return fmt.Errorf("repo.cloudbrain_query_fail") | |||||
| } | |||||
| if result != nil { | |||||
| if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 { | |||||
| task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0] | |||||
| } | |||||
| oldStatus := task.Status | |||||
| task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) | |||||
| if task.Status != result.JobInfo.Status || result.JobInfo.Status == models.GrampusStatusRunning { | |||||
| task.Duration = result.JobInfo.RunSec | |||||
| if task.Duration < 0 { | |||||
| task.Duration = 0 | |||||
| } | |||||
| task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||||
| if task.StartTime == 0 && result.JobInfo.StartedAt > 0 { | |||||
| task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt) | |||||
| } | |||||
| if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { | |||||
| task.EndTime = task.StartTime.Add(task.Duration) | |||||
| } | |||||
| task.CorrectCreateUnix() | |||||
| if oldStatus != task.Status { | |||||
| notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||||
| } | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed:" + err.Error()) | |||||
| return fmt.Errorf("repo.cloudbrain_query_fail") | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| return nil | |||||
| } | |||||