From 394bfdd8a70dee03091d45fda7fbff1b6d3527ca Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Wed, 30 Nov 2022 18:06:18 +0800 Subject: [PATCH 1/6] #3169 update --- models/resource_specification.go | 93 +++++++++++++++++++ modules/grampus/grampus.go | 5 +- .../resource/resource_specification.go | 48 +--------- 3 files changed, 96 insertions(+), 50 deletions(-) diff --git a/models/resource_specification.go b/models/resource_specification.go index 2f815818b..809a3496a 100644 --- a/models/resource_specification.go +++ b/models/resource_specification.go @@ -3,6 +3,7 @@ package models import ( "code.gitea.io/gitea/modules/timeutil" "fmt" + "strings" "xorm.io/builder" ) @@ -197,12 +198,104 @@ type Specification struct { AiCenterName string IsExclusive bool ExclusiveOrg string + //specs that have the same sourceSpecId, computeResource and cluster as current spec + RelatedSpecs []*Specification } func (Specification) TableName() string { return "resource_specification" } +func (s *Specification) loadRelatedSpecs() { + if s.RelatedSpecs != nil { + return + } + defaultSpecs := make([]*Specification, 0) + if s.SourceSpecId == "" { + s.RelatedSpecs = defaultSpecs + return + } + r, err := FindSpecs(FindSpecsOptions{ + ComputeResource: s.ComputeResource, + Cluster: s.Cluster, + SourceSpecId: s.SourceSpecId, + RequestAll: true, + SpecStatus: SpecOnShelf, + }) + if err != nil { + s.RelatedSpecs = defaultSpecs + return + } + s.RelatedSpecs = r +} +func (s *Specification) GetAvailableCenterIds(userIds ...int64) []string { + s.loadRelatedSpecs() + + if len(s.RelatedSpecs) == 0 { + return make([]string, 0) + } + + var uId int64 + if len(userIds) > 0 { + uId = userIds[0] + } + //filter exclusive specs + specs := FilterExclusiveSpecs(s.RelatedSpecs, uId) + + centerIds := make([]string, len(specs)) + for i, v := range specs { + centerIds[i] = v.AiCenterCode + } + return centerIds +} + +func FilterExclusiveSpecs(r []*Specification, userId int64) []*Specification { + if userId == 0 { + return r + } + specs := make([]*Specification, 0, len(r)) + specMap := make(map[int64]string, 0) + for i := 0; i < len(r); i++ { + spec := r[i] + if _, has := specMap[spec.ID]; has { + continue + } + if !spec.IsExclusive { + specs = append(specs, spec) + specMap[spec.ID] = "" + continue + } + orgs := strings.Split(spec.ExclusiveOrg, ";") + for _, org := range orgs { + isMember, _ := IsOrganizationMemberByOrgName(org, userId) + if isMember { + specs = append(specs, spec) + specMap[spec.ID] = "" + break + } + } + } + return specs +} + +func DistinctSpecs(r []*Specification) []*Specification { + specs := make([]*Specification, 0, len(r)) + sourceSpecIdMap := make(map[string]string, 0) + for i := 0; i < len(r); i++ { + spec := r[i] + if spec.SourceSpecId == "" { + specs = append(specs, spec) + continue + } + if _, has := sourceSpecIdMap[spec.SourceSpecId]; has { + continue + } + specs = append(specs, spec) + sourceSpecIdMap[spec.SourceSpecId] = "" + } + return specs +} + func InsertResourceSpecification(r ResourceSpecification) (int64, error) { return x.Insert(&r) } diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index b6f62560a..6b2ea6288 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -105,8 +105,6 @@ func getDatasetGrampus(datasetInfos map[string]models.DatasetInfo) []models.Gram func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId string, err error) { createTime := timeutil.TimeStampNow() - centerID, centerName := getCentersParamter(ctx, req) - var datasetGrampus, modelGrampus []models.GrampusDataset var codeGrampus models.GrampusDataset if ProcessorTypeNPU == req.ProcessType { @@ -138,8 +136,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId str ResourceSpecId: req.Spec.SourceSpecId, ImageId: req.ImageId, ImageUrl: req.ImageUrl, - CenterID: centerID, - CenterName: centerName, + CenterID: req.Spec.GetAvailableCenterIds(ctx.User.ID), ReplicaNum: 1, Datasets: datasetGrampus, Models: modelGrampus, diff --git a/services/cloudbrain/resource/resource_specification.go b/services/cloudbrain/resource/resource_specification.go index 8f4182d87..5070d7c1e 100644 --- a/services/cloudbrain/resource/resource_specification.go +++ b/services/cloudbrain/resource/resource_specification.go @@ -246,10 +246,10 @@ func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]*models.S return nil, err } //filter exclusive specs - specs := filterExclusiveSpecs(r, userId) + specs := models.FilterExclusiveSpecs(r, userId) //distinct by sourceSpecId - specs = distinctSpecs(specs) + specs = models.DistinctSpecs(specs) return specs, err } @@ -265,50 +265,6 @@ func FindAvailableSpecs4Show(userId int64, opts models.FindSpecsOptions) ([]*api return result, nil } -func filterExclusiveSpecs(r []*models.Specification, userId int64) []*models.Specification { - specs := make([]*models.Specification, 0, len(r)) - specMap := make(map[int64]string, 0) - for i := 0; i < len(r); i++ { - spec := r[i] - if _, has := specMap[spec.ID]; has { - continue - } - if !spec.IsExclusive { - specs = append(specs, spec) - specMap[spec.ID] = "" - continue - } - orgs := strings.Split(spec.ExclusiveOrg, ";") - for _, org := range orgs { - isMember, _ := models.IsOrganizationMemberByOrgName(org, userId) - if isMember { - specs = append(specs, spec) - specMap[spec.ID] = "" - break - } - } - } - return specs -} - -func distinctSpecs(r []*models.Specification) []*models.Specification { - specs := make([]*models.Specification, 0, len(r)) - sourceSpecIdMap := make(map[string]string, 0) - for i := 0; i < len(r); i++ { - spec := r[i] - if spec.SourceSpecId == "" { - specs = append(specs, spec) - continue - } - if _, has := sourceSpecIdMap[spec.SourceSpecId]; has { - continue - } - specs = append(specs, spec) - sourceSpecIdMap[spec.SourceSpecId] = "" - } - return specs -} - func GetAndCheckSpec(userId int64, specId int64, opts models.FindSpecsOptions) (*models.Specification, error) { if specId == 0 { return nil, nil From a56551d5e50bce11116003a6c1513e5a1cae4d78 Mon Sep 17 00:00:00 2001 From: liuzx Date: Wed, 7 Dec 2022 17:25:22 +0800 Subject: [PATCH 2/6] fix-3255 --- routers/api/v1/repo/cloudbrain_dashboard.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/routers/api/v1/repo/cloudbrain_dashboard.go b/routers/api/v1/repo/cloudbrain_dashboard.go index 7fe5d603c..0d68fff30 100755 --- a/routers/api/v1/repo/cloudbrain_dashboard.go +++ b/routers/api/v1/repo/cloudbrain_dashboard.go @@ -968,6 +968,8 @@ func GetWaittingTop(ctx *context.Context) { taskDetail.RepoID = ciTasks[i].RepoID if ciTasks[i].Repo != nil { taskDetail.RepoName = ciTasks[i].Repo.OwnerName + "/" + ciTasks[i].Repo.Name + } else { + taskDetail.RepoName = "" } WaitTimeInt := time.Now().Unix() - ciTasks[i].Cloudbrain.CreatedUnix.AsTime().Unix() taskDetail.WaitTime = models.ConvertDurationToStr(WaitTimeInt) @@ -975,6 +977,13 @@ func GetWaittingTop(ctx *context.Context) { if WaitTimeInt < 0 { taskDetail.WaitTime = "00:00:00" } + + taskDetail.ID = ciTasks[i].Cloudbrain.ID + taskDetail.ComputeResource = ciTasks[i].Cloudbrain.ComputeResource + taskDetail.JobType = ciTasks[i].Cloudbrain.JobType + taskDetail.JobID = ciTasks[i].Cloudbrain.JobID + taskDetail.Type = ciTasks[i].Cloudbrain.Type + tasks = append(tasks, taskDetail) } ctx.JSON(http.StatusOK, map[string]interface{}{ @@ -1001,6 +1010,12 @@ func GetRunningTop(ctx *context.Context) { taskDetail.RepoName = ciTasks[i].Repo.OwnerName + "/" + ciTasks[i].Repo.Name } + taskDetail.ID = ciTasks[i].Cloudbrain.ID + taskDetail.ComputeResource = ciTasks[i].Cloudbrain.ComputeResource + taskDetail.JobType = ciTasks[i].Cloudbrain.JobType + taskDetail.JobID = ciTasks[i].Cloudbrain.JobID + taskDetail.Type = ciTasks[i].Cloudbrain.Type + tasks = append(tasks, taskDetail) } ctx.JSON(http.StatusOK, map[string]interface{}{ From 516ba2e108c19a53b85f0edca8a97e32e012c467 Mon Sep 17 00:00:00 2001 From: liuzx Date: Wed, 7 Dec 2022 17:53:02 +0800 Subject: [PATCH 3/6] fix-3255 --- models/cloudbrain_static.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/models/cloudbrain_static.go b/models/cloudbrain_static.go index a213f179c..40d7a2a2e 100644 --- a/models/cloudbrain_static.go +++ b/models/cloudbrain_static.go @@ -183,6 +183,17 @@ func GetWaittingTop() ([]*CloudbrainInfo, error) { Find(&cloudbrains); err != nil { log.Info("find error.") } + + var ids []int64 + for _, task := range cloudbrains { + ids = append(ids, task.RepoID) + } + repositoryMap, err := GetRepositoriesMapByIDs(ids) + if err == nil { + for _, task := range cloudbrains { + task.Repo = repositoryMap[task.RepoID] + } + } return cloudbrains, nil } @@ -199,6 +210,16 @@ func GetRunningTop() ([]*CloudbrainInfo, error) { Find(&cloudbrains); err != nil { log.Info("find error.") } + var ids []int64 + for _, task := range cloudbrains { + ids = append(ids, task.RepoID) + } + repositoryMap, err := GetRepositoriesMapByIDs(ids) + if err == nil { + for _, task := range cloudbrains { + task.Repo = repositoryMap[task.RepoID] + } + } return cloudbrains, nil } From f1728176e000384a56029c0aeac8d3ae12e30032 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Fri, 9 Dec 2022 12:38:22 +0800 Subject: [PATCH 4/6] =?UTF-8?q?=E4=BC=98=E5=8C=96=E8=B0=83=E8=AF=95?= =?UTF-8?q?=E4=BB=BB=E5=8A=A1=E6=B8=85=E7=90=86=E7=AD=96=E7=95=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/cloudbrain.go | 30 +++++++++++++++++++++++--- modules/setting/setting.go | 2 ++ services/cloudbrain/clear.go | 41 +++++++++++++++++++++++------------- 3 files changed, 55 insertions(+), 18 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 5b427c010..aeed8629c 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1906,6 +1906,12 @@ func GetCloudbrainByID(id string) (*Cloudbrain, error) { return getRepoCloudBrain(cb) } +func IsCloudbrainExistByJobName(jobName string)(bool,error){ + return x.Unscoped().Exist(&Cloudbrain{ + JobName: jobName, + }) +} + func GetCloudbrainByIDWithDeleted(id string) (*Cloudbrain, error) { idInt64, _ := strconv.ParseInt(id, 10, 64) cb := &Cloudbrain{ID: idInt64} @@ -2051,19 +2057,37 @@ func GetCloudBrainUnStoppedJob() ([]*Cloudbrain, error) { Find(&cloudbrains) } -func GetCloudBrainOneStoppedJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) { +func GetCloudBrainOneStoppedNotDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0, 10) endTimeBefore := time.Now().Unix() - int64(days)*24*3600 missEndTimeBefore := endTimeBefore - 24*3600 - return cloudbrains, x.Cols("id,job_name,job_id"). + return cloudbrains, x.Unscoped().Cols("id,job_name,job_id"). In("status", JobStopped, JobSucceeded, JobFailed, ModelArtsCreateFailed, ModelArtsStartFailed, ModelArtsUnavailable, ModelArtsResizFailed, ModelArtsDeleted, ModelArtsStopped, ModelArtsTrainJobCanceled, ModelArtsTrainJobCheckFailed, ModelArtsTrainJobCompleted, ModelArtsTrainJobDeleteFailed, ModelArtsTrainJobDeployServiceFailed, ModelArtsTrainJobFailed, ModelArtsTrainJobImageFailed, ModelArtsTrainJobKilled, ModelArtsTrainJobLost, ModelArtsTrainJobSubmitFailed, ModelArtsTrainJobSubmitModelFailed). - Where("(((end_time is null or end_time=0) and updated_unix Date: Fri, 9 Dec 2022 13:00:05 +0800 Subject: [PATCH 5/6] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- services/cloudbrain/clear.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/cloudbrain/clear.go b/services/cloudbrain/clear.go index 259ee2592..6172a8203 100644 --- a/services/cloudbrain/clear.go +++ b/services/cloudbrain/clear.go @@ -48,7 +48,7 @@ func ClearCloudbrainResultSpace() { log.Warn("Failed to set cloudbrain cleared status", err) } //如果云脑表处理完了,通过遍历minio对象处理历史垃圾数据,如果存在的话 - if len(tasks) < setting.ClearStrategy.BatchSize { + if len(tasks) < setting.ClearStrategy.BatchSize+setting.ClearStrategy.DebugJobSize { clearLocalHistoryTrashFile() clearMinioHistoryTrashFile() From 2c1ba17372a285d1a4eda1df89255b4afc86a743 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Fri, 9 Dec 2022 17:44:33 +0800 Subject: [PATCH 6/6] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- services/cloudbrain/clear.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/services/cloudbrain/clear.go b/services/cloudbrain/clear.go index 6172a8203..44613ee3c 100644 --- a/services/cloudbrain/clear.go +++ b/services/cloudbrain/clear.go @@ -69,7 +69,8 @@ func clearMinioHistoryTrashFile() { SortModTimeAscend(miniofiles) for _, file := range miniofiles { - if file.ModTime().Before(time.Now().AddDate(0, 0, -setting.ClearStrategy.TrashSaveDays)) { + if file.Name()!="" && file.ModTime().Before(time.Now().AddDate(0, 0, -setting.ClearStrategy.TrashSaveDays)) { + has,err:=models.IsCloudbrainExistByJobName(file.Name()) if err==nil && !has { dirPath := setting.CBCodePathPrefix + file.Name() + "/" @@ -98,7 +99,7 @@ func clearLocalHistoryTrashFile() { SortModTimeAscend(files) for _, file := range files { //清理n天前的历史垃圾数据,清理job目录 - if file.ModTime().Before(time.Now().AddDate(0, 0, -setting.ClearStrategy.TrashSaveDays)) { + if file.Name()!="" && file.ModTime().Before(time.Now().AddDate(0, 0, -setting.ClearStrategy.TrashSaveDays)) { has,err:=models.IsCloudbrainExistByJobName(file.Name()) if err==nil && !has{ os.RemoveAll(setting.JobPath + file.Name()) @@ -125,6 +126,10 @@ func SortModTimeAscend(files []os.FileInfo) { } func DeleteCloudbrainOneJobStorage(jobName string) error { + + if jobName==""{ + return nil + } //delete local localJobPath := setting.JobPath + jobName err := os.RemoveAll(localJobPath)