| @@ -2313,3 +2313,9 @@ func GetCloudbrainByIDs(ids []int64) ([]*Cloudbrain, error) { | |||||
| In("id", ids). | In("id", ids). | ||||
| Find(&cloudbrains) | Find(&cloudbrains) | ||||
| } | } | ||||
| func GetCloudbrainWithDeletedByIDs(ids []int64) ([]*Cloudbrain, error) { | |||||
| cloudbrains := make([]*Cloudbrain, 0) | |||||
| return cloudbrains, x. | |||||
| In("id", ids).Unscoped().Find(&cloudbrains) | |||||
| } | |||||
| @@ -83,3 +83,26 @@ func GetCloudbrainSpecByID(cloudbrainId int64) (*CloudbrainSpec, error) { | |||||
| } | } | ||||
| return r, nil | return r, nil | ||||
| } | } | ||||
| func FindNoSpecHistoricTask(page, pageSize int) ([]*Cloudbrain, error) { | |||||
| r := make([]*Cloudbrain, 0) | |||||
| err := x.Unscoped(). | |||||
| Where(" 1=1 and not exists (select 1 from cloudbrain_spec where cloudbrain.id = cloudbrain_spec.cloudbrain_id)"). | |||||
| Limit(pageSize, (page-1)*pageSize). | |||||
| OrderBy("cloudbrain.id"). | |||||
| Find(&r) | |||||
| if err != nil { | |||||
| return nil, err | |||||
| } | |||||
| return r, nil | |||||
| } | |||||
| func CountNoSpecHistoricTask() (int64, error) { | |||||
| n, err := x.Unscoped(). | |||||
| Where(" 1=1 and not exists (select 1 from cloudbrain_spec where cloudbrain.id = cloudbrain_spec.cloudbrain_id)"). | |||||
| Count(&Cloudbrain{}) | |||||
| if err != nil { | |||||
| return 0, err | |||||
| } | |||||
| return n, nil | |||||
| } | |||||
| @@ -147,6 +147,21 @@ type FindSpecsOptions struct { | |||||
| Cluster string | Cluster string | ||||
| AiCenterCode string | AiCenterCode string | ||||
| SpecId int64 | SpecId int64 | ||||
| QueueCode string | |||||
| SourceSpecId string | |||||
| AccCardsNum int | |||||
| UseAccCardsNum bool | |||||
| AccCardType string | |||||
| CpuCores int | |||||
| UseCpuCores bool | |||||
| MemGiB float32 | |||||
| UseMemGiB bool | |||||
| GPUMemGiB float32 | |||||
| UseGPUMemGiB bool | |||||
| ShareMemGiB float32 | |||||
| UseShareMemGiB bool | |||||
| //if true,find specs no matter used or not used in scene. if false,only find specs used in scene | |||||
| RequestAll bool | |||||
| } | } | ||||
| type Specification struct { | type Specification struct { | ||||
| @@ -316,9 +331,10 @@ func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceS | |||||
| return sess.Commit() | return sess.Commit() | ||||
| } | } | ||||
| func FindAvailableSpecs(opts FindSpecsOptions) ([]*Specification, error) { | |||||
| //FindSpecs | |||||
| func FindSpecs(opts FindSpecsOptions) ([]*Specification, error) { | |||||
| var cond = builder.NewCond() | var cond = builder.NewCond() | ||||
| if opts.JobType != "" { | |||||
| if !opts.RequestAll && opts.JobType != "" { | |||||
| cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType}) | cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType}) | ||||
| } | } | ||||
| if opts.ComputeResource != "" { | if opts.ComputeResource != "" { | ||||
| @@ -333,17 +349,108 @@ func FindAvailableSpecs(opts FindSpecsOptions) ([]*Specification, error) { | |||||
| if opts.SpecId > 0 { | if opts.SpecId > 0 { | ||||
| cond = cond.And(builder.Eq{"resource_specification.id": opts.SpecId}) | cond = cond.And(builder.Eq{"resource_specification.id": opts.SpecId}) | ||||
| } | } | ||||
| cond = cond.And(builder.Or(builder.Eq{"resource_scene.delete_time": 0}, builder.IsNull{"resource_scene.delete_time"})) | |||||
| if opts.QueueCode != "" { | |||||
| cond = cond.And(builder.Eq{"resource_queue.queue_code": opts.QueueCode}) | |||||
| } | |||||
| if opts.SourceSpecId != "" { | |||||
| cond = cond.And(builder.Eq{"resource_specification.source_spec_id": opts.SourceSpecId}) | |||||
| } | |||||
| if opts.UseAccCardsNum { | |||||
| cond = cond.And(builder.Eq{"resource_specification.acc_cards_num": opts.AccCardsNum}) | |||||
| } | |||||
| if opts.AccCardType != "" { | |||||
| cond = cond.And(builder.Eq{"resource_queue.acc_card_type": opts.AccCardType}) | |||||
| } | |||||
| if opts.UseCpuCores { | |||||
| cond = cond.And(builder.Eq{"resource_specification.cpu_cores": opts.CpuCores}) | |||||
| } | |||||
| if opts.UseMemGiB { | |||||
| cond = cond.And(builder.Eq{"resource_specification.mem_gi_b": opts.MemGiB}) | |||||
| } | |||||
| if opts.UseGPUMemGiB { | |||||
| cond = cond.And(builder.Eq{"resource_specification.gpu_mem_gi_b": opts.GPUMemGiB}) | |||||
| } | |||||
| if opts.UseShareMemGiB { | |||||
| cond = cond.And(builder.Eq{"resource_specification.share_mem_gi_b": opts.ShareMemGiB}) | |||||
| } | |||||
| r := make([]*Specification, 0) | r := make([]*Specification, 0) | ||||
| err := x.Where(cond). | |||||
| Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id"). | |||||
| Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id"). | |||||
| Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id"). | |||||
| OrderBy("resource_queue.compute_resource asc,resource_queue.acc_card_type asc,resource_specification.acc_cards_num asc"). | |||||
| s := x.Where(cond). | |||||
| Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id") | |||||
| if !opts.RequestAll { | |||||
| s = s.Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id"). | |||||
| Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id") | |||||
| } | |||||
| err := s.OrderBy("resource_queue.compute_resource asc,resource_queue.acc_card_type asc,resource_specification.acc_cards_num asc"). | |||||
| Unscoped().Find(&r) | Unscoped().Find(&r) | ||||
| if err != nil { | if err != nil { | ||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| return r, nil | return r, nil | ||||
| } | } | ||||
| func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specification, error) { | |||||
| sess := x.NewSession() | |||||
| defer sess.Close() | |||||
| sess.Begin() | |||||
| param := ResourceQueue{ | |||||
| QueueCode: queue.QueueCode, | |||||
| Cluster: queue.Cluster, | |||||
| AiCenterCode: queue.AiCenterCode, | |||||
| ComputeResource: queue.ComputeResource, | |||||
| AccCardType: queue.AccCardType, | |||||
| } | |||||
| _, err := sess.Get(¶m) | |||||
| if err != nil { | |||||
| sess.Rollback() | |||||
| return nil, err | |||||
| } | |||||
| if param.ID == 0 { | |||||
| _, err = sess.InsertOne(&queue) | |||||
| if err != nil { | |||||
| sess.Rollback() | |||||
| return nil, err | |||||
| } | |||||
| } else { | |||||
| queue = param | |||||
| } | |||||
| spec.QueueId = queue.ID | |||||
| _, err = sess.InsertOne(&spec) | |||||
| if err != nil { | |||||
| sess.Rollback() | |||||
| return nil, err | |||||
| } | |||||
| sess.Commit() | |||||
| return &Specification{ | |||||
| ID: spec.ID, | |||||
| SourceSpecId: spec.SourceSpecId, | |||||
| AccCardsNum: spec.AccCardsNum, | |||||
| AccCardType: queue.AccCardType, | |||||
| CpuCores: spec.CpuCores, | |||||
| MemGiB: spec.MemGiB, | |||||
| GPUMemGiB: spec.GPUMemGiB, | |||||
| ShareMemGiB: spec.ShareMemGiB, | |||||
| ComputeResource: queue.ComputeResource, | |||||
| UnitPrice: spec.UnitPrice, | |||||
| QueueId: queue.ID, | |||||
| QueueCode: queue.QueueCode, | |||||
| Cluster: queue.Cluster, | |||||
| AiCenterCode: queue.AiCenterCode, | |||||
| AiCenterName: queue.AiCenterName, | |||||
| }, nil | |||||
| } | |||||
| func GetCloudbrainOneAccCardType(queueCode string) string { | |||||
| switch queueCode { | |||||
| case "a100": | |||||
| return "A100" | |||||
| case "openidebug": | |||||
| return "T4" | |||||
| case "openidgx": | |||||
| return "V100" | |||||
| } | |||||
| return "" | |||||
| } | |||||
| @@ -22,6 +22,7 @@ type CreateModelArtsNotebookForm struct { | |||||
| Description string `form:"description"` | Description string `form:"description"` | ||||
| Flavor string `form:"flavor" binding:"Required"` | Flavor string `form:"flavor" binding:"Required"` | ||||
| ImageId string `form:"image_id" binding:"Required"` | ImageId string `form:"image_id" binding:"Required"` | ||||
| SpecId int64 `form:"spec_id" binding:"Required"` | |||||
| } | } | ||||
| func (f *CreateModelArtsNotebookForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { | func (f *CreateModelArtsNotebookForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { | ||||
| @@ -46,6 +47,7 @@ type CreateModelArtsTrainJobForm struct { | |||||
| VersionName string `form:"version_name" binding:"Required"` | VersionName string `form:"version_name" binding:"Required"` | ||||
| FlavorName string `form:"flaver_names" binding:"Required"` | FlavorName string `form:"flaver_names" binding:"Required"` | ||||
| EngineName string `form:"engine_names" binding:"Required"` | EngineName string `form:"engine_names" binding:"Required"` | ||||
| SpecId int64 `form:"spec_id" binding:"Required"` | |||||
| } | } | ||||
| type CreateModelArtsInferenceJobForm struct { | type CreateModelArtsInferenceJobForm struct { | ||||
| @@ -71,6 +73,7 @@ type CreateModelArtsInferenceJobForm struct { | |||||
| ModelName string `form:"model_name" binding:"Required"` | ModelName string `form:"model_name" binding:"Required"` | ||||
| ModelVersion string `form:"model_version" binding:"Required"` | ModelVersion string `form:"model_version" binding:"Required"` | ||||
| CkptName string `form:"ckpt_name" binding:"Required"` | CkptName string `form:"ckpt_name" binding:"Required"` | ||||
| SpecId int64 `form:"spec_id" binding:"Required"` | |||||
| } | } | ||||
| func (f *CreateModelArtsTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { | func (f *CreateModelArtsTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { | ||||
| @@ -84,7 +84,6 @@ type GenerateTrainJobReq struct { | |||||
| BootFileUrl string | BootFileUrl string | ||||
| DataUrl string | DataUrl string | ||||
| TrainUrl string | TrainUrl string | ||||
| FlavorCode string | |||||
| LogUrl string | LogUrl string | ||||
| PoolID string | PoolID string | ||||
| WorkServerNumber int | WorkServerNumber int | ||||
| @@ -96,6 +95,7 @@ type GenerateTrainJobReq struct { | |||||
| BranchName string | BranchName string | ||||
| PreVersionId int64 | PreVersionId int64 | ||||
| PreVersionName string | PreVersionName string | ||||
| FlavorCode string | |||||
| FlavorName string | FlavorName string | ||||
| VersionCount int | VersionCount int | ||||
| EngineName string | EngineName string | ||||
| @@ -103,6 +103,7 @@ type GenerateTrainJobReq struct { | |||||
| UserImageUrl string | UserImageUrl string | ||||
| UserCommand string | UserCommand string | ||||
| DatasetName string | DatasetName string | ||||
| Spec *models.Specification | |||||
| } | } | ||||
| type GenerateInferenceJobReq struct { | type GenerateInferenceJobReq struct { | ||||
| @@ -115,7 +116,6 @@ type GenerateInferenceJobReq struct { | |||||
| BootFileUrl string | BootFileUrl string | ||||
| DataUrl string | DataUrl string | ||||
| TrainUrl string | TrainUrl string | ||||
| FlavorCode string | |||||
| LogUrl string | LogUrl string | ||||
| PoolID string | PoolID string | ||||
| WorkServerNumber int | WorkServerNumber int | ||||
| @@ -134,6 +134,7 @@ type GenerateInferenceJobReq struct { | |||||
| ModelVersion string | ModelVersion string | ||||
| CkptName string | CkptName string | ||||
| ResultUrl string | ResultUrl string | ||||
| Spec *models.Specification | |||||
| } | } | ||||
| type VersionInfo struct { | type VersionInfo struct { | ||||
| @@ -256,7 +257,7 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor strin | |||||
| return nil | return nil | ||||
| } | } | ||||
| func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, flavor, imageId string) error { | |||||
| func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification) error { | |||||
| if poolInfos == nil { | if poolInfos == nil { | ||||
| json.Unmarshal([]byte(setting.PoolInfos), &poolInfos) | json.Unmarshal([]byte(setting.PoolInfos), &poolInfos) | ||||
| } | } | ||||
| @@ -270,7 +271,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc | |||||
| jobResult, err := createNotebook2(models.CreateNotebook2Params{ | jobResult, err := createNotebook2(models.CreateNotebook2Params{ | ||||
| JobName: jobName, | JobName: jobName, | ||||
| Description: description, | Description: description, | ||||
| Flavor: flavor, | |||||
| Flavor: spec.SourceSpecId, | |||||
| Duration: autoStopDurationMs, | Duration: autoStopDurationMs, | ||||
| ImageID: imageId, | ImageID: imageId, | ||||
| PoolID: poolInfos.PoolInfo[0].PoolId, | PoolID: poolInfos.PoolInfo[0].PoolId, | ||||
| @@ -292,7 +293,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc | |||||
| RepoID: ctx.Repo.Repository.ID, | RepoID: ctx.Repo.Repository.ID, | ||||
| JobID: jobResult.ID, | JobID: jobResult.ID, | ||||
| JobName: jobName, | JobName: jobName, | ||||
| FlavorCode: flavor, | |||||
| FlavorCode: spec.SourceSpecId, | |||||
| DisplayJobName: displayJobName, | DisplayJobName: displayJobName, | ||||
| JobType: string(models.JobTypeDebug), | JobType: string(models.JobTypeDebug), | ||||
| Type: models.TypeCloudBrainTwo, | Type: models.TypeCloudBrainTwo, | ||||
| @@ -302,6 +303,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc | |||||
| Description: description, | Description: description, | ||||
| CreatedUnix: createTime, | CreatedUnix: createTime, | ||||
| UpdatedUnix: createTime, | UpdatedUnix: createTime, | ||||
| Spec: spec, | |||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| @@ -335,7 +337,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||||
| PoolID: req.PoolID, | PoolID: req.PoolID, | ||||
| CreateVersion: true, | CreateVersion: true, | ||||
| Flavor: models.Flavor{ | Flavor: models.Flavor{ | ||||
| Code: req.FlavorCode, | |||||
| Code: req.Spec.SourceSpecId, | |||||
| }, | }, | ||||
| Parameter: req.Parameters, | Parameter: req.Parameters, | ||||
| UserImageUrl: req.UserImageUrl, | UserImageUrl: req.UserImageUrl, | ||||
| @@ -357,7 +359,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||||
| PoolID: req.PoolID, | PoolID: req.PoolID, | ||||
| CreateVersion: true, | CreateVersion: true, | ||||
| Flavor: models.Flavor{ | Flavor: models.Flavor{ | ||||
| Code: req.FlavorCode, | |||||
| Code: req.Spec.SourceSpecId, | |||||
| }, | }, | ||||
| Parameter: req.Parameters, | Parameter: req.Parameters, | ||||
| }, | }, | ||||
| @@ -391,7 +393,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||||
| BootFile: req.BootFile, | BootFile: req.BootFile, | ||||
| DataUrl: req.DataUrl, | DataUrl: req.DataUrl, | ||||
| LogUrl: req.LogUrl, | LogUrl: req.LogUrl, | ||||
| FlavorCode: req.FlavorCode, | |||||
| FlavorCode: req.Spec.SourceSpecId, | |||||
| Description: req.Description, | Description: req.Description, | ||||
| WorkServerNumber: req.WorkServerNumber, | WorkServerNumber: req.WorkServerNumber, | ||||
| FlavorName: req.FlavorName, | FlavorName: req.FlavorName, | ||||
| @@ -400,6 +402,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||||
| TotalVersionCount: req.TotalVersionCount, | TotalVersionCount: req.TotalVersionCount, | ||||
| CreatedUnix: createTime, | CreatedUnix: createTime, | ||||
| UpdatedUnix: createTime, | UpdatedUnix: createTime, | ||||
| Spec: req.Spec, | |||||
| }) | }) | ||||
| if createErr != nil { | if createErr != nil { | ||||
| @@ -451,7 +454,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||||
| LogUrl: req.LogUrl, | LogUrl: req.LogUrl, | ||||
| PoolID: req.PoolID, | PoolID: req.PoolID, | ||||
| Flavor: models.Flavor{ | Flavor: models.Flavor{ | ||||
| Code: req.FlavorCode, | |||||
| Code: req.Spec.SourceSpecId, | |||||
| }, | }, | ||||
| Parameter: req.Parameters, | Parameter: req.Parameters, | ||||
| PreVersionId: req.PreVersionId, | PreVersionId: req.PreVersionId, | ||||
| @@ -472,7 +475,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||||
| LogUrl: req.LogUrl, | LogUrl: req.LogUrl, | ||||
| PoolID: req.PoolID, | PoolID: req.PoolID, | ||||
| Flavor: models.Flavor{ | Flavor: models.Flavor{ | ||||
| Code: req.FlavorCode, | |||||
| Code: req.Spec.SourceSpecId, | |||||
| }, | }, | ||||
| Parameter: req.Parameters, | Parameter: req.Parameters, | ||||
| PreVersionId: req.PreVersionId, | PreVersionId: req.PreVersionId, | ||||
| @@ -524,7 +527,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||||
| DataUrl: req.DataUrl, | DataUrl: req.DataUrl, | ||||
| LogUrl: req.LogUrl, | LogUrl: req.LogUrl, | ||||
| PreVersionId: req.PreVersionId, | PreVersionId: req.PreVersionId, | ||||
| FlavorCode: req.FlavorCode, | |||||
| FlavorCode: req.Spec.SourceSpecId, | |||||
| Description: req.Description, | Description: req.Description, | ||||
| WorkServerNumber: req.WorkServerNumber, | WorkServerNumber: req.WorkServerNumber, | ||||
| FlavorName: req.FlavorName, | FlavorName: req.FlavorName, | ||||
| @@ -533,6 +536,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||||
| VersionCount: VersionListCount + 1, | VersionCount: VersionListCount + 1, | ||||
| CreatedUnix: createTime, | CreatedUnix: createTime, | ||||
| UpdatedUnix: createTime, | UpdatedUnix: createTime, | ||||
| Spec: req.Spec, | |||||
| }) | }) | ||||
| if createErr != nil { | if createErr != nil { | ||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, createErr.Error()) | log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, createErr.Error()) | ||||
| @@ -716,7 +720,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e | |||||
| PoolID: req.PoolID, | PoolID: req.PoolID, | ||||
| CreateVersion: true, | CreateVersion: true, | ||||
| Flavor: models.Flavor{ | Flavor: models.Flavor{ | ||||
| Code: req.FlavorCode, | |||||
| Code: req.Spec.SourceSpecId, | |||||
| }, | }, | ||||
| Parameter: req.Parameters, | Parameter: req.Parameters, | ||||
| }, | }, | ||||
| @@ -753,7 +757,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e | |||||
| BootFile: req.BootFile, | BootFile: req.BootFile, | ||||
| DataUrl: req.DataUrl, | DataUrl: req.DataUrl, | ||||
| LogUrl: req.LogUrl, | LogUrl: req.LogUrl, | ||||
| FlavorCode: req.FlavorCode, | |||||
| FlavorCode: req.Spec.SourceSpecId, | |||||
| Description: req.Description, | Description: req.Description, | ||||
| WorkServerNumber: req.WorkServerNumber, | WorkServerNumber: req.WorkServerNumber, | ||||
| FlavorName: req.FlavorName, | FlavorName: req.FlavorName, | ||||
| @@ -769,6 +773,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e | |||||
| ResultUrl: req.ResultUrl, | ResultUrl: req.ResultUrl, | ||||
| CreatedUnix: createTime, | CreatedUnix: createTime, | ||||
| UpdatedUnix: createTime, | UpdatedUnix: createTime, | ||||
| Spec: req.Spec, | |||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| @@ -8,6 +8,8 @@ import ( | |||||
| "code.gitea.io/gitea/routers/response" | "code.gitea.io/gitea/routers/response" | ||||
| "code.gitea.io/gitea/services/cloudbrain/resource" | "code.gitea.io/gitea/services/cloudbrain/resource" | ||||
| "net/http" | "net/http" | ||||
| "strconv" | |||||
| "strings" | |||||
| ) | ) | ||||
| const ( | const ( | ||||
| @@ -246,3 +248,37 @@ func UpdateResourceScene(ctx *context.Context, req models.ResourceSceneReq) { | |||||
| } | } | ||||
| ctx.JSON(http.StatusOK, response.Success()) | ctx.JSON(http.StatusOK, response.Success()) | ||||
| } | } | ||||
| func RefreshHistorySpec(ctx *context.Context) { | |||||
| scope := ctx.Query("scope") | |||||
| list := ctx.Query("list") | |||||
| var scopeAll = false | |||||
| if scope == "all" { | |||||
| scopeAll = true | |||||
| } | |||||
| var ids = make([]int64, 0) | |||||
| if list != "" { | |||||
| strs := strings.Split(list, "|") | |||||
| for _, s := range strs { | |||||
| i, err := strconv.ParseInt(s, 10, 64) | |||||
| if err != nil { | |||||
| ctx.JSON(http.StatusOK, response.ServerError(err.Error())) | |||||
| return | |||||
| } | |||||
| ids = append(ids, i) | |||||
| } | |||||
| } | |||||
| total, success, err := resource.RefreshHistorySpec(scopeAll, ids) | |||||
| if err != nil { | |||||
| log.Error("RefreshHistorySpec error. %v", err) | |||||
| ctx.JSON(http.StatusOK, response.ServerError(err.Error())) | |||||
| return | |||||
| } | |||||
| r := make(map[string]interface{}, 0) | |||||
| r["success"] = success | |||||
| r["total"] = total | |||||
| ctx.JSON(http.StatusOK, response.SuccessWithData(r)) | |||||
| } | |||||
| @@ -6,6 +6,7 @@ | |||||
| package private | package private | ||||
| import ( | import ( | ||||
| "code.gitea.io/gitea/routers/admin" | |||||
| "strings" | "strings" | ||||
| "code.gitea.io/gitea/routers/repo" | "code.gitea.io/gitea/routers/repo" | ||||
| @@ -51,6 +52,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||||
| m.Get("/tool/org_stat", OrgStatisticManually) | m.Get("/tool/org_stat", OrgStatisticManually) | ||||
| m.Post("/tool/update_repo_visit/:date", UpdateRepoVisit) | m.Post("/tool/update_repo_visit/:date", UpdateRepoVisit) | ||||
| m.Post("/task/history_handle/duration", repo.HandleTaskWithNoDuration) | m.Post("/task/history_handle/duration", repo.HandleTaskWithNoDuration) | ||||
| m.Post("/resources/specification/handle_historical_task", admin.RefreshHistorySpec) | |||||
| }, CheckInternalToken) | }, CheckInternalToken) | ||||
| } | } | ||||
| @@ -122,89 +122,8 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { | |||||
| ctx.Data["QueuesDetail"] = queuesDetail | ctx.Data["QueuesDetail"] = queuesDetail | ||||
| } | } | ||||
| cloudbrain.InitSpecialPool() | |||||
| if gpuInfos == nil { | |||||
| json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) | |||||
| } | |||||
| ctx.Data["gpu_types"] = gpuInfos.GpuInfo | |||||
| if trainGpuInfos == nil { | |||||
| json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos) | |||||
| } | |||||
| ctx.Data["train_gpu_types"] = trainGpuInfos.GpuInfo | |||||
| if inferenceGpuInfos == nil && setting.InferenceGpuTypes != "" { | |||||
| json.Unmarshal([]byte(setting.InferenceGpuTypes), &inferenceGpuInfos) | |||||
| } | |||||
| if inferenceGpuInfos != nil { | |||||
| ctx.Data["inference_gpu_types"] = inferenceGpuInfos.GpuInfo | |||||
| } | |||||
| if benchmarkGpuInfos == nil { | |||||
| json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos) | |||||
| } | |||||
| ctx.Data["benchmark_gpu_types"] = benchmarkGpuInfos.GpuInfo | |||||
| if benchmarkResourceSpecs == nil { | |||||
| json.Unmarshal([]byte(setting.BenchmarkResourceSpecs), &benchmarkResourceSpecs) | |||||
| } | |||||
| ctx.Data["benchmark_resource_specs"] = benchmarkResourceSpecs.ResourceSpec | |||||
| if cloudbrain.ResourceSpecs == nil { | |||||
| json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) | |||||
| } | |||||
| ctx.Data["resource_specs"] = cloudbrain.ResourceSpecs.ResourceSpec | |||||
| if cloudbrain.TrainResourceSpecs == nil { | |||||
| json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) | |||||
| } | |||||
| ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec | |||||
| if cloudbrain.InferenceResourceSpecs == nil && setting.InferenceResourceSpecs != "" { | |||||
| json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs) | |||||
| } | |||||
| if cloudbrain.InferenceResourceSpecs != nil { | |||||
| ctx.Data["inference_resource_specs"] = cloudbrain.InferenceResourceSpecs.ResourceSpec | |||||
| } | |||||
| prepareCloudbrainOneSpecs(ctx) | prepareCloudbrainOneSpecs(ctx) | ||||
| if cloudbrain.SpecialPools != nil { | |||||
| var debugGpuTypes []*models.GpuInfo | |||||
| var trainGpuTypes []*models.GpuInfo | |||||
| for _, pool := range cloudbrain.SpecialPools.Pools { | |||||
| isOrgMember, _ := models.IsOrganizationMemberByOrgName(pool.Org, ctx.User.ID) | |||||
| if isOrgMember { | |||||
| for _, jobType := range pool.JobType { | |||||
| if jobType == string(models.JobTypeDebug) { | |||||
| debugGpuTypes = append(debugGpuTypes, pool.Pool...) | |||||
| if pool.ResourceSpec != nil { | |||||
| ctx.Data["resource_specs"] = pool.ResourceSpec | |||||
| } | |||||
| } else if jobType == string(models.JobTypeTrain) { | |||||
| trainGpuTypes = append(trainGpuTypes, pool.Pool...) | |||||
| if pool.ResourceSpec != nil { | |||||
| ctx.Data["train_resource_specs"] = pool.ResourceSpec | |||||
| } | |||||
| } | |||||
| } | |||||
| break | |||||
| } | |||||
| } | |||||
| if len(debugGpuTypes) > 0 { | |||||
| ctx.Data["gpu_types"] = debugGpuTypes | |||||
| } | |||||
| if len(trainGpuTypes) > 0 { | |||||
| ctx.Data["train_gpu_types"] = trainGpuTypes | |||||
| } | |||||
| } | |||||
| ctx.Data["params"] = "" | ctx.Data["params"] = "" | ||||
| ctx.Data["branchName"] = ctx.Repo.BranchName | ctx.Data["branchName"] = ctx.Repo.BranchName | ||||
| @@ -229,8 +148,6 @@ func prepareCloudbrainOneSpecs(ctx *context.Context) { | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne, | AiCenterCode: models.AICenterOfCloudBrainOne, | ||||
| }) | }) | ||||
| ctx.Data["debug_specs"] = debugSpecs | ctx.Data["debug_specs"] = debugSpecs | ||||
| b, _ := json.Marshal(debugSpecs) | |||||
| log.Info("%s", string(b)) | |||||
| trainSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ | trainSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ | ||||
| JobType: models.JobTypeTrain, | JobType: models.JobTypeTrain, | ||||
| @@ -247,6 +164,14 @@ func prepareCloudbrainOneSpecs(ctx *context.Context) { | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne, | AiCenterCode: models.AICenterOfCloudBrainOne, | ||||
| }) | }) | ||||
| ctx.Data["inference_specs"] = inferenceSpecs | ctx.Data["inference_specs"] = inferenceSpecs | ||||
| benchmarkSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ | |||||
| JobType: models.JobTypeBenchmark, | |||||
| ComputeResource: models.GPU, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne, | |||||
| }) | |||||
| ctx.Data["benchmark_specs"] = benchmarkSpecs | |||||
| } | } | ||||
| func CloudBrainNew(ctx *context.Context) { | func CloudBrainNew(ctx *context.Context) { | ||||
| @@ -348,18 +273,10 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||||
| command = commandTrain | command = commandTrain | ||||
| } | } | ||||
| errStr := checkCloudBrainSpecialPool(ctx, jobType, gpuQueue, resourceSpecId) | |||||
| if errStr != "" { | |||||
| cloudBrainNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr(errStr, tpl, &form) | |||||
| return | |||||
| } | |||||
| if branchName == "" { | if branchName == "" { | ||||
| branchName = cloudbrain.DefaultBranchName | branchName = cloudbrain.DefaultBranchName | ||||
| } | } | ||||
| errStr = loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ModelMountPath) | |||||
| errStr := loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ModelMountPath) | |||||
| if errStr != "" { | if errStr != "" { | ||||
| cloudBrainNewDataPrepare(ctx) | cloudBrainNewDataPrepare(ctx) | ||||
| ctx.RenderWithErr(ctx.Tr(errStr), tpl, &form) | ctx.RenderWithErr(ctx.Tr(errStr), tpl, &form) | ||||
| @@ -375,7 +292,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne}) | AiCenterCode: models.AICenterOfCloudBrainOne}) | ||||
| if err != nil || spec == nil { | if err != nil || spec == nil { | ||||
| cloudBrainNewDataPrepare(ctx) | cloudBrainNewDataPrepare(ctx) | ||||
| ctx.RenderWithErr("Illegal resource specification", tpl, &form) | |||||
| ctx.RenderWithErr("Resource specification not available", tpl, &form) | |||||
| return | return | ||||
| } | } | ||||
| @@ -534,7 +451,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne}) | AiCenterCode: models.AICenterOfCloudBrainOne}) | ||||
| if err != nil || spec == nil { | if err != nil || spec == nil { | ||||
| cloudBrainNewDataPrepare(ctx) | cloudBrainNewDataPrepare(ctx) | ||||
| ctx.RenderWithErr("Illegal resource specification", tpl, &form) | |||||
| ctx.RenderWithErr("Resource specification not available", tpl, &form) | |||||
| return | return | ||||
| } | } | ||||
| req := cloudbrain.GenerateCloudBrainTaskReq{ | req := cloudbrain.GenerateCloudBrainTaskReq{ | ||||
| @@ -2447,7 +2364,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne}) | AiCenterCode: models.AICenterOfCloudBrainOne}) | ||||
| if err != nil || spec == nil { | if err != nil || spec == nil { | ||||
| cloudBrainNewDataPrepare(ctx) | cloudBrainNewDataPrepare(ctx) | ||||
| ctx.RenderWithErr("Illegal resource specification", tplCloudBrainBenchmarkNew, &form) | |||||
| ctx.RenderWithErr("Resource specification not available", tplCloudBrainBenchmarkNew, &form) | |||||
| return | return | ||||
| } | } | ||||
| @@ -2587,7 +2504,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne}) | AiCenterCode: models.AICenterOfCloudBrainOne}) | ||||
| if err != nil || spec == nil { | if err != nil || spec == nil { | ||||
| cloudBrainNewDataPrepare(ctx) | cloudBrainNewDataPrepare(ctx) | ||||
| ctx.RenderWithErr("Illegal resource specification", tpl, &form) | |||||
| ctx.RenderWithErr("Resource specification not available", tpl, &form) | |||||
| return | return | ||||
| } | } | ||||
| req := cloudbrain.GenerateCloudBrainTaskReq{ | req := cloudbrain.GenerateCloudBrainTaskReq{ | ||||
| @@ -2,6 +2,7 @@ package repo | |||||
| import ( | import ( | ||||
| "archive/zip" | "archive/zip" | ||||
| "code.gitea.io/gitea/services/cloudbrain/resource" | |||||
| "encoding/json" | "encoding/json" | ||||
| "errors" | "errors" | ||||
| "fmt" | "fmt" | ||||
| @@ -141,11 +142,7 @@ func notebookNewDataPrepare(ctx *context.Context) error { | |||||
| } | } | ||||
| ctx.Data["images"] = modelarts.ImageInfos.ImageInfo | ctx.Data["images"] = modelarts.ImageInfos.ImageInfo | ||||
| if modelarts.FlavorInfos == nil { | |||||
| json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos) | |||||
| } | |||||
| ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo | |||||
| setSpecBySpecialPoolConfig(ctx, string(models.JobTypeDebug)) | |||||
| prepareCloudbrainTwoDebugSpecs(ctx) | |||||
| ctx.Data["datasetType"] = models.TypeCloudBrainTwo | ctx.Data["datasetType"] = models.TypeCloudBrainTwo | ||||
| @@ -155,6 +152,16 @@ func notebookNewDataPrepare(ctx *context.Context) error { | |||||
| return nil | return nil | ||||
| } | } | ||||
| func prepareCloudbrainTwoDebugSpecs(ctx *context.Context) { | |||||
| noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ | |||||
| JobType: models.JobTypeDebug, | |||||
| ComputeResource: models.NPU, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainTwo, | |||||
| }) | |||||
| ctx.Data["Specs"] = noteBookSpecs | |||||
| } | |||||
| func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) { | func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) { | ||||
| ctx.Data["PageIsNotebook"] = true | ctx.Data["PageIsNotebook"] = true | ||||
| jobName := form.JobName | jobName := form.JobName | ||||
| @@ -205,7 +212,6 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm | |||||
| jobName := util.ConvertDisplayJobNameToJobName(displayJobName) | jobName := util.ConvertDisplayJobNameToJobName(displayJobName) | ||||
| uuid := form.Attachment | uuid := form.Attachment | ||||
| description := form.Description | description := form.Description | ||||
| flavor := form.Flavor | |||||
| imageId := form.ImageId | imageId := form.ImageId | ||||
| repo := ctx.Repo.Repository | repo := ctx.Repo.Repository | ||||
| @@ -241,14 +247,17 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm | |||||
| } | } | ||||
| } | } | ||||
| errStr := checkModelArtsSpecialPool(ctx, flavor, string(models.JobTypeDebug)) | |||||
| if errStr != "" { | |||||
| spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ | |||||
| JobType: models.JobTypeDebug, | |||||
| ComputeResource: models.NPU, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainTwo}) | |||||
| if err != nil || spec == nil { | |||||
| notebookNewDataPrepare(ctx) | notebookNewDataPrepare(ctx) | ||||
| ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsNotebookNew, &form) | |||||
| ctx.RenderWithErr("Resource specification not available", tplModelArtsNotebookNew, &form) | |||||
| return | return | ||||
| } | } | ||||
| err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, flavor, imageId) | |||||
| err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, imageId, spec) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"]) | log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"]) | ||||
| notebookNewDataPrepare(ctx) | notebookNewDataPrepare(ctx) | ||||
| @@ -728,14 +737,7 @@ func trainJobNewDataPrepare(ctx *context.Context) error { | |||||
| } | } | ||||
| ctx.Data["engine_versions"] = versionInfos.Version | ctx.Data["engine_versions"] = versionInfos.Version | ||||
| var flavorInfos modelarts.Flavor | |||||
| if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { | |||||
| ctx.ServerError("json.Unmarshal failed:", err) | |||||
| return err | |||||
| } | |||||
| ctx.Data["flavor_infos"] = flavorInfos.Info | |||||
| setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain)) | |||||
| prepareCloudbrainTwoTrainSpecs(ctx) | |||||
| ctx.Data["params"] = "" | ctx.Data["params"] = "" | ||||
| ctx.Data["branchName"] = ctx.Repo.BranchName | ctx.Data["branchName"] = ctx.Repo.BranchName | ||||
| @@ -753,6 +755,16 @@ func trainJobNewDataPrepare(ctx *context.Context) error { | |||||
| return nil | return nil | ||||
| } | } | ||||
| func prepareCloudbrainTwoTrainSpecs(ctx *context.Context) { | |||||
| noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ | |||||
| JobType: models.JobTypeTrain, | |||||
| ComputeResource: models.NPU, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainTwo, | |||||
| }) | |||||
| ctx.Data["Specs"] = noteBookSpecs | |||||
| } | |||||
| func setSpecBySpecialPoolConfig(ctx *context.Context, jobType string) { | func setSpecBySpecialPoolConfig(ctx *context.Context, jobType string) { | ||||
| modelarts.InitSpecialPool() | modelarts.InitSpecialPool() | ||||
| @@ -835,13 +847,7 @@ func trainJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArts | |||||
| } | } | ||||
| ctx.Data["engine_versions"] = versionInfos.Version | ctx.Data["engine_versions"] = versionInfos.Version | ||||
| var flavorInfos modelarts.Flavor | |||||
| if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { | |||||
| ctx.ServerError("json.Unmarshal failed:", err) | |||||
| return err | |||||
| } | |||||
| ctx.Data["flavor_infos"] = flavorInfos.Info | |||||
| setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain)) | |||||
| prepareCloudbrainTwoTrainSpecs(ctx) | |||||
| configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) | configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) | ||||
| if err != nil { | if err != nil { | ||||
| @@ -1020,13 +1026,7 @@ func versionErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrai | |||||
| } | } | ||||
| ctx.Data["engine_versions"] = versionInfos.Version | ctx.Data["engine_versions"] = versionInfos.Version | ||||
| var flavorInfos modelarts.Flavor | |||||
| if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { | |||||
| ctx.ServerError("json.Unmarshal failed:", err) | |||||
| return err | |||||
| } | |||||
| ctx.Data["flavor_infos"] = flavorInfos.Info | |||||
| setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain)) | |||||
| prepareCloudbrainTwoTrainSpecs(ctx) | |||||
| var Parameters modelarts.Parameters | var Parameters modelarts.Parameters | ||||
| if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { | if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { | ||||
| @@ -1079,7 +1079,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||||
| workServerNumber := form.WorkServerNumber | workServerNumber := form.WorkServerNumber | ||||
| engineID := form.EngineID | engineID := form.EngineID | ||||
| bootFile := strings.TrimSpace(form.BootFile) | bootFile := strings.TrimSpace(form.BootFile) | ||||
| flavorCode := form.Flavor | |||||
| params := form.Params | params := form.Params | ||||
| poolID := form.PoolID | poolID := form.PoolID | ||||
| //isSaveParam := form.IsSaveParam | //isSaveParam := form.IsSaveParam | ||||
| @@ -1117,10 +1116,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||||
| return | return | ||||
| } | } | ||||
| errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain)) | |||||
| if errStr != "" { | |||||
| spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ | |||||
| JobType: models.JobTypeTrain, | |||||
| ComputeResource: models.NPU, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainTwo}) | |||||
| if err != nil || spec == nil { | |||||
| trainJobErrorNewDataPrepare(ctx, form) | trainJobErrorNewDataPrepare(ctx, form) | ||||
| ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobNew, &form) | |||||
| ctx.RenderWithErr("Resource specification not available", tplModelArtsTrainJobNew, &form) | |||||
| return | return | ||||
| } | } | ||||
| //Determine whether the task name of the task in the project is duplicated | //Determine whether the task name of the task in the project is duplicated | ||||
| @@ -1283,7 +1286,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||||
| BootFileUrl: codeObsPath + bootFile, | BootFileUrl: codeObsPath + bootFile, | ||||
| BootFile: bootFile, | BootFile: bootFile, | ||||
| TrainUrl: outputObsPath, | TrainUrl: outputObsPath, | ||||
| FlavorCode: flavorCode, | |||||
| WorkServerNumber: workServerNumber, | WorkServerNumber: workServerNumber, | ||||
| EngineID: int64(engineID), | EngineID: int64(engineID), | ||||
| LogUrl: logObsPath, | LogUrl: logObsPath, | ||||
| @@ -1299,6 +1301,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||||
| VersionCount: VersionCount, | VersionCount: VersionCount, | ||||
| TotalVersionCount: modelarts.TotalVersionCount, | TotalVersionCount: modelarts.TotalVersionCount, | ||||
| DatasetName: datasetNames, | DatasetName: datasetNames, | ||||
| Spec: spec, | |||||
| } | } | ||||
| userCommand, userImageUrl := getUserCommand(engineID, req) | userCommand, userImageUrl := getUserCommand(engineID, req) | ||||
| req.UserCommand = userCommand | req.UserCommand = userCommand | ||||
| @@ -1384,7 +1387,6 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||||
| workServerNumber := form.WorkServerNumber | workServerNumber := form.WorkServerNumber | ||||
| engineID := form.EngineID | engineID := form.EngineID | ||||
| bootFile := strings.TrimSpace(form.BootFile) | bootFile := strings.TrimSpace(form.BootFile) | ||||
| flavorCode := form.Flavor | |||||
| params := form.Params | params := form.Params | ||||
| poolID := form.PoolID | poolID := form.PoolID | ||||
| //isSaveParam := form.IsSaveParam | //isSaveParam := form.IsSaveParam | ||||
| @@ -1414,10 +1416,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||||
| return | return | ||||
| } | } | ||||
| errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain)) | |||||
| if errStr != "" { | |||||
| spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ | |||||
| JobType: models.JobTypeTrain, | |||||
| ComputeResource: models.NPU, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainTwo}) | |||||
| if err != nil || spec == nil { | |||||
| versionErrorDataPrepare(ctx, form) | versionErrorDataPrepare(ctx, form) | ||||
| ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobVersionNew, &form) | |||||
| ctx.RenderWithErr("Resource specification not available", tplModelArtsTrainJobVersionNew, &form) | |||||
| return | return | ||||
| } | } | ||||
| @@ -1571,7 +1577,6 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||||
| BootFileUrl: codeObsPath + bootFile, | BootFileUrl: codeObsPath + bootFile, | ||||
| BootFile: bootFile, | BootFile: bootFile, | ||||
| TrainUrl: outputObsPath, | TrainUrl: outputObsPath, | ||||
| FlavorCode: flavorCode, | |||||
| WorkServerNumber: workServerNumber, | WorkServerNumber: workServerNumber, | ||||
| IsLatestVersion: isLatestVersion, | IsLatestVersion: isLatestVersion, | ||||
| EngineID: int64(engineID), | EngineID: int64(engineID), | ||||
| @@ -1588,6 +1593,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||||
| PreVersionName: PreVersionName, | PreVersionName: PreVersionName, | ||||
| TotalVersionCount: latestTask.TotalVersionCount + 1, | TotalVersionCount: latestTask.TotalVersionCount + 1, | ||||
| DatasetName: datasetNames, | DatasetName: datasetNames, | ||||
| Spec: spec, | |||||
| } | } | ||||
| userCommand, userImageUrl := getUserCommand(engineID, req) | userCommand, userImageUrl := getUserCommand(engineID, req) | ||||
| req.UserCommand = userCommand | req.UserCommand = userCommand | ||||
| @@ -2016,7 +2022,6 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference | |||||
| workServerNumber := form.WorkServerNumber | workServerNumber := form.WorkServerNumber | ||||
| engineID := form.EngineID | engineID := form.EngineID | ||||
| bootFile := strings.TrimSpace(form.BootFile) | bootFile := strings.TrimSpace(form.BootFile) | ||||
| flavorCode := form.Flavor | |||||
| params := form.Params | params := form.Params | ||||
| poolID := form.PoolID | poolID := form.PoolID | ||||
| repo := ctx.Repo.Repository | repo := ctx.Repo.Repository | ||||
| @@ -2078,13 +2083,16 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference | |||||
| } | } | ||||
| } | } | ||||
| errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeInference)) | |||||
| if errStr != "" { | |||||
| spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ | |||||
| JobType: models.JobTypeInference, | |||||
| ComputeResource: models.NPU, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainTwo}) | |||||
| if err != nil || spec == nil { | |||||
| inferenceJobErrorNewDataPrepare(ctx, form) | inferenceJobErrorNewDataPrepare(ctx, form) | ||||
| ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsInferenceJobNew, &form) | |||||
| ctx.RenderWithErr("Resource specification not available", tplModelArtsInferenceJobNew, &form) | |||||
| return | return | ||||
| } | } | ||||
| //todo: del the codeLocalPath | //todo: del the codeLocalPath | ||||
| _, err = ioutil.ReadDir(codeLocalPath) | _, err = ioutil.ReadDir(codeLocalPath) | ||||
| if err == nil { | if err == nil { | ||||
| @@ -2170,7 +2178,6 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference | |||||
| BootFileUrl: codeObsPath + bootFile, | BootFileUrl: codeObsPath + bootFile, | ||||
| BootFile: bootFile, | BootFile: bootFile, | ||||
| TrainUrl: trainUrl, | TrainUrl: trainUrl, | ||||
| FlavorCode: flavorCode, | |||||
| WorkServerNumber: workServerNumber, | WorkServerNumber: workServerNumber, | ||||
| EngineID: int64(engineID), | EngineID: int64(engineID), | ||||
| LogUrl: logObsPath, | LogUrl: logObsPath, | ||||
| @@ -2369,14 +2376,7 @@ func inferenceJobNewDataPrepare(ctx *context.Context) error { | |||||
| } | } | ||||
| ctx.Data["engine_versions"] = versionInfos.Version | ctx.Data["engine_versions"] = versionInfos.Version | ||||
| var flavorInfos modelarts.Flavor | |||||
| if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { | |||||
| ctx.ServerError("json.Unmarshal failed:", err) | |||||
| return err | |||||
| } | |||||
| ctx.Data["flavor_infos"] = flavorInfos.Info | |||||
| setSpecBySpecialPoolConfig(ctx, string(models.JobTypeInference)) | |||||
| prepareCloudbrainTwoInferenceSpecs(ctx) | |||||
| ctx.Data["params"] = "" | ctx.Data["params"] = "" | ||||
| ctx.Data["branchName"] = ctx.Repo.BranchName | ctx.Data["branchName"] = ctx.Repo.BranchName | ||||
| @@ -2407,6 +2407,16 @@ func inferenceJobNewDataPrepare(ctx *context.Context) error { | |||||
| return nil | return nil | ||||
| } | } | ||||
| func prepareCloudbrainTwoInferenceSpecs(ctx *context.Context) { | |||||
| noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ | |||||
| JobType: models.JobTypeInference, | |||||
| ComputeResource: models.NPU, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainTwo, | |||||
| }) | |||||
| ctx.Data["Specs"] = noteBookSpecs | |||||
| } | |||||
| func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) error { | func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) error { | ||||
| ctx.Data["PageIsCloudBrain"] = true | ctx.Data["PageIsCloudBrain"] = true | ||||
| @@ -2,12 +2,17 @@ package resource | |||||
| import ( | import ( | ||||
| "code.gitea.io/gitea/models" | "code.gitea.io/gitea/models" | ||||
| "code.gitea.io/gitea/modules/cloudbrain" | |||||
| "code.gitea.io/gitea/modules/grampus" | "code.gitea.io/gitea/modules/grampus" | ||||
| "code.gitea.io/gitea/modules/log" | "code.gitea.io/gitea/modules/log" | ||||
| "code.gitea.io/gitea/modules/setting" | |||||
| "code.gitea.io/gitea/routers/response" | "code.gitea.io/gitea/routers/response" | ||||
| "code.gitea.io/gitea/services/admin/operate_log" | "code.gitea.io/gitea/services/admin/operate_log" | ||||
| "encoding/json" | |||||
| "errors" | |||||
| "fmt" | "fmt" | ||||
| "strings" | "strings" | ||||
| "time" | |||||
| ) | ) | ||||
| func AddResourceSpecification(doerId int64, req models.ResourceSpecificationReq) error { | func AddResourceSpecification(doerId int64, req models.ResourceSpecificationReq) error { | ||||
| @@ -186,7 +191,7 @@ func AddSpecOperateLog(doerId int64, operateType string, newValue, oldValue *mod | |||||
| } | } | ||||
| func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]*models.Specification, error) { | func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]*models.Specification, error) { | ||||
| r, err := models.FindAvailableSpecs(opts) | |||||
| r, err := models.FindSpecs(opts) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("FindAvailableSpecs error.%v", err) | log.Error("FindAvailableSpecs error.%v", err) | ||||
| return nil, err | return nil, err | ||||
| @@ -270,3 +275,260 @@ func GetCloudbrainSpec(cloudbrainId int64) (*models.Specification, error) { | |||||
| } | } | ||||
| return c.ConvertToSpecification(), nil | return c.ConvertToSpecification(), nil | ||||
| } | } | ||||
| func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) { | |||||
| var success int64 | |||||
| var total int64 | |||||
| if !scopeAll { | |||||
| if ids == nil || len(ids) == 0 { | |||||
| return 0, 0, nil | |||||
| } | |||||
| total = int64(len(ids)) | |||||
| tasks, err := models.GetCloudbrainWithDeletedByIDs(ids) | |||||
| if err != nil { | |||||
| return total, 0, err | |||||
| } | |||||
| for _, task := range tasks { | |||||
| err = RefreshOneHistorySpec(task) | |||||
| if err != nil { | |||||
| log.Error("RefreshOneHistorySpec error.%v", err) | |||||
| continue | |||||
| } | |||||
| success++ | |||||
| } | |||||
| } else { | |||||
| page := 1 | |||||
| pageSize := 100 | |||||
| n, err := models.CountNoSpecHistoricTask() | |||||
| if err != nil { | |||||
| log.Error("FindNoSpecHistoricTask CountNoSpecHistoricTask error. e=%v", err) | |||||
| return 0, 0, err | |||||
| } | |||||
| total = n | |||||
| for i := 0; i < 1000; i++ { | |||||
| list, err := models.FindNoSpecHistoricTask(page, pageSize) | |||||
| if err != nil { | |||||
| log.Error("FindNoSpecHistoricTask error.page=%d pageSize=%d e=%v", page, pageSize, err) | |||||
| return total, success, err | |||||
| } | |||||
| if len(list) == 0 { | |||||
| log.Info("RefreshHistorySpec. list is empty") | |||||
| break | |||||
| } | |||||
| for _, task := range list { | |||||
| time.Sleep(1 * time.Second) | |||||
| err = RefreshOneHistorySpec(task) | |||||
| if err != nil { | |||||
| log.Error("RefreshOneHistorySpec error.%v", err) | |||||
| continue | |||||
| } | |||||
| success++ | |||||
| } | |||||
| if len(list) < pageSize { | |||||
| log.Info("RefreshHistorySpec. list < pageSize") | |||||
| break | |||||
| } | |||||
| } | |||||
| } | |||||
| return total, success, nil | |||||
| } | |||||
| func RefreshOneHistorySpec(task *models.Cloudbrain) error { | |||||
| var spec *models.Specification | |||||
| var err error | |||||
| switch task.Type { | |||||
| case models.TypeCloudBrainOne: | |||||
| spec, err = getCloudbrainOneSpec(task) | |||||
| } | |||||
| if err != nil { | |||||
| log.Error("find spec error,task.ID=%d err=%v", task.ID, err) | |||||
| return err | |||||
| } | |||||
| if spec == nil { | |||||
| log.Error("find spec failed,task.ID=%d", task.ID) | |||||
| return errors.New("find spec failed") | |||||
| } | |||||
| return InsertCloudbrainSpec(task.ID, spec) | |||||
| } | |||||
| func getCloudbrainOneSpec(task *models.Cloudbrain) (*models.Specification, error) { | |||||
| //find from remote | |||||
| result, err := cloudbrain.GetJob(task.JobID) | |||||
| if err != nil { | |||||
| log.Error("getCloudbrainOneSpec error. %v", err) | |||||
| return nil, err | |||||
| } | |||||
| if result != nil { | |||||
| jobRes, _ := models.ConvertToJobResultPayload(result.Payload) | |||||
| memSize, _ := models.ParseMemSizeFromGrampus(jobRes.Resource.Memory) | |||||
| if task.ComputeResource == "CPU/GPU" { | |||||
| task.ComputeResource = models.GPU | |||||
| } | |||||
| var shmMB float32 | |||||
| if jobRes.Config.TaskRoles != nil && len(jobRes.Config.TaskRoles) > 0 { | |||||
| shmMB = float32(jobRes.Config.TaskRoles[0].ShmMB) / 1024 | |||||
| } | |||||
| opt := models.FindSpecsOptions{ | |||||
| ComputeResource: task.ComputeResource, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne, | |||||
| QueueCode: task.GpuQueue, | |||||
| AccCardsNum: jobRes.Resource.NvidiaComGpu, | |||||
| UseAccCardsNum: true, | |||||
| CpuCores: jobRes.Resource.CPU, | |||||
| UseCpuCores: true, | |||||
| MemGiB: memSize, | |||||
| UseMemGiB: memSize > 0, | |||||
| ShareMemGiB: shmMB, | |||||
| UseShareMemGiB: shmMB > 0, | |||||
| RequestAll: true, | |||||
| } | |||||
| specs, err := models.FindSpecs(opt) | |||||
| if err != nil { | |||||
| log.Error("getCloudbrainOneSpec from remote error,%v", err) | |||||
| return nil, err | |||||
| } | |||||
| if len(specs) == 1 { | |||||
| return specs[0], nil | |||||
| } | |||||
| if len(specs) == 0 { | |||||
| s, err := InitQueueAndSpec(opt, "云脑一", "处理历史云脑任务时自动添加") | |||||
| if err != nil { | |||||
| log.Error("getCloudbrainOneSpec InitQueueAndSpec error.err=%v", err) | |||||
| return nil, nil | |||||
| } | |||||
| return s, nil | |||||
| } | |||||
| if len(specs) > 1 { | |||||
| log.Error("Too many results matched.size=%d opt=%+v", len(specs), opt) | |||||
| return nil, nil | |||||
| } | |||||
| } else { | |||||
| //find from config | |||||
| var specConfig *models.ResourceSpec | |||||
| hasSpec := false | |||||
| if task.JobType == string(models.JobTypeTrain) { | |||||
| if cloudbrain.TrainResourceSpecs == nil { | |||||
| json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) | |||||
| } | |||||
| for _, tmp := range cloudbrain.TrainResourceSpecs.ResourceSpec { | |||||
| if tmp.Id == task.ResourceSpecId { | |||||
| hasSpec = true | |||||
| specConfig = tmp | |||||
| break | |||||
| } | |||||
| } | |||||
| } else if task.JobType == string(models.JobTypeInference) { | |||||
| if cloudbrain.InferenceResourceSpecs == nil { | |||||
| json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs) | |||||
| } | |||||
| for _, tmp := range cloudbrain.InferenceResourceSpecs.ResourceSpec { | |||||
| if tmp.Id == task.ResourceSpecId { | |||||
| hasSpec = true | |||||
| specConfig = tmp | |||||
| break | |||||
| } | |||||
| } | |||||
| } else { | |||||
| if cloudbrain.ResourceSpecs == nil { | |||||
| json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) | |||||
| } | |||||
| for _, tmp := range cloudbrain.ResourceSpecs.ResourceSpec { | |||||
| if tmp.Id == task.ResourceSpecId { | |||||
| hasSpec = true | |||||
| specConfig = tmp | |||||
| break | |||||
| } | |||||
| } | |||||
| } | |||||
| if !hasSpec && cloudbrain.SpecialPools != nil { | |||||
| for _, specialPool := range cloudbrain.SpecialPools.Pools { | |||||
| if specialPool.ResourceSpec != nil { | |||||
| for _, spec := range specialPool.ResourceSpec { | |||||
| if task.ResourceSpecId == spec.Id { | |||||
| hasSpec = true | |||||
| specConfig = spec | |||||
| break | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| if specConfig == nil { | |||||
| log.Error("getCloudbrainOneSpec from config failed,task.ResourceSpecId=%d", task.ResourceSpecId) | |||||
| return nil, nil | |||||
| } | |||||
| opt := models.FindSpecsOptions{ | |||||
| JobType: models.JobType(task.JobType), | |||||
| ComputeResource: task.ComputeResource, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne, | |||||
| QueueCode: task.GpuQueue, | |||||
| AccCardsNum: specConfig.GpuNum, | |||||
| UseAccCardsNum: true, | |||||
| CpuCores: specConfig.GpuNum, | |||||
| UseCpuCores: true, | |||||
| MemGiB: float32(specConfig.MemMiB) / 1024, | |||||
| UseMemGiB: true, | |||||
| ShareMemGiB: float32(specConfig.ShareMemMiB) / 1024, | |||||
| UseShareMemGiB: true, | |||||
| RequestAll: true, | |||||
| } | |||||
| specs, err := models.FindSpecs(opt) | |||||
| if err != nil { | |||||
| log.Error("getCloudbrainOneSpec from config error,%v", err) | |||||
| return nil, err | |||||
| } | |||||
| if len(specs) > 1 { | |||||
| log.Error("Too many results matched.size=%d opt=%+v", len(specs), opt) | |||||
| return nil, nil | |||||
| } | |||||
| if len(specs) == 0 { | |||||
| s, err := InitQueueAndSpec(opt, "云脑一", "处理历史云脑任务时自动添加") | |||||
| if err != nil { | |||||
| log.Error("getCloudbrainOneSpec InitQueueAndSpec error.err=%v", err) | |||||
| return nil, nil | |||||
| } | |||||
| return s, nil | |||||
| } | |||||
| return specs[0], nil | |||||
| } | |||||
| return nil, nil | |||||
| } | |||||
| func RefreshCloudbrainTwoSpec(task *models.Cloudbrain) error { | |||||
| return nil | |||||
| } | |||||
| func RefreshC2NetSpec(task *models.Cloudbrain) error { | |||||
| return nil | |||||
| } | |||||
| func InitQueueAndSpec(opt models.FindSpecsOptions, aiCenterName string, remark string) (*models.Specification, error) { | |||||
| return models.InitQueueAndSpec(models.ResourceQueue{ | |||||
| QueueCode: opt.QueueCode, | |||||
| Cluster: opt.Cluster, | |||||
| AiCenterCode: opt.AiCenterCode, | |||||
| AiCenterName: aiCenterName, | |||||
| ComputeResource: opt.ComputeResource, | |||||
| AccCardType: models.GetCloudbrainOneAccCardType(opt.QueueCode), | |||||
| Remark: remark, | |||||
| }, models.ResourceSpecification{ | |||||
| AccCardsNum: opt.AccCardsNum, | |||||
| CpuCores: opt.CpuCores, | |||||
| MemGiB: opt.MemGiB, | |||||
| GPUMemGiB: opt.GPUMemGiB, | |||||
| ShareMemGiB: opt.ShareMemGiB, | |||||
| Status: models.SpecOffShelf, | |||||
| }) | |||||
| } | |||||