From 5a04a20bd281e06fedd20f90db10a204d4121d0f Mon Sep 17 00:00:00 2001 From: zouap Date: Thu, 2 Jun 2022 15:42:26 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E7=95=8C=E9=9D=A2=E4=BA=A4?= =?UTF-8?q?=E4=BA=92=E5=8A=9F=E8=83=BD=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zouap --- models/ai_model_manage.go | 1 + routers/repo/ai_model_convert.go | 99 +++++++++++++++++++- routers/repo/ai_model_manage.go | 4 + templates/repo/modelmanage/convertIndex.tmpl | 35 +++++++ 4 files changed, 138 insertions(+), 1 deletion(-) diff --git a/models/ai_model_manage.go b/models/ai_model_manage.go index a60a7011f..463c47f16 100644 --- a/models/ai_model_manage.go +++ b/models/ai_model_manage.go @@ -47,6 +47,7 @@ type AiModelConvert struct { RepoId int64 `xorm:"INDEX NULL"` ModelId string `xorm:"NOT NULL"` ModelVersion string `xorm:"NOT NULL"` + ModelPath string `xorm:"NOT NULL"` DestFormat int `xorm:"NOT NULL DEFAULT 0"` NetOutputFormat int `xorm:"NULL"` UserId int64 `xorm:"NOT NULL"` diff --git a/routers/repo/ai_model_convert.go b/routers/repo/ai_model_convert.go index 459c3b3cd..fb6520b58 100644 --- a/routers/repo/ai_model_convert.go +++ b/routers/repo/ai_model_convert.go @@ -1,7 +1,11 @@ package repo import ( + "encoding/json" + "errors" + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/cloudbrain" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" @@ -14,6 +18,19 @@ const ( PYTORCH_ENGINE = 0 TENSORFLOW_ENGINE = 1 MINDSPORE_ENGIN = 2 + ModelMountPath = "/model" + CodeMountPath = "/code" + DataSetMountPath = "/dataset" + LogFile = "log.txt" + DefaultBranchName = "master" + SubTaskName = "task1" + GpuQueue = "openidgx" + Success = "S000" + GPU_PYTORCH_IMAGE = "dockerhub.pcl.ac.cn:5000/user-images/openi:tensorRT_7_zouap" +) + +var ( + TrainResourceSpecs *models.ResourceSpecs ) func SaveModelConvert(ctx *context.Context) { @@ -25,6 +42,7 @@ func SaveModelConvert(ctx *context.Context) { name := ctx.Query("name") desc := ctx.Query("desc") modelId := ctx.Query("modelId") + modelPath := ctx.Query("modelPath") SrcEngine := ctx.QueryInt("SrcEngine") InputShape := ctx.Query("inputshape") InputDataFormat := ctx.Query("inputdataformat") @@ -40,6 +58,7 @@ func SaveModelConvert(ctx *context.Context) { SrcEngine: SrcEngine, RepoId: ctx.Repo.Repository.ID, ModelId: modelId, + ModelPath: modelPath, DestFormat: DestFormat, NetOutputFormat: NetOutputFormat, InputShape: InputShape, @@ -52,11 +71,89 @@ func SaveModelConvert(ctx *context.Context) { }) } -func createTrainJob(modelId string, SrcEngine int, ctx *context.Context) { +func createTrainJob(modelConvertId string, modelId string, SrcEngine int, ctx *context.Context, modelPath string) error { repo, _ := models.GetRepositoryByID(ctx.Repo.Repository.RepoID) if SrcEngine == PYTORCH_ENGINE { + codePath := setting.JobPath + modelConvertId + CodeMountPath + downloadCode(repo, codePath, DefaultBranchName) + uploadCodeToMinio(codePath+"/", modelConvertId, CodeMountPath+"/") + modelPath := setting.JobPath + modelConvertId + ModelMountPath + "/" + mkModelPath(modelPath) + uploadCodeToMinio(modelPath, modelConvertId, ModelMountPath+"/") + command := getModelConvertCommand(modelConvertId) + dataActualPath := setting.Attachment.Minio.RealPath + modelPath + + if TrainResourceSpecs == nil { + json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) + } + resourceSpec := TrainResourceSpecs.ResourceSpec[1] + jobResult, err := cloudbrain.CreateJob(modelConvertId, models.CreateJobParams{ + JobName: modelConvertId, + RetryCount: 1, + GpuType: GpuQueue, + Image: GPU_PYTORCH_IMAGE, + TaskRoles: []models.TaskRole{ + { + Name: SubTaskName, + TaskNumber: 1, + MinSucceededTaskCount: 1, + MinFailedTaskCount: 1, + CPUNumber: resourceSpec.CpuNum, + GPUNumber: resourceSpec.GpuNum, + MemoryMB: resourceSpec.MemMiB, + ShmMB: resourceSpec.ShareMemMiB, + Command: command, + NeedIBDevice: false, + IsMainRole: false, + UseNNI: false, + }, + }, + Volumes: []models.Volume{ + { + HostPath: models.StHostPath{ + Path: codePath, + MountPath: CodeMountPath, + ReadOnly: false, + }, + }, + { + HostPath: models.StHostPath{ + Path: dataActualPath, + MountPath: DataSetMountPath, + ReadOnly: true, + }, + }, + { + HostPath: models.StHostPath{ + Path: modelPath, + MountPath: ModelMountPath, + ReadOnly: false, + }, + }, + }, + }) + if err != nil { + log.Error("CreateJob failed:", err.Error(), ctx.Data["MsgID"]) + return err + } + if jobResult.Code != Success { + log.Error("CreateJob(%s) failed:%s", modelConvertId, jobResult.Msg, ctx.Data["MsgID"]) + return errors.New(jobResult.Msg) + } + + var jobID = jobResult.Payload["jobId"].(string) + log.Info("jobId=" + jobID) } + + return nil +} + +func getModelConvertCommand(name string) string { + var command string + bootFile := "convert_pytorch.py" + command += "python /code/" + bootFile + " > " + ModelMountPath + "/" + name + "-" + LogFile + return command } func DeleteModelConvert(ctx *context.Context) { diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index a13095d52..b0937aa6d 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -791,6 +791,10 @@ func QueryModelFileForPredict(ctx *context.Context) { prefix := model.Path[len(setting.Bucket)+1:] fileinfos, _ := storage.GetAllObjectByBucketAndPrefix(setting.Bucket, prefix) ctx.JSON(http.StatusOK, fileinfos) + } else if model.Type == models.TypeCloudBrainOne { + prefix := model.Path[len(setting.Attachment.Minio.Bucket)+1:] + fileinfos, _ := storage.GetAllObjectByBucketAndPrefixMinio(setting.Attachment.Minio.Bucket, prefix) + ctx.JSON(http.StatusOK, fileinfos) } } diff --git a/templates/repo/modelmanage/convertIndex.tmpl b/templates/repo/modelmanage/convertIndex.tmpl index 13181b3be..da7f8b404 100644 --- a/templates/repo/modelmanage/convertIndex.tmpl +++ b/templates/repo/modelmanage/convertIndex.tmpl @@ -208,6 +208,17 @@ +
+ + +
   @@ -310,6 +321,7 @@ document.getElementById("formId").reset(); $('#choice_model').dropdown('clear') $('#choice_version').dropdown('clear') + $('#choice_file').dropdown('clear') $('.ui.dimmer').css({"background-color":""}) $('.ui.error.message').text() $('.ui.error.message').css('display','none') @@ -333,6 +345,14 @@ onChange:function(value){ console.log("model version:" + value); $('#choice_version input[name="ModelVersion"]').val(value) + loadModelFile(value); + } + }) + + $('#choice_file').dropdown({ + onChange:function(value){ + console.log("model file:" + value); + $('#choice_file input[name="ModelFile"]').val(value) } }) }) @@ -353,6 +373,20 @@ loadModelVersion(nameList[0]) }) } + function loadModelFile(modelId){ + console.log("modelId=" + modelId); + $.get(`${repolink}/modelmanage/query_modelfile_for_predict?ID=${modelId}`, (data) => { + const n_length = data.length + let file_html='' + for (let i=0;i${data[i].FileName}
` + file_html += '' + } + $("#model-file").append(file_html) + $('#choice_file .default.text').text(data[0].FileName) + $('#choice_file input[name="ModelFile"]').val(data[0].FileName) + }) + } function loadModelVersion(value){ console.log("value=" + value); if(value ==null || value ==""){ @@ -370,6 +404,7 @@ $(".ui.dropdown.selection.search.width70").removeClass("loading") $('#choice_version .default.text').text(versionList[0].Version) $('#choice_version input[name="ModelVersion"]').val(versionList[0].ID) + loadModelFile(versionList[0].ID); } }