| @@ -41,35 +41,36 @@ type AiModelManage struct { | |||
| } | |||
| type AiModelConvert struct { | |||
| ID string `xorm:"pk"` | |||
| Name string `xorm:"INDEX NOT NULL"` | |||
| Status string `xorm:"NULL"` | |||
| SrcEngine int `xorm:"NOT NULL DEFAULT 0"` | |||
| RepoId int64 `xorm:"INDEX NULL"` | |||
| ModelId string `xorm:"NOT NULL"` | |||
| ModelName string `xorm:"NULL"` | |||
| ModelVersion string `xorm:"NOT NULL"` | |||
| ModelPath string `xorm:"NULL"` | |||
| DestFormat int `xorm:"NOT NULL DEFAULT 0"` | |||
| NetOutputFormat int `xorm:"NULL"` | |||
| UserId int64 `xorm:"NOT NULL"` | |||
| CloudBrainTaskId string `xorm:"NULL"` | |||
| ContainerID string | |||
| ContainerIp string | |||
| RunTime int64 `xorm:"NULL"` | |||
| TrainJobDuration string | |||
| InputShape string `xorm:"varchar(2000)"` | |||
| InputDataFormat string `xorm:"NOT NULL"` | |||
| Description string `xorm:"varchar(2000)"` | |||
| Path string `xorm:"varchar(400) NOT NULL"` | |||
| CreatedUnix timeutil.TimeStamp `xorm:"created"` | |||
| UpdatedUnix timeutil.TimeStamp `xorm:"updated"` | |||
| StartTime timeutil.TimeStamp | |||
| EndTime timeutil.TimeStamp | |||
| UserName string | |||
| UserRelAvatarLink string | |||
| IsCanOper bool | |||
| IsCanDelete bool | |||
| ID string `xorm:"pk"` | |||
| Name string `xorm:"INDEX NOT NULL"` | |||
| Status string `xorm:"NULL"` | |||
| SrcEngine int `xorm:"NOT NULL DEFAULT 0"` | |||
| RepoId int64 `xorm:"INDEX NULL"` | |||
| ModelId string `xorm:"NOT NULL"` | |||
| ModelName string `xorm:"NULL"` | |||
| ModelVersion string `xorm:"NOT NULL"` | |||
| ModelPath string `xorm:"NULL"` | |||
| DestFormat int `xorm:"NOT NULL DEFAULT 0"` | |||
| NetOutputFormat int `xorm:"NULL"` | |||
| UserId int64 `xorm:"NOT NULL"` | |||
| CloudBrainTaskId string `xorm:"NULL"` | |||
| ModelArtsVersionId string `xorm:"NULL"` | |||
| ContainerID string | |||
| ContainerIp string | |||
| RunTime int64 `xorm:"NULL"` | |||
| TrainJobDuration string | |||
| InputShape string `xorm:"varchar(2000)"` | |||
| InputDataFormat string `xorm:"NOT NULL"` | |||
| Description string `xorm:"varchar(2000)"` | |||
| Path string `xorm:"varchar(400) NOT NULL"` | |||
| CreatedUnix timeutil.TimeStamp `xorm:"created"` | |||
| UpdatedUnix timeutil.TimeStamp `xorm:"updated"` | |||
| StartTime timeutil.TimeStamp | |||
| EndTime timeutil.TimeStamp | |||
| UserName string | |||
| UserRelAvatarLink string | |||
| IsCanOper bool | |||
| IsCanDelete bool | |||
| } | |||
| type AiModelQueryOptions struct { | |||
| @@ -109,6 +110,21 @@ func ModelComputeAndSetDuration(task *AiModelConvert, result JobResultPayload) { | |||
| task.TrainJobDuration = ConvertDurationToStr(d) | |||
| } | |||
| func UpdateModelConvertModelArts(id string, CloudBrainTaskId string, VersionId string) error { | |||
| var sess *xorm.Session | |||
| sess = x.ID(id) | |||
| defer sess.Close() | |||
| re, err := sess.Cols("cloud_brain_task_id,model_arts_version_id").Update(&AiModelConvert{ | |||
| CloudBrainTaskId: CloudBrainTaskId, | |||
| ModelArtsVersionId: VersionId, | |||
| }) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| log.Info("success to update cloud_brain_task_id from db.re=" + fmt.Sprint((re))) | |||
| return nil | |||
| } | |||
| func UpdateModelConvertCBTI(id string, CloudBrainTaskId string) error { | |||
| var sess *xorm.Session | |||
| sess = x.ID(id) | |||
| @@ -908,6 +908,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
| m.Group("/modelmanage", func() { | |||
| m.Get("/:id", repo.GetCloudbrainModelConvertTask) | |||
| m.Get("/:id/log", repo.CloudbrainForModelConvertGetLog) | |||
| m.Get("/:id/modelartlog", repo.TrainJobForModelConvertGetLog) | |||
| m.Get("/:id/model_list", repo.CloudBrainModelConvertList) | |||
| }, reqRepoReader(models.UnitTypeModelManage)) | |||
| m.Group("/modelarts", func() { | |||
| @@ -199,6 +199,68 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { | |||
| } | |||
| func TrainJobForModelConvertGetLog(ctx *context.APIContext) { | |||
| var ( | |||
| err error | |||
| ) | |||
| var jobID = ctx.Params(":id") | |||
| var baseLine = ctx.Query("base_line") | |||
| var order = ctx.Query("order") | |||
| var lines = ctx.Query("lines") | |||
| lines_int, err := strconv.Atoi(lines) | |||
| if err != nil { | |||
| log.Error("change lines(%d) string to int failed", lines_int) | |||
| } | |||
| if order != modelarts.OrderDesc && order != modelarts.OrderAsc { | |||
| log.Error("order(%s) check failed", order) | |||
| ctx.JSON(http.StatusBadRequest, map[string]interface{}{ | |||
| "err_msg": "order check failed", | |||
| }) | |||
| return | |||
| } | |||
| resultLogFile, result, err := trainJobForModelConvertGetLogContent(jobID, baseLine, order, lines_int) | |||
| if err != nil { | |||
| log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error()) | |||
| // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) | |||
| return | |||
| } | |||
| ctx.Data["log_file_name"] = resultLogFile.LogFileList[0] | |||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
| "JobID": jobID, | |||
| "LogFileName": resultLogFile.LogFileList[0], | |||
| "StartLine": result.StartLine, | |||
| "EndLine": result.EndLine, | |||
| "Content": result.Content, | |||
| "Lines": result.Lines, | |||
| }) | |||
| } | |||
| func trainJobForModelConvertGetLogContent(jobID string, baseLine string, order string, lines int) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) { | |||
| task, err := models.QueryModelConvertById(jobID) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) | |||
| return nil, nil, err | |||
| } | |||
| resultLogFile, err := modelarts.GetTrainJobLogFileNames(task.CloudBrainTaskId, task.ModelArtsVersionId) | |||
| if err != nil { | |||
| log.Error("GetTrainJobLogFileNames(%s) failed:%v", task.CloudBrainTaskId, err.Error()) | |||
| return nil, nil, err | |||
| } | |||
| result, err := modelarts.GetTrainJobLog(task.CloudBrainTaskId, task.ModelArtsVersionId, baseLine, resultLogFile.LogFileList[0], order, lines) | |||
| if err != nil { | |||
| log.Error("GetTrainJobLog(%s) failed:%v", task.CloudBrainTaskId, err.Error()) | |||
| return nil, nil, err | |||
| } | |||
| return resultLogFile, result, err | |||
| } | |||
| func TrainJobGetLog(ctx *context.APIContext) { | |||
| var ( | |||
| err error | |||
| @@ -195,8 +195,8 @@ func createNpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context | |||
| BranchName: DefaultBranchName, | |||
| } | |||
| result, err := modelarts.GenerateModelConvertTrainJob(req) | |||
| log.Info("jobId=" + fmt.Sprint(result.JobID)) | |||
| models.UpdateModelConvertCBTI(modelConvert.ID, fmt.Sprint(result.JobID)) | |||
| log.Info("jobId=" + fmt.Sprint(result.JobID) + " versionid=" + fmt.Sprint(result.VersionID)) | |||
| models.UpdateModelConvertModelArts(modelConvert.ID, fmt.Sprint(result.JobID), fmt.Sprint(result.VersionID)) | |||
| } | |||
| func downloadConvertCode(repopath string, codePath, branchName string) error { | |||
| @@ -368,6 +368,13 @@ func DeleteModelConvert(ctx *context.Context) { | |||
| } | |||
| } | |||
| func isCloudBrainTask(task *models.AiModelConvert) bool { | |||
| if task.SrcEngine == PYTORCH_ENGINE { | |||
| return true | |||
| } | |||
| return false | |||
| } | |||
| func StopModelConvert(ctx *context.Context) { | |||
| id := ctx.Params(":id") | |||
| log.Info("stop model convert start.id=" + id) | |||
| @@ -384,35 +391,47 @@ func ShowModelConvertInfo(ctx *context.Context) { | |||
| if err == nil { | |||
| ctx.Data["task"] = job | |||
| } | |||
| result, err := cloudbrain.GetJob(job.CloudBrainTaskId) | |||
| if err != nil { | |||
| log.Info("error:" + err.Error()) | |||
| ctx.Data["error"] = err.Error() | |||
| return | |||
| } | |||
| if result != nil { | |||
| jobRes, _ := models.ConvertToJobResultPayload(result.Payload) | |||
| ctx.Data["result"] = jobRes | |||
| taskRoles := jobRes.TaskRoles | |||
| taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) | |||
| ctx.Data["taskRes"] = taskRes | |||
| ctx.Data["ExitDiagnostics"] = taskRes.TaskStatuses[0].ExitDiagnostics | |||
| job.Status = jobRes.JobStatus.State | |||
| if jobRes.JobStatus.State != string(models.JobWaiting) && jobRes.JobStatus.State != string(models.JobFailed) { | |||
| job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP | |||
| job.ContainerID = taskRes.TaskStatuses[0].ContainerID | |||
| job.Status = taskRes.TaskStatuses[0].State | |||
| if isCloudBrainTask(job) { | |||
| ctx.Data["npu_display"] = "none" | |||
| ctx.Data["gpu_display"] = "block" | |||
| result, err := cloudbrain.GetJob(job.CloudBrainTaskId) | |||
| if err != nil { | |||
| log.Info("error:" + err.Error()) | |||
| ctx.Data["error"] = err.Error() | |||
| return | |||
| } | |||
| if jobRes.JobStatus.State != string(models.JobWaiting) { | |||
| models.ModelComputeAndSetDuration(job, jobRes) | |||
| err = models.UpdateModelConvert(job) | |||
| if err != nil { | |||
| log.Error("UpdateModelConvert failed:", err) | |||
| if result != nil { | |||
| jobRes, _ := models.ConvertToJobResultPayload(result.Payload) | |||
| ctx.Data["result"] = jobRes | |||
| taskRoles := jobRes.TaskRoles | |||
| taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) | |||
| ctx.Data["taskRes"] = taskRes | |||
| ctx.Data["ExitDiagnostics"] = taskRes.TaskStatuses[0].ExitDiagnostics | |||
| ctx.Data["AppExitDiagnostics"] = jobRes.JobStatus.AppExitDiagnostics | |||
| job.Status = jobRes.JobStatus.State | |||
| if jobRes.JobStatus.State != string(models.JobWaiting) && jobRes.JobStatus.State != string(models.JobFailed) { | |||
| job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP | |||
| job.ContainerID = taskRes.TaskStatuses[0].ContainerID | |||
| job.Status = taskRes.TaskStatuses[0].State | |||
| } | |||
| if jobRes.JobStatus.State != string(models.JobWaiting) { | |||
| models.ModelComputeAndSetDuration(job, jobRes) | |||
| err = models.UpdateModelConvert(job) | |||
| if err != nil { | |||
| log.Error("UpdateModelConvert failed:", err) | |||
| } | |||
| } | |||
| } | |||
| } else { | |||
| ctx.Data["npu_display"] = "block" | |||
| ctx.Data["gpu_display"] = "none" | |||
| ctx.Data["ExitDiagnostics"] = "" | |||
| ctx.Data["AppExitDiagnostics"] = "" | |||
| } | |||
| ctx.HTML(200, tplModelConvertInfo) | |||
| } | |||
| @@ -215,8 +215,9 @@ td, th { | |||
| <div class="content-pad"> | |||
| <div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);"> | |||
| <a class="active item" data-tab="first">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a> | |||
| <a class="item" data-tab="second" onclick="javascript:parseInfo()">{{$.i18n.Tr "repo.cloudbrain.runinfo"}}</a> | |||
| <a class="item" data-tab="third" onclick="loadLog()">{{$.i18n.Tr "repo.modelarts.log"}}</a> | |||
| <a id="gpuruntimeinfo" style="display: {{$.gpu_display}};" class="item" data-tab="second" onclick="javascript:parseInfo()">{{$.i18n.Tr "repo.cloudbrain.runinfo"}}</a> | |||
| <a id="gpulog" style="display: {{$.gpu_display}};" class="item" data-tab="third" onclick="loadLog()">{{$.i18n.Tr "repo.modelarts.log"}}</a> | |||
| <a id="npulog" style="display: {{$.npu_display}};" class="item" data-tab="five" onclick="loadLog()">{{$.i18n.Tr "repo.modelarts.log"}}</a> | |||
| <a class="item" data-tab="four" onclick="loadModelFile()">{{$.i18n.Tr "repo.model_download"}}</a> | |||
| </div> | |||
| <div class="ui tab active" data-tab="first"> | |||
| @@ -417,7 +418,7 @@ td, th { | |||
| <div id="header"></div> | |||
| </div> | |||
| <div class="ui attached log" id="log" style="height: 390px !important; overflow: auto;"> | |||
| <input type="hidden" id="json_value" value="{{$.result.JobStatus.AppExitDiagnostics}}"> | |||
| <input type="hidden" id="json_value" value="{{$.AppExitDiagnostics}}"> | |||
| <input type="hidden" id="ExitDiagnostics" value="{{$.ExitDiagnostics}}"> | |||
| <span id="info_display" class="info_text"> | |||
| @@ -443,6 +444,25 @@ td, th { | |||
| </div> | |||
| <div class="ui tab" data-tab="five"> | |||
| <div style="position: relative;"> | |||
| <span> | |||
| <a title="滚动到顶部" style="position: absolute; right: -32px;cursor: pointer;" class="log_top" data-version="V0001"><i class="icon-to-top"></i></a> | |||
| </span> | |||
| <span> | |||
| <a title="滚动到底部" style="position: absolute; bottom: 10px;right: -32px;cursor: pointer;" class="log_bottom" data-version="V0001"><i class="icon-to-bottom"></i></a> | |||
| </span> | |||
| <div id="log_npu_message" class="ui message message" style="display: none;"> | |||
| <div id="log_npu_header"></div> | |||
| </div> | |||
| <div class="ui attached log" onscroll="fn()" id="log_npu" style="height: 300px !important; overflow: auto;"> | |||
| <input type="hidden" name="end_line" value> | |||
| <input type="hidden" name="start_line" value> | |||
| <pre id="log_file_npu"></pre> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| <div class="ui tab" data-tab="four"> | |||
| <input type="hidden" name="model" value="-1"> | |||
| <input type="hidden" name="modelback" value="-1"> | |||
| @@ -455,6 +475,8 @@ td, th { | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| @@ -712,4 +734,136 @@ td, th { | |||
| document.getElementById("info_display").innerHTML=html; | |||
| } | |||
| function debounce(fn,delay){ | |||
| let timer; | |||
| return (...args) => { | |||
| // 判断定时器是否存在,清除定时器 | |||
| if (timer) { | |||
| clearTimeout(timer); | |||
| } | |||
| // 重新调用setTimeout | |||
| timer = setTimeout(() => { | |||
| fn.apply(this, args); | |||
| }, delay); | |||
| }; | |||
| } | |||
| const fn = debounce(logScroll, 500) | |||
| function logScroll(version_name) { | |||
| let container = document.querySelector(`#log_npu`) | |||
| let scrollTop = container.scrollTop | |||
| let scrollHeight = container.scrollHeight | |||
| let clientHeight = container.clientHeight | |||
| let scrollLeft = container.scrollLeft | |||
| if(((parseInt(scrollTop) + clientHeight == scrollHeight || parseInt(scrollTop) + clientHeight +1 == scrollHeight || parseInt(scrollTop) + clientHeight - 1 == scrollHeight)) && parseInt(scrollTop)!==0 && scrollLeft==0){ | |||
| let end_line = $(`#log_npu input[name=end_line]`).val() | |||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=${end_line}&lines=50&order=desc`, (data) => { | |||
| if (data.Lines == 0){ | |||
| $(`#log_npu_header`).text('您已翻阅至日志底部') | |||
| $(`#log_npu_message`).css('display', 'block') | |||
| setTimeout(function(){ | |||
| $(`#log_npu_message`).css('display', 'none') | |||
| }, 1000) | |||
| }else{ | |||
| if(end_line===data.EndLine){ | |||
| return | |||
| } | |||
| else{ | |||
| $(`#log_npu input[name=end_line]`).val(data.EndLine) | |||
| $(`#log_npu`).append('<pre>' + data.Content) | |||
| } | |||
| } | |||
| }).fail(function(err) { | |||
| console.log(err); | |||
| }); | |||
| } | |||
| if([0,1,2,3,4,5,6,7,8,9,10].includes(scrollTop) && scrollLeft==0){ | |||
| let start_line = $(`#log_npu input[name=start_line]`).val() | |||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=${start_line}&lines=50&order=asc`, (data) => { | |||
| if (data.Lines == 0){ | |||
| $(`#log_npu_header`).text('您已翻阅至日志顶部') | |||
| $(`#log_npu_message`).css('display', 'block') | |||
| setTimeout(function(){ | |||
| $(`#log_npu_message`).css('display', 'none') | |||
| }, 1000) | |||
| }else{ | |||
| $(`#log_npu input[name=start_line]`).val(data.StartLine) //如果变动就改变所对应的值 | |||
| $(`#log_npu`).prepend('<pre>' + data.Content) | |||
| } | |||
| }).fail(function(err) { | |||
| console.log(err); | |||
| }); | |||
| } | |||
| } | |||
| function scrollAnimation(dom, currentY, targetY, currentX) { | |||
| let needScrollTop = targetY - currentY; | |||
| let _currentY = currentY; | |||
| setTimeout(() => { | |||
| // 一次调用滑动帧数,每次调用会不一样 | |||
| //取总距离的十分之一 | |||
| const dist = Math.ceil(needScrollTop / 10); | |||
| _currentY += dist; | |||
| //移动一个十分之一 | |||
| dom.scrollTo(currentX || 0, _currentY,'smooth'); | |||
| // 如果移动幅度小于十个像素,直接移动,否则递归调用,实现动画效果 | |||
| if (needScrollTop > 10 || needScrollTop < -10) { | |||
| scrollAnimation(dom, _currentY, targetY) | |||
| } else { | |||
| dom.scrollTo(0, targetY,'smooth') | |||
| } | |||
| }, 1) | |||
| } | |||
| $('.log_top').click(function(){ | |||
| let logContentDom = document.querySelector(`#log_npu`) | |||
| $(`#log_file_npu`).siblings('pre').remove() | |||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=&lines=50&order=asc`, (data) => { | |||
| $(`#log_npu input[name=end_line]`).val(data.EndLine) //如果变动就改变所对应的值 | |||
| $(`#log_npu input[name=start_line]`).val(data.StartLine) | |||
| $(`#log_npu`).prepend('<pre>' + data.Content) | |||
| $(`#log_npu_header`).text('您已翻阅至日志顶部') | |||
| $(`#log_npu_message`).css('display', 'block') | |||
| setTimeout(function(){ | |||
| $(`#log_npu_message`).css('display', 'none') | |||
| }, 1000) | |||
| scrollAnimation(logContentDom, logContentDom.scrollTop, 0); | |||
| }) | |||
| }) | |||
| $('.log_bottom').click(function(e){ | |||
| let logContentDom = document.querySelector(`#log_npu`) | |||
| $(`#log_file_npu`).siblings('pre').remove() | |||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=&lines=50&order=desc`, (data) => { | |||
| $(`#log_npu input[name=end_line]`).val(data.EndLine) //如果变动就改变所对应的值 | |||
| $(`#log_npu input[name=start_line]`).val(data.StartLine) | |||
| $(`#log_npu`).append('<pre>' + data.Content) | |||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelmanage/${jobID}/modelartlog?version_name=V0001&base_line=${data.EndLine}&lines=50&order=desc`, (data) => { | |||
| if (data.Lines == 0){ | |||
| $(`#log_npu_header`).text('您已翻阅至日志底部') | |||
| $(`#log_npu_message`).css('display', 'block') | |||
| setTimeout(function(){ | |||
| $(`#log_npu_message`).css('display', 'none') | |||
| }, 1000) | |||
| }else{ | |||
| if(end_line===data.EndLine){ | |||
| return | |||
| } | |||
| else{ | |||
| $(`#log_npu input[name=end_line]`).val(data.EndLine) | |||
| $(`#log_npu`).append('<pre>' + data.Content) | |||
| } | |||
| } | |||
| }).fail(function(err) { | |||
| console.log(err); | |||
| }); | |||
| scrollAnimation(logContentDom, logContentDom.scrollTop+1, logContentDom.scrollHeight - logContentDom.clientHeight); | |||
| }) | |||
| }) | |||
| </script> | |||