Browse Source

Merge remote-tracking branch 'origin/V20220718' into zouap

tags/v1.22.7.1
zouap 3 years ago
parent
commit
036214e88d
10 changed files with 241 additions and 69 deletions
  1. +6
    -5
      models/cloudbrain.go
  2. +64
    -2
      modules/cloudbrain/cloudbrain.go
  3. +7
    -0
      modules/setting/setting.go
  4. +13
    -0
      modules/templates/helper.go
  5. +20
    -4
      routers/api/v1/repo/cloudbrain_dashboard.go
  6. +27
    -15
      routers/api/v1/repo/modelarts.go
  7. +97
    -5
      routers/repo/cloudbrain.go
  8. +6
    -1
      templates/repo/grampus/trainjob/show.tmpl
  9. +1
    -1
      templates/repo/modelarts/trainjob/show.tmpl
  10. +0
    -36
      templates/repo/modelarts/trainjob/version_new.tmpl

+ 6
- 5
models/cloudbrain.go View File

@@ -570,11 +570,12 @@ type SpecialPools struct {
Pools []*SpecialPool `json:"pools"` Pools []*SpecialPool `json:"pools"`
} }
type SpecialPool struct { type SpecialPool struct {
Org string `json:"org"`
Type string `json:"type"`
IsExclusive bool `json:"isExclusive"`
Pool []*GpuInfo `json:"pool"`
JobType []string `json:"jobType"`
Org string `json:"org"`
Type string `json:"type"`
IsExclusive bool `json:"isExclusive"`
Pool []*GpuInfo `json:"pool"`
JobType []string `json:"jobType"`
ResourceSpec []*ResourceSpec `json:"resourceSpecs"`
} }


type ImageInfosModelArts struct { type ImageInfosModelArts struct {


+ 64
- 2
modules/cloudbrain/cloudbrain.go View File

@@ -17,7 +17,7 @@ import (
) )


const ( const (
Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
//Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
//CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"` //CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"`
CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"` CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"`
CodeMountPath = "/code" CodeMountPath = "/code"
@@ -42,6 +42,7 @@ const (
var ( var (
ResourceSpecs *models.ResourceSpecs ResourceSpecs *models.ResourceSpecs
TrainResourceSpecs *models.ResourceSpecs TrainResourceSpecs *models.ResourceSpecs
SpecialPools *models.SpecialPools
) )


type GenerateCloudBrainTaskReq struct { type GenerateCloudBrainTaskReq struct {
@@ -70,6 +71,11 @@ type GenerateCloudBrainTaskReq struct {
ResourceSpecId int ResourceSpecId int
} }


func GetCloudbrainDebugCommand() string {
var command = `pip3 install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;/usr/local/bin/python /usr/local/bin/jupyter-lab --ServerApp.shutdown_no_activity_timeout=` + setting.CullIdleTimeout + ` --TerminalManager.cull_inactive_timeout=` + setting.CullIdleTimeout + ` --TerminalManager.cull_interval=` + setting.CullInterval + ` --MappingKernelManager.cull_idle_timeout=` + setting.CullIdleTimeout + ` --MappingKernelManager.cull_interval=` + setting.CullInterval + ` --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --ServerApp.token="" --ServerApp.allow_origin="self https://cloudbrain.pcl.ac.cn" `
return command
}

func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool { func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool {
if !ctx.IsSigned { if !ctx.IsSigned {
return false return false
@@ -222,6 +228,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
for _, spec := range TrainResourceSpecs.ResourceSpec { for _, spec := range TrainResourceSpecs.ResourceSpec {
if req.ResourceSpecId == spec.Id { if req.ResourceSpecId == spec.Id {
resourceSpec = spec resourceSpec = spec
break
} }
} }
} else { } else {
@@ -231,10 +238,29 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
for _, spec := range ResourceSpecs.ResourceSpec { for _, spec := range ResourceSpecs.ResourceSpec {
if req.ResourceSpecId == spec.Id { if req.ResourceSpecId == spec.Id {
resourceSpec = spec resourceSpec = spec
break
} }
} }


} }
//如果没有匹配到spec信息,尝试从专属资源池获取
if resourceSpec == nil && SpecialPools != nil {
for _, specialPool := range SpecialPools.Pools {
if resourceSpec != nil {
break
}
if specialPool.ResourceSpec != nil {
if IsElementExist(specialPool.JobType, req.JobType) && IsQueueInSpecialtPool(specialPool.Pool, req.GpuQueue) {
for _, spec := range specialPool.ResourceSpec {
if req.ResourceSpecId == spec.Id {
resourceSpec = spec
break
}
}
}
}
}
}


if resourceSpec == nil { if resourceSpec == nil {
log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"]) log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"])
@@ -486,7 +512,7 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e
GPUNumber: resourceSpec.GpuNum, GPUNumber: resourceSpec.GpuNum,
MemoryMB: resourceSpec.MemMiB, MemoryMB: resourceSpec.MemMiB,
ShmMB: resourceSpec.ShareMemMiB, ShmMB: resourceSpec.ShareMemMiB,
Command: Command,
Command: GetCloudbrainDebugCommand(),//Command,
NeedIBDevice: false, NeedIBDevice: false,
IsMainRole: false, IsMainRole: false,
UseNNI: false, UseNNI: false,
@@ -538,3 +564,39 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e


return nil return nil
} }

func InitSpecialPool() {
if SpecialPools == nil && setting.SpecialPools != "" {
json.Unmarshal([]byte(setting.SpecialPools), &SpecialPools)
}
}

func IsResourceSpecInSpecialPool(resourceSpecs []*models.ResourceSpec, resourceSpecId int) bool {
if resourceSpecs == nil || len(resourceSpecs) == 0 {
return true
}
for _, v := range resourceSpecs {
if v.Id == resourceSpecId {
return true
}
}
return false
}

func IsQueueInSpecialtPool(pool []*models.GpuInfo, queue string) bool {
for _, v := range pool {
if v.Queue == queue {
return true
}
}
return false
}

func IsElementExist(s []string, str string) bool {
for _, v := range s {
if v == str {
return true
}
}
return false
}

+ 7
- 0
modules/setting/setting.go View File

@@ -460,12 +460,15 @@ var (
CBCodePathPrefix string CBCodePathPrefix string
JobType string JobType string
GpuTypes string GpuTypes string
SpecialPools string
DebugServerHost string DebugServerHost string
ResourceSpecs string ResourceSpecs string
MaxDuration int64 MaxDuration int64
TrainGpuTypes string TrainGpuTypes string
TrainResourceSpecs string TrainResourceSpecs string
MaxDatasetNum int MaxDatasetNum int
CullIdleTimeout string
CullInterval string


//benchmark config //benchmark config
IsBenchmarkEnabled bool IsBenchmarkEnabled bool
@@ -1311,7 +1314,11 @@ func NewContext() {
MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400) MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400)
TrainGpuTypes = sec.Key("TRAIN_GPU_TYPES").MustString("") TrainGpuTypes = sec.Key("TRAIN_GPU_TYPES").MustString("")
TrainResourceSpecs = sec.Key("TRAIN_RESOURCE_SPECS").MustString("") TrainResourceSpecs = sec.Key("TRAIN_RESOURCE_SPECS").MustString("")
SpecialPools = sec.Key("SPECIAL_POOL").MustString("")
MaxDatasetNum = sec.Key("MAX_DATASET_NUM").MustInt(5) MaxDatasetNum = sec.Key("MAX_DATASET_NUM").MustInt(5)
CullIdleTimeout = sec.Key("CULL_IDLE_TIMEOUT").MustString("900")
CullInterval = sec.Key("CULL_INTERVAL").MustString("60")


sec = Cfg.Section("benchmark") sec = Cfg.Section("benchmark")
IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false) IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false)


+ 13
- 0
modules/templates/helper.go View File

@@ -18,6 +18,7 @@ import (
"path/filepath" "path/filepath"
"regexp" "regexp"
"runtime" "runtime"
"strconv"
"strings" "strings"
texttmpl "text/template" texttmpl "text/template"
"time" "time"
@@ -327,6 +328,7 @@ func NewFuncMap() []template.FuncMap {
}, },
"GetRefType": GetRefType, "GetRefType": GetRefType,
"GetRefName": GetRefName, "GetRefName": GetRefName,
"MB2GB": MB2GB,
}} }}
} }


@@ -785,3 +787,14 @@ func GetRefName(ref string) string {
reg := regexp.MustCompile(REF_TYPE_PATTERN) reg := regexp.MustCompile(REF_TYPE_PATTERN)
return reg.ReplaceAllString(ref, "") return reg.ReplaceAllString(ref, "")
} }

func MB2GB(size int64) string {
s := strconv.FormatFloat(float64(size)/float64(1024), 'f', 2, 64)
for strings.HasSuffix(s, "0") {
s = strings.TrimSuffix(s, "0")
}
if strings.HasSuffix(s, ".") {
s = strings.TrimSuffix(s, ".")
}
return s
}

+ 20
- 4
routers/api/v1/repo/cloudbrain_dashboard.go View File

@@ -752,10 +752,26 @@ func GetCloudbrainsDetailData(ctx *context.Context) {
taskDetail.RepoAlias = ciTasks[i].Repo.OwnerName + "/" + ciTasks[i].Repo.Alias taskDetail.RepoAlias = ciTasks[i].Repo.OwnerName + "/" + ciTasks[i].Repo.Alias
} }
if ciTasks[i].Cloudbrain.Status == string(models.JobWaiting) { if ciTasks[i].Cloudbrain.Status == string(models.JobWaiting) {
WaitTimeInt := time.Now().Unix() - ciTasks[i].Cloudbrain.CreatedUnix.AsTime().Unix()
taskDetail.WaitTime = models.ConvertDurationToStr(WaitTimeInt)
if WaitTimeInt < 0 {
taskDetail.WaitTime = "00:00:00"
if ciTasks[i].Cloudbrain.DeletedAt != nilTime {
WaitTimeInt := ciTasks[i].Cloudbrain.UpdatedUnix.AsTime().Unix() - ciTasks[i].Cloudbrain.CreatedUnix.AsTime().Unix()
taskDetail.WaitTime = models.ConvertDurationToStr(WaitTimeInt)
if WaitTimeInt < 0 {
taskDetail.WaitTime = "00:00:00"
}
} else {
if ciTasks[i].Cloudbrain.StartTime.AsTime().Unix() == 0 {
WaitTimeInt := time.Now().Unix() - ciTasks[i].Cloudbrain.CreatedUnix.AsTime().Unix()
taskDetail.WaitTime = models.ConvertDurationToStr(WaitTimeInt)
if WaitTimeInt < 0 {
taskDetail.WaitTime = "00:00:00"
}
} else {
WaitTimeInt := ciTasks[i].Cloudbrain.StartTime.AsTime().Unix() - ciTasks[i].Cloudbrain.CreatedUnix.AsTime().Unix()
taskDetail.WaitTime = models.ConvertDurationToStr(WaitTimeInt)
if WaitTimeInt < 0 {
taskDetail.WaitTime = "00:00:00"
}
}
} }
} else if ciTasks[i].Cloudbrain.Status == string(models.JobStopped) && ciTasks[i].Cloudbrain.StartTime.AsTime().Unix() == 0 { } else if ciTasks[i].Cloudbrain.Status == string(models.JobStopped) && ciTasks[i].Cloudbrain.StartTime.AsTime().Unix() == 0 {
WaitTimeInt := ciTasks[i].Cloudbrain.EndTime.AsTime().Unix() - ciTasks[i].Cloudbrain.CreatedUnix.AsTime().Unix() WaitTimeInt := ciTasks[i].Cloudbrain.EndTime.AsTime().Unix() - ciTasks[i].Cloudbrain.CreatedUnix.AsTime().Unix()


+ 27
- 15
routers/api/v1/repo/modelarts.go View File

@@ -7,8 +7,10 @@ package repo


import ( import (
"code.gitea.io/gitea/modules/grampus" "code.gitea.io/gitea/modules/grampus"
"code.gitea.io/gitea/modules/setting"
"encoding/json" "encoding/json"
"net/http" "net/http"
"path"
"strconv" "strconv"
"strings" "strings"


@@ -263,39 +265,49 @@ func TrainJobGetLog(ctx *context.APIContext) {
return return
} }


resultLogFile, result, err := trainJobGetLogContent(jobID, versionName, baseLine, order, lines_int)
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
if err != nil {
log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
return
}
resultLogFile, result, err := trainJobGetLogContent(jobID, task.VersionID, baseLine, order, lines_int)
if err != nil { if err != nil {
log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error()) log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error())
// ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
return return
} }


prefix := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, modelarts.LogPath, versionName), "/") + "/job"
_, err = storage.GetObsLogFileName(prefix)
var canLogDownload bool
if err != nil {
canLogDownload = false
} else {
canLogDownload = true
}

ctx.Data["log_file_name"] = resultLogFile.LogFileList[0] ctx.Data["log_file_name"] = resultLogFile.LogFileList[0]


ctx.JSON(http.StatusOK, map[string]interface{}{ ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": jobID,
"LogFileName": resultLogFile.LogFileList[0],
"StartLine": result.StartLine,
"EndLine": result.EndLine,
"Content": result.Content,
"Lines": result.Lines,
"JobID": jobID,
"LogFileName": resultLogFile.LogFileList[0],
"StartLine": result.StartLine,
"EndLine": result.EndLine,
"Content": result.Content,
"Lines": result.Lines,
"CanLogDownload": canLogDownload,
}) })
} }


func trainJobGetLogContent(jobID string, versionName string, baseLine string, order string, lines int) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) {
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
if err != nil {
log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
return nil, nil, err
}
func trainJobGetLogContent(jobID string, versionID int64, baseLine string, order string, lines int) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) {


resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10))
resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(versionID, 10))
if err != nil { if err != nil {
log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error()) log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error())
return nil, nil, err return nil, nil, err
} }


result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, resultLogFile.LogFileList[0], order, lines)
result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(versionID, 10), baseLine, resultLogFile.LogFileList[0], order, lines)
if err != nil { if err != nil {
log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error()) log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
return nil, nil, err return nil, nil, err


+ 97
- 5
routers/repo/cloudbrain.go View File

@@ -2,7 +2,6 @@ package repo


import ( import (
"bufio" "bufio"
"code.gitea.io/gitea/modules/grampus"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
@@ -16,6 +15,8 @@ import (
"time" "time"
"unicode/utf8" "unicode/utf8"


"code.gitea.io/gitea/modules/grampus"

"code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/timeutil"
"github.com/unknwon/i18n" "github.com/unknwon/i18n"


@@ -135,7 +136,7 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error {
} }


ctx.Data["attachments"] = attachs ctx.Data["attachments"] = attachs
ctx.Data["command"] = cloudbrain.Command
ctx.Data["command"] = cloudbrain.GetCloudbrainDebugCommand()
ctx.Data["code_path"] = cloudbrain.CodeMountPath ctx.Data["code_path"] = cloudbrain.CodeMountPath
ctx.Data["dataset_path"] = cloudbrain.DataSetMountPath ctx.Data["dataset_path"] = cloudbrain.DataSetMountPath
ctx.Data["model_path"] = cloudbrain.ModelMountPath ctx.Data["model_path"] = cloudbrain.ModelMountPath
@@ -149,6 +150,8 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error {


ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType


cloudbrain.InitSpecialPool()

if gpuInfos == nil { if gpuInfos == nil {
json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos)
} }
@@ -178,6 +181,45 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error {
json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs)
} }
ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec

if cloudbrain.SpecialPools != nil {
var debugGpuTypes []*models.GpuInfo
var trainGpuTypes []*models.GpuInfo

for _, pool := range cloudbrain.SpecialPools.Pools {
org, _ := models.GetOrgByName(pool.Org)
if org != nil {
isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID)
if isOrgMember {
for _, jobType := range pool.JobType {
if jobType == string(models.JobTypeDebug) {
debugGpuTypes = append(debugGpuTypes, pool.Pool...)
if pool.ResourceSpec != nil {
ctx.Data["resource_specs"] = pool.ResourceSpec
}
} else if jobType == string(models.JobTypeTrain) {
trainGpuTypes = append(trainGpuTypes, pool.Pool...)
if pool.ResourceSpec != nil {
ctx.Data["train_resource_specs"] = pool.ResourceSpec
}
}
}
break
}
}

}

if len(debugGpuTypes) > 0 {
ctx.Data["gpu_types"] = debugGpuTypes
}

if len(trainGpuTypes) > 0 {
ctx.Data["train_gpu_types"] = trainGpuTypes
}

}

ctx.Data["params"] = "" ctx.Data["params"] = ""
ctx.Data["branchName"] = ctx.Repo.BranchName ctx.Data["branchName"] = ctx.Repo.BranchName


@@ -217,6 +259,10 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
repo := ctx.Repo.Repository repo := ctx.Repo.Repository
tpl := tplCloudBrainNew tpl := tplCloudBrainNew


if jobType == string(models.JobTypeTrain) {
tpl = tplCloudBrainTrainJobNew
}

tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName)
if err == nil { if err == nil {
if len(tasks) != 0 { if len(tasks) != 0 {
@@ -269,7 +315,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
return return
} }


command := cloudbrain.Command
command := cloudbrain.GetCloudbrainDebugCommand()
if jobType == string(models.JobTypeTrain) { if jobType == string(models.JobTypeTrain) {
tpl = tplCloudBrainTrainJobNew tpl = tplCloudBrainTrainJobNew
commandTrain, err := getTrainJobCommand(form) commandTrain, err := getTrainJobCommand(form)
@@ -282,6 +328,14 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
command = commandTrain command = commandTrain
} }


errStr := checkCloudBrainSpecialPool(ctx, jobType, gpuQueue, resourceSpecId)

if errStr != "" {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr(errStr, tpl, &form)
return
}

if branchName == "" { if branchName == "" {
branchName = cloudbrain.DefaultBranchName branchName = cloudbrain.DefaultBranchName
} }
@@ -334,6 +388,42 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
} }
} }


/**
检查用户传输的参数是否符合专属资源池
*/
func checkCloudBrainSpecialPool(ctx *context.Context, jobType string, queue string, resourceSpecId int) string {
if cloudbrain.SpecialPools != nil {

var isInPoolOrg = false
var matchSpecialPool = false

for _, specialPool := range cloudbrain.SpecialPools.Pools {

if cloudbrain.IsElementExist(specialPool.JobType, jobType) && cloudbrain.IsQueueInSpecialtPool(specialPool.Pool, queue) {
if cloudbrain.IsResourceSpecInSpecialPool(specialPool.ResourceSpec, resourceSpecId) {
matchSpecialPool = true
org, _ := models.GetOrgByName(specialPool.Org)
if org != nil {
isInPoolOrg, _ = models.IsOrganizationMember(org.ID, ctx.User.ID)
if isInPoolOrg {
break //传入参数,和专属资源池匹配上了,检查通过
}
}
}

}

}
//资源池有匹配上,但是用户不在相应的组织中,返回错误信息。界面已经过滤了选择,界面操作不会到这个逻辑
if matchSpecialPool && !isInPoolOrg {
return ctx.Tr("repo.grampus.no_operate_right")
}

}
//没有匹配到资源池或者没有设置专属资源池,检查通过; 获取和资源池完全匹配检查通过
return ""
}

func CloudBrainRestart(ctx *context.Context) { func CloudBrainRestart(ctx *context.Context) {
var ID = ctx.Params(":id") var ID = ctx.Params(":id")
var resultCode = "0" var resultCode = "0"
@@ -573,7 +663,9 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo
if task.TrainJobDuration == "" { if task.TrainJobDuration == "" {
if task.Duration == 0 { if task.Duration == 0 {
var duration int64 var duration int64
if task.Status == string(models.JobRunning) {
if task.Status == string(models.JobWaiting) {
duration = 0
} else if task.Status == string(models.JobRunning) {
duration = time.Now().Unix() - int64(task.CreatedUnix) duration = time.Now().Unix() - int64(task.CreatedUnix)
} else { } else {
duration = int64(task.UpdatedUnix) - int64(task.CreatedUnix) duration = int64(task.UpdatedUnix) - int64(task.CreatedUnix)
@@ -2094,7 +2186,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
repo := ctx.Repo.Repository repo := ctx.Repo.Repository


tpl := tplCloudBrainBenchmarkNew tpl := tplCloudBrainBenchmarkNew
command := cloudbrain.Command
command := cloudbrain.GetCloudbrainDebugCommand()


tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName)
if err == nil { if err == nil {


+ 6
- 1
templates/repo/grampus/trainjob/show.tmpl View File

@@ -480,8 +480,13 @@


</div> </div>
<div id="dir_list{{.VersionName}}"> <div id="dir_list{{.VersionName}}">

</div> </div>
{{if eq .ComputeResource "CPU/GPU"}}
<div style="display:flex;align-items: center;justify-content: end;color: #f2711c;">
<i class="ri-error-warning-line" style="margin-right:0.5rem;"></i>
<span>{{$.i18n.Tr "repo.file_limit_100"}}</span>
</div>
{{end}}
</div> </div>


</div> </div>


+ 1
- 1
templates/repo/modelarts/trainjob/show.tmpl View File

@@ -488,7 +488,7 @@
<div class="ui tab" data-tab="second{{$k}}"> <div class="ui tab" data-tab="second{{$k}}">
<div> <div>
<a id="{{.VersionName}}-log-down" <a id="{{.VersionName}}-log-down"
class='{{if and (.CanModify) (eq .Status "KILLED" "FAILED" "START_FAILED" "STOPPED" "COMPLETED") }}ti-download-file{{else}}disabled{{end}}'
class='{{if and ($.CanLogDownload) (eq .Status "KILLED" "FAILED" "START_FAILED" "STOPPED" "COMPLETED") }}ti-download-file{{else}}disabled{{end}}'
href="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/download_log_file?version_name={{.VersionName}}"> href="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/download_log_file?version_name={{.VersionName}}">
<i class="ri-download-cloud-2-line"></i> <i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span> <span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span>


+ 0
- 36
templates/repo/modelarts/trainjob/version_new.tmpl View File

@@ -446,24 +446,6 @@
] ]


}, },
work_server_number: {
identifier : 'work_server_number',
rules: [
{
type : 'integer[1..25]',
prompt : '计算节点需要在1-25之间,请您键入正确的值'
}
]
},
run_para_list:{
identifier : 'run_para_list',
rules: [
{
type: 'maxLength[255]',
prompt : '所有字符最长不超过255个字符。'
}
]
},
}, },
}) })


@@ -512,24 +494,6 @@
] ]


}, },
work_server_number: {
identifier : 'work_server_number',
rules: [
{
type : 'integer[1..25]',
prompt : '计算节点需要在1-25之间,请您键入正确的值'
}
]
},
run_para_list:{
identifier : 'run_para_list',
rules: [
{
type: 'maxLength[255]',
prompt : '所有字符最长不超过255个字符。'
}
]
},
}, },
onSuccess: function(){ onSuccess: function(){
// $('.ui.page.dimmer').dimmer('show') // $('.ui.page.dimmer').dimmer('show')


Loading…
Cancel
Save