| @@ -107,6 +107,7 @@ const ( | |||||
| GrampusStatusSucceeded = "SUCCEEDED" | GrampusStatusSucceeded = "SUCCEEDED" | ||||
| GrampusStatusStopped = "STOPPED" | GrampusStatusStopped = "STOPPED" | ||||
| GrampusStatusUnknown = "UNKNOWN" | GrampusStatusUnknown = "UNKNOWN" | ||||
| GrampusStatusWaiting = "WAITING" | |||||
| ) | ) | ||||
| type Cloudbrain struct { | type Cloudbrain struct { | ||||
| @@ -1734,7 +1735,7 @@ func GetCloudbrainInferenceJobCountByUserID(userID int64) (int, error) { | |||||
| } | } | ||||
| func GetGrampusCountByUserID(userID int64, jobType, computeResource string) (int, error) { | func GetGrampusCountByUserID(userID int64, jobType, computeResource string) (int, error) { | ||||
| count, err := x.In("status", GrampusStatusPending, GrampusStatusRunning).And("job_type = ? and user_id = ? and type = ?", jobType, userID, TypeC2Net).And("compute_resource = ?", computeResource).Count(new(Cloudbrain)) | |||||
| count, err := x.In("status", GrampusStatusWaiting, GrampusStatusRunning).And("job_type = ? and user_id = ? and type = ?", jobType, userID, TypeC2Net).And("compute_resource = ?", computeResource).Count(new(Cloudbrain)) | |||||
| return int(count), err | return int(count), err | ||||
| } | } | ||||
| @@ -15,7 +15,7 @@ const ( | |||||
| ProcessorTypeNPU = "npu.huawei.com/NPU" | ProcessorTypeNPU = "npu.huawei.com/NPU" | ||||
| ProcessorTypeGPU = "nvidia.com/gpu" | ProcessorTypeGPU = "nvidia.com/gpu" | ||||
| CommandPrepareScript = "pwd;cd /tmp;mkdir -p output;mkdir -p code;mkdir -p dataset;wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;" + | |||||
| CommandPrepareScript = "pwd;cd /cache;mkdir -p output;mkdir -p code;mkdir -p dataset;wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;" + | |||||
| "unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_obs downloader_for_minio uploader_for_minio;" | "unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_obs downloader_for_minio uploader_for_minio;" | ||||
| //CommandPrepareScript = "bash;pwd;apt-get -y update;apt-get -y upgrade;apt-get -y install wget;apt-get -y install unzip;" + | //CommandPrepareScript = "bash;pwd;apt-get -y update;apt-get -y upgrade;apt-get -y install wget;apt-get -y install unzip;" + | ||||
| @@ -72,7 +72,9 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||||
| ResourceSpecId: req.ResourceSpecId, | ResourceSpecId: req.ResourceSpecId, | ||||
| ImageId: req.ImageId, | ImageId: req.ImageId, | ||||
| ImageUrl: req.ImageUrl, | ImageUrl: req.ImageUrl, | ||||
| ReplicaNum: 0, | |||||
| ReplicaNum: 1, | |||||
| CenterName: []string{"成都智算"}, | |||||
| CenterID: []string{"chengdu"}, | |||||
| }, | }, | ||||
| }, | }, | ||||
| }) | }) | ||||
| @@ -111,6 +113,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||||
| TotalVersionCount: req.TotalVersionCount, | TotalVersionCount: req.TotalVersionCount, | ||||
| CreatedUnix: createTime, | CreatedUnix: createTime, | ||||
| UpdatedUnix: createTime, | UpdatedUnix: createTime, | ||||
| AiCenter: jobResult.JobInfo.Tasks[0].CenterID[0] + "+" + jobResult.JobInfo.Tasks[0].CenterName[0], | |||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| @@ -130,8 +133,8 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||||
| } | } | ||||
| func TransTrainJobStatus(status string) string { | func TransTrainJobStatus(status string) string { | ||||
| if status == "pending" { | |||||
| status = "waiting" | |||||
| if status == models.GrampusStatusPending { | |||||
| status = models.GrampusStatusWaiting | |||||
| } | } | ||||
| return strings.ToUpper(status) | return strings.ToUpper(status) | ||||
| @@ -81,6 +81,7 @@ func getToken() error { | |||||
| } | } | ||||
| TOKEN = result.Token | TOKEN = result.Token | ||||
| log.Info(TOKEN) | |||||
| return nil | return nil | ||||
| } | } | ||||
| @@ -615,7 +615,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo | |||||
| if strings.HasSuffix(datasetName, ".tar.gz") { | if strings.HasSuffix(datasetName, ".tar.gz") { | ||||
| toolUnzip = "tar -zxvf " | toolUnzip = "tar -zxvf " | ||||
| } | } | ||||
| commandUnzip := "cd /tmp/dataset;" + toolUnzip + datasetName + ";cd /tmp/code;unzip -q master.zip;" | |||||
| commandUnzip := "cd /cache/dataset;" + toolUnzip + datasetName + ";cd /cache/code;unzip -q master.zip;" | |||||
| command += commandUnzip | command += commandUnzip | ||||
| //exec code | //exec code | ||||
| @@ -638,7 +638,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo | |||||
| } | } | ||||
| } | } | ||||
| commandCode := "cd /tmp/code/" + strings.ToLower(repoName) + ";python " + bootFile + paramCode + ";" | |||||
| commandCode := "cd /cache/code/" + strings.ToLower(repoName) + ";python " + bootFile + paramCode + ";" | |||||
| command += commandCode | command += commandCode | ||||
| //get exec result | //get exec result | ||||
| @@ -647,10 +647,10 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo | |||||
| //upload models | //upload models | ||||
| if processorType == grampus.ProcessorTypeNPU { | if processorType == grampus.ProcessorTypeNPU { | ||||
| commandUpload := "cd /tmp/script_for_grampus/;./uploader_for_obs " + setting.Bucket + " " + outputRemotePath + " " + "/tmp/output/;" | |||||
| commandUpload := "cd /cache/script_for_grampus/;./uploader_for_obs " + setting.Bucket + " " + outputRemotePath + " " + "/tmp/output/;" | |||||
| command += commandUpload | command += commandUpload | ||||
| } else if processorType == grampus.ProcessorTypeGPU { | } else if processorType == grampus.ProcessorTypeGPU { | ||||
| commandUpload := "cd /tmp/script_for_grampus/;./uploader_for_minio " + setting.Grampus.Env + " " + outputRemotePath + " " + "/tmp/output/;" | |||||
| commandUpload := "cd /cache/script_for_grampus/;./uploader_for_minio " + setting.Grampus.Env + " " + outputRemotePath + " " + "/tmp/output/;" | |||||
| command += commandUpload | command += commandUpload | ||||
| } | } | ||||