package grampus import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/notification" "code.gitea.io/gitea/modules/timeutil" "strings" ) const ( storageTypeOBS = "obs" WorkPath = "/home/ma-user/work" CodePath = "/code/" DatasetPath = "/dataset" OutputPath = "/output/" ResultPath = "/result/" LogPath = "/log/" JobPath = "/job/" OrderDesc = "desc" //向下查询 OrderAsc = "asc" //向上查询 Lines = 500 TrainUrl = "train_url" DataUrl = "data_url" ResultUrl = "result_url" CkptUrl = "ckpt_url" DeviceTarget = "device_target" Ascend = "Ascend" PerPage = 10 IsLatestVersion = "1" NotLatestVersion = "0" VersionCount = 1 SortByCreateTime = "create_time" ConfigTypeCustom = "custom" TotalVersionCount = 1 ProcessorTypeNPU = "npu.huawei.com/NPU" ProcessorTypeGPU = "nvidia.com/gpu" CommandPrepareScript = "pwd;cd /tmp;wget https://git.openi.org.cn/lewis/script_for_grampus/archive/master.zip;unzip master.zip;cd script_for_grampus;" ScriptSyncObsCodeAndDataset = "sync_obs_code_and_dataset.py" ) var ( poolInfos *models.PoolInfos FlavorInfos *models.FlavorInfos ImageInfos *models.ImageInfosModelArts ) type GenerateTrainJobReq struct { JobName string Command string ResourceSpecId string ImageUrl string //与image_id二选一,都有的情况下优先image_url ImageId string DisplayJobName string Uuid string Description string CodeObsPath string BootFile string BootFileUrl string DataUrl string TrainUrl string WorkServerNumber int EngineID int64 CommitID string IsLatestVersion string BranchName string PreVersionId int64 PreVersionName string FlavorName string VersionCount int EngineName string TotalVersionCount int ComputeResource string DatasetName string Params string } func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { createTime := timeutil.TimeStampNow() jobResult, err := createJob(models.CreateGrampusJobRequest{ Name: req.JobName, Tasks: []models.GrampusTasks{ { Name: req.JobName, Command: req.Command, ResourceSpecId: req.ResourceSpecId, ImageId: req.ImageId, ImageUrl: req.ImageUrl, }, }, }) if err != nil { log.Error("createJob failed: %v", err.Error()) return err } jobID := jobResult.JobInfo.JobID err = models.CreateCloudbrain(&models.Cloudbrain{ Status: TransTrainJobStatus(jobResult.JobInfo.Status), UserID: ctx.User.ID, RepoID: ctx.Repo.Repository.ID, JobID: jobID, JobName: req.JobName, DisplayJobName: req.DisplayJobName, JobType: string(models.JobTypeTrain), Type: models.TypeCloudBrainGrampus, Uuid: req.Uuid, DatasetName: req.DatasetName, CommitID: req.CommitID, IsLatestVersion: req.IsLatestVersion, ComputeResource: req.ComputeResource, ImageID: req.ImageId, TrainUrl: req.TrainUrl, BranchName: req.BranchName, Parameters: req.Params, BootFile: req.BootFile, DataUrl: req.DataUrl, FlavorCode: req.ResourceSpecId, Description: req.Description, WorkServerNumber: req.WorkServerNumber, FlavorName: req.FlavorName, EngineName: req.EngineName, VersionCount: req.VersionCount, TotalVersionCount: req.TotalVersionCount, CreatedUnix: createTime, UpdatedUnix: createTime, }) if err != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error()) return err } var actionType models.ActionType if req.ComputeResource == models.NPUResource { actionType = models.ActionCreateTrainTask } else if req.ComputeResource == models.GPUResource { actionType = models.ActionCreateGPUTrainTask } notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, actionType) return nil } func TransTrainJobStatus(status string) string { if status == "pending" { status = "waiting" } return strings.ToUpper(status) }