|
- package grampus
-
- import (
- "code.gitea.io/gitea/models"
- "code.gitea.io/gitea/modules/context"
- "code.gitea.io/gitea/modules/log"
- "code.gitea.io/gitea/modules/notification"
- "code.gitea.io/gitea/modules/timeutil"
- "strings"
- )
-
- const (
- storageTypeOBS = "obs"
-
- WorkPath = "/home/ma-user/work"
- CodePath = "/code/"
- DatasetPath = "/dataset"
- OutputPath = "/output/"
- ResultPath = "/result/"
- LogPath = "/log/"
- JobPath = "/job/"
- OrderDesc = "desc" //向下查询
- OrderAsc = "asc" //向上查询
- Lines = 500
- TrainUrl = "train_url"
- DataUrl = "data_url"
- ResultUrl = "result_url"
- CkptUrl = "ckpt_url"
- DeviceTarget = "device_target"
- Ascend = "Ascend"
- PerPage = 10
- IsLatestVersion = "1"
- NotLatestVersion = "0"
- VersionCount = 1
-
- SortByCreateTime = "create_time"
- ConfigTypeCustom = "custom"
- TotalVersionCount = 1
-
- ProcessorTypeNPU = "npu.huawei.com/NPU"
- ProcessorTypeGPU = "nvidia.com/gpu"
-
- CommandPrepareScript = "pwd;cd /tmp;wget https://git.openi.org.cn/lewis/script_for_grampus/archive/master.zip;unzip master.zip;cd script_for_grampus;"
- ScriptSyncObsCodeAndDataset = "sync_obs_code_and_dataset.py"
- )
-
- var (
- poolInfos *models.PoolInfos
- FlavorInfos *models.FlavorInfos
- ImageInfos *models.ImageInfosModelArts
- )
-
- type GenerateTrainJobReq struct {
- JobName string
- Command string
- ResourceSpecId string
- ImageUrl string //与image_id二选一,都有的情况下优先image_url
- ImageId string
-
- DisplayJobName string
- Uuid string
- Description string
- CodeObsPath string
- BootFile string
- BootFileUrl string
- DataUrl string
- TrainUrl string
- WorkServerNumber int
- EngineID int64
- CommitID string
- IsLatestVersion string
- BranchName string
- PreVersionId int64
- PreVersionName string
- FlavorName string
- VersionCount int
- EngineName string
- TotalVersionCount int
- ComputeResource string
- DatasetName string
- Params string
- }
-
- func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) {
- createTime := timeutil.TimeStampNow()
- jobResult, err := createJob(models.CreateGrampusJobRequest{
- Name: req.JobName,
- Tasks: []models.GrampusTasks{
- {
- Name: req.JobName,
- Command: req.Command,
- ResourceSpecId: req.ResourceSpecId,
- ImageId: req.ImageId,
- ImageUrl: req.ImageUrl,
- },
- },
- })
- if err != nil {
- log.Error("createJob failed: %v", err.Error())
- return err
- }
-
- jobID := jobResult.JobInfo.JobID
- err = models.CreateCloudbrain(&models.Cloudbrain{
- Status: TransTrainJobStatus(jobResult.JobInfo.Status),
- UserID: ctx.User.ID,
- RepoID: ctx.Repo.Repository.ID,
- JobID: jobID,
- JobName: req.JobName,
- DisplayJobName: req.DisplayJobName,
- JobType: string(models.JobTypeTrain),
- Type: models.TypeCloudBrainGrampus,
- Uuid: req.Uuid,
- DatasetName: req.DatasetName,
- CommitID: req.CommitID,
- IsLatestVersion: req.IsLatestVersion,
- ComputeResource: req.ComputeResource,
- ImageID: req.ImageId,
- TrainUrl: req.TrainUrl,
- BranchName: req.BranchName,
- Parameters: req.Params,
- BootFile: req.BootFile,
- DataUrl: req.DataUrl,
- FlavorCode: req.ResourceSpecId,
- Description: req.Description,
- WorkServerNumber: req.WorkServerNumber,
- FlavorName: req.FlavorName,
- EngineName: req.EngineName,
- VersionCount: req.VersionCount,
- TotalVersionCount: req.TotalVersionCount,
- CreatedUnix: createTime,
- UpdatedUnix: createTime,
- })
-
- if err != nil {
- log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error())
- return err
- }
-
- var actionType models.ActionType
- if req.ComputeResource == models.NPUResource {
- actionType = models.ActionCreateTrainTask
- } else if req.ComputeResource == models.GPUResource {
- actionType = models.ActionCreateGPUTrainTask
- }
- notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, actionType)
-
- return nil
- }
-
- func TransTrainJobStatus(status string) string {
- if status == "pending" {
- status = "waiting"
- }
-
- return strings.ToUpper(status)
- }
|