You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

modelarts.go 18 kB

4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. package modelarts
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "path"
  7. "strconv"
  8. "code.gitea.io/gitea/models"
  9. "code.gitea.io/gitea/modules/context"
  10. "code.gitea.io/gitea/modules/log"
  11. "code.gitea.io/gitea/modules/notification"
  12. "code.gitea.io/gitea/modules/setting"
  13. "code.gitea.io/gitea/modules/storage"
  14. )
  15. const (
  16. //notebook
  17. storageTypeOBS = "obs"
  18. autoStopDuration = 4 * 60 * 60
  19. autoStopDurationMs = 4 * 60 * 60 * 1000
  20. DataSetMountPath = "/home/ma-user/work"
  21. NotebookEnv = "Python3"
  22. NotebookType = "Ascend"
  23. FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)"
  24. //train-job
  25. // ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}"
  26. // Engines = "{\"engine\":[{\"id\":1, \"value\":\"Ascend-Powered-Engine\"}]}"
  27. // EngineVersions = "{\"version\":[{\"id\":118,\"value\":\"MindSpore-1.0.0-c75-python3.7-euleros2.8-aarch64\"}," +
  28. // "{\"id\":119,\"value\":\"MindSpore-1.1.1-c76-python3.7-euleros2.8-aarch64\"}," +
  29. // "{\"id\":120,\"value\":\"MindSpore-1.1.1-c76-tr5-python3.7-euleros2.8-aarch64\"}," +
  30. // "{\"id\":117,\"value\":\"TF-1.15-c75-python3.7-euleros2.8-aarch64\"}" +
  31. // "]}"
  32. // TrainJobFlavorInfo = "{\"flavor\":[{\"code\":\"modelarts.bm.910.arm.public.2\",\"value\":\"Ascend : 2 * Ascend 910 CPU:48 核 512GiB\"}," +
  33. // "{\"code\":\"modelarts.bm.910.arm.public.8\",\"value\":\"Ascend : 8 * Ascend 910 CPU:192 核 2048GiB\"}," +
  34. // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," +
  35. // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" +
  36. // "]}"
  37. CodePath = "/code/"
  38. OutputPath = "/output/"
  39. ResultPath = "/result/"
  40. LogPath = "/log/"
  41. JobPath = "/job/"
  42. OrderDesc = "desc" //向下查询
  43. OrderAsc = "asc" //向上查询
  44. Lines = 500
  45. TrainUrl = "train_url"
  46. DataUrl = "data_url"
  47. ResultUrl = "result_url"
  48. CkptUrl = "ckpt_url"
  49. PerPage = 10
  50. IsLatestVersion = "1"
  51. NotLatestVersion = "0"
  52. DebugType = -1
  53. VersionCount = 1
  54. SortByCreateTime = "create_time"
  55. ConfigTypeCustom = "custom"
  56. TotalVersionCount = 1
  57. )
  58. var (
  59. poolInfos *models.PoolInfos
  60. FlavorInfos *models.FlavorInfos
  61. ImageInfos *models.ImageInfosModelArts
  62. )
  63. type GenerateTrainJobReq struct {
  64. JobName string
  65. Uuid string
  66. Description string
  67. CodeObsPath string
  68. BootFile string
  69. BootFileUrl string
  70. DataUrl string
  71. TrainUrl string
  72. FlavorCode string
  73. LogUrl string
  74. PoolID string
  75. WorkServerNumber int
  76. EngineID int64
  77. Parameters []models.Parameter
  78. CommitID string
  79. IsLatestVersion string
  80. Params string
  81. BranchName string
  82. PreVersionId int64
  83. PreVersionName string
  84. FlavorName string
  85. VersionCount int
  86. EngineName string
  87. TotalVersionCount int
  88. }
  89. type GenerateTrainJobVersionReq struct {
  90. JobName string
  91. Uuid string
  92. Description string
  93. CodeObsPath string
  94. BootFile string
  95. BootFileUrl string
  96. DataUrl string
  97. TrainUrl string
  98. FlavorCode string
  99. LogUrl string
  100. PoolID string
  101. WorkServerNumber int
  102. EngineID int64
  103. Parameters []models.Parameter
  104. Params string
  105. PreVersionId int64
  106. CommitID string
  107. BranchName string
  108. FlavorName string
  109. EngineName string
  110. PreVersionName string
  111. TotalVersionCount int
  112. }
  113. type GenerateInferenceJobReq struct {
  114. JobName string
  115. Uuid string
  116. Description string
  117. CodeObsPath string
  118. BootFile string
  119. BootFileUrl string
  120. DataUrl string
  121. TrainUrl string
  122. FlavorCode string
  123. LogUrl string
  124. PoolID string
  125. WorkServerNumber int
  126. EngineID int64
  127. Parameters []models.Parameter
  128. CommitID string
  129. Params string
  130. BranchName string
  131. FlavorName string
  132. EngineName string
  133. LabelName string
  134. IsLatestVersion string
  135. VersionCount int
  136. TotalVersionCount int
  137. ModelName string
  138. ModelVersion string
  139. CkptName string
  140. ResultUrl string
  141. }
  142. type VersionInfo struct {
  143. Version []struct {
  144. ID int `json:"id"`
  145. Value string `json:"value"`
  146. } `json:"version"`
  147. }
  148. type Flavor struct {
  149. Info []struct {
  150. Code string `json:"code"`
  151. Value string `json:"value"`
  152. } `json:"flavor"`
  153. }
  154. type Engine struct {
  155. Info []struct {
  156. ID int `json:"id"`
  157. Value string `json:"value"`
  158. } `json:"engine"`
  159. }
  160. type ResourcePool struct {
  161. Info []struct {
  162. ID string `json:"id"`
  163. Value string `json:"value"`
  164. } `json:"resource_pool"`
  165. }
  166. // type Parameter struct {
  167. // Label string `json:"label"`
  168. // Value string `json:"value"`
  169. // }
  170. // type Parameters struct {
  171. // Parameter []Parameter `json:"parameter"`
  172. // }
  173. type Parameters struct {
  174. Parameter []struct {
  175. Label string `json:"label"`
  176. Value string `json:"value"`
  177. } `json:"parameter"`
  178. }
  179. func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor string) error {
  180. var dataActualPath string
  181. if uuid != "" {
  182. dataActualPath = setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
  183. } else {
  184. userPath := setting.UserBasePath + ctx.User.Name + "/"
  185. isExist, err := storage.ObsHasObject(userPath)
  186. if err != nil {
  187. log.Error("ObsHasObject failed:%v", err.Error(), ctx.Data["MsgID"])
  188. return err
  189. }
  190. if !isExist {
  191. if err = storage.ObsCreateObject(userPath); err != nil {
  192. log.Error("ObsCreateObject failed:%v", err.Error(), ctx.Data["MsgID"])
  193. return err
  194. }
  195. }
  196. dataActualPath = setting.Bucket + "/" + userPath
  197. }
  198. if poolInfos == nil {
  199. json.Unmarshal([]byte(setting.PoolInfos), &poolInfos)
  200. }
  201. jobResult, err := CreateJob(models.CreateNotebookParams{
  202. JobName: jobName,
  203. Description: description,
  204. ProfileID: setting.ProfileID,
  205. Flavor: flavor,
  206. Pool: models.Pool{
  207. ID: poolInfos.PoolInfo[0].PoolId,
  208. Name: poolInfos.PoolInfo[0].PoolName,
  209. Type: poolInfos.PoolInfo[0].PoolType,
  210. },
  211. Spec: models.Spec{
  212. Storage: models.Storage{
  213. Type: storageTypeOBS,
  214. Location: models.Location{
  215. Path: dataActualPath,
  216. },
  217. },
  218. AutoStop: models.AutoStop{
  219. Enable: true,
  220. Duration: autoStopDuration,
  221. },
  222. },
  223. })
  224. if err != nil {
  225. log.Error("CreateJob failed: %v", err.Error())
  226. return err
  227. }
  228. err = models.CreateCloudbrain(&models.Cloudbrain{
  229. Status: string(models.JobWaiting),
  230. UserID: ctx.User.ID,
  231. RepoID: ctx.Repo.Repository.ID,
  232. JobID: jobResult.ID,
  233. JobName: jobName,
  234. JobType: string(models.JobTypeDebug),
  235. Type: models.TypeCloudBrainTwo,
  236. Uuid: uuid,
  237. ComputeResource: models.NPUResource,
  238. })
  239. if err != nil {
  240. return err
  241. }
  242. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobResult.ID, jobName, models.ActionCreateDebugNPUTask)
  243. return nil
  244. }
  245. func GenerateNotebook2(ctx *context.Context, jobName, uuid, description, flavor, imageId string) error {
  246. if poolInfos == nil {
  247. json.Unmarshal([]byte(setting.PoolInfos), &poolInfos)
  248. }
  249. imageName, err := GetNotebookImageName(imageId)
  250. if err != nil {
  251. log.Error("GetNotebookImageName failed: %v", err.Error())
  252. return err
  253. }
  254. jobResult, err := createNotebook2(models.CreateNotebook2Params{
  255. JobName: jobName,
  256. Description: description,
  257. Flavor: flavor,
  258. Duration: autoStopDurationMs,
  259. ImageID: imageId,
  260. PoolID: poolInfos.PoolInfo[0].PoolId,
  261. Feature: models.NotebookFeature,
  262. Volume: models.VolumeReq{
  263. Capacity: setting.Capacity,
  264. Category: models.EVSCategory,
  265. Ownership: models.ManagedOwnership,
  266. },
  267. WorkspaceID: "0",
  268. })
  269. if err != nil {
  270. log.Error("createNotebook2 failed: %v", err.Error())
  271. return err
  272. }
  273. err = models.CreateCloudbrain(&models.Cloudbrain{
  274. Status: jobResult.Status,
  275. UserID: ctx.User.ID,
  276. RepoID: ctx.Repo.Repository.ID,
  277. JobID: jobResult.ID,
  278. JobName: jobName,
  279. JobType: string(models.JobTypeDebug),
  280. Type: models.TypeCloudBrainTwo,
  281. Uuid: uuid,
  282. ComputeResource: models.NPUResource,
  283. Image: imageName,
  284. Description: description,
  285. })
  286. if err != nil {
  287. return err
  288. }
  289. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobResult.ID, jobName, models.ActionCreateDebugNPUTask)
  290. return nil
  291. }
  292. func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) {
  293. jobResult, err := createTrainJob(models.CreateTrainJobParams{
  294. JobName: req.JobName,
  295. Description: req.Description,
  296. Config: models.Config{
  297. WorkServerNum: req.WorkServerNumber,
  298. AppUrl: req.CodeObsPath,
  299. BootFileUrl: req.BootFileUrl,
  300. DataUrl: req.DataUrl,
  301. EngineID: req.EngineID,
  302. TrainUrl: req.TrainUrl,
  303. LogUrl: req.LogUrl,
  304. PoolID: req.PoolID,
  305. CreateVersion: true,
  306. Flavor: models.Flavor{
  307. Code: req.FlavorCode,
  308. },
  309. Parameter: req.Parameters,
  310. },
  311. })
  312. if err != nil {
  313. log.Error("CreateJob failed: %v", err.Error())
  314. return err
  315. }
  316. attach, err := models.GetAttachmentByUUID(req.Uuid)
  317. if err != nil {
  318. log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
  319. return err
  320. }
  321. jobId := strconv.FormatInt(jobResult.JobID, 10)
  322. err = models.CreateCloudbrain(&models.Cloudbrain{
  323. Status: TransTrainJobStatus(jobResult.Status),
  324. UserID: ctx.User.ID,
  325. RepoID: ctx.Repo.Repository.ID,
  326. JobID: jobId,
  327. JobName: req.JobName,
  328. JobType: string(models.JobTypeTrain),
  329. Type: models.TypeCloudBrainTwo,
  330. VersionID: jobResult.VersionID,
  331. VersionName: jobResult.VersionName,
  332. Uuid: req.Uuid,
  333. DatasetName: attach.Name,
  334. CommitID: req.CommitID,
  335. IsLatestVersion: req.IsLatestVersion,
  336. ComputeResource: models.NPUResource,
  337. EngineID: req.EngineID,
  338. TrainUrl: req.TrainUrl,
  339. BranchName: req.BranchName,
  340. Parameters: req.Params,
  341. BootFile: req.BootFile,
  342. DataUrl: req.DataUrl,
  343. LogUrl: req.LogUrl,
  344. FlavorCode: req.FlavorCode,
  345. Description: req.Description,
  346. WorkServerNumber: req.WorkServerNumber,
  347. FlavorName: req.FlavorName,
  348. EngineName: req.EngineName,
  349. VersionCount: req.VersionCount,
  350. TotalVersionCount: req.TotalVersionCount,
  351. })
  352. if err != nil {
  353. log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error())
  354. return err
  355. }
  356. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobId, req.JobName, models.ActionCreateTrainTask)
  357. return nil
  358. }
  359. func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) {
  360. jobResult, err := createTrainJobVersion(models.CreateTrainJobVersionParams{
  361. Description: req.Description,
  362. Config: models.TrainJobVersionConfig{
  363. WorkServerNum: req.WorkServerNumber,
  364. AppUrl: req.CodeObsPath,
  365. BootFileUrl: req.BootFileUrl,
  366. DataUrl: req.DataUrl,
  367. EngineID: req.EngineID,
  368. TrainUrl: req.TrainUrl,
  369. LogUrl: req.LogUrl,
  370. PoolID: req.PoolID,
  371. Flavor: models.Flavor{
  372. Code: req.FlavorCode,
  373. },
  374. Parameter: req.Parameters,
  375. PreVersionId: req.PreVersionId,
  376. },
  377. }, jobId)
  378. if err != nil {
  379. log.Error("CreateJob failed: %v", err.Error())
  380. return err
  381. }
  382. attach, err := models.GetAttachmentByUUID(req.Uuid)
  383. if err != nil {
  384. log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
  385. return err
  386. }
  387. var jobTypes []string
  388. jobTypes = append(jobTypes, string(models.JobTypeTrain))
  389. repo := ctx.Repo.Repository
  390. VersionTaskList, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
  391. RepoID: repo.ID,
  392. Type: models.TypeCloudBrainTwo,
  393. JobTypes: jobTypes,
  394. JobID: strconv.FormatInt(jobResult.JobID, 10),
  395. })
  396. if err != nil {
  397. ctx.ServerError("Cloudbrain", err)
  398. return err
  399. }
  400. //将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount
  401. err = models.CreateCloudbrain(&models.Cloudbrain{
  402. Status: TransTrainJobStatus(jobResult.Status),
  403. UserID: ctx.User.ID,
  404. RepoID: ctx.Repo.Repository.ID,
  405. JobID: strconv.FormatInt(jobResult.JobID, 10),
  406. JobName: req.JobName,
  407. JobType: string(models.JobTypeTrain),
  408. Type: models.TypeCloudBrainTwo,
  409. VersionID: jobResult.VersionID,
  410. VersionName: jobResult.VersionName,
  411. Uuid: req.Uuid,
  412. DatasetName: attach.Name,
  413. CommitID: req.CommitID,
  414. IsLatestVersion: req.IsLatestVersion,
  415. PreVersionName: req.PreVersionName,
  416. ComputeResource: models.NPUResource,
  417. EngineID: req.EngineID,
  418. TrainUrl: req.TrainUrl,
  419. BranchName: req.BranchName,
  420. Parameters: req.Params,
  421. BootFile: req.BootFile,
  422. DataUrl: req.DataUrl,
  423. LogUrl: req.LogUrl,
  424. PreVersionId: req.PreVersionId,
  425. FlavorCode: req.FlavorCode,
  426. Description: req.Description,
  427. WorkServerNumber: req.WorkServerNumber,
  428. FlavorName: req.FlavorName,
  429. EngineName: req.EngineName,
  430. TotalVersionCount: VersionTaskList[0].TotalVersionCount + 1,
  431. VersionCount: VersionListCount + 1,
  432. })
  433. if err != nil {
  434. log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error())
  435. return err
  436. }
  437. //将训练任务的上一版本的isLatestVersion设置为"0"
  438. err = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount)
  439. if err != nil {
  440. ctx.ServerError("Update IsLatestVersion failed", err)
  441. return err
  442. }
  443. return err
  444. }
  445. func TransTrainJobStatus(status int) string {
  446. switch status {
  447. case 0:
  448. return "UNKNOWN"
  449. case 1:
  450. return "INIT"
  451. case 2:
  452. return "IMAGE_CREATING"
  453. case 3:
  454. return "IMAGE_FAILED"
  455. case 4:
  456. return "SUBMIT_TRYING"
  457. case 5:
  458. return "SUBMIT_FAILED"
  459. case 6:
  460. return "DELETE_FAILED"
  461. case 7:
  462. return "WAITING"
  463. case 8:
  464. return "RUNNING"
  465. case 9:
  466. return "KILLING"
  467. case 10:
  468. return "COMPLETED"
  469. case 11:
  470. return "FAILED"
  471. case 12:
  472. return "KILLED"
  473. case 13:
  474. return "CANCELED"
  475. case 14:
  476. return "LOST"
  477. case 15:
  478. return "SCALING"
  479. case 16:
  480. return "SUBMIT_MODEL_FAILED"
  481. case 17:
  482. return "DEPLOY_SERVICE_FAILED"
  483. case 18:
  484. return "CHECK_INIT"
  485. case 19:
  486. return "CHECK_RUNNING"
  487. case 20:
  488. return "CHECK_RUNNING_COMPLETED"
  489. case 21:
  490. return "CHECK_FAILED"
  491. default:
  492. return strconv.Itoa(status)
  493. }
  494. }
  495. func GetOutputPathByCount(TotalVersionCount int) (VersionOutputPath string) {
  496. talVersionCountToString := fmt.Sprintf("%04d", TotalVersionCount)
  497. VersionOutputPath = "V" + talVersionCountToString
  498. return VersionOutputPath
  499. }
  500. func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (err error) {
  501. jobResult, err := createInferenceJob(models.CreateInferenceJobParams{
  502. JobName: req.JobName,
  503. Description: req.Description,
  504. InfConfig: models.InfConfig{
  505. WorkServerNum: req.WorkServerNumber,
  506. AppUrl: req.CodeObsPath,
  507. BootFileUrl: req.BootFileUrl,
  508. DataUrl: req.DataUrl,
  509. EngineID: req.EngineID,
  510. // TrainUrl: req.TrainUrl,
  511. LogUrl: req.LogUrl,
  512. PoolID: req.PoolID,
  513. CreateVersion: true,
  514. Flavor: models.Flavor{
  515. Code: req.FlavorCode,
  516. },
  517. Parameter: req.Parameters,
  518. },
  519. })
  520. if err != nil {
  521. log.Error("CreateJob failed: %v", err.Error())
  522. return err
  523. }
  524. attach, err := models.GetAttachmentByUUID(req.Uuid)
  525. if err != nil {
  526. log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
  527. return err
  528. }
  529. jobID := strconv.FormatInt(jobResult.JobID, 10)
  530. err = models.CreateCloudbrain(&models.Cloudbrain{
  531. Status: TransTrainJobStatus(jobResult.Status),
  532. UserID: ctx.User.ID,
  533. RepoID: ctx.Repo.Repository.ID,
  534. JobID: jobID,
  535. JobName: req.JobName,
  536. JobType: string(models.JobTypeInference),
  537. Type: models.TypeCloudBrainTwo,
  538. VersionID: jobResult.VersionID,
  539. VersionName: jobResult.VersionName,
  540. Uuid: req.Uuid,
  541. DatasetName: attach.Name,
  542. CommitID: req.CommitID,
  543. EngineID: req.EngineID,
  544. TrainUrl: req.TrainUrl,
  545. BranchName: req.BranchName,
  546. Parameters: req.Params,
  547. BootFile: req.BootFile,
  548. DataUrl: req.DataUrl,
  549. LogUrl: req.LogUrl,
  550. FlavorCode: req.FlavorCode,
  551. Description: req.Description,
  552. WorkServerNumber: req.WorkServerNumber,
  553. FlavorName: req.FlavorName,
  554. EngineName: req.EngineName,
  555. LabelName: req.LabelName,
  556. IsLatestVersion: req.IsLatestVersion,
  557. ComputeResource: models.NPUResource,
  558. VersionCount: req.VersionCount,
  559. TotalVersionCount: req.TotalVersionCount,
  560. ModelName: req.ModelName,
  561. ModelVersion: req.ModelVersion,
  562. CkptName: req.CkptName,
  563. ResultUrl: req.ResultUrl,
  564. })
  565. if err != nil {
  566. log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error())
  567. return err
  568. }
  569. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.JobName, models.ActionCreateInferenceTask)
  570. return nil
  571. }
  572. func GetNotebookImageName(imageId string) (string, error) {
  573. var validImage = false
  574. var imageName = ""
  575. if ImageInfos == nil {
  576. json.Unmarshal([]byte(setting.ImageInfos), &ImageInfos)
  577. }
  578. for _, imageInfo := range ImageInfos.ImageInfo {
  579. if imageInfo.Id == imageId {
  580. validImage = true
  581. imageName = imageInfo.Value
  582. }
  583. }
  584. if !validImage {
  585. log.Error("the image id(%s) is invalid", imageId)
  586. return imageName, errors.New("the image id is invalid")
  587. }
  588. return imageName, nil
  589. }