You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 17 kB

5 years ago
5 years ago
5 years ago
3 years ago
4 years ago
4 years ago
3 years ago
4 years ago
5 years ago
5 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
5 years ago
3 years ago
5 years ago
3 years ago
5 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. package cloudbrain
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "strconv"
  6. "code.gitea.io/gitea/modules/timeutil"
  7. "code.gitea.io/gitea/modules/storage"
  8. "code.gitea.io/gitea/models"
  9. "code.gitea.io/gitea/modules/context"
  10. "code.gitea.io/gitea/modules/log"
  11. "code.gitea.io/gitea/modules/notification"
  12. "code.gitea.io/gitea/modules/setting"
  13. )
  14. const (
  15. //Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
  16. //CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"`
  17. CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"`
  18. CodeMountPath = "/code"
  19. DataSetMountPath = "/dataset"
  20. ModelMountPath = "/model"
  21. LogFile = "log.txt"
  22. BenchMarkMountPath = "/benchmark"
  23. BenchMarkResourceID = 1
  24. Snn4imagenetMountPath = "/snn4imagenet"
  25. BrainScoreMountPath = "/brainscore"
  26. TaskInfoName = "/taskInfo"
  27. Snn4imagenetCommand = `/opt/conda/bin/python /snn4imagenet/testSNN_script.py --modelname '%s' --modelpath '/dataset' --modeldescription '%s'`
  28. BrainScoreCommand = `bash /brainscore/brainscore_test_par4shSrcipt.sh -b '%s' -n '%s' -p '/dataset' -d '%s'`
  29. SubTaskName = "task1"
  30. Success = "S000"
  31. DefaultBranchName = "master"
  32. )
  33. var (
  34. ResourceSpecs *models.ResourceSpecs
  35. TrainResourceSpecs *models.ResourceSpecs
  36. SpecialPools *models.SpecialPools
  37. )
  38. type GenerateCloudBrainTaskReq struct {
  39. Ctx *context.Context
  40. DisplayJobName string
  41. JobName string
  42. Image string
  43. Command string
  44. CodePath string
  45. ModelPath string
  46. BenchmarkPath string
  47. Snn4ImageNetPath string
  48. BrainScorePath string
  49. JobType string
  50. GpuQueue string
  51. Description string
  52. BranchName string
  53. BootFile string
  54. Params string
  55. CommitID string
  56. Uuids string
  57. DatasetNames string
  58. DatasetInfos map[string]models.DatasetInfo
  59. BenchmarkTypeID int
  60. BenchmarkChildTypeID int
  61. ResourceSpecId int
  62. }
  63. func GetCloudbrainDebugCommand() string {
  64. var command = `pip3 install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;/usr/local/bin/python /usr/local/bin/jupyter-lab --ServerApp.shutdown_no_activity_timeout=` + setting.CullIdleTimeout + ` --TerminalManager.cull_inactive_timeout=` + setting.CullIdleTimeout + ` --TerminalManager.cull_interval=` + setting.CullInterval + ` --MappingKernelManager.cull_idle_timeout=` + setting.CullIdleTimeout + ` --MappingKernelManager.cull_interval=` + setting.CullInterval + ` --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --ServerApp.token="" --ServerApp.allow_origin="self https://cloudbrain.pcl.ac.cn" `
  65. return command
  66. }
  67. func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool {
  68. if !ctx.IsSigned {
  69. return false
  70. }
  71. if err != nil {
  72. return ctx.IsUserRepoOwner() || ctx.IsUserSiteAdmin()
  73. } else {
  74. return ctx.IsUserRepoOwner() || ctx.IsUserSiteAdmin() || ctx.User.ID == job.UserID
  75. }
  76. }
  77. func CanDeleteJob(ctx *context.Context, job *models.Cloudbrain) bool {
  78. return isAdminOrOwnerOrJobCreater(ctx, job, nil)
  79. }
  80. func CanCreateOrDebugJob(ctx *context.Context) bool {
  81. if !ctx.IsSigned {
  82. return false
  83. }
  84. return ctx.Repo.CanWrite(models.UnitTypeCloudBrain)
  85. }
  86. func CanModifyJob(ctx *context.Context, job *models.Cloudbrain) bool {
  87. return isAdminOrJobCreater(ctx, job, nil)
  88. }
  89. func isAdminOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool {
  90. if !ctx.IsSigned {
  91. return false
  92. }
  93. if err != nil {
  94. return ctx.IsUserSiteAdmin()
  95. } else {
  96. return ctx.IsUserSiteAdmin() || ctx.User.ID == job.UserID
  97. }
  98. }
  99. func isAdminOrImageCreater(ctx *context.Context, image *models.Image, err error) bool {
  100. if !ctx.IsSigned {
  101. return false
  102. }
  103. if err != nil {
  104. return ctx.IsUserSiteAdmin()
  105. } else {
  106. return ctx.IsUserSiteAdmin() || ctx.User.ID == image.UID
  107. }
  108. }
  109. func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) {
  110. var ID = ctx.Params(":id")
  111. job, err := models.GetCloudbrainByID(ID)
  112. if err != nil {
  113. log.Error("GetCloudbrainByID failed:%v", err.Error())
  114. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  115. }
  116. ctx.Cloudbrain = job
  117. if !isAdminOrOwnerOrJobCreater(ctx, job, err) {
  118. log.Error("!isAdminOrOwnerOrJobCreater error:%v", err.Error())
  119. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  120. }
  121. }
  122. func AdminOrJobCreaterRight(ctx *context.Context) {
  123. var ID = ctx.Params(":id")
  124. job, err := models.GetCloudbrainByID(ID)
  125. if err != nil {
  126. log.Error("GetCloudbrainByID failed:%v", err.Error())
  127. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  128. }
  129. ctx.Cloudbrain = job
  130. if !isAdminOrJobCreater(ctx, job, err) {
  131. log.Error("!isAdminOrJobCreater error:%v", err.Error())
  132. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  133. }
  134. }
  135. func AdminOrOwnerOrJobCreaterRightForTrain(ctx *context.Context) {
  136. var jobID = ctx.Params(":jobid")
  137. job, err := models.GetCloudbrainByJobID(jobID)
  138. if err != nil {
  139. log.Error("GetCloudbrainByJobID failed:%v", err.Error())
  140. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  141. }
  142. ctx.Cloudbrain = job
  143. if !isAdminOrOwnerOrJobCreater(ctx, job, err) {
  144. log.Error("!isAdminOrOwnerOrJobCreater failed:%v", err.Error())
  145. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  146. }
  147. }
  148. func AdminOrJobCreaterRightForTrain(ctx *context.Context) {
  149. var jobID = ctx.Params(":jobid")
  150. job, err := models.GetCloudbrainByJobID(jobID)
  151. if err != nil {
  152. log.Error("GetCloudbrainByJobID failed:%v", err.Error())
  153. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  154. }
  155. ctx.Cloudbrain = job
  156. if !isAdminOrJobCreater(ctx, job, err) {
  157. log.Error("!isAdminOrJobCreater errot:%v", err.Error())
  158. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  159. }
  160. }
  161. func AdminOrImageCreaterRight(ctx *context.Context) {
  162. id, err := strconv.ParseInt(ctx.Params(":id"), 10, 64)
  163. var image *models.Image
  164. if err != nil {
  165. log.Error("Get Image by ID failed:%v", err.Error())
  166. } else {
  167. image, err = models.GetImageByID(id)
  168. if err != nil {
  169. log.Error("Get Image by ID failed:%v", err.Error())
  170. return
  171. }
  172. }
  173. if !isAdminOrImageCreater(ctx, image, err) {
  174. log.Error("!isAdminOrImageCreater error:%v", err.Error())
  175. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  176. }
  177. }
  178. func GenerateTask(req GenerateCloudBrainTaskReq) error {
  179. var resourceSpec *models.ResourceSpec
  180. var versionCount int
  181. if req.JobType == string(models.JobTypeTrain) {
  182. versionCount = 1
  183. if TrainResourceSpecs == nil {
  184. json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs)
  185. }
  186. for _, spec := range TrainResourceSpecs.ResourceSpec {
  187. if req.ResourceSpecId == spec.Id {
  188. resourceSpec = spec
  189. break
  190. }
  191. }
  192. } else {
  193. if ResourceSpecs == nil {
  194. json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs)
  195. }
  196. for _, spec := range ResourceSpecs.ResourceSpec {
  197. if req.ResourceSpecId == spec.Id {
  198. resourceSpec = spec
  199. break
  200. }
  201. }
  202. }
  203. //如果没有匹配到spec信息,尝试从专属资源池获取
  204. if resourceSpec == nil && SpecialPools != nil {
  205. for _, specialPool := range SpecialPools.Pools {
  206. if resourceSpec != nil {
  207. break
  208. }
  209. if specialPool.ResourceSpec != nil {
  210. if IsElementExist(specialPool.JobType, req.JobType) && IsQueueInSpecialtPool(specialPool.Pool, req.GpuQueue) {
  211. for _, spec := range specialPool.ResourceSpec {
  212. if req.ResourceSpecId == spec.Id {
  213. resourceSpec = spec
  214. break
  215. }
  216. }
  217. }
  218. }
  219. }
  220. }
  221. if resourceSpec == nil {
  222. log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"])
  223. return errors.New("no such resourceSpec")
  224. }
  225. volumes := []models.Volume{
  226. {
  227. HostPath: models.StHostPath{
  228. Path: req.CodePath,
  229. MountPath: CodeMountPath,
  230. ReadOnly: false,
  231. },
  232. },
  233. {
  234. HostPath: models.StHostPath{
  235. Path: req.ModelPath,
  236. MountPath: ModelMountPath,
  237. ReadOnly: false,
  238. },
  239. },
  240. {
  241. HostPath: models.StHostPath{
  242. Path: req.BenchmarkPath,
  243. MountPath: BenchMarkMountPath,
  244. ReadOnly: true,
  245. },
  246. },
  247. {
  248. HostPath: models.StHostPath{
  249. Path: req.Snn4ImageNetPath,
  250. MountPath: Snn4imagenetMountPath,
  251. ReadOnly: true,
  252. },
  253. },
  254. {
  255. HostPath: models.StHostPath{
  256. Path: req.BrainScorePath,
  257. MountPath: BrainScoreMountPath,
  258. ReadOnly: true,
  259. },
  260. },
  261. }
  262. if len(req.DatasetInfos) == 1 {
  263. volumes = append(volumes, models.Volume{
  264. HostPath: models.StHostPath{
  265. Path: req.DatasetInfos[req.Uuids].DataLocalPath,
  266. MountPath: DataSetMountPath,
  267. ReadOnly: true,
  268. },
  269. })
  270. } else {
  271. for _, dataset := range req.DatasetInfos {
  272. volumes = append(volumes, models.Volume{
  273. HostPath: models.StHostPath{
  274. Path: dataset.DataLocalPath,
  275. MountPath: DataSetMountPath + "/" + dataset.Name,
  276. ReadOnly: true,
  277. },
  278. })
  279. }
  280. }
  281. createTime := timeutil.TimeStampNow()
  282. jobResult, err := CreateJob(req.JobName, models.CreateJobParams{
  283. JobName: req.JobName,
  284. RetryCount: 1,
  285. GpuType: req.GpuQueue,
  286. Image: req.Image,
  287. TaskRoles: []models.TaskRole{
  288. {
  289. Name: SubTaskName,
  290. TaskNumber: 1,
  291. MinSucceededTaskCount: 1,
  292. MinFailedTaskCount: 1,
  293. CPUNumber: resourceSpec.CpuNum,
  294. GPUNumber: resourceSpec.GpuNum,
  295. MemoryMB: resourceSpec.MemMiB,
  296. ShmMB: resourceSpec.ShareMemMiB,
  297. Command: req.Command,
  298. NeedIBDevice: false,
  299. IsMainRole: false,
  300. UseNNI: false,
  301. },
  302. },
  303. Volumes: volumes,
  304. })
  305. if err != nil {
  306. log.Error("CreateJob failed:", err.Error(), req.Ctx.Data["MsgID"])
  307. return err
  308. }
  309. if jobResult.Code != Success {
  310. log.Error("CreateJob(%s) failed:%s", req.JobName, jobResult.Msg, req.Ctx.Data["MsgID"])
  311. return errors.New(jobResult.Msg)
  312. }
  313. var jobID = jobResult.Payload["jobId"].(string)
  314. err = models.CreateCloudbrain(&models.Cloudbrain{
  315. Status: string(models.JobWaiting),
  316. UserID: req.Ctx.User.ID,
  317. RepoID: req.Ctx.Repo.Repository.ID,
  318. JobID: jobID,
  319. JobName: req.JobName,
  320. DisplayJobName: req.DisplayJobName,
  321. SubTaskName: SubTaskName,
  322. JobType: req.JobType,
  323. Type: models.TypeCloudBrainOne,
  324. Uuid: req.Uuids,
  325. Image: req.Image,
  326. GpuQueue: req.GpuQueue,
  327. ResourceSpecId: req.ResourceSpecId,
  328. ComputeResource: models.GPUResource,
  329. BenchmarkTypeID: req.BenchmarkTypeID,
  330. BenchmarkChildTypeID: req.BenchmarkChildTypeID,
  331. Description: req.Description,
  332. IsLatestVersion: "1",
  333. VersionCount: versionCount,
  334. BranchName: req.BranchName,
  335. BootFile: req.BootFile,
  336. DatasetName: req.DatasetNames,
  337. Parameters: req.Params,
  338. CreatedUnix: createTime,
  339. UpdatedUnix: createTime,
  340. CommitID: req.CommitID,
  341. })
  342. if err != nil {
  343. return err
  344. }
  345. task, err := models.GetCloudbrainByJobID(jobID)
  346. if err != nil {
  347. log.Error("GetCloudbrainByJobID failed: %v", err.Error())
  348. return err
  349. }
  350. stringId := strconv.FormatInt(task.ID, 10)
  351. if IsBenchmarkJob(req.JobType) {
  352. notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, stringId, req.DisplayJobName, models.ActionCreateBenchMarkTask)
  353. } else if string(models.JobTypeTrain) == req.JobType {
  354. notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, jobID, req.DisplayJobName, models.ActionCreateGPUTrainTask)
  355. } else {
  356. notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, stringId, req.DisplayJobName, models.ActionCreateDebugGPUTask)
  357. }
  358. return nil
  359. }
  360. func IsBenchmarkJob(jobType string) bool {
  361. return string(models.JobTypeBenchmark) == jobType || string(models.JobTypeBrainScore) == jobType || string(models.JobTypeSnn4imagenet) == jobType
  362. }
  363. func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) error {
  364. jobName := task.JobName
  365. var resourceSpec *models.ResourceSpec
  366. if ResourceSpecs == nil {
  367. json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs)
  368. }
  369. for _, spec := range ResourceSpecs.ResourceSpec {
  370. if task.ResourceSpecId == spec.Id {
  371. resourceSpec = spec
  372. }
  373. }
  374. if resourceSpec == nil {
  375. log.Error("no such resourceSpecId(%d)", task.ResourceSpecId, ctx.Data["MsgID"])
  376. return errors.New("no such resourceSpec")
  377. }
  378. datasetInfos, _, err := models.GetDatasetInfo(task.Uuid)
  379. if err != nil {
  380. log.Error("GetDatasetInfo failed:%v", err, ctx.Data["MsgID"])
  381. return err
  382. }
  383. volumes := []models.Volume{
  384. {
  385. HostPath: models.StHostPath{
  386. Path: storage.GetMinioPath(jobName, CodeMountPath+"/"),
  387. MountPath: CodeMountPath,
  388. ReadOnly: false,
  389. },
  390. },
  391. {
  392. HostPath: models.StHostPath{
  393. Path: storage.GetMinioPath(jobName, ModelMountPath+"/"),
  394. MountPath: ModelMountPath,
  395. ReadOnly: false,
  396. },
  397. },
  398. {
  399. HostPath: models.StHostPath{
  400. Path: storage.GetMinioPath(jobName, BenchMarkMountPath+"/"),
  401. MountPath: BenchMarkMountPath,
  402. ReadOnly: true,
  403. },
  404. },
  405. {
  406. HostPath: models.StHostPath{
  407. Path: storage.GetMinioPath(jobName, Snn4imagenetMountPath+"/"),
  408. MountPath: Snn4imagenetMountPath,
  409. ReadOnly: true,
  410. },
  411. },
  412. {
  413. HostPath: models.StHostPath{
  414. Path: storage.GetMinioPath(jobName, BrainScoreMountPath+"/"),
  415. MountPath: BrainScoreMountPath,
  416. ReadOnly: true,
  417. },
  418. },
  419. }
  420. if len(datasetInfos) == 1 {
  421. volumes = append(volumes, models.Volume{
  422. HostPath: models.StHostPath{
  423. Path: datasetInfos[task.Uuid].DataLocalPath,
  424. MountPath: DataSetMountPath,
  425. ReadOnly: true,
  426. },
  427. })
  428. } else {
  429. for _, dataset := range datasetInfos {
  430. volumes = append(volumes, models.Volume{
  431. HostPath: models.StHostPath{
  432. Path: dataset.DataLocalPath,
  433. MountPath: DataSetMountPath + "/" + dataset.Name,
  434. ReadOnly: true,
  435. },
  436. })
  437. }
  438. }
  439. createTime := timeutil.TimeStampNow()
  440. jobResult, err := CreateJob(jobName, models.CreateJobParams{
  441. JobName: jobName,
  442. RetryCount: 1,
  443. GpuType: task.GpuQueue,
  444. Image: task.Image,
  445. TaskRoles: []models.TaskRole{
  446. {
  447. Name: SubTaskName,
  448. TaskNumber: 1,
  449. MinSucceededTaskCount: 1,
  450. MinFailedTaskCount: 1,
  451. CPUNumber: resourceSpec.CpuNum,
  452. GPUNumber: resourceSpec.GpuNum,
  453. MemoryMB: resourceSpec.MemMiB,
  454. ShmMB: resourceSpec.ShareMemMiB,
  455. Command: GetCloudbrainDebugCommand(),//Command,
  456. NeedIBDevice: false,
  457. IsMainRole: false,
  458. UseNNI: false,
  459. },
  460. },
  461. Volumes: volumes,
  462. })
  463. if err != nil {
  464. log.Error("CreateJob failed:%v", err.Error(), ctx.Data["MsgID"])
  465. return err
  466. }
  467. if jobResult.Code != Success {
  468. log.Error("CreateJob(%s) failed:%s", jobName, jobResult.Msg, ctx.Data["MsgID"])
  469. return errors.New(jobResult.Msg)
  470. }
  471. var jobID = jobResult.Payload["jobId"].(string)
  472. newTask := &models.Cloudbrain{
  473. Status: string(models.JobWaiting),
  474. UserID: task.UserID,
  475. RepoID: task.RepoID,
  476. JobID: jobID,
  477. JobName: task.JobName,
  478. DisplayJobName: task.DisplayJobName,
  479. SubTaskName: task.SubTaskName,
  480. JobType: task.JobType,
  481. Type: task.Type,
  482. Uuid: task.Uuid,
  483. DatasetName: task.DatasetName,
  484. Image: task.Image,
  485. GpuQueue: task.GpuQueue,
  486. ResourceSpecId: task.ResourceSpecId,
  487. ComputeResource: task.ComputeResource,
  488. CreatedUnix: createTime,
  489. UpdatedUnix: createTime,
  490. BranchName: task.BranchName,
  491. }
  492. err = models.RestartCloudbrain(task, newTask)
  493. if err != nil {
  494. log.Error("RestartCloudbrain(%s) failed:%v", jobName, err.Error(), ctx.Data["MsgID"])
  495. return err
  496. }
  497. stringId := strconv.FormatInt(newTask.ID, 10)
  498. *newID = stringId
  499. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, task.DisplayJobName, models.ActionCreateDebugGPUTask)
  500. return nil
  501. }
  502. func InitSpecialPool() {
  503. if SpecialPools == nil && setting.SpecialPools != "" {
  504. json.Unmarshal([]byte(setting.SpecialPools), &SpecialPools)
  505. }
  506. }
  507. func IsResourceSpecInSpecialPool(resourceSpecs []*models.ResourceSpec, resourceSpecId int) bool {
  508. if resourceSpecs == nil || len(resourceSpecs) == 0 {
  509. return true
  510. }
  511. for _, v := range resourceSpecs {
  512. if v.Id == resourceSpecId {
  513. return true
  514. }
  515. }
  516. return false
  517. }
  518. func IsQueueInSpecialtPool(pool []*models.GpuInfo, queue string) bool {
  519. for _, v := range pool {
  520. if v.Queue == queue {
  521. return true
  522. }
  523. }
  524. return false
  525. }
  526. func IsElementExist(s []string, str string) bool {
  527. for _, v := range s {
  528. if v == str {
  529. return true
  530. }
  531. }
  532. return false
  533. }