You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 5.7 kB

5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. package cloudbrain
  2. import (
  3. "errors"
  4. "strconv"
  5. "code.gitea.io/gitea/modules/setting"
  6. "code.gitea.io/gitea/models"
  7. "code.gitea.io/gitea/modules/context"
  8. "code.gitea.io/gitea/modules/log"
  9. )
  10. const (
  11. Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
  12. CodeMountPath = "/code"
  13. DataSetMountPath = "/dataset"
  14. ModelMountPath = "/model"
  15. BenchMarkMountPath = "/benchmark"
  16. Snn4imagenetMountPath = "/snn4imagenet"
  17. BrainScoreMountPath = "/brainscore"
  18. TaskInfoName = "/taskInfo"
  19. SubTaskName = "task1"
  20. Success = "S000"
  21. )
  22. var (
  23. ResourceSpecs *models.ResourceSpecs
  24. )
  25. func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool {
  26. log.Info("is repo owner:" + strconv.FormatBool(ctx.IsUserRepoOwner()))
  27. log.Info("is user admin:" + strconv.FormatBool(ctx.IsUserSiteAdmin()))
  28. if err != nil {
  29. return ctx.IsUserRepoOwner() || ctx.IsUserSiteAdmin()
  30. } else {
  31. log.Info("is job creator:" + strconv.FormatBool(ctx.User.ID == job.UserID))
  32. return ctx.IsUserRepoOwner() || ctx.IsUserSiteAdmin() || ctx.User.ID == job.UserID
  33. }
  34. }
  35. func CanDeleteDebugJob(ctx *context.Context, job *models.Cloudbrain) bool {
  36. if job.Status != string(models.JobStopped) && job.Status != string(models.JobFailed) && job.Status != string(models.ModelArtsStartFailed) && job.Status != string(models.ModelArtsCreateFailed) {
  37. return false
  38. }
  39. if !ctx.IsSigned {
  40. return false
  41. }
  42. return isAdminOrOwnerOrJobCreater(ctx, job, nil)
  43. }
  44. func CanDeleteTrainJob(ctx *context.Context, job *models.Cloudbrain) bool {
  45. if !ctx.IsSigned {
  46. return false
  47. }
  48. return isAdminOrOwnerOrJobCreater(ctx, job, nil)
  49. }
  50. func CanCreateOrDebugJob(ctx *context.Context) bool {
  51. if !ctx.IsSigned {
  52. return false
  53. }
  54. return ctx.Repo.CanWrite(models.UnitTypeCloudBrain)
  55. }
  56. func CanModifyJob(ctx *context.Context, job *models.Cloudbrain) bool {
  57. return isAdminOrJobCreater(ctx, job, nil)
  58. }
  59. func isAdminOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool {
  60. if err != nil {
  61. return ctx.IsUserSiteAdmin()
  62. } else {
  63. return ctx.IsUserSiteAdmin() || ctx.User.ID == job.UserID
  64. }
  65. }
  66. func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) {
  67. var jobID = ctx.Params(":jobid")
  68. job, err := models.GetCloudbrainByJobID(jobID)
  69. if !isAdminOrOwnerOrJobCreater(ctx, job, err) {
  70. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  71. }
  72. }
  73. func AdminOrJobCreaterRight(ctx *context.Context) {
  74. var jobID = ctx.Params(":jobid")
  75. job, err := models.GetCloudbrainByJobID(jobID)
  76. if !isAdminOrJobCreater(ctx, job, err) {
  77. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  78. }
  79. }
  80. func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, brainScorePath, jobType, gpuQueue string, resourceSpecId int) error {
  81. dataActualPath := setting.Attachment.Minio.RealPath +
  82. setting.Attachment.Minio.Bucket + "/" +
  83. setting.Attachment.Minio.BasePath +
  84. models.AttachmentRelativePath(uuid) +
  85. uuid
  86. var resourceSpec *models.ResourceSpec
  87. for _, spec := range ResourceSpecs.ResourceSpec {
  88. if resourceSpecId == spec.Id {
  89. resourceSpec = spec
  90. }
  91. }
  92. if resourceSpec == nil {
  93. log.Error("no such resourceSpecId(%d)", resourceSpecId, ctx.Data["MsgID"])
  94. return errors.New("no such resourceSpec")
  95. }
  96. jobResult, err := CreateJob(jobName, models.CreateJobParams{
  97. JobName: jobName,
  98. RetryCount: 1,
  99. GpuType: gpuQueue,
  100. Image: image,
  101. TaskRoles: []models.TaskRole{
  102. {
  103. Name: SubTaskName,
  104. TaskNumber: 1,
  105. MinSucceededTaskCount: 1,
  106. MinFailedTaskCount: 1,
  107. CPUNumber: resourceSpec.CpuNum,
  108. GPUNumber: resourceSpec.GpuNum,
  109. MemoryMB: resourceSpec.MemMiB,
  110. ShmMB: resourceSpec.ShareMemMiB,
  111. Command: command,
  112. NeedIBDevice: false,
  113. IsMainRole: false,
  114. UseNNI: false,
  115. },
  116. },
  117. Volumes: []models.Volume{
  118. {
  119. HostPath: models.StHostPath{
  120. Path: codePath,
  121. MountPath: CodeMountPath,
  122. ReadOnly: false,
  123. },
  124. },
  125. {
  126. HostPath: models.StHostPath{
  127. Path: dataActualPath,
  128. MountPath: DataSetMountPath,
  129. ReadOnly: true,
  130. },
  131. },
  132. {
  133. HostPath: models.StHostPath{
  134. Path: modelPath,
  135. MountPath: ModelMountPath,
  136. ReadOnly: false,
  137. },
  138. },
  139. {
  140. HostPath: models.StHostPath{
  141. Path: benchmarkPath,
  142. MountPath: BenchMarkMountPath,
  143. ReadOnly: true,
  144. },
  145. },
  146. {
  147. HostPath: models.StHostPath{
  148. Path: snn4imagenetPath,
  149. MountPath: Snn4imagenetMountPath,
  150. ReadOnly: true,
  151. },
  152. },
  153. {
  154. HostPath: models.StHostPath{
  155. Path: brainScorePath,
  156. MountPath: BrainScoreMountPath,
  157. ReadOnly: true,
  158. },
  159. },
  160. },
  161. })
  162. if err != nil {
  163. log.Error("CreateJob failed:", err.Error())
  164. return err
  165. }
  166. if jobResult.Code != Success {
  167. log.Error("CreateJob(%s) failed:%s", jobName, jobResult.Msg)
  168. return errors.New(jobResult.Msg)
  169. }
  170. var jobID = jobResult.Payload["jobId"].(string)
  171. err = models.CreateCloudbrain(&models.Cloudbrain{
  172. Status: string(models.JobWaiting),
  173. UserID: ctx.User.ID,
  174. RepoID: ctx.Repo.Repository.ID,
  175. JobID: jobID,
  176. JobName: jobName,
  177. SubTaskName: SubTaskName,
  178. JobType: jobType,
  179. Type: models.TypeCloudBrainOne,
  180. Uuid: uuid,
  181. })
  182. if err != nil {
  183. return err
  184. }
  185. return nil
  186. }