You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 8.0 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. package models
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "time"
  7. "code.gitea.io/gitea/modules/setting"
  8. "code.gitea.io/gitea/modules/timeutil"
  9. "xorm.io/builder"
  10. )
  11. type CloudbrainStatus string
  12. const (
  13. JobWaiting CloudbrainStatus = "WAITING"
  14. JobStopped CloudbrainStatus = "STOPPED"
  15. JobSucceeded CloudbrainStatus = "SUCCEEDED"
  16. JobFailed CloudbrainStatus = "FAILED"
  17. )
  18. type Cloudbrain struct {
  19. ID int64 `xorm:"pk autoincr"`
  20. JobID string `xorm:"INDEX NOT NULL"`
  21. JobName string
  22. Status string `xorm:"INDEX"`
  23. UserID int64 `xorm:"INDEX"`
  24. RepoID int64 `xorm:"INDEX"`
  25. CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"`
  26. UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
  27. User *User `xorm:"-"`
  28. Repo *Repository `xorm:"-"`
  29. }
  30. type CloudBrainLoginResult struct {
  31. Code string
  32. Msg string
  33. Payload map[string]interface{}
  34. }
  35. type TaskRole struct {
  36. Name string `json:"name"`
  37. TaskNumber int8 `json:"taskNumber"`
  38. MinSucceededTaskCount int8 `json:"minSucceededTaskCount"`
  39. MinFailedTaskCount int8 `json:"minFailedTaskCount"`
  40. CPUNumber int8 `json:"cpuNumber"`
  41. GPUNumber int8 `json:"gpuNumber"`
  42. MemoryMB int `json:"memoryMB"`
  43. ShmMB int `json:"shmMB"`
  44. Command string `json:"command"`
  45. NeedIBDevice bool `json:"needIBDevice"`
  46. IsMainRole bool `json:"isMainRole"`
  47. }
  48. type CreateJobParams struct {
  49. JobName string `json:"jobName"`
  50. RetryCount int8 `json:"retryCount"`
  51. GpuType string `json:"gpuType"`
  52. Image string `json:"image"`
  53. TaskRoles []TaskRole `json:"taskRoles"`
  54. }
  55. type CreateJobResult struct {
  56. Code string
  57. Msg string
  58. Payload map[string]interface{}
  59. }
  60. type GetJobResult struct {
  61. Code string `json:"code"`
  62. Msg string `json:"msg"`
  63. Payload map[string]interface{} `json:"payload"`
  64. }
  65. type CloudbrainsOptions struct {
  66. ListOptions
  67. RepoID int64 // include all repos if empty
  68. UserID int64
  69. JobID int64
  70. SortType string
  71. CloudbrainIDs []int64
  72. // JobStatus CloudbrainStatus
  73. }
  74. type TaskPod struct {
  75. TaskRoleStatus struct {
  76. Name string `json:"name"`
  77. } `json:"taskRoleStatus"`
  78. TaskStatuses []struct {
  79. TaskIndex int `json:"taskIndex"`
  80. PodUID string `json:"podUid"`
  81. PodIP string `json:"podIp"`
  82. PodName string `json:"podName"`
  83. ContainerID string `json:"containerId"`
  84. ContainerIP string `json:"containerIp"`
  85. ContainerGpus string `json:"containerGpus"`
  86. State string `json:"state"`
  87. StartAt time.Time `json:"startAt"`
  88. FinishedAt time.Time `json:"finishedAt"`
  89. ExitCode int `json:"exitCode"`
  90. ExitDiagnostics string `json:"exitDiagnostics"`
  91. RetriedCount int `json:"retriedCount"`
  92. } `json:"taskStatuses"`
  93. }
  94. func ConvertToTaskPod(input map[string]interface{}) (TaskPod, error) {
  95. data, _ := json.Marshal(input)
  96. var taskPod TaskPod
  97. err := json.Unmarshal(data, &taskPod)
  98. return taskPod, err
  99. }
  100. type JobResultPayload struct {
  101. ID string `json:"id"`
  102. Name string `json:"name"`
  103. Platform string `json:"platform"`
  104. JobStatus struct {
  105. Username string `json:"username"`
  106. State string `json:"state"`
  107. SubState string `json:"subState"`
  108. ExecutionType string `json:"executionType"`
  109. Retries int `json:"retries"`
  110. CreatedTime int64 `json:"createdTime"`
  111. CompletedTime int64 `json:"completedTime"`
  112. AppID string `json:"appId"`
  113. AppProgress string `json:"appProgress"`
  114. AppTrackingURL string `json:"appTrackingUrl"`
  115. AppLaunchedTime int64 `json:"appLaunchedTime"`
  116. AppCompletedTime interface{} `json:"appCompletedTime"`
  117. AppExitCode int `json:"appExitCode"`
  118. AppExitDiagnostics string `json:"appExitDiagnostics"`
  119. AppExitType interface{} `json:"appExitType"`
  120. VirtualCluster string `json:"virtualCluster"`
  121. } `json:"jobStatus"`
  122. TaskRoles map[string]interface{} `json:"taskRoles"`
  123. Resource struct {
  124. CPU int `json:"cpu"`
  125. Memory string `json:"memory"`
  126. NvidiaComGpu int `json:"nvidia.com/gpu"`
  127. } `json:"resource"`
  128. Config struct {
  129. Image string `json:"image"`
  130. JobID string `json:"jobId"`
  131. GpuType string `json:"gpuType"`
  132. JobName string `json:"jobName"`
  133. JobType string `json:"jobType"`
  134. TaskRoles []struct {
  135. Name string `json:"name"`
  136. ShmMB int `json:"shmMB"`
  137. Command string `json:"command"`
  138. MemoryMB int `json:"memoryMB"`
  139. CPUNumber int `json:"cpuNumber"`
  140. GpuNumber int `json:"gpuNumber"`
  141. IsMainRole bool `json:"isMainRole"`
  142. TaskNumber int `json:"taskNumber"`
  143. NeedIBDevice bool `json:"needIBDevice"`
  144. MinFailedTaskCount int `json:"minFailedTaskCount"`
  145. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  146. } `json:"taskRoles"`
  147. RetryCount int `json:"retryCount"`
  148. } `json:"config"`
  149. Userinfo struct {
  150. User string `json:"user"`
  151. OrgID string `json:"org_id"`
  152. } `json:"userinfo"`
  153. }
  154. func ConvertToJobResultPayload(input map[string]interface{}) (JobResultPayload, error) {
  155. data, _ := json.Marshal(input)
  156. var jobResultPayload JobResultPayload
  157. err := json.Unmarshal(data, &jobResultPayload)
  158. return jobResultPayload, err
  159. }
  160. func Cloudbrains(opts *CloudbrainsOptions) ([]*Cloudbrain, int64, error) {
  161. sess := x.NewSession()
  162. defer sess.Close()
  163. var cond = builder.NewCond()
  164. if opts.RepoID > 0 {
  165. cond.And(
  166. builder.Eq{"cloudbrain.repo_id": opts.RepoID},
  167. )
  168. }
  169. if opts.UserID > 0 {
  170. cond.And(
  171. builder.Eq{"cloudbrain.user_id": opts.UserID},
  172. )
  173. }
  174. if (opts.JobID) > 0 {
  175. cond.And(
  176. builder.Eq{"cloudbrain.job_id": opts.JobID},
  177. )
  178. }
  179. // switch opts.JobStatus {
  180. // case JobWaiting:
  181. // cond.And(builder.Eq{"cloudbrain.status": int(JobWaiting)})
  182. // case JobFailed:
  183. // cond.And(builder.Eq{"cloudbrain.status": int(JobFailed)})
  184. // case JobStopped:
  185. // cond.And(builder.Eq{"cloudbrain.status": int(JobStopped)})
  186. // case JobSucceeded:
  187. // cond.And(builder.Eq{"cloudbrain.status": int(JobSucceeded)})
  188. // }
  189. if len(opts.CloudbrainIDs) > 0 {
  190. cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
  191. }
  192. count, err := sess.Where(cond).Count(new(Cloudbrain))
  193. if err != nil {
  194. return nil, 0, fmt.Errorf("Count: %v", err)
  195. }
  196. if opts.Page >= 0 && opts.PageSize > 0 {
  197. var start int
  198. if opts.Page == 0 {
  199. start = 0
  200. } else {
  201. start = (opts.Page - 1) * opts.PageSize
  202. }
  203. sess.Limit(opts.PageSize, start)
  204. }
  205. sess.OrderBy("cloudbrain.created_unix DESC")
  206. cloudbrains := make([]*Cloudbrain, 0, setting.UI.IssuePagingNum)
  207. if err := sess.Find(&cloudbrains); err != nil {
  208. return nil, 0, fmt.Errorf("Find: %v", err)
  209. }
  210. sess.Close()
  211. return cloudbrains, count, nil
  212. }
  213. func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
  214. if _, err = x.Insert(cloudbrain); err != nil {
  215. return err
  216. }
  217. return nil
  218. }
  219. func getRepoCloudBrain(cb *Cloudbrain) (*Cloudbrain, error) {
  220. has, err := x.Get(cb)
  221. if err != nil {
  222. return nil, err
  223. } else if !has {
  224. return nil, errors.New("cloudbrain task is not found")
  225. }
  226. return cb, nil
  227. }
  228. func GetRepoCloudBrainByJobID(repoID int64, jobID string) (*Cloudbrain, error) {
  229. cb := &Cloudbrain{JobID: jobID, RepoID: repoID}
  230. return getRepoCloudBrain(cb)
  231. }
  232. func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) {
  233. cb := &Cloudbrain{JobID: jobID}
  234. return getRepoCloudBrain(cb)
  235. }
  236. func SetCloudbrainStatusByJobID(jobID string, status CloudbrainStatus) (err error) {
  237. cb := &Cloudbrain{JobID: jobID, Status: string(status)}
  238. _, err = x.Cols("status").Where("cloudbrain.job_id=?", jobID).Update(cb)
  239. return
  240. }