You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 11 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. package models
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "time"
  7. "xorm.io/xorm"
  8. "code.gitea.io/gitea/modules/setting"
  9. "code.gitea.io/gitea/modules/timeutil"
  10. "xorm.io/builder"
  11. )
  12. type CloudbrainStatus string
  13. const (
  14. JobWaiting CloudbrainStatus = "WAITING"
  15. JobStopped CloudbrainStatus = "STOPPED"
  16. JobSucceeded CloudbrainStatus = "SUCCEEDED"
  17. JobFailed CloudbrainStatus = "FAILED"
  18. JobRunning CloudbrainStatus = "RUNNING"
  19. )
  20. type Cloudbrain struct {
  21. ID int64 `xorm:"pk autoincr"`
  22. JobID string `xorm:"INDEX NOT NULL"`
  23. JobName string `xorm:"INDEX"`
  24. Status string `xorm:"INDEX"`
  25. UserID int64 `xorm:"INDEX"`
  26. RepoID int64 `xorm:"INDEX"`
  27. SubTaskName string `xorm:"INDEX"`
  28. ContainerID string
  29. ContainerIp string
  30. CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"`
  31. UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
  32. DeletedAt time.Time `xorm:"deleted"`
  33. CanDebug bool `xorm:"-"`
  34. User *User `xorm:"-"`
  35. Repo *Repository `xorm:"-"`
  36. }
  37. type CloudBrainLoginResult struct {
  38. Code string
  39. Msg string
  40. Payload map[string]interface{}
  41. }
  42. type TaskRole struct {
  43. Name string `json:"name"`
  44. TaskNumber int8 `json:"taskNumber"`
  45. MinSucceededTaskCount int8 `json:"minSucceededTaskCount"`
  46. MinFailedTaskCount int8 `json:"minFailedTaskCount"`
  47. CPUNumber int8 `json:"cpuNumber"`
  48. GPUNumber int8 `json:"gpuNumber"`
  49. MemoryMB int `json:"memoryMB"`
  50. ShmMB int `json:"shmMB"`
  51. Command string `json:"command"`
  52. NeedIBDevice bool `json:"needIBDevice"`
  53. IsMainRole bool `json:"isMainRole"`
  54. UseNNI bool `json:"useNNI"`
  55. }
  56. type StHostPath struct {
  57. Path string `json:"path"`
  58. MountPath string `json:"mountPath"`
  59. ReadOnly bool `json:"readOnly"`
  60. }
  61. type Volume struct {
  62. HostPath StHostPath `json:"hostPath"`
  63. }
  64. type CreateJobParams struct {
  65. JobName string `json:"jobName"`
  66. RetryCount int8 `json:"retryCount"`
  67. GpuType string `json:"gpuType"`
  68. Image string `json:"image"`
  69. TaskRoles []TaskRole `json:"taskRoles"`
  70. Volumes []Volume `json:"volumes"`
  71. }
  72. type CreateJobResult struct {
  73. Code string `json:"code"`
  74. Msg string `json:"msg"`
  75. Payload map[string]interface{} `json:"payload"`
  76. }
  77. type GetJobResult struct {
  78. Code string `json:"code"`
  79. Msg string `json:"msg"`
  80. Payload map[string]interface{} `json:"payload"`
  81. }
  82. type GetImagesResult struct {
  83. Code string `json:"code"`
  84. Msg string `json:"msg"`
  85. Payload map[string]*ImageInfo `json:"payload"`
  86. }
  87. type CloudbrainsOptions struct {
  88. ListOptions
  89. RepoID int64 // include all repos if empty
  90. UserID int64
  91. JobID int64
  92. SortType string
  93. CloudbrainIDs []int64
  94. // JobStatus CloudbrainStatus
  95. }
  96. type TaskPod struct {
  97. TaskRoleStatus struct {
  98. Name string `json:"name"`
  99. } `json:"taskRoleStatus"`
  100. TaskStatuses []struct {
  101. TaskIndex int `json:"taskIndex"`
  102. PodUID string `json:"podUid"`
  103. PodIP string `json:"podIp"`
  104. PodName string `json:"podName"`
  105. ContainerID string `json:"containerId"`
  106. ContainerIP string `json:"containerIp"`
  107. ContainerGpus string `json:"containerGpus"`
  108. State string `json:"state"`
  109. StartAt time.Time `json:"startAt"`
  110. FinishedAt time.Time `json:"finishedAt"`
  111. ExitCode int `json:"exitCode"`
  112. ExitDiagnostics string `json:"exitDiagnostics"`
  113. RetriedCount int `json:"retriedCount"`
  114. StartTime string
  115. FinishedTime string
  116. } `json:"taskStatuses"`
  117. }
  118. type TaskInfo struct {
  119. Username string `json:"username"`
  120. TaskName string `json:"task_name"`
  121. CodeName string `json:"code_name"`
  122. }
  123. func ConvertToTaskPod(input map[string]interface{}) (TaskPod, error) {
  124. data, _ := json.Marshal(input)
  125. var taskPod TaskPod
  126. err := json.Unmarshal(data, &taskPod)
  127. taskPod.TaskStatuses[0].StartTime = time.Unix(taskPod.TaskStatuses[0].StartAt.Unix() + 8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  128. taskPod.TaskStatuses[0].FinishedTime = time.Unix(taskPod.TaskStatuses[0].FinishedAt.Unix() + 8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  129. return taskPod, err
  130. }
  131. type JobResultPayload struct {
  132. ID string `json:"id"`
  133. Name string `json:"name"`
  134. Platform string `json:"platform"`
  135. JobStatus struct {
  136. Username string `json:"username"`
  137. State string `json:"state"`
  138. SubState string `json:"subState"`
  139. ExecutionType string `json:"executionType"`
  140. Retries int `json:"retries"`
  141. CreatedTime int64 `json:"createdTime"`
  142. CompletedTime int64 `json:"completedTime"`
  143. AppID string `json:"appId"`
  144. AppProgress string `json:"appProgress"`
  145. AppTrackingURL string `json:"appTrackingUrl"`
  146. AppLaunchedTime int64 `json:"appLaunchedTime"`
  147. AppCompletedTime interface{} `json:"appCompletedTime"`
  148. AppExitCode int `json:"appExitCode"`
  149. AppExitDiagnostics string `json:"appExitDiagnostics"`
  150. AppExitType interface{} `json:"appExitType"`
  151. VirtualCluster string `json:"virtualCluster"`
  152. StartTime string
  153. EndTime string
  154. } `json:"jobStatus"`
  155. TaskRoles map[string]interface{} `json:"taskRoles"`
  156. Resource struct {
  157. CPU int `json:"cpu"`
  158. Memory string `json:"memory"`
  159. NvidiaComGpu int `json:"nvidia.com/gpu"`
  160. } `json:"resource"`
  161. Config struct {
  162. Image string `json:"image"`
  163. JobID string `json:"jobId"`
  164. GpuType string `json:"gpuType"`
  165. JobName string `json:"jobName"`
  166. JobType string `json:"jobType"`
  167. TaskRoles []struct {
  168. Name string `json:"name"`
  169. ShmMB int `json:"shmMB"`
  170. Command string `json:"command"`
  171. MemoryMB int `json:"memoryMB"`
  172. CPUNumber int `json:"cpuNumber"`
  173. GpuNumber int `json:"gpuNumber"`
  174. IsMainRole bool `json:"isMainRole"`
  175. TaskNumber int `json:"taskNumber"`
  176. NeedIBDevice bool `json:"needIBDevice"`
  177. MinFailedTaskCount int `json:"minFailedTaskCount"`
  178. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  179. } `json:"taskRoles"`
  180. RetryCount int `json:"retryCount"`
  181. } `json:"config"`
  182. Userinfo struct {
  183. User string `json:"user"`
  184. OrgID string `json:"org_id"`
  185. } `json:"userinfo"`
  186. }
  187. func ConvertToJobResultPayload(input map[string]interface{}) (JobResultPayload, error) {
  188. data, _ := json.Marshal(input)
  189. var jobResultPayload JobResultPayload
  190. err := json.Unmarshal(data, &jobResultPayload)
  191. jobResultPayload.JobStatus.StartTime = time.Unix(jobResultPayload.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05")
  192. jobResultPayload.JobStatus.EndTime = time.Unix(jobResultPayload.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05")
  193. return jobResultPayload, err
  194. }
  195. type ImagesResultPayload struct {
  196. Images []struct {
  197. ID int `json:"id"`
  198. Name string `json:"name"`
  199. Place string `json:"place"`
  200. Description string `json:"description"`
  201. Provider string `json:"provider"`
  202. Createtime string `json:"createtime"`
  203. Remark string `json:"remark"`
  204. } `json:"taskStatuses"`
  205. }
  206. type ImageInfo struct {
  207. ID int `json:"id"`
  208. Name string `json:"name"`
  209. Place string `json:"place"`
  210. Description string `json:"description"`
  211. Provider string `json:"provider"`
  212. Createtime string `json:"createtime"`
  213. Remark string `json:"remark"`
  214. PlaceView string
  215. }
  216. type CommitImageParams struct {
  217. Ip string `json:"ip"`
  218. TaskContainerId string `json:"taskContainerId"`
  219. ImageTag string `json:"imageTag"`
  220. ImageDescription string `json:"imageDescription"`
  221. }
  222. type CommitImageResult struct {
  223. Code string `json:"code"`
  224. Msg string `json:"msg"`
  225. Payload map[string]interface{} `json:"payload"`
  226. }
  227. type StopJobResult struct {
  228. Code string `json:"code"`
  229. Msg string `json:"msg"`
  230. }
  231. func Cloudbrains(opts *CloudbrainsOptions) ([]*Cloudbrain, int64, error) {
  232. sess := x.NewSession()
  233. defer sess.Close()
  234. var cond = builder.NewCond()
  235. if opts.RepoID > 0 {
  236. cond = cond.And(
  237. builder.Eq{"cloudbrain.repo_id": opts.RepoID},
  238. )
  239. }
  240. if opts.UserID > 0 {
  241. cond = cond.And(
  242. builder.Eq{"cloudbrain.user_id": opts.UserID},
  243. )
  244. }
  245. if (opts.JobID) > 0 {
  246. cond = cond.And(
  247. builder.Eq{"cloudbrain.job_id": opts.JobID},
  248. )
  249. }
  250. // switch opts.JobStatus {
  251. // case JobWaiting:
  252. // cond.And(builder.Eq{"cloudbrain.status": int(JobWaiting)})
  253. // case JobFailed:
  254. // cond.And(builder.Eq{"cloudbrain.status": int(JobFailed)})
  255. // case JobStopped:
  256. // cond.And(builder.Eq{"cloudbrain.status": int(JobStopped)})
  257. // case JobSucceeded:
  258. // cond.And(builder.Eq{"cloudbrain.status": int(JobSucceeded)})
  259. // }
  260. if len(opts.CloudbrainIDs) > 0 {
  261. cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
  262. }
  263. count, err := sess.Where(cond).Count(new(Cloudbrain))
  264. if err != nil {
  265. return nil, 0, fmt.Errorf("Count: %v", err)
  266. }
  267. if opts.Page >= 0 && opts.PageSize > 0 {
  268. var start int
  269. if opts.Page == 0 {
  270. start = 0
  271. } else {
  272. start = (opts.Page - 1) * opts.PageSize
  273. }
  274. sess.Limit(opts.PageSize, start)
  275. }
  276. sess.OrderBy("cloudbrain.created_unix DESC")
  277. cloudbrains := make([]*Cloudbrain, 0, setting.UI.IssuePagingNum)
  278. if err := sess.Where(cond).Find(&cloudbrains); err != nil {
  279. return nil, 0, fmt.Errorf("Find: %v", err)
  280. }
  281. sess.Close()
  282. return cloudbrains, count, nil
  283. }
  284. func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
  285. if _, err = x.Insert(cloudbrain); err != nil {
  286. return err
  287. }
  288. return nil
  289. }
  290. func getRepoCloudBrain(cb *Cloudbrain) (*Cloudbrain, error) {
  291. has, err := x.Get(cb)
  292. if err != nil {
  293. return nil, err
  294. } else if !has {
  295. return nil, errors.New("cloudbrain task is not found")
  296. }
  297. return cb, nil
  298. }
  299. func GetRepoCloudBrainByJobID(repoID int64, jobID string) (*Cloudbrain, error) {
  300. cb := &Cloudbrain{JobID: jobID, RepoID: repoID}
  301. return getRepoCloudBrain(cb)
  302. }
  303. func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) {
  304. cb := &Cloudbrain{JobID: jobID}
  305. return getRepoCloudBrain(cb)
  306. }
  307. func SetCloudbrainStatusByJobID(jobID string, status CloudbrainStatus) (err error) {
  308. cb := &Cloudbrain{JobID: jobID, Status: string(status)}
  309. _, err = x.Cols("status").Where("cloudbrain.job_id=?", jobID).Update(cb)
  310. return
  311. }
  312. func UpdateJob(job *Cloudbrain) error {
  313. return updateJob(x, job)
  314. }
  315. func updateJob(e Engine, job *Cloudbrain) error {
  316. var sess *xorm.Session
  317. sess = e.Where("job_id = ?", job.JobID)
  318. _, err := sess.Cols("status", "container_id", "container_ip").Update(job)
  319. return err
  320. }
  321. func DeleteJob(job *Cloudbrain) error {
  322. return deleteJob(x, job)
  323. }
  324. func deleteJob(e Engine, job *Cloudbrain) error {
  325. _, err := e.ID(job.ID).Delete(job)
  326. return err
  327. }