You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 32 kB

4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007
  1. package models
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "time"
  7. "xorm.io/builder"
  8. "xorm.io/xorm"
  9. "code.gitea.io/gitea/modules/log"
  10. "code.gitea.io/gitea/modules/setting"
  11. "code.gitea.io/gitea/modules/timeutil"
  12. )
  13. type CloudbrainStatus string
  14. type JobType string
  15. type ModelArtsJobStatus string
  16. const (
  17. JobWaiting CloudbrainStatus = "WAITING"
  18. JobStopped CloudbrainStatus = "STOPPED"
  19. JobSucceeded CloudbrainStatus = "SUCCEEDED"
  20. JobFailed CloudbrainStatus = "FAILED"
  21. JobRunning CloudbrainStatus = "RUNNING"
  22. JobTypeDebug JobType = "DEBUG"
  23. JobTypeBenchmark JobType = "BENCHMARK"
  24. JobTypeSnn4imagenet JobType = "SNN4IMAGENET"
  25. JobTypeBrainScore JobType = "BRAINSCORE"
  26. JobTypeTrain JobType = "TRAIN"
  27. ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中
  28. ModelArtsCreating ModelArtsJobStatus = "CREATING" //创建中
  29. ModelArtsCreateFailed ModelArtsJobStatus = "CREATE_FAILED" //创建失败
  30. ModelArtsStartQueuing ModelArtsJobStatus = "START_QUEUING" //免费资源启动排队中
  31. ModelArtsReadyToStart ModelArtsJobStatus = "READY_TO_START" //免费资源等待启动
  32. ModelArtsStarting ModelArtsJobStatus = "STARTING" //启动中
  33. ModelArtsRestarting ModelArtsJobStatus = "RESTARTING" //重启中
  34. ModelArtsStartFailed ModelArtsJobStatus = "START_FAILED" //启动失败
  35. ModelArtsRunning ModelArtsJobStatus = "RUNNING" //运行中
  36. ModelArtsStopping ModelArtsJobStatus = "STOPPING" //停止中
  37. ModelArtsStopped ModelArtsJobStatus = "STOPPED" //停止
  38. ModelArtsUnavailable ModelArtsJobStatus = "UNAVAILABLE" //故障
  39. ModelArtsDeleted ModelArtsJobStatus = "DELETED" //已删除
  40. ModelArtsResizing ModelArtsJobStatus = "RESIZING" //规格变更中
  41. ModelArtsResizFailed ModelArtsJobStatus = "RESIZE_FAILED" //规格变更失败
  42. )
  43. type Cloudbrain struct {
  44. ID int64 `xorm:"pk autoincr"`
  45. JobID string `xorm:"INDEX NOT NULL"`
  46. JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"`
  47. JobName string `xorm:"INDEX"`
  48. Status string `xorm:"INDEX"`
  49. UserID int64 `xorm:"INDEX"`
  50. RepoID int64 `xorm:"INDEX"`
  51. SubTaskName string `xorm:"INDEX"`
  52. ContainerID string
  53. ContainerIp string
  54. CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"`
  55. UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
  56. Duration int64 `xorm:"INDEX duration"`
  57. TrainJobDuration string
  58. DeletedAt time.Time `xorm:"deleted"`
  59. CanDebug bool `xorm:"-"`
  60. CanDel bool `xorm:"-"`
  61. Type int `xorm:"INDEX DEFAULT 0"`
  62. VersionID int64 `xorm:"INDEX DEFAULT 0"`
  63. VersionName string
  64. Uuid string
  65. DatasetName string
  66. User *User `xorm:"-"`
  67. Repo *Repository `xorm:"-"`
  68. }
  69. type CloudbrainInfo struct {
  70. Cloudbrain `xorm:"extends"`
  71. User `xorm:"extends"`
  72. }
  73. type CloudBrainLoginResult struct {
  74. Code string
  75. Msg string
  76. Payload map[string]interface{}
  77. }
  78. type TaskRole struct {
  79. Name string `json:"name"`
  80. TaskNumber int `json:"taskNumber"`
  81. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  82. MinFailedTaskCount int `json:"minFailedTaskCount"`
  83. CPUNumber int `json:"cpuNumber"`
  84. GPUNumber int `json:"gpuNumber"`
  85. MemoryMB int `json:"memoryMB"`
  86. ShmMB int `json:"shmMB"`
  87. Command string `json:"command"`
  88. NeedIBDevice bool `json:"needIBDevice"`
  89. IsMainRole bool `json:"isMainRole"`
  90. UseNNI bool `json:"useNNI"`
  91. }
  92. type StHostPath struct {
  93. Path string `json:"path"`
  94. MountPath string `json:"mountPath"`
  95. ReadOnly bool `json:"readOnly"`
  96. }
  97. type Volume struct {
  98. HostPath StHostPath `json:"hostPath"`
  99. }
  100. type CreateJobParams struct {
  101. JobName string `json:"jobName"`
  102. RetryCount int8 `json:"retryCount"`
  103. GpuType string `json:"gpuType"`
  104. Image string `json:"image"`
  105. TaskRoles []TaskRole `json:"taskRoles"`
  106. Volumes []Volume `json:"volumes"`
  107. }
  108. type CreateJobResult struct {
  109. Code string `json:"code"`
  110. Msg string `json:"msg"`
  111. Payload map[string]interface{} `json:"payload"`
  112. }
  113. type GetJobResult struct {
  114. Code string `json:"code"`
  115. Msg string `json:"msg"`
  116. Payload map[string]interface{} `json:"payload"`
  117. }
  118. type GetImagesResult struct {
  119. Code string `json:"code"`
  120. Msg string `json:"msg"`
  121. Payload GetImagesPayload `json:"payload"`
  122. }
  123. type GetImagesPayload struct {
  124. Count int `json:"count"`
  125. TotalPages int `json:"totalPages,omitempty"`
  126. ImageInfo []*ImageInfo `json:"rows"`
  127. }
  128. type CloudbrainsOptions struct {
  129. ListOptions
  130. RepoID int64 // include all repos if empty
  131. UserID int64
  132. JobID int64
  133. SortType string
  134. CloudbrainIDs []int64
  135. // JobStatus CloudbrainStatus
  136. Type int
  137. JobType string
  138. }
  139. type TaskPod struct {
  140. TaskRoleStatus struct {
  141. Name string `json:"name"`
  142. } `json:"taskRoleStatus"`
  143. //TaskStatuses []struct {
  144. // TaskIndex int `json:"taskIndex"`
  145. // PodUID string `json:"podUid"`
  146. // PodIP string `json:"podIp"`
  147. // PodName string `json:"podName"`
  148. // ContainerID string `json:"containerId"`
  149. // ContainerIP string `json:"containerIp"`
  150. // ContainerGpus string `json:"containerGpus"`
  151. // State string `json:"state"`
  152. // StartAt time.Time `json:"startAt"`
  153. // FinishedAt time.Time `json:"finishedAt"`
  154. // ExitCode int `json:"exitCode"`
  155. // ExitDiagnostics string `json:"exitDiagnostics"`
  156. // RetriedCount int `json:"retriedCount"`
  157. // StartTime string
  158. // FinishedTime string
  159. //} `json:"taskStatuses"`
  160. TaskStatuses []TaskStatuses `json:"taskStatuses"`
  161. }
  162. type TaskStatuses struct {
  163. TaskIndex int `json:"taskIndex"`
  164. PodUID string `json:"podUid"`
  165. PodIP string `json:"podIp"`
  166. PodName string `json:"podName"`
  167. ContainerID string `json:"containerId"`
  168. ContainerIP string `json:"containerIp"`
  169. ContainerGpus string `json:"containerGpus"`
  170. State string `json:"state"`
  171. StartAt time.Time `json:"startAt"`
  172. FinishedAt time.Time `json:"finishedAt"`
  173. ExitCode int `json:"exitCode"`
  174. ExitDiagnostics string `json:"exitDiagnostics"`
  175. RetriedCount int `json:"retriedCount"`
  176. StartTime string
  177. FinishedTime string
  178. }
  179. type TaskInfo struct {
  180. Username string `json:"username"`
  181. TaskName string `json:"task_name"`
  182. CodeName string `json:"code_name"`
  183. BenchmarkCategory []string `json:"selected_category"`
  184. CodeLink string `json:"code_link"`
  185. GpuType string `json:"gpu_type"`
  186. }
  187. func ConvertToTaskPod(input map[string]interface{}) (TaskPod, error) {
  188. data, _ := json.Marshal(input)
  189. var taskPod TaskPod
  190. err := json.Unmarshal(data, &taskPod)
  191. taskPod.TaskStatuses[0].StartTime = time.Unix(taskPod.TaskStatuses[0].StartAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  192. taskPod.TaskStatuses[0].FinishedTime = time.Unix(taskPod.TaskStatuses[0].FinishedAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  193. //if the task is not finished or stopped,the cloudbrain renturns 0001-01-01 08:00:00, the finishedTime shows with -
  194. if strings.HasPrefix(taskPod.TaskStatuses[0].FinishedTime, "0001") {
  195. taskPod.TaskStatuses[0].FinishedTime = "-"
  196. }
  197. return taskPod, err
  198. }
  199. type JobResultPayload struct {
  200. ID string `json:"id"`
  201. Name string `json:"name"`
  202. Platform string `json:"platform"`
  203. JobStatus struct {
  204. Username string `json:"username"`
  205. State string `json:"state"`
  206. SubState string `json:"subState"`
  207. ExecutionType string `json:"executionType"`
  208. Retries int `json:"retries"`
  209. CreatedTime int64 `json:"createdTime"`
  210. CompletedTime int64 `json:"completedTime"`
  211. AppID string `json:"appId"`
  212. AppProgress string `json:"appProgress"`
  213. AppTrackingURL string `json:"appTrackingUrl"`
  214. AppLaunchedTime int64 `json:"appLaunchedTime"`
  215. AppCompletedTime interface{} `json:"appCompletedTime"`
  216. AppExitCode int `json:"appExitCode"`
  217. AppExitDiagnostics string `json:"appExitDiagnostics"`
  218. AppExitType interface{} `json:"appExitType"`
  219. VirtualCluster string `json:"virtualCluster"`
  220. StartTime string
  221. EndTime string
  222. } `json:"jobStatus"`
  223. TaskRoles map[string]interface{} `json:"taskRoles"`
  224. Resource struct {
  225. CPU int `json:"cpu"`
  226. Memory string `json:"memory"`
  227. NvidiaComGpu int `json:"nvidia.com/gpu"`
  228. } `json:"resource"`
  229. Config struct {
  230. Image string `json:"image"`
  231. JobID string `json:"jobId"`
  232. GpuType string `json:"gpuType"`
  233. JobName string `json:"jobName"`
  234. JobType string `json:"jobType"`
  235. TaskRoles []struct {
  236. Name string `json:"name"`
  237. ShmMB int `json:"shmMB"`
  238. Command string `json:"command"`
  239. MemoryMB int `json:"memoryMB"`
  240. CPUNumber int `json:"cpuNumber"`
  241. GpuNumber int `json:"gpuNumber"`
  242. IsMainRole bool `json:"isMainRole"`
  243. TaskNumber int `json:"taskNumber"`
  244. NeedIBDevice bool `json:"needIBDevice"`
  245. MinFailedTaskCount int `json:"minFailedTaskCount"`
  246. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  247. } `json:"taskRoles"`
  248. RetryCount int `json:"retryCount"`
  249. } `json:"config"`
  250. Userinfo struct {
  251. User string `json:"user"`
  252. OrgID string `json:"org_id"`
  253. } `json:"userinfo"`
  254. }
  255. func ConvertToJobResultPayload(input map[string]interface{}) (JobResultPayload, error) {
  256. data, _ := json.Marshal(input)
  257. var jobResultPayload JobResultPayload
  258. err := json.Unmarshal(data, &jobResultPayload)
  259. jobResultPayload.JobStatus.StartTime = time.Unix(jobResultPayload.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05")
  260. jobResultPayload.JobStatus.EndTime = time.Unix(jobResultPayload.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05")
  261. if jobResultPayload.JobStatus.State == string(JobWaiting) {
  262. jobResultPayload.JobStatus.StartTime = "-"
  263. jobResultPayload.JobStatus.EndTime = "-"
  264. }
  265. return jobResultPayload, err
  266. }
  267. type ImagesResultPayload struct {
  268. Images []struct {
  269. ID int `json:"id"`
  270. Name string `json:"name"`
  271. Place string `json:"place"`
  272. Description string `json:"description"`
  273. Provider string `json:"provider"`
  274. Createtime string `json:"createtime"`
  275. Remark string `json:"remark"`
  276. } `json:"taskStatuses"`
  277. }
  278. type ImageInfo struct {
  279. ID int `json:"id"`
  280. Name string `json:"name"`
  281. Place string `json:"place"`
  282. Description string `json:"description"`
  283. Provider string `json:"provider"`
  284. Createtime string `json:"createtime"`
  285. Remark string `json:"remark"`
  286. IsPublic int `json:"isPublic"`
  287. PlaceView string
  288. }
  289. type Categories struct {
  290. Category []*Category `json:"category"`
  291. }
  292. type Category struct {
  293. Id int `json:"id"`
  294. Value string `json:"value"`
  295. }
  296. type GpuInfos struct {
  297. GpuInfo []*GpuInfo `json:"gpu_type"`
  298. }
  299. type GpuInfo struct {
  300. Id int `json:"id"`
  301. Value string `json:"value"`
  302. Queue string `json:"queue"`
  303. }
  304. type ResourceSpecs struct {
  305. ResourceSpec []*ResourceSpec `json:"resorce_specs"`
  306. }
  307. type ResourceSpec struct {
  308. Id int `json:"id"`
  309. CpuNum int `json:"cpu"`
  310. GpuNum int `json:"gpu"`
  311. MemMiB int `json:"memMiB"`
  312. ShareMemMiB int `json:"shareMemMiB"`
  313. }
  314. type FlavorInfos struct {
  315. FlavorInfo []*FlavorInfo `json:"flavor_info"`
  316. }
  317. type FlavorInfo struct {
  318. Id int `json:"id"`
  319. Value string `json:"value"`
  320. Desc string `json:"desc"`
  321. }
  322. type PoolInfos struct {
  323. PoolInfo []*PoolInfo `json:"pool_info"`
  324. }
  325. type PoolInfo struct {
  326. PoolId string `json:"pool_id"`
  327. PoolName string `json:"pool_name"`
  328. PoolType string `json:"pool_type"`
  329. }
  330. type CommitImageParams struct {
  331. Ip string `json:"ip"`
  332. TaskContainerId string `json:"taskContainerId"`
  333. ImageTag string `json:"imageTag"`
  334. ImageDescription string `json:"imageDescription"`
  335. }
  336. type CommitImageResult struct {
  337. Code string `json:"code"`
  338. Msg string `json:"msg"`
  339. Payload map[string]interface{} `json:"payload"`
  340. }
  341. type CloudBrainResult struct {
  342. Code string `json:"code"`
  343. Msg string `json:"msg"`
  344. }
  345. type CreateNotebookParams struct {
  346. JobName string `json:"name"`
  347. Description string `json:"description"`
  348. ProfileID string `json:"profile_id"`
  349. Flavor string `json:"flavor"`
  350. Spec Spec `json:"spec"`
  351. Workspace Workspace `json:"workspace"`
  352. Pool Pool `json:"pool"`
  353. }
  354. type Pool struct {
  355. ID string `json:"id"`
  356. Name string `json:"name"`
  357. Type string `json:"type"`
  358. }
  359. type Workspace struct {
  360. ID string `json:"id"`
  361. }
  362. type Spec struct {
  363. Storage Storage `json:"storage"`
  364. AutoStop AutoStop `json:"auto_stop"`
  365. }
  366. type AutoStop struct {
  367. Enable bool `json:"enable"`
  368. Duration int `json:"duration"`
  369. }
  370. type Storage struct {
  371. Type string `json:"type"`
  372. Location Location `json:"location"`
  373. }
  374. type Location struct {
  375. Path string `json:"path"`
  376. }
  377. type NotebookResult struct {
  378. ErrorCode string `json:"error_code"`
  379. ErrorMsg string `json:"error_msg"`
  380. }
  381. type CreateNotebookResult struct {
  382. ErrorCode string `json:"error_code"`
  383. ErrorMsg string `json:"error_msg"`
  384. ID string `json:"id"`
  385. Name string `json:"name"`
  386. Description string `json:"description"`
  387. Status string `json:"status"`
  388. CreationTimestamp string `json:"creation_timestamp"`
  389. LatestUpdateTimestamp string `json:"latest_update_timestamp"`
  390. Profile struct {
  391. ID string `json:"id"`
  392. Name string `json:"name"`
  393. Description string `json:"description"`
  394. DeType string `json:"de_type"`
  395. FlavorType string `json:"flavor_type"`
  396. } `json:"profile"`
  397. Flavor string `json:"flavor"`
  398. FlavorDetails struct {
  399. Name string `json:"name"`
  400. Status string `json:"status"`
  401. QueuingNum int `json:"queuing_num"`
  402. QueueLeftTime int `json:"queue_left_time"` //s
  403. Duration int `json:"duration"` //auto_stop_time s
  404. } `json:"flavor_details"`
  405. }
  406. type GetNotebookResult struct {
  407. ErrorCode string `json:"error_code"`
  408. ErrorMsg string `json:"error_msg"`
  409. ID string `json:"id"`
  410. Name string `json:"name"`
  411. Description string `json:"description"`
  412. Status string `json:"status"`
  413. CreationTimestamp string `json:"creation_timestamp"`
  414. CreateTime string
  415. LatestUpdateTimestamp string `json:"latest_update_timestamp"`
  416. LatestUpdateTime string
  417. Profile struct {
  418. ID string `json:"id"`
  419. Name string `json:"name"`
  420. Description string `json:"description"`
  421. DeType string `json:"de_type"`
  422. FlavorType string `json:"flavor_type"`
  423. } `json:"profile"`
  424. Flavor string `json:"flavor"`
  425. FlavorDetails struct {
  426. Name string `json:"name"`
  427. Status string `json:"status"`
  428. QueuingNum int `json:"queuing_num"`
  429. QueueLeftTime int `json:"queue_left_time"` //s
  430. Duration int `json:"duration"` //auto_stop_time s
  431. } `json:"flavor_details"`
  432. QueuingInfo struct {
  433. ID string `json:"id"`
  434. Name string `json:"name"`
  435. Flavor string `json:"flavor"`
  436. DeType string `json:"de_type"`
  437. Status string `json:"status"`
  438. BeginTimestamp int `json:"begin_timestamp"` //time of instance begin in queue
  439. BeginTime string
  440. RemainTime int `json:"remain_time"` //remain time of instance
  441. EndTimestamp int `json:"end_timestamp"` //
  442. EndTime string
  443. Rank int `json:"rank"` //rank of instance in queue
  444. } `json:"queuing_info"`
  445. Spec struct {
  446. Annotations struct {
  447. TargetDomain string `json:"target_domain"`
  448. Url string `json:"url"`
  449. } `json:"annotations"`
  450. } `json:"spec"`
  451. }
  452. type GetTokenParams struct {
  453. Auth Auth `json:"auth"`
  454. }
  455. type Auth struct {
  456. Identity Identity `json:"identity"`
  457. Scope Scope `json:"scope"`
  458. }
  459. type Scope struct {
  460. Project Project `json:"project"`
  461. }
  462. type Project struct {
  463. Name string `json:"name"`
  464. }
  465. type Identity struct {
  466. Methods []string `json:"methods"`
  467. Password Password `json:"password"`
  468. }
  469. type Password struct {
  470. User NotebookUser `json:"user"`
  471. }
  472. type NotebookUser struct {
  473. Name string `json:"name"`
  474. Password string `json:"password"`
  475. Domain Domain `json:"domain"`
  476. }
  477. type Domain struct {
  478. Name string `json:"name"`
  479. }
  480. const (
  481. ActionStart = "start"
  482. ActionStop = "stop"
  483. ActionRestart = "restart"
  484. ActionQueue = "queue"
  485. ActionDequeue = "dequeue"
  486. )
  487. type NotebookAction struct {
  488. Action string `json:"action"`
  489. }
  490. type NotebookActionResult struct {
  491. ErrorCode string `json:"error_code"`
  492. ErrorMsg string `json:"error_msg"`
  493. CurrentStatus string `json:"current_status"`
  494. PreviousState string `json:"previous_state"`
  495. }
  496. type NotebookGetJobTokenResult struct {
  497. ErrorCode string `json:"error_code"`
  498. ErrorMsg string `json:"error_msg"`
  499. Token string `json:"token"`
  500. }
  501. type NotebookDelResult struct {
  502. InstanceID string `json:"instance_id"`
  503. }
  504. type CreateTrainJobParams struct {
  505. JobName string `json:"job_name"`
  506. Description string `json:"job_desc"`
  507. Config Config `json:"config"`
  508. WorkspaceID string `json:"workspace_id"`
  509. }
  510. type Config struct {
  511. WorkServerNum int `json:"worker_server_num"`
  512. AppUrl string `json:"app_url"` //训练作业的代码目录
  513. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  514. Parameter []Parameter `json:"parameter"`
  515. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  516. //DatasetID string `json:"dataset_id"`
  517. //DataVersionID string `json:"dataset_version_id"`
  518. //DataSource []DataSource `json:"data_source"`
  519. //SpecID int64 `json:"spec_id"`
  520. EngineID int64 `json:"engine_id"`
  521. //ModelID int64 `json:"model_id"`
  522. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  523. LogUrl string `json:"log_url"`
  524. //UserImageUrl string `json:"user_image_url"`
  525. //UserCommand string `json:"user_command"`
  526. CreateVersion bool `json:"create_version"`
  527. //Volumes []Volumes `json:"volumes"`
  528. Flavor Flavor `json:"flavor"`
  529. PoolID string `json:"pool_id"`
  530. }
  531. type CreateConfigParams struct {
  532. ConfigName string `json:"config_name"`
  533. Description string `json:"config_desc"`
  534. WorkServerNum int `json:"worker_server_num"`
  535. AppUrl string `json:"app_url"` //训练作业的代码目录
  536. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  537. Parameter []Parameter `json:"parameter"`
  538. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  539. //DatasetID string `json:"dataset_id"`
  540. //DataVersionID string `json:"dataset_version_id"`
  541. //DataSource []DataSource `json:"data_source"`
  542. //SpecID int64 `json:"spec_id"`
  543. EngineID int64 `json:"engine_id"`
  544. //ModelID int64 `json:"model_id"`
  545. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  546. LogUrl string `json:"log_url"`
  547. //UserImageUrl string `json:"user_image_url"`
  548. //UserCommand string `json:"user_command"`
  549. //CreateVersion bool `json:"create_version"`
  550. //Volumes []Volumes `json:"volumes"`
  551. Flavor Flavor `json:"flavor"`
  552. PoolID string `json:"pool_id"`
  553. }
  554. type Parameter struct {
  555. Label string `json:"label"`
  556. Value string `json:"value"`
  557. }
  558. type Parameters struct {
  559. Parameter []Parameter `json:"parameter"`
  560. }
  561. type DataSource struct {
  562. DatasetID string `json:"dataset_id"`
  563. DatasetVersion string `json:"dataset_version"`
  564. Type string `json:"type"`
  565. DataUrl string `json:"data_url"`
  566. }
  567. type Volumes struct {
  568. Nfs Nfs `json:"nfs"`
  569. HostPath HostPath `json:"host_path"`
  570. }
  571. type Nfs struct {
  572. ID string `json:"id"`
  573. SourcePath string `json:"src_path"`
  574. DestPath string `json:"dest_path"`
  575. ReadOnly bool `json:"read_only"`
  576. }
  577. type HostPath struct {
  578. SourcePath string `json:"src_path"`
  579. DestPath string `json:"dest_path"`
  580. ReadOnly bool `json:"read_only"`
  581. }
  582. type Flavor struct {
  583. Code string `json:"code"`
  584. }
  585. type CreateTrainJobResult struct {
  586. ErrorCode string `json:"error_code"`
  587. ErrorMsg string `json:"error_msg"`
  588. IsSuccess bool `json:"is_success"`
  589. JobName string `json:"job_name"`
  590. JobID int64 `json:"job_id"`
  591. Status int `json:"status"`
  592. CreateTime int64 `json:"create_time"`
  593. VersionID int64 `json:"version_id"`
  594. ResourceID string `json:"resource_id"`
  595. VersionName string `json:"version_name"`
  596. }
  597. type CreateTrainJobConfigResult struct {
  598. ErrorCode string `json:"error_code"`
  599. ErrorMsg string `json:"error_msg"`
  600. IsSuccess bool `json:"is_success"`
  601. }
  602. type GetResourceSpecsResult struct {
  603. ErrorCode string `json:"error_code"`
  604. ErrorMsg string `json:"error_msg"`
  605. IsSuccess bool `json:"is_success"`
  606. SpecTotalCount int `json:"spec_total_count"`
  607. Specs []Specs `json:"specs"`
  608. }
  609. type Specs struct {
  610. Core string `json:"core"`
  611. Cpu string `json:"cpu"`
  612. IsNoResource bool `json:"no_resource"`
  613. GpuType string `json:"gpu_type"`
  614. SpecID int64 `json:"spec_id"`
  615. GpuNum int `json:"gpu_num"`
  616. SpecCode string `json:"spec_code"`
  617. Storage string `json:"storage"`
  618. MaxNum int `json:"max_num"`
  619. UnitNum int `json:"unit_num"`
  620. InterfaceType int `json:"interface_type"`
  621. }
  622. type GetConfigListResult struct {
  623. ErrorCode string `json:"error_code"`
  624. ErrorMsg string `json:"error_msg"`
  625. IsSuccess bool `json:"is_success"`
  626. ConfigTotalCount int `json:"config_total_count"`
  627. ParaConfigs []ParaConfig `json:"configs"`
  628. }
  629. type ParaConfig struct {
  630. ConfigName string `json:"config_name"`
  631. ConfigDesc string `json:"config_desc"`
  632. CreateTime int64 `json:"create_time"`
  633. EngineType int `json:"engine_type"`
  634. EngineName string `json:"engine_name"`
  635. EngineId int64 `json:"engine_id"`
  636. EngineVersion string `json:"engine_version"`
  637. UserImageUrl string `json:"user_image_url"`
  638. UserCommand string `json:"user_command"`
  639. Result GetConfigResult
  640. }
  641. type GetConfigResult struct {
  642. ErrorCode string `json:"error_code"`
  643. ErrorMsg string `json:"error_msg"`
  644. IsSuccess bool `json:"is_success"`
  645. ConfigName string `json:"config_name"`
  646. Description string `json:"config_desc"`
  647. WorkServerNum int `json:"worker_server_num"`
  648. AppUrl string `json:"app_url"` //训练作业的代码目录
  649. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  650. Parameter []Parameter `json:"parameter"`
  651. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  652. //DatasetID string `json:"dataset_id"`
  653. //DataVersionID string `json:"dataset_version_id"`
  654. //DataSource []DataSource `json:"data_source"`
  655. //SpecID int64 `json:"spec_id"`
  656. EngineID int64 `json:"engine_id"`
  657. //ModelID int64 `json:"model_id"`
  658. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  659. LogUrl string `json:"log_url"`
  660. //UserImageUrl string `json:"user_image_url"`
  661. //UserCommand string `json:"user_command"`
  662. //CreateVersion bool `json:"create_version"`
  663. //Volumes []Volumes `json:"volumes"`
  664. Flavor Flavor `json:"flavor"`
  665. PoolID string `json:"pool_id"`
  666. }
  667. type ErrorResult struct {
  668. ErrorCode string `json:"error_code"`
  669. ErrorMsg string `json:"error_message"`
  670. IsSuccess bool `json:"is_success"`
  671. }
  672. type GetTrainJobResult struct {
  673. IsSuccess bool `json:"is_success"`
  674. JobName string `json:"job_name"`
  675. JobID int64 `json:"job_id"`
  676. Description string `json:"job_desc"`
  677. IntStatus int `json:"status"`
  678. Status string
  679. LongCreateTime int64 `json:"create_time"`
  680. CreateTime string
  681. Duration int64 `json:"duration"` //训练作业的运行时间,单位为毫秒
  682. TrainJobDuration string //训练作业的运行时间,格式为hh:mm:ss
  683. VersionID int64 `json:"version_id"`
  684. ResourceID string `json:"resource_id"`
  685. VersionName string `json:"version_name"`
  686. PreVersionID int64 `json:"pre_version_id"`
  687. WorkServerNum int `json:"worker_server_num"`
  688. AppUrl string `json:"app_url"` //训练作业的代码目录
  689. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  690. Parameter []Parameter `json:"parameter"`
  691. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  692. //DatasetID string `json:"dataset_id"`
  693. //DataVersionID string `json:"dataset_version_id"`
  694. //DataSource []DataSource `json:"data_source"`
  695. //SpecID int64 `json:"spec_id"`
  696. EngineID int64 `json:"engine_id"`
  697. EngineName string `json:"engine_name"`
  698. EngineVersion string `json:"engine_version"`
  699. //ModelID int64 `json:"model_id"`
  700. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  701. LogUrl string `json:"log_url"`
  702. //UserImageUrl string `json:"user_image_url"`
  703. //UserCommand string `json:"user_command"`
  704. //Volumes []Volumes `json:"volumes"`
  705. Flavor Flavor `json:"flavor"`
  706. PoolID string `json:"pool_id"`
  707. PoolName string `json:"pool_name"`
  708. NasMountPath string `json:"nas_mount_path"`
  709. NasShareAddr string `json:"nas_share_addr"`
  710. DatasetName string
  711. }
  712. type GetTrainJobLogResult struct {
  713. ErrorCode string `json:"error_code"`
  714. ErrorMsg string `json:"error_msg"`
  715. IsSuccess bool `json:"is_success"`
  716. Content string `json:"content"`
  717. Lines int `json:"lines"`
  718. StartLine string `json:"start_line"`
  719. EndLine string `json:"end_line"`
  720. }
  721. type GetTrainJobLogFileNamesResult struct {
  722. ErrorCode string `json:"error_code"`
  723. ErrorMsg string `json:"error_msg"`
  724. IsSuccess bool `json:"is_success"`
  725. LogFileList []string `json:"log_file_list"`
  726. }
  727. type TrainJobResult struct {
  728. ErrorCode string `json:"error_code"`
  729. ErrorMsg string `json:"error_msg"`
  730. IsSuccess bool `json:"is_success"`
  731. }
  732. type LogFile struct {
  733. Name string
  734. }
  735. func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
  736. sess := x.NewSession()
  737. defer sess.Close()
  738. var cond = builder.NewCond()
  739. if opts.RepoID > 0 {
  740. cond = cond.And(
  741. builder.Eq{"cloudbrain.repo_id": opts.RepoID},
  742. )
  743. }
  744. if opts.UserID > 0 {
  745. cond = cond.And(
  746. builder.Eq{"cloudbrain.user_id": opts.UserID},
  747. )
  748. }
  749. if (opts.JobID) > 0 {
  750. cond = cond.And(
  751. builder.Eq{"cloudbrain.job_id": opts.JobID},
  752. )
  753. }
  754. if (opts.Type) >= 0 {
  755. cond = cond.And(
  756. builder.Eq{"cloudbrain.type": opts.Type},
  757. )
  758. }
  759. if (opts.JobType) != "" {
  760. cond = cond.And(
  761. builder.Eq{"cloudbrain.job_type": opts.JobType},
  762. )
  763. }
  764. // switch opts.JobStatus {
  765. // case JobWaiting:
  766. // cond.And(builder.Eq{"cloudbrain.status": int(JobWaiting)})
  767. // case JobFailed:
  768. // cond.And(builder.Eq{"cloudbrain.status": int(JobFailed)})
  769. // case JobStopped:
  770. // cond.And(builder.Eq{"cloudbrain.status": int(JobStopped)})
  771. // case JobSucceeded:
  772. // cond.And(builder.Eq{"cloudbrain.status": int(JobSucceeded)})
  773. // }
  774. if len(opts.CloudbrainIDs) > 0 {
  775. cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
  776. }
  777. count, err := sess.Where(cond).Count(new(Cloudbrain))
  778. if err != nil {
  779. return nil, 0, fmt.Errorf("Count: %v", err)
  780. }
  781. if opts.Page >= 0 && opts.PageSize > 0 {
  782. var start int
  783. if opts.Page == 0 {
  784. start = 0
  785. } else {
  786. start = (opts.Page - 1) * opts.PageSize
  787. }
  788. sess.Limit(opts.PageSize, start)
  789. }
  790. sess.OrderBy("cloudbrain.created_unix DESC")
  791. cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum)
  792. if err := sess.Table(&Cloudbrain{}).Where(cond).
  793. Join("left", "`user`", "cloudbrain.user_id = `user`.id").
  794. Find(&cloudbrains); err != nil {
  795. return nil, 0, fmt.Errorf("Find: %v", err)
  796. }
  797. sess.Close()
  798. return cloudbrains, count, nil
  799. }
  800. func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
  801. if _, err = x.Insert(cloudbrain); err != nil {
  802. return err
  803. }
  804. return nil
  805. }
  806. func getRepoCloudBrain(cb *Cloudbrain) (*Cloudbrain, error) {
  807. has, err := x.Get(cb)
  808. if err != nil {
  809. return nil, err
  810. } else if !has {
  811. return nil, ErrJobNotExist{}
  812. }
  813. return cb, nil
  814. }
  815. func GetRepoCloudBrainByJobID(repoID int64, jobID string) (*Cloudbrain, error) {
  816. cb := &Cloudbrain{JobID: jobID, RepoID: repoID}
  817. return getRepoCloudBrain(cb)
  818. }
  819. func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) {
  820. cb := &Cloudbrain{JobID: jobID}
  821. return getRepoCloudBrain(cb)
  822. }
  823. func GetCloudbrainsNeededStopByUserID(userID int64) ([]*Cloudbrain, error) {
  824. cloudBrains := make([]*Cloudbrain, 0)
  825. err := x.Cols("job_id", "status", "type").Where("user_id=? AND status !=?", userID, string(JobStopped)).Find(&cloudBrains)
  826. return cloudBrains, err
  827. }
  828. func GetCloudbrainsNeededStopByRepoID(repoID int64) ([]*Cloudbrain, error) {
  829. cloudBrains := make([]*Cloudbrain, 0)
  830. err := x.Cols("job_id", "status", "type").Where("repo_id=? AND status !=?", repoID, string(JobStopped)).Find(&cloudBrains)
  831. return cloudBrains, err
  832. }
  833. func SetCloudbrainStatusByJobID(jobID string, status CloudbrainStatus) (err error) {
  834. cb := &Cloudbrain{JobID: jobID, Status: string(status)}
  835. _, err = x.Cols("status").Where("cloudbrain.job_id=?", jobID).Update(cb)
  836. return
  837. }
  838. func SetTrainJobStatusByJobID(jobID string, status string, duration int64, trainjobduration string) (err error) {
  839. cb := &Cloudbrain{JobID: jobID, Status: string(status), Duration: duration, TrainJobDuration: trainjobduration}
  840. _, err = x.Cols("status", "duration", "train_job_duration").Where("cloudbrain.job_id=?", jobID).Update(cb)
  841. return
  842. }
  843. func UpdateJob(job *Cloudbrain) error {
  844. return updateJob(x, job)
  845. }
  846. func updateJob(e Engine, job *Cloudbrain) error {
  847. var sess *xorm.Session
  848. sess = e.Where("job_id = ?", job.JobID)
  849. _, err := sess.Cols("status", "container_id", "container_ip").Update(job)
  850. return err
  851. }
  852. // func UpdateTrainJob(job *CloudbrainInfo) error {
  853. // return updateTrainJob(x, job)
  854. // }
  855. // func updateTrainJob(e Engine, job *CloudbrainInfo) error {
  856. // var sess *xorm.Session
  857. // sess = e.Where("job_id = ?", job.Cloudbrain.JobID)
  858. // _, err := sess.Cols("status", "container_id", "container_ip").Update(job)
  859. // return err
  860. // }
  861. func DeleteJob(job *Cloudbrain) error {
  862. return deleteJob(x, job)
  863. }
  864. func deleteJob(e Engine, job *Cloudbrain) error {
  865. _, err := e.ID(job.ID).Delete(job)
  866. return err
  867. }
  868. func GetCloudbrainByName(jobName string) (*Cloudbrain, error) {
  869. cb := &Cloudbrain{JobName: jobName}
  870. return getRepoCloudBrain(cb)
  871. }
  872. func CanDelJob(isSigned bool, user *User, job *CloudbrainInfo) bool {
  873. if !isSigned || (job.Status != string(JobStopped) && job.Status != string(JobFailed) && job.Status != string(ModelArtsStartFailed) && job.Status != string(ModelArtsCreateFailed)) {
  874. return false
  875. }
  876. repo, err := GetRepositoryByID(job.RepoID)
  877. if err != nil {
  878. log.Error("GetRepositoryByID failed:%v", err.Error())
  879. return false
  880. }
  881. permission, _ := GetUserRepoPermission(repo, user)
  882. if err != nil {
  883. log.Error("GetUserRepoPermission failed:%v", err.Error())
  884. return false
  885. }
  886. if (user.ID == job.UserID && permission.AccessMode >= AccessModeWrite) || user.IsAdmin || permission.AccessMode >= AccessModeAdmin {
  887. return true
  888. }
  889. return false
  890. }