You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 30 kB

5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904
  1. package models
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "time"
  6. "xorm.io/xorm"
  7. "code.gitea.io/gitea/modules/setting"
  8. "code.gitea.io/gitea/modules/timeutil"
  9. "xorm.io/builder"
  10. )
  11. type CloudbrainStatus string
  12. type JobType string
  13. type ModelArtsJobStatus string
  14. const (
  15. JobWaiting CloudbrainStatus = "WAITING"
  16. JobStopped CloudbrainStatus = "STOPPED"
  17. JobSucceeded CloudbrainStatus = "SUCCEEDED"
  18. JobFailed CloudbrainStatus = "FAILED"
  19. JobRunning CloudbrainStatus = "RUNNING"
  20. JobTypeDebug JobType = "DEBUG"
  21. JobTypeBenchmark JobType = "BENCHMARK"
  22. JobTypeSnn4imagenet JobType = "SNN4IMAGENET"
  23. ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中
  24. ModelArtsCreating ModelArtsJobStatus = "CREATING" //创建中
  25. ModelArtsCreateFailed ModelArtsJobStatus = "CREATE_FAILED" //创建失败
  26. ModelArtsStartQueuing ModelArtsJobStatus = "START_QUEUING" //免费资源启动排队中
  27. ModelArtsReadyToStart ModelArtsJobStatus = "READY_TO_START" //免费资源等待启动
  28. ModelArtsStarting ModelArtsJobStatus = "STARTING" //启动中
  29. ModelArtsRestarting ModelArtsJobStatus = "RESTARTING" //重启中
  30. ModelArtsStartFailed ModelArtsJobStatus = "START_FAILED" //启动失败
  31. ModelArtsRunning ModelArtsJobStatus = "RUNNING" //运行中
  32. ModelArtsStopping ModelArtsJobStatus = "STOPPING" //停止中
  33. ModelArtsStopped ModelArtsJobStatus = "STOPPED" //停止
  34. ModelArtsUnavailable ModelArtsJobStatus = "UNAVAILABLE" //故障
  35. ModelArtsDeleted ModelArtsJobStatus = "DELETED" //已删除
  36. ModelArtsResizing ModelArtsJobStatus = "RESIZING" //规格变更中
  37. ModelArtsResizFailed ModelArtsJobStatus = "RESIZE_FAILED" //规格变更失败
  38. )
  39. type Cloudbrain struct {
  40. ID int64 `xorm:"pk autoincr"`
  41. JobID string `xorm:"INDEX NOT NULL"`
  42. JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"`
  43. JobName string `xorm:"INDEX"`
  44. Status string `xorm:"INDEX"`
  45. UserID int64 `xorm:"INDEX"`
  46. RepoID int64 `xorm:"INDEX"`
  47. SubTaskName string `xorm:"INDEX"`
  48. ContainerID string
  49. ContainerIp string
  50. CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"`
  51. UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
  52. DeletedAt time.Time `xorm:"deleted"`
  53. CanDebug bool `xorm:"-"`
  54. Type int `xorm:"INDEX DEFAULT 0"`
  55. VersionID int64 `xorm:"INDEX DEFAULT 0"`
  56. VersionName string
  57. Uuid string
  58. User *User `xorm:"-"`
  59. Repo *Repository `xorm:"-"`
  60. }
  61. type CloudBrainLoginResult struct {
  62. Code string
  63. Msg string
  64. Payload map[string]interface{}
  65. }
  66. type TaskRole struct {
  67. Name string `json:"name"`
  68. TaskNumber int `json:"taskNumber"`
  69. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  70. MinFailedTaskCount int `json:"minFailedTaskCount"`
  71. CPUNumber int `json:"cpuNumber"`
  72. GPUNumber int `json:"gpuNumber"`
  73. MemoryMB int `json:"memoryMB"`
  74. ShmMB int `json:"shmMB"`
  75. Command string `json:"command"`
  76. NeedIBDevice bool `json:"needIBDevice"`
  77. IsMainRole bool `json:"isMainRole"`
  78. UseNNI bool `json:"useNNI"`
  79. }
  80. type StHostPath struct {
  81. Path string `json:"path"`
  82. MountPath string `json:"mountPath"`
  83. ReadOnly bool `json:"readOnly"`
  84. }
  85. type Volume struct {
  86. HostPath StHostPath `json:"hostPath"`
  87. }
  88. type CreateJobParams struct {
  89. JobName string `json:"jobName"`
  90. RetryCount int8 `json:"retryCount"`
  91. GpuType string `json:"gpuType"`
  92. Image string `json:"image"`
  93. TaskRoles []TaskRole `json:"taskRoles"`
  94. Volumes []Volume `json:"volumes"`
  95. }
  96. type CreateJobResult struct {
  97. Code string `json:"code"`
  98. Msg string `json:"msg"`
  99. Payload map[string]interface{} `json:"payload"`
  100. }
  101. type GetJobResult struct {
  102. Code string `json:"code"`
  103. Msg string `json:"msg"`
  104. Payload map[string]interface{} `json:"payload"`
  105. }
  106. type GetImagesResult struct {
  107. Code string `json:"code"`
  108. Msg string `json:"msg"`
  109. Payload GetImagesPayload `json:"payload"`
  110. }
  111. type GetImagesPayload struct {
  112. Count int `json:"count"`
  113. TotalPages int `json:"totalPages,omitempty"`
  114. ImageInfo []*ImageInfo `json:"rows"`
  115. }
  116. type CloudbrainsOptions struct {
  117. ListOptions
  118. RepoID int64 // include all repos if empty
  119. UserID int64
  120. JobID int64
  121. SortType string
  122. CloudbrainIDs []int64
  123. // JobStatus CloudbrainStatus
  124. Type int
  125. }
  126. type TaskPod struct {
  127. TaskRoleStatus struct {
  128. Name string `json:"name"`
  129. } `json:"taskRoleStatus"`
  130. TaskStatuses []struct {
  131. TaskIndex int `json:"taskIndex"`
  132. PodUID string `json:"podUid"`
  133. PodIP string `json:"podIp"`
  134. PodName string `json:"podName"`
  135. ContainerID string `json:"containerId"`
  136. ContainerIP string `json:"containerIp"`
  137. ContainerGpus string `json:"containerGpus"`
  138. State string `json:"state"`
  139. StartAt time.Time `json:"startAt"`
  140. FinishedAt time.Time `json:"finishedAt"`
  141. ExitCode int `json:"exitCode"`
  142. ExitDiagnostics string `json:"exitDiagnostics"`
  143. RetriedCount int `json:"retriedCount"`
  144. StartTime string
  145. FinishedTime string
  146. } `json:"taskStatuses"`
  147. }
  148. type TaskInfo struct {
  149. Username string `json:"username"`
  150. TaskName string `json:"task_name"`
  151. CodeName string `json:"code_name"`
  152. BenchmarkCategory []string `json:"selected_category"`
  153. CodeLink string `json:"code_link"`
  154. GpuType string `json:"gpu_type"`
  155. }
  156. func ConvertToTaskPod(input map[string]interface{}) (TaskPod, error) {
  157. data, _ := json.Marshal(input)
  158. var taskPod TaskPod
  159. err := json.Unmarshal(data, &taskPod)
  160. taskPod.TaskStatuses[0].StartTime = time.Unix(taskPod.TaskStatuses[0].StartAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  161. taskPod.TaskStatuses[0].FinishedTime = time.Unix(taskPod.TaskStatuses[0].FinishedAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  162. return taskPod, err
  163. }
  164. type JobResultPayload struct {
  165. ID string `json:"id"`
  166. Name string `json:"name"`
  167. Platform string `json:"platform"`
  168. JobStatus struct {
  169. Username string `json:"username"`
  170. State string `json:"state"`
  171. SubState string `json:"subState"`
  172. ExecutionType string `json:"executionType"`
  173. Retries int `json:"retries"`
  174. CreatedTime int64 `json:"createdTime"`
  175. CompletedTime int64 `json:"completedTime"`
  176. AppID string `json:"appId"`
  177. AppProgress string `json:"appProgress"`
  178. AppTrackingURL string `json:"appTrackingUrl"`
  179. AppLaunchedTime int64 `json:"appLaunchedTime"`
  180. AppCompletedTime interface{} `json:"appCompletedTime"`
  181. AppExitCode int `json:"appExitCode"`
  182. AppExitDiagnostics string `json:"appExitDiagnostics"`
  183. AppExitType interface{} `json:"appExitType"`
  184. VirtualCluster string `json:"virtualCluster"`
  185. StartTime string
  186. EndTime string
  187. } `json:"jobStatus"`
  188. TaskRoles map[string]interface{} `json:"taskRoles"`
  189. Resource struct {
  190. CPU int `json:"cpu"`
  191. Memory string `json:"memory"`
  192. NvidiaComGpu int `json:"nvidia.com/gpu"`
  193. } `json:"resource"`
  194. Config struct {
  195. Image string `json:"image"`
  196. JobID string `json:"jobId"`
  197. GpuType string `json:"gpuType"`
  198. JobName string `json:"jobName"`
  199. JobType string `json:"jobType"`
  200. TaskRoles []struct {
  201. Name string `json:"name"`
  202. ShmMB int `json:"shmMB"`
  203. Command string `json:"command"`
  204. MemoryMB int `json:"memoryMB"`
  205. CPUNumber int `json:"cpuNumber"`
  206. GpuNumber int `json:"gpuNumber"`
  207. IsMainRole bool `json:"isMainRole"`
  208. TaskNumber int `json:"taskNumber"`
  209. NeedIBDevice bool `json:"needIBDevice"`
  210. MinFailedTaskCount int `json:"minFailedTaskCount"`
  211. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  212. } `json:"taskRoles"`
  213. RetryCount int `json:"retryCount"`
  214. } `json:"config"`
  215. Userinfo struct {
  216. User string `json:"user"`
  217. OrgID string `json:"org_id"`
  218. } `json:"userinfo"`
  219. }
  220. func ConvertToJobResultPayload(input map[string]interface{}) (JobResultPayload, error) {
  221. data, _ := json.Marshal(input)
  222. var jobResultPayload JobResultPayload
  223. err := json.Unmarshal(data, &jobResultPayload)
  224. jobResultPayload.JobStatus.StartTime = time.Unix(jobResultPayload.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05")
  225. jobResultPayload.JobStatus.EndTime = time.Unix(jobResultPayload.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05")
  226. return jobResultPayload, err
  227. }
  228. type ImagesResultPayload struct {
  229. Images []struct {
  230. ID int `json:"id"`
  231. Name string `json:"name"`
  232. Place string `json:"place"`
  233. Description string `json:"description"`
  234. Provider string `json:"provider"`
  235. Createtime string `json:"createtime"`
  236. Remark string `json:"remark"`
  237. } `json:"taskStatuses"`
  238. }
  239. type ImageInfo struct {
  240. ID int `json:"id"`
  241. Name string `json:"name"`
  242. Place string `json:"place"`
  243. Description string `json:"description"`
  244. Provider string `json:"provider"`
  245. Createtime string `json:"createtime"`
  246. Remark string `json:"remark"`
  247. IsPublic int `json:"isPublic"`
  248. PlaceView string
  249. }
  250. type Categories struct {
  251. Category []*Category `json:"category"`
  252. }
  253. type Category struct {
  254. Id int `json:"id"`
  255. Value string `json:"value"`
  256. }
  257. type GpuInfos struct {
  258. GpuInfo []*GpuInfo `json:"gpu_type"`
  259. }
  260. type GpuInfo struct {
  261. Id int `json:"id"`
  262. Value string `json:"value"`
  263. Queue string `json:"queue"`
  264. }
  265. type ResourceSpecs struct {
  266. ResourceSpec []*ResourceSpec `json:"resorce_specs"`
  267. }
  268. type ResourceSpec struct {
  269. Id int `json:"id"`
  270. CpuNum int `json:"cpu"`
  271. GpuNum int `json:"gpu"`
  272. MemMiB int `json:"memMiB"`
  273. ShareMemMiB int `json:"shareMemMiB"`
  274. }
  275. type FlavorInfos struct {
  276. FlavorInfo []*FlavorInfo `json:"flavor_info"`
  277. }
  278. type FlavorInfo struct {
  279. Id int `json:"id"`
  280. Value string `json:"value"`
  281. }
  282. type PoolInfos struct {
  283. PoolInfo []*PoolInfo `json:"pool_info"`
  284. }
  285. type PoolInfo struct {
  286. PoolId string `json:"pool_id"`
  287. PoolName string `json:"pool_name"`
  288. PoolType string `json:"pool_type"`
  289. }
  290. type CommitImageParams struct {
  291. Ip string `json:"ip"`
  292. TaskContainerId string `json:"taskContainerId"`
  293. ImageTag string `json:"imageTag"`
  294. ImageDescription string `json:"imageDescription"`
  295. }
  296. type CommitImageResult struct {
  297. Code string `json:"code"`
  298. Msg string `json:"msg"`
  299. Payload map[string]interface{} `json:"payload"`
  300. }
  301. type CloudBrainResult struct {
  302. Code string `json:"code"`
  303. Msg string `json:"msg"`
  304. }
  305. type CreateNotebookParams struct {
  306. JobName string `json:"name"`
  307. Description string `json:"description"`
  308. ProfileID string `json:"profile_id"`
  309. Flavor string `json:"flavor"`
  310. Spec Spec `json:"spec"`
  311. Workspace Workspace `json:"workspace"`
  312. Pool Pool `json:"pool"`
  313. }
  314. type Pool struct {
  315. ID string `json:"id"`
  316. Name string `json:"name"`
  317. Type string `json:"type"`
  318. }
  319. type Workspace struct {
  320. ID string `json:"id"`
  321. }
  322. type Spec struct {
  323. Storage Storage `json:"storage"`
  324. AutoStop AutoStop `json:"auto_stop"`
  325. }
  326. type AutoStop struct {
  327. Enable bool `json:"enable"`
  328. Duration int `json:"duration"`
  329. }
  330. type Storage struct {
  331. Type string `json:"type"`
  332. Location Location `json:"location"`
  333. }
  334. type Location struct {
  335. Path string `json:"path"`
  336. }
  337. type NotebookResult struct {
  338. ErrorCode string `json:"error_code"`
  339. ErrorMsg string `json:"error_msg"`
  340. }
  341. type CreateNotebookResult struct {
  342. ErrorCode string `json:"error_code"`
  343. ErrorMsg string `json:"error_msg"`
  344. ID string `json:"id"`
  345. Name string `json:"name"`
  346. Description string `json:"description"`
  347. Status string `json:"status"`
  348. CreationTimestamp string `json:"creation_timestamp"`
  349. LatestUpdateTimestamp string `json:"latest_update_timestamp"`
  350. Profile struct {
  351. ID string `json:"id"`
  352. Name string `json:"name"`
  353. Description string `json:"description"`
  354. DeType string `json:"de_type"`
  355. FlavorType string `json:"flavor_type"`
  356. } `json:"profile"`
  357. Flavor string `json:"flavor"`
  358. FlavorDetails struct {
  359. Name string `json:"name"`
  360. Status string `json:"status"`
  361. QueuingNum int `json:"queuing_num"`
  362. QueueLeftTime int `json:"queue_left_time"` //s
  363. Duration int `json:"duration"` //auto_stop_time s
  364. } `json:"flavor_details"`
  365. }
  366. type GetNotebookResult struct {
  367. ErrorCode string `json:"error_code"`
  368. ErrorMsg string `json:"error_msg"`
  369. ID string `json:"id"`
  370. Name string `json:"name"`
  371. Description string `json:"description"`
  372. Status string `json:"status"`
  373. CreationTimestamp string `json:"creation_timestamp"`
  374. CreateTime string
  375. LatestUpdateTimestamp string `json:"latest_update_timestamp"`
  376. LatestUpdateTime string
  377. Profile struct {
  378. ID string `json:"id"`
  379. Name string `json:"name"`
  380. Description string `json:"description"`
  381. DeType string `json:"de_type"`
  382. FlavorType string `json:"flavor_type"`
  383. } `json:"profile"`
  384. Flavor string `json:"flavor"`
  385. FlavorDetails struct {
  386. Name string `json:"name"`
  387. Status string `json:"status"`
  388. QueuingNum int `json:"queuing_num"`
  389. QueueLeftTime int `json:"queue_left_time"` //s
  390. Duration int `json:"duration"` //auto_stop_time s
  391. } `json:"flavor_details"`
  392. QueuingInfo struct {
  393. ID string `json:"id"`
  394. Name string `json:"name"`
  395. Flavor string `json:"flavor"`
  396. DeType string `json:"de_type"`
  397. Status string `json:"status"`
  398. BeginTimestamp int `json:"begin_timestamp"` //time of instance begin in queue
  399. BeginTime string
  400. RemainTime int `json:"remain_time"` //remain time of instance
  401. EndTimestamp int `json:"end_timestamp"` //
  402. EndTime string
  403. Rank int `json:"rank"` //rank of instance in queue
  404. } `json:"queuing_info"`
  405. Spec struct {
  406. Annotations struct {
  407. TargetDomain string `json:"target_domain"`
  408. Url string `json:"url"`
  409. } `json:"annotations"`
  410. } `json:"spec"`
  411. }
  412. type GetTokenParams struct {
  413. Auth Auth `json:"auth"`
  414. }
  415. type Auth struct {
  416. Identity Identity `json:"identity"`
  417. Scope Scope `json:"scope"`
  418. }
  419. type Scope struct {
  420. Project Project `json:"project"`
  421. }
  422. type Project struct {
  423. Name string `json:"name"`
  424. }
  425. type Identity struct {
  426. Methods []string `json:"methods"`
  427. Password Password `json:"password"`
  428. }
  429. type Password struct {
  430. User NotebookUser `json:"user"`
  431. }
  432. type NotebookUser struct {
  433. Name string `json:"name"`
  434. Password string `json:"password"`
  435. Domain Domain `json:"domain"`
  436. }
  437. type Domain struct {
  438. Name string `json:"name"`
  439. }
  440. const (
  441. ActionStart = "start"
  442. ActionStop = "stop"
  443. ActionRestart = "restart"
  444. ActionQueue = "queue"
  445. ActionDequeue = "dequeue"
  446. )
  447. type NotebookAction struct {
  448. Action string `json:"action"`
  449. }
  450. type NotebookActionResult struct {
  451. ErrorCode string `json:"error_code"`
  452. ErrorMsg string `json:"error_msg"`
  453. CurrentStatus string `json:"current_status"`
  454. PreviousState string `json:"previous_state"`
  455. }
  456. type NotebookGetJobTokenResult struct {
  457. ErrorCode string `json:"error_code"`
  458. ErrorMsg string `json:"error_msg"`
  459. Token string `json:"token"`
  460. }
  461. type NotebookDelResult struct {
  462. InstanceID string `json:"instance_id"`
  463. }
  464. type CreateTrainJobParams struct {
  465. JobName string `json:"job_name"`
  466. Description string `json:"job_desc"`
  467. Config Config `json:"config"`
  468. WorkspaceID string `json:"workspace_id"`
  469. }
  470. type Config struct {
  471. WorkServerNum int `json:"worker_server_num"`
  472. AppUrl string `json:"app_url"` //训练作业的代码目录
  473. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  474. Parameter []Parameter `json:"parameter"`
  475. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  476. //DatasetID string `json:"dataset_id"`
  477. //DataVersionID string `json:"dataset_version_id"`
  478. //DataSource []DataSource `json:"data_source"`
  479. //SpecID int64 `json:"spec_id"`
  480. EngineID int64 `json:"engine_id"`
  481. //ModelID int64 `json:"model_id"`
  482. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  483. LogUrl string `json:"log_url"`
  484. //UserImageUrl string `json:"user_image_url"`
  485. //UserCommand string `json:"user_command"`
  486. CreateVersion bool `json:"create_version"`
  487. //Volumes []Volumes `json:"volumes"`
  488. Flavor Flavor `json:"flavor"`
  489. PoolID string `json:"pool_id"`
  490. }
  491. type CreateConfigParams struct {
  492. ConfigName string `json:"config_name"`
  493. Description string `json:"config_desc"`
  494. WorkServerNum int `json:"worker_server_num"`
  495. AppUrl string `json:"app_url"` //训练作业的代码目录
  496. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  497. Parameter []Parameter `json:"parameter"`
  498. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  499. //DatasetID string `json:"dataset_id"`
  500. //DataVersionID string `json:"dataset_version_id"`
  501. //DataSource []DataSource `json:"data_source"`
  502. //SpecID int64 `json:"spec_id"`
  503. EngineID int64 `json:"engine_id"`
  504. //ModelID int64 `json:"model_id"`
  505. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  506. LogUrl string `json:"log_url"`
  507. //UserImageUrl string `json:"user_image_url"`
  508. //UserCommand string `json:"user_command"`
  509. //CreateVersion bool `json:"create_version"`
  510. //Volumes []Volumes `json:"volumes"`
  511. Flavor Flavor `json:"flavor"`
  512. PoolID string `json:"pool_id"`
  513. }
  514. type Parameter struct {
  515. Label string `json:"label"`
  516. Value string `json:"value"`
  517. }
  518. type Parameters struct {
  519. Parameter []Parameter `json:"parameter"`
  520. }
  521. type DataSource struct {
  522. DatasetID string `json:"dataset_id"`
  523. DatasetVersion string `json:"dataset_version"`
  524. Type string `json:"type"`
  525. DataUrl string `json:"data_url"`
  526. }
  527. type Volumes struct {
  528. Nfs Nfs `json:"nfs"`
  529. HostPath HostPath `json:"host_path"`
  530. }
  531. type Nfs struct {
  532. ID string `json:"id"`
  533. SourcePath string `json:"src_path"`
  534. DestPath string `json:"dest_path"`
  535. ReadOnly bool `json:"read_only"`
  536. }
  537. type HostPath struct {
  538. SourcePath string `json:"src_path"`
  539. DestPath string `json:"dest_path"`
  540. ReadOnly bool `json:"read_only"`
  541. }
  542. type Flavor struct {
  543. Code string `json:"code"`
  544. }
  545. type CreateTrainJobResult struct {
  546. ErrorCode string `json:"error_code"`
  547. ErrorMsg string `json:"error_msg"`
  548. IsSuccess bool `json:"is_success"`
  549. JobName string `json:"job_name"`
  550. JobID int64 `json:"job_id"`
  551. Status int `json:"status"`
  552. CreateTime int64 `json:"create_time"`
  553. VersionID int64 `json:"version_id"`
  554. ResourceID string `json:"resource_id"`
  555. VersionName string `json:"version_name"`
  556. }
  557. type CreateTrainJobConfigResult struct {
  558. ErrorCode string `json:"error_code"`
  559. ErrorMsg string `json:"error_msg"`
  560. IsSuccess bool `json:"is_success"`
  561. }
  562. type GetResourceSpecsResult struct {
  563. ErrorCode string `json:"error_code"`
  564. ErrorMsg string `json:"error_msg"`
  565. IsSuccess bool `json:"is_success"`
  566. SpecTotalCount int `json:"spec_total_count"`
  567. Specs []Specs `json:"specs"`
  568. }
  569. type Specs struct {
  570. Core string `json:"core"`
  571. Cpu string `json:"cpu"`
  572. IsNoResource bool `json:"no_resource"`
  573. GpuType string `json:"gpu_type"`
  574. SpecID int64 `json:"spec_id"`
  575. GpuNum int `json:"gpu_num"`
  576. SpecCode string `json:"spec_code"`
  577. Storage string `json:"storage"`
  578. MaxNum int `json:"max_num"`
  579. UnitNum int `json:"unit_num"`
  580. InterfaceType int `json:"interface_type"`
  581. }
  582. type GetConfigListResult struct {
  583. ErrorCode string `json:"error_code"`
  584. ErrorMsg string `json:"error_msg"`
  585. IsSuccess bool `json:"is_success"`
  586. ConfigTotalCount int `json:"config_total_count"`
  587. ParaConfigs []ParaConfig `json:"configs"`
  588. }
  589. type ParaConfig struct {
  590. ConfigName string `json:"config_name"`
  591. ConfigDesc string `json:"config_desc"`
  592. CreateTime int64 `json:"create_time"`
  593. EngineType int `json:"engine_type"`
  594. EngineName string `json:"engine_name"`
  595. EngineId int64 `json:"engine_id"`
  596. EngineVersion string `json:"engine_version"`
  597. UserImageUrl string `json:"user_image_url"`
  598. UserCommand string `json:"user_command"`
  599. Result GetConfigResult
  600. }
  601. type GetConfigResult struct {
  602. ErrorCode string `json:"error_code"`
  603. ErrorMsg string `json:"error_msg"`
  604. IsSuccess bool `json:"is_success"`
  605. ConfigName string `json:"config_name"`
  606. Description string `json:"config_desc"`
  607. WorkServerNum int `json:"worker_server_num"`
  608. AppUrl string `json:"app_url"` //训练作业的代码目录
  609. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  610. Parameter []Parameter `json:"parameter"`
  611. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  612. //DatasetID string `json:"dataset_id"`
  613. //DataVersionID string `json:"dataset_version_id"`
  614. //DataSource []DataSource `json:"data_source"`
  615. //SpecID int64 `json:"spec_id"`
  616. EngineID int64 `json:"engine_id"`
  617. //ModelID int64 `json:"model_id"`
  618. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  619. LogUrl string `json:"log_url"`
  620. //UserImageUrl string `json:"user_image_url"`
  621. //UserCommand string `json:"user_command"`
  622. //CreateVersion bool `json:"create_version"`
  623. //Volumes []Volumes `json:"volumes"`
  624. Flavor Flavor `json:"flavor"`
  625. PoolID string `json:"pool_id"`
  626. }
  627. type ErrorResult struct {
  628. ErrorCode string `json:"error_code"`
  629. ErrorMsg string `json:"error_message"`
  630. IsSuccess bool `json:"is_success"`
  631. }
  632. type GetTrainJobResult struct {
  633. IsSuccess bool `json:"is_success"`
  634. JobName string `json:"job_name"`
  635. JobID int64 `json:"job_id"`
  636. Description string `json:"job_desc"`
  637. IntStatus int `json:"status"`
  638. Status string
  639. LongCreateTime int64 `json:"create_time"`
  640. CreateTime string
  641. Duration int64 `json:"duration"` //训练作业的运行时间,单位为毫秒
  642. VersionID int64 `json:"version_id"`
  643. ResourceID string `json:"resource_id"`
  644. VersionName string `json:"version_name"`
  645. PreVersionID int64 `json:"pre_version_id"`
  646. WorkServerNum int `json:"worker_server_num"`
  647. AppUrl string `json:"app_url"` //训练作业的代码目录
  648. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  649. Parameter []Parameter `json:"parameter"`
  650. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  651. //DatasetID string `json:"dataset_id"`
  652. //DataVersionID string `json:"dataset_version_id"`
  653. //DataSource []DataSource `json:"data_source"`
  654. //SpecID int64 `json:"spec_id"`
  655. EngineID int64 `json:"engine_id"`
  656. EngineName string `json:"engine_name"`
  657. EngineVersion string `json:"engine_version"`
  658. //ModelID int64 `json:"model_id"`
  659. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  660. LogUrl string `json:"log_url"`
  661. //UserImageUrl string `json:"user_image_url"`
  662. //UserCommand string `json:"user_command"`
  663. //Volumes []Volumes `json:"volumes"`
  664. Flavor Flavor `json:"flavor"`
  665. PoolID string `json:"pool_id"`
  666. PoolName string `json:"pool_name"`
  667. NasMountPath string `json:"nas_mount_path"`
  668. NasShareAddr string `json:"nas_share_addr"`
  669. DatasetName string
  670. }
  671. type GetTrainJobLogResult struct {
  672. ErrorCode string `json:"error_code"`
  673. ErrorMsg string `json:"error_msg"`
  674. IsSuccess bool `json:"is_success"`
  675. Content string `json:"content"`
  676. Lines int `json:"lines"`
  677. StartLine string `json:"start_line"`
  678. EndLine string `json:"end_line"`
  679. }
  680. type GetTrainJobLogFileNamesResult struct {
  681. ErrorCode string `json:"error_code"`
  682. ErrorMsg string `json:"error_msg"`
  683. IsSuccess bool `json:"is_success"`
  684. LogFileList []string `json:"log_file_list"`
  685. }
  686. type TrainJobResult struct {
  687. ErrorCode string `json:"error_code"`
  688. ErrorMsg string `json:"error_msg"`
  689. IsSuccess bool `json:"is_success"`
  690. }
  691. type LogFile struct {
  692. Name string
  693. }
  694. func Cloudbrains(opts *CloudbrainsOptions) ([]*Cloudbrain, int64, error) {
  695. sess := x.NewSession()
  696. defer sess.Close()
  697. var cond = builder.NewCond()
  698. if opts.RepoID > 0 {
  699. cond = cond.And(
  700. builder.Eq{"cloudbrain.repo_id": opts.RepoID},
  701. )
  702. }
  703. if opts.UserID > 0 {
  704. cond = cond.And(
  705. builder.Eq{"cloudbrain.user_id": opts.UserID},
  706. )
  707. }
  708. if (opts.JobID) > 0 {
  709. cond = cond.And(
  710. builder.Eq{"cloudbrain.job_id": opts.JobID},
  711. )
  712. }
  713. if (opts.Type) >= 0 {
  714. cond = cond.And(
  715. builder.Eq{"cloudbrain.type": opts.Type},
  716. )
  717. }
  718. // switch opts.JobStatus {
  719. // case JobWaiting:
  720. // cond.And(builder.Eq{"cloudbrain.status": int(JobWaiting)})
  721. // case JobFailed:
  722. // cond.And(builder.Eq{"cloudbrain.status": int(JobFailed)})
  723. // case JobStopped:
  724. // cond.And(builder.Eq{"cloudbrain.status": int(JobStopped)})
  725. // case JobSucceeded:
  726. // cond.And(builder.Eq{"cloudbrain.status": int(JobSucceeded)})
  727. // }
  728. if len(opts.CloudbrainIDs) > 0 {
  729. cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
  730. }
  731. count, err := sess.Where(cond).Count(new(Cloudbrain))
  732. if err != nil {
  733. return nil, 0, fmt.Errorf("Count: %v", err)
  734. }
  735. if opts.Page >= 0 && opts.PageSize > 0 {
  736. var start int
  737. if opts.Page == 0 {
  738. start = 0
  739. } else {
  740. start = (opts.Page - 1) * opts.PageSize
  741. }
  742. sess.Limit(opts.PageSize, start)
  743. }
  744. sess.OrderBy("cloudbrain.created_unix DESC")
  745. cloudbrains := make([]*Cloudbrain, 0, setting.UI.IssuePagingNum)
  746. if err := sess.Where(cond).Find(&cloudbrains); err != nil {
  747. return nil, 0, fmt.Errorf("Find: %v", err)
  748. }
  749. sess.Close()
  750. return cloudbrains, count, nil
  751. }
  752. func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
  753. if _, err = x.Insert(cloudbrain); err != nil {
  754. return err
  755. }
  756. return nil
  757. }
  758. func getRepoCloudBrain(cb *Cloudbrain) (*Cloudbrain, error) {
  759. has, err := x.Get(cb)
  760. if err != nil {
  761. return nil, err
  762. } else if !has {
  763. return nil, ErrJobNotExist{}
  764. }
  765. return cb, nil
  766. }
  767. func GetRepoCloudBrainByJobID(repoID int64, jobID string) (*Cloudbrain, error) {
  768. cb := &Cloudbrain{JobID: jobID, RepoID: repoID}
  769. return getRepoCloudBrain(cb)
  770. }
  771. func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) {
  772. cb := &Cloudbrain{JobID: jobID}
  773. return getRepoCloudBrain(cb)
  774. }
  775. func SetCloudbrainStatusByJobID(jobID string, status CloudbrainStatus) (err error) {
  776. cb := &Cloudbrain{JobID: jobID, Status: string(status)}
  777. _, err = x.Cols("status").Where("cloudbrain.job_id=?", jobID).Update(cb)
  778. return
  779. }
  780. func UpdateJob(job *Cloudbrain) error {
  781. return updateJob(x, job)
  782. }
  783. func updateJob(e Engine, job *Cloudbrain) error {
  784. var sess *xorm.Session
  785. sess = e.Where("job_id = ?", job.JobID)
  786. _, err := sess.Cols("status", "container_id", "container_ip").Update(job)
  787. return err
  788. }
  789. func DeleteJob(job *Cloudbrain) error {
  790. return deleteJob(x, job)
  791. }
  792. func deleteJob(e Engine, job *Cloudbrain) error {
  793. _, err := e.ID(job.ID).Delete(job)
  794. return err
  795. }
  796. func GetCloudbrainByName(jobName string) (*Cloudbrain, error) {
  797. cb := &Cloudbrain{JobName: jobName}
  798. return getRepoCloudBrain(cb)
  799. }