You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

octopusHttp.go 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734
  1. package octopusHttp
  2. import (
  3. "bytes"
  4. "context"
  5. "encoding/json"
  6. "errors"
  7. "fmt"
  8. common2 "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/common"
  9. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/entity"
  10. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option"
  11. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector"
  12. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/executor"
  13. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference"
  14. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
  15. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
  16. omodel "gitlink.org.cn/JointCloud/pcm-octopus/http/model"
  17. "gitlink.org.cn/JointCloud/pcm-openi/common"
  18. "mime/multipart"
  19. "net/http"
  20. "strconv"
  21. "strings"
  22. "time"
  23. )
  24. const (
  25. RESOURCE_POOL = "grampus-pool"
  26. Param_Token = "token"
  27. Param_Addr = "addr"
  28. Forward_Slash = "/"
  29. COMMA = ","
  30. UNDERSCORE = "_"
  31. TASK_NAME_PREFIX = "trainJob"
  32. Python = "python "
  33. SemiColon = ";"
  34. BALANCE = "balance"
  35. RATE = "rate"
  36. PERHOUR = "per-hour"
  37. NUMBER = "number"
  38. KILOBYTE = "kb"
  39. GIGABYTE = "gb"
  40. CPUCORE = "core"
  41. STORAGE = "STORAGE"
  42. DISK = "disk"
  43. MEMORY = "memory"
  44. RAM = "ram"
  45. VRAM = "vram"
  46. RMB = "rmb"
  47. POINT = "point"
  48. RUNNINGTASK = "RUNNING_TASK"
  49. RUNNING = "RUNNING"
  50. CPU = "cpu"
  51. Gi = "Gi"
  52. )
  53. const (
  54. NotImplementError = "not implemented"
  55. )
  56. const (
  57. MyAlgorithmListUrl = "api/v1/algorithm/myAlgorithmList"
  58. ResourcespecsUrl = "api/v1/resource/specs"
  59. CreateTrainJobUrl = "api/v1/job/create"
  60. TrainJobDetail = "api/v1/job/detail"
  61. )
  62. // compute source
  63. var (
  64. ComputeSourceToCardType = map[string]string{
  65. "nvidia-a100": "GPU",
  66. "nvidia-a100-80g": "GPU",
  67. "mr-v100": "ILUVATAR-GPGPU",
  68. "bi-v100": "ILUVATAR-GPGPU",
  69. "MR-V50": "ILUVATAR-GPGPU",
  70. "BI-V100": "ILUVATAR-GPGPU",
  71. "BI-V150": "ILUVATAR-GPGPU",
  72. "MR-V100": "ILUVATAR-GPGPU",
  73. "cambricon.com/mlu": "MLU",
  74. "hygon.com/dcu": "DCU",
  75. "huawei.com/Ascend910": "NPU",
  76. "enflame.com/gcu": "GCU",
  77. "ILUVATAR-GPGPU": "ILUVATAR-GPGPU",
  78. "MXN260": "METAX-GPGPU",
  79. }
  80. )
  81. type OctopusHttp struct {
  82. server string
  83. host string
  84. platform string
  85. participantId int64
  86. token *Token
  87. }
  88. func NewOctopusHttp(id int64, name, server, host string, user string, pwd string) *OctopusHttp {
  89. token, _ := NewToken(server, host, user, pwd)
  90. return &OctopusHttp{platform: name, participantId: id, server: server, host: host, token: token}
  91. }
  92. // executor
  93. func (o *OctopusHttp) Execute(ctx context.Context, option *option.AiOption, mode int) (interface{}, error) {
  94. switch mode {
  95. case executor.SUBMIT_MODE_JOINT_CLOUD:
  96. case executor.SUBMIT_MODE_STORAGE_SCHEDULE:
  97. // cmd
  98. if option.AlgorithmId == "" {
  99. return nil, errors.New("algorithmId is empty")
  100. }
  101. if option.Cmd != "" {
  102. option.Cmd = option.Cmd + SemiColon + Python + option.AlgorithmId
  103. } else {
  104. option.Cmd = Python + option.AlgorithmId
  105. }
  106. option.ResourceId = "964fdee2db544928bfea74dac12a924f"
  107. task, err := o.SubmitTask(ctx, option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType)
  108. if err != nil {
  109. return nil, err
  110. }
  111. return task, nil
  112. }
  113. return nil, nil
  114. }
  115. func (o *OctopusHttp) Stop(ctx context.Context, id string) error {
  116. return nil
  117. }
  118. func (o *OctopusHttp) SubmitTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) {
  119. // octopus提交任务
  120. reqUrl := o.server + CreateTrainJobUrl
  121. token, err := o.token.Get()
  122. if err != nil {
  123. return nil, err
  124. }
  125. // python参数
  126. var prms []struct {
  127. Key string `json:"key"`
  128. Value string `json:"value"`
  129. }
  130. for _, param := range params {
  131. var p struct {
  132. Key string `json:"key"`
  133. Value string `json:"value"`
  134. }
  135. s := strings.Split(param, COMMA)
  136. p.Key = s[0]
  137. p.Value = s[1]
  138. prms = append(prms, p)
  139. }
  140. //环境变量
  141. envMap := make(map[string]string)
  142. for _, env := range envs {
  143. s := strings.Split(env, COMMA)
  144. envMap[s[0]] = s[1]
  145. }
  146. param := &omodel.CreateTrainJobParam{
  147. //DataSetId: datasetsId,
  148. //DataSetVersion: VERSION,
  149. //AlgorithmId: algorithmId,
  150. //AlgorithmVersion: VERSION,
  151. Name: TASK_NAME_PREFIX + UNDERSCORE + utils.RandomString(10),
  152. ImageId: imageId,
  153. IsDistributed: false,
  154. ResourcePool: RESOURCE_POOL,
  155. Config: []*omodel.CreateTrainJobConf{
  156. {
  157. Command: cmd,
  158. ResourceSpecId: resourceId,
  159. MinFailedTaskCount: 1,
  160. MinSucceededTaskCount: 1,
  161. TaskNumber: 1,
  162. Parameters: prms,
  163. Envs: envMap,
  164. },
  165. },
  166. }
  167. resp := &entity.OctResp{}
  168. req := common.GetRestyRequest(common.TIMEOUT)
  169. _, err = req.
  170. SetHeader("Authorization", "Bearer "+token).
  171. SetQueryString("token=" + token).
  172. SetQueryString("addr=" + o.host).
  173. SetBody(param).
  174. SetResult(resp).
  175. Post(reqUrl)
  176. if err != nil {
  177. return nil, err
  178. }
  179. return resp, nil
  180. }
  181. // collector
  182. func (o *OctopusHttp) resourceSpecs(ctx context.Context) (*entity.OctResp, error) {
  183. resourcespecsUrl := o.server + ResourcespecsUrl
  184. token, err := o.token.Get()
  185. if err != nil {
  186. return nil, err
  187. }
  188. param := omodel.ResourceSpecParam{
  189. ResourcePool: RESOURCE_POOL,
  190. }
  191. b, _ := json.Marshal(param)
  192. byt := bytes.NewBuffer(b)
  193. resp := &entity.OctResp{}
  194. req := common.GetRestyRequest(common.TIMEOUT)
  195. r, _ := http.NewRequest("GET", resourcespecsUrl, byt)
  196. req.RawRequest = r
  197. req.URL = resourcespecsUrl
  198. _, err = req.
  199. SetHeader("Content-Type", "application/json").
  200. SetQueryParam(Param_Token, token).
  201. SetQueryParam(Param_Addr, o.host).
  202. SetBody(byt).
  203. SetResult(resp).
  204. Send()
  205. if err != nil {
  206. return nil, err
  207. }
  208. return resp, nil
  209. }
  210. func (o *OctopusHttp) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) {
  211. resp, err := o.resourceSpecs(ctx)
  212. if err != nil {
  213. return nil, err
  214. }
  215. if resp.Code != http.StatusOK {
  216. if resp.Data != nil {
  217. marshal, err := json.Marshal(resp.Data)
  218. if err != nil {
  219. return nil, err
  220. }
  221. errormdl := &omodel.Error{}
  222. err = json.Unmarshal(marshal, errormdl)
  223. if err != nil {
  224. return nil, err
  225. }
  226. return nil, errors.New(errormdl.Message)
  227. }
  228. } else {
  229. if resp.Data != nil {
  230. spec := &entity.OctResourceSpecs{}
  231. marshal, err := json.Marshal(resp.Data)
  232. if err != nil {
  233. return nil, err
  234. }
  235. err = json.Unmarshal(marshal, spec)
  236. if err != nil {
  237. return nil, err
  238. }
  239. }
  240. }
  241. return nil, nil
  242. }
  243. func (o *OctopusHttp) GetDatasetsSpecs(ctx context.Context) ([]*collector.DatasetsSpecs, error) {
  244. return nil, nil
  245. }
  246. func (o *OctopusHttp) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm, error) {
  247. return nil, errors.New(NotImplementError)
  248. }
  249. func (o *OctopusHttp) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) {
  250. return "", errors.New(NotImplementError)
  251. }
  252. func (o *OctopusHttp) GetTrainingTask(ctx context.Context, taskId string) (*collector.Task, error) {
  253. if taskId == "" {
  254. return nil, errors.New("empty taskId")
  255. }
  256. resp, err := o.getTrainingTask(ctx, taskId)
  257. if err != nil {
  258. return nil, err
  259. }
  260. if resp.Code != http.StatusOK {
  261. if resp.Data != nil {
  262. marshal, err := json.Marshal(resp.Data)
  263. if err != nil {
  264. return nil, err
  265. }
  266. errormdl := &omodel.Error{}
  267. err = json.Unmarshal(marshal, errormdl)
  268. if err != nil {
  269. return nil, err
  270. }
  271. return nil, errors.New(errormdl.Message)
  272. }
  273. } else {
  274. if resp.Data != nil {
  275. job := &entity.OctTrainJob{}
  276. marshal, err := json.Marshal(resp.Data)
  277. if err != nil {
  278. return nil, err
  279. }
  280. err = json.Unmarshal(marshal, job)
  281. if err != nil {
  282. return nil, err
  283. }
  284. var task collector.Task
  285. task.Id = job.TrainJob.Id
  286. if job.TrainJob.StartedAt != 0 {
  287. task.Start = time.Unix(int64(job.TrainJob.StartedAt), 0).Format(constants.Layout)
  288. }
  289. if job.TrainJob.CompletedAt != 0 {
  290. task.End = time.Unix(int64(job.TrainJob.CompletedAt), 0).Format(constants.Layout)
  291. }
  292. switch job.TrainJob.Status {
  293. case "succeeded":
  294. task.Status = constants.Completed
  295. case "failed":
  296. task.Status = constants.Failed
  297. case "running":
  298. task.Status = constants.Running
  299. case "stopped":
  300. task.Status = constants.Stopped
  301. case "pending":
  302. task.Status = constants.Pending
  303. default:
  304. task.Status = "undefined"
  305. }
  306. return &task, nil
  307. }
  308. }
  309. return nil, errors.New("failed to get trainjob")
  310. }
  311. func (o *OctopusHttp) getTrainingTask(ctx context.Context, taskId string) (*entity.OctResp, error) {
  312. taskDetailsUrl := o.server + TrainJobDetail
  313. token, err := o.token.Get()
  314. if err != nil {
  315. return nil, err
  316. }
  317. param := omodel.TrainJobDetailParam{
  318. JobId: taskId,
  319. }
  320. b, _ := json.Marshal(param)
  321. byt := bytes.NewBuffer(b)
  322. resp := &entity.OctResp{}
  323. req := common.GetRestyRequest(common.TIMEOUT)
  324. r, _ := http.NewRequest("GET", taskDetailsUrl, byt)
  325. req.RawRequest = r
  326. req.URL = taskDetailsUrl
  327. _, err = req.
  328. SetHeader("Content-Type", "application/json").
  329. SetQueryParam(Param_Token, token).
  330. SetQueryParam(Param_Addr, o.host).
  331. SetBody(byt).
  332. SetResult(resp).
  333. Send()
  334. if err != nil {
  335. return nil, errors.New("failed to invoke taskDetails")
  336. }
  337. return resp, nil
  338. }
  339. func (o *OctopusHttp) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
  340. return "", errors.New(NotImplementError)
  341. }
  342. func (o *OctopusHttp) UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error {
  343. return nil
  344. }
  345. func (o OctopusHttp) GetComputeCards(ctx context.Context) ([]string, error) {
  346. return nil, errors.New(NotImplementError)
  347. }
  348. func (o *OctopusHttp) GetUserBalance(ctx context.Context) (float64, error) {
  349. return 0, errors.New(NotImplementError)
  350. }
  351. func (o *OctopusHttp) GetResourceSpecs(ctx context.Context, resrcType string) (*collector.ResourceSpec, error) {
  352. resp, err := o.resourceSpecs(ctx)
  353. if err != nil {
  354. return nil, err
  355. }
  356. res := &collector.ResourceSpec{
  357. ClusterId: strconv.FormatInt(o.participantId, 10),
  358. Tag: resrcType,
  359. }
  360. if resp.Code != http.StatusOK {
  361. if resp.Data != nil {
  362. marshal, err := json.Marshal(resp.Data)
  363. if err != nil {
  364. return nil, err
  365. }
  366. errormdl := &omodel.Error{}
  367. err = json.Unmarshal(marshal, errormdl)
  368. if err != nil {
  369. return nil, err
  370. }
  371. return nil, errors.New(errormdl.Message)
  372. }
  373. } else {
  374. if resp.Data != nil {
  375. specs := &entity.OctResourceSpecs{}
  376. marshal, err := json.Marshal(resp.Data)
  377. if err != nil {
  378. return nil, err
  379. }
  380. err = json.Unmarshal(marshal, specs)
  381. if err != nil {
  382. return nil, err
  383. }
  384. clusterResources, err := genSpecs(specs, resrcType)
  385. if err != nil {
  386. return nil, err
  387. }
  388. res.Resources = clusterResources
  389. }
  390. }
  391. return res, nil
  392. }
  393. func genSpecs(specs *entity.OctResourceSpecs, resrcType string) ([]interface{}, error) {
  394. res := make([]interface{}, 0)
  395. if resrcType == "Inference" {
  396. return res, nil
  397. } else if resrcType == "Train" {
  398. if specs.MapResourceSpecIdList.Train.ResourceSpecs == nil {
  399. return res, nil
  400. } else {
  401. for _, s := range specs.MapResourceSpecIdList.Train.ResourceSpecs {
  402. spec := &omodel.Spec{}
  403. marshal, err := json.Marshal(s)
  404. if err != nil {
  405. return nil, err
  406. }
  407. err = json.Unmarshal(marshal, spec)
  408. if err != nil {
  409. return nil, err
  410. }
  411. resType, err := chooseResourceType(spec)
  412. if err != nil {
  413. return nil, err
  414. }
  415. if resType == nil {
  416. continue
  417. }
  418. res = append(res, resType)
  419. }
  420. }
  421. }
  422. return res, nil
  423. }
  424. func chooseResourceType(spec *omodel.Spec) (*collector.ClusterResource, error) {
  425. if spec.ResourceQuantity.NvidiaA100 != "" {
  426. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "NvidiaA100")
  427. if err != nil {
  428. return nil, err
  429. }
  430. cres, err := genClusterResources(tag, spec.ResourceQuantity.NvidiaA100, spec)
  431. if err != nil {
  432. return nil, err
  433. }
  434. return cres, nil
  435. } else if spec.ResourceQuantity.NvidiaA10080G != "" {
  436. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "NvidiaA10080G")
  437. if err != nil {
  438. return nil, err
  439. }
  440. cres, err := genClusterResources(tag, spec.ResourceQuantity.NvidiaA10080G, spec)
  441. if err != nil {
  442. return nil, err
  443. }
  444. return cres, nil
  445. } else if spec.ResourceQuantity.MrV100 != "" {
  446. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "MrV100")
  447. if err != nil {
  448. return nil, err
  449. }
  450. cres, err := genClusterResources(tag, spec.ResourceQuantity.MrV100, spec)
  451. if err != nil {
  452. return nil, err
  453. }
  454. return cres, nil
  455. } else if spec.ResourceQuantity.BiV100 != "" {
  456. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "BiV100")
  457. if err != nil {
  458. return nil, err
  459. }
  460. cres, err := genClusterResources(tag, spec.ResourceQuantity.BiV100, spec)
  461. if err != nil {
  462. return nil, err
  463. }
  464. return cres, nil
  465. } else if spec.ResourceQuantity.MRV50 != "" {
  466. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "MRV50")
  467. if err != nil {
  468. return nil, err
  469. }
  470. cres, err := genClusterResources(tag, spec.ResourceQuantity.MRV50, spec)
  471. if err != nil {
  472. return nil, err
  473. }
  474. return cres, nil
  475. } else if spec.ResourceQuantity.BIV100 != "" {
  476. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "NvidiaA100")
  477. if err != nil {
  478. return nil, err
  479. }
  480. cres, err := genClusterResources(tag, spec.ResourceQuantity.NvidiaA100, spec)
  481. if err != nil {
  482. return nil, err
  483. }
  484. return cres, nil
  485. } else if spec.ResourceQuantity.BIV150 != "" {
  486. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "BIV150")
  487. if err != nil {
  488. return nil, err
  489. }
  490. cres, err := genClusterResources(tag, spec.ResourceQuantity.BIV150, spec)
  491. if err != nil {
  492. return nil, err
  493. }
  494. return cres, nil
  495. } else if spec.ResourceQuantity.MRV100 != "" {
  496. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "MRV100")
  497. if err != nil {
  498. return nil, err
  499. }
  500. cres, err := genClusterResources(tag, spec.ResourceQuantity.MRV100, spec)
  501. if err != nil {
  502. return nil, err
  503. }
  504. return cres, nil
  505. } else if spec.ResourceQuantity.CambriconComMlu != "" {
  506. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "CambriconComMlu")
  507. if err != nil {
  508. return nil, err
  509. }
  510. cres, err := genClusterResources(tag, spec.ResourceQuantity.CambriconComMlu, spec)
  511. if err != nil {
  512. return nil, err
  513. }
  514. return cres, nil
  515. } else if spec.ResourceQuantity.HygonComDcu != "" {
  516. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "HygonComDcu")
  517. if err != nil {
  518. return nil, err
  519. }
  520. cres, err := genClusterResources(tag, spec.ResourceQuantity.HygonComDcu, spec)
  521. if err != nil {
  522. return nil, err
  523. }
  524. return cres, nil
  525. } else if spec.ResourceQuantity.HuaweiComAscend910 != "" {
  526. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "HuaweiComAscend910")
  527. if err != nil {
  528. return nil, err
  529. }
  530. cres, err := genClusterResources(tag, spec.ResourceQuantity.HuaweiComAscend910, spec)
  531. if err != nil {
  532. return nil, err
  533. }
  534. return cres, nil
  535. } else if spec.ResourceQuantity.EnflameComGcu != "" {
  536. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "EnflameComGcu")
  537. if err != nil {
  538. return nil, err
  539. }
  540. cres, err := genClusterResources(tag, spec.ResourceQuantity.EnflameComGcu, spec)
  541. if err != nil {
  542. return nil, err
  543. }
  544. return cres, nil
  545. } else if spec.ResourceQuantity.MXN260 != "" {
  546. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "MXN260")
  547. if err != nil {
  548. return nil, err
  549. }
  550. cres, err := genClusterResources(tag, spec.ResourceQuantity.MXN260, spec)
  551. if err != nil {
  552. return nil, err
  553. }
  554. return cres, nil
  555. } else if spec.ResourceQuantity.NvidiaV100 != "" {
  556. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "NvidiaV100")
  557. if err != nil {
  558. return nil, err
  559. }
  560. cres, err := genClusterResources(tag, spec.ResourceQuantity.NvidiaV100, spec)
  561. if err != nil {
  562. return nil, err
  563. }
  564. return cres, nil
  565. } else if spec.ResourceQuantity.MetaxTechComGpu != "" {
  566. tag, err := common2.GetJSONTag(spec.ResourceQuantity, "MetaxTechComGpu")
  567. if err != nil {
  568. return nil, err
  569. }
  570. cres, err := genClusterResources(tag, spec.ResourceQuantity.MetaxTechComGpu, spec)
  571. if err != nil {
  572. return nil, err
  573. }
  574. return cres, nil
  575. }
  576. return nil, nil
  577. }
  578. func genClusterResources(cType string, cNum string, s *omodel.Spec) (*collector.ClusterResource, error) {
  579. cres := &collector.ClusterResource{}
  580. bres := make([]*collector.Usage, 0)
  581. var cardNum int64
  582. var cpuCore int64
  583. var memGi int64
  584. cardNum, err := strconv.ParseInt(cNum, 10, 64)
  585. if err != nil {
  586. cardNum = 0
  587. }
  588. cpuCore, err = strconv.ParseInt(s.ResourceQuantity.Cpu, 10, 64)
  589. if err != nil {
  590. cpuCore = 0
  591. }
  592. if s.ResourceQuantity.Memory != "" {
  593. gi := strings.Split(s.ResourceQuantity.Memory, Gi)
  594. if len(gi) != 2 {
  595. return nil, fmt.Errorf("s.ResourceQuantity.Memory convert error: %s", s.ResourceQuantity.Memory)
  596. }
  597. mGi, err := strconv.ParseInt(gi[0], 10, 64)
  598. if err != nil {
  599. memGi = 0
  600. } else {
  601. memGi = mGi
  602. }
  603. } else {
  604. memGi = 0
  605. }
  606. card := &collector.Usage{
  607. Type: ComputeSourceToCardType[cType],
  608. Name: strings.ToUpper(cType),
  609. Total: &collector.UnitValue{Unit: NUMBER, Value: cardNum},
  610. Available: &collector.UnitValue{Unit: NUMBER, Value: cardNum},
  611. }
  612. cpu := &collector.Usage{
  613. Type: strings.ToUpper(CPU),
  614. Name: strings.ToUpper(CPU),
  615. Total: &collector.UnitValue{Unit: CPUCORE, Value: cpuCore},
  616. Available: &collector.UnitValue{Unit: CPUCORE, Value: cpuCore},
  617. }
  618. mem := &collector.Usage{
  619. Type: strings.ToUpper(MEMORY),
  620. Name: strings.ToUpper(RAM),
  621. Total: &collector.UnitValue{Unit: GIGABYTE, Value: memGi},
  622. Available: &collector.UnitValue{Unit: GIGABYTE, Value: memGi},
  623. }
  624. bres = append(bres, cpu)
  625. bres = append(bres, mem)
  626. cres.Resource = card
  627. cres.BaseResources = bres
  628. return cres, nil
  629. }
  630. // inference
  631. func (o *OctopusHttp) GetClusterInferUrl(ctx context.Context, option *option.InferOption) (*inference.ClusterInferUrl, error) {
  632. return nil, errors.New(NotImplementError)
  633. }
  634. func (o *OctopusHttp) GetInferDeployInstanceList(ctx context.Context) ([]*inference.DeployInstance, error) {
  635. return nil, errors.New(NotImplementError)
  636. }
  637. func (o *OctopusHttp) StartInferDeployInstance(ctx context.Context, id string) bool {
  638. return false
  639. }
  640. func (o *OctopusHttp) StopInferDeployInstance(ctx context.Context, id string) bool {
  641. return false
  642. }
  643. func (o *OctopusHttp) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
  644. return nil, errors.New(NotImplementError)
  645. }
  646. func (o *OctopusHttp) CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error) {
  647. return "", errors.New(NotImplementError)
  648. }
  649. func (o *OctopusHttp) CheckModelExistence(ctx context.Context, modelName string, modelType string) bool {
  650. return false
  651. }
  652. func (o *OctopusHttp) GetImageInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error) {
  653. return "", errors.New(NotImplementError)
  654. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.