You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

resource_specification.go 15 kB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. package models
  2. import (
  3. "code.gitea.io/gitea/modules/timeutil"
  4. "fmt"
  5. "xorm.io/builder"
  6. )
  7. const (
  8. SpecNotVerified int = iota + 1
  9. SpecOnShelf
  10. SpecOffShelf
  11. )
  12. type ResourceSpecification struct {
  13. ID int64 `xorm:"pk autoincr"`
  14. QueueId int64 `xorm:"INDEX"`
  15. SourceSpecId string `xorm:"INDEX"`
  16. AccCardsNum int
  17. CpuCores int
  18. MemGiB float32
  19. GPUMemGiB float32
  20. ShareMemGiB float32
  21. UnitPrice int
  22. Status int
  23. IsAutomaticSync bool
  24. CreatedTime timeutil.TimeStamp `xorm:"created"`
  25. CreatedBy int64
  26. UpdatedTime timeutil.TimeStamp `xorm:"updated"`
  27. UpdatedBy int64
  28. }
  29. func (r ResourceSpecification) ConvertToRes() *ResourceSpecificationRes {
  30. return &ResourceSpecificationRes{
  31. ID: r.ID,
  32. SourceSpecId: r.SourceSpecId,
  33. AccCardsNum: r.AccCardsNum,
  34. CpuCores: r.CpuCores,
  35. MemGiB: r.MemGiB,
  36. ShareMemGiB: r.ShareMemGiB,
  37. GPUMemGiB: r.GPUMemGiB,
  38. UnitPrice: r.UnitPrice,
  39. Status: r.Status,
  40. UpdatedTime: r.UpdatedTime,
  41. }
  42. }
  43. type ResourceSpecificationReq struct {
  44. QueueId int64 `binding:"Required"`
  45. SourceSpecId string
  46. AccCardsNum int
  47. CpuCores int
  48. MemGiB float32
  49. GPUMemGiB float32
  50. ShareMemGiB float32
  51. UnitPrice int
  52. Status int
  53. IsAutomaticSync bool
  54. CreatorId int64
  55. }
  56. func (r ResourceSpecificationReq) ToDTO() ResourceSpecification {
  57. return ResourceSpecification{
  58. QueueId: r.QueueId,
  59. SourceSpecId: r.SourceSpecId,
  60. AccCardsNum: r.AccCardsNum,
  61. CpuCores: r.CpuCores,
  62. MemGiB: r.MemGiB,
  63. GPUMemGiB: r.GPUMemGiB,
  64. ShareMemGiB: r.ShareMemGiB,
  65. UnitPrice: r.UnitPrice,
  66. Status: r.Status,
  67. IsAutomaticSync: r.IsAutomaticSync,
  68. CreatedBy: r.CreatorId,
  69. UpdatedBy: r.CreatorId,
  70. }
  71. }
  72. type SearchResourceSpecificationOptions struct {
  73. ListOptions
  74. QueueId int64
  75. Status int
  76. Cluster string
  77. }
  78. type SearchResourceBriefSpecificationOptions struct {
  79. QueueId int64
  80. Cluster string
  81. }
  82. type ResourceSpecAndQueueListRes struct {
  83. TotalSize int64
  84. List []*ResourceSpecAndQueueRes
  85. }
  86. func NewResourceSpecAndQueueListRes(totalSize int64, list []ResourceSpecAndQueue) *ResourceSpecAndQueueListRes {
  87. resList := make([]*ResourceSpecAndQueueRes, len(list))
  88. for i, v := range list {
  89. resList[i] = v.ConvertToRes()
  90. }
  91. return &ResourceSpecAndQueueListRes{
  92. TotalSize: totalSize,
  93. List: resList,
  94. }
  95. }
  96. type ResourceSpecificationRes struct {
  97. ID int64
  98. SourceSpecId string
  99. AccCardsNum int
  100. CpuCores int
  101. MemGiB float32
  102. GPUMemGiB float32
  103. ShareMemGiB float32
  104. UnitPrice int
  105. Status int
  106. UpdatedTime timeutil.TimeStamp
  107. }
  108. func (ResourceSpecificationRes) TableName() string {
  109. return "resource_specification"
  110. }
  111. type ResourceSpecAndQueueRes struct {
  112. Spec *ResourceSpecificationRes
  113. Queue *ResourceQueueRes
  114. }
  115. type ResourceSpecAndQueue struct {
  116. ResourceSpecification `xorm:"extends"`
  117. ResourceQueue `xorm:"extends"`
  118. }
  119. func (*ResourceSpecAndQueue) TableName() string {
  120. return "resource_specification"
  121. }
  122. func (r ResourceSpecAndQueue) ConvertToRes() *ResourceSpecAndQueueRes {
  123. return &ResourceSpecAndQueueRes{
  124. Spec: r.ResourceSpecification.ConvertToRes(),
  125. Queue: r.ResourceQueue.ConvertToRes(),
  126. }
  127. }
  128. type FindSpecsOptions struct {
  129. JobType JobType
  130. ComputeResource string
  131. Cluster string
  132. AiCenterCode string
  133. SpecId int64
  134. QueueCode string
  135. SourceSpecId string
  136. AccCardsNum int
  137. UseAccCardsNum bool
  138. AccCardType string
  139. CpuCores int
  140. UseCpuCores bool
  141. MemGiB float32
  142. UseMemGiB bool
  143. GPUMemGiB float32
  144. UseGPUMemGiB bool
  145. ShareMemGiB float32
  146. UseShareMemGiB bool
  147. //if true,find specs no matter used or not used in scene. if false,only find specs used in scene
  148. RequestAll bool
  149. }
  150. type Specification struct {
  151. ID int64
  152. SourceSpecId string
  153. AccCardsNum int
  154. AccCardType string
  155. CpuCores int
  156. MemGiB float32
  157. GPUMemGiB float32
  158. ShareMemGiB float32
  159. ComputeResource string
  160. UnitPrice int
  161. QueueId int64
  162. QueueCode string
  163. Cluster string
  164. AiCenterCode string
  165. AiCenterName string
  166. IsExclusive bool
  167. ExclusiveOrg string
  168. }
  169. func (Specification) TableName() string {
  170. return "resource_specification"
  171. }
  172. func InsertResourceSpecification(r ResourceSpecification) (int64, error) {
  173. return x.Insert(&r)
  174. }
  175. func UpdateResourceSpecificationById(queueId int64, spec ResourceSpecification) (int64, error) {
  176. return x.ID(queueId).Update(&spec)
  177. }
  178. func UpdateSpecUnitPriceById(id int64, unitPrice int) error {
  179. _, err := x.Exec("update resource_specification set unit_price = ? ,updated_time = ? where id = ?", unitPrice, timeutil.TimeStampNow(), id)
  180. return err
  181. }
  182. func SearchResourceSpecification(opts SearchResourceSpecificationOptions) (int64, []ResourceSpecAndQueue, error) {
  183. var cond = builder.NewCond()
  184. if opts.Page <= 0 {
  185. opts.Page = 1
  186. }
  187. if opts.QueueId > 0 {
  188. cond = cond.And(builder.Eq{"resource_specification.queue_id": opts.QueueId})
  189. }
  190. if opts.Status > 0 {
  191. cond = cond.And(builder.Eq{"resource_specification.status": opts.Status})
  192. }
  193. if opts.Cluster != "" {
  194. cond = cond.And(builder.Eq{"resource_queue.cluster": opts.Cluster})
  195. }
  196. //cond = cond.And(builder.Or(builder.Eq{"resource_queue.deleted_time": 0}).Or(builder.IsNull{"resource_queue.deleted_time"}))
  197. n, err := x.Where(cond).Join("INNER", "resource_queue", "resource_queue.ID = resource_specification.queue_id").
  198. Unscoped().Count(&ResourceSpecAndQueue{})
  199. if err != nil {
  200. return 0, nil, err
  201. }
  202. r := make([]ResourceSpecAndQueue, 0)
  203. err = x.Where(cond).
  204. Join("INNER", "resource_queue", "resource_queue.ID = resource_specification.queue_id").
  205. Desc("resource_specification.id").
  206. Limit(opts.PageSize, (opts.Page-1)*opts.PageSize).
  207. Unscoped().Find(&r)
  208. if err != nil {
  209. return 0, nil, err
  210. }
  211. return n, r, nil
  212. }
  213. func GetSpecScenes(specId int64) ([]ResourceSceneBriefRes, error) {
  214. r := make([]ResourceSceneBriefRes, 0)
  215. err := x.Where("resource_scene_spec.spec_id = ?", specId).
  216. Join("INNER", "resource_scene_spec", "resource_scene_spec.scene_id = resource_scene.id").
  217. Find(&r)
  218. if err != nil {
  219. return nil, err
  220. }
  221. return r, nil
  222. }
  223. func ResourceSpecOnShelf(id int64, unitPrice int) error {
  224. _, err := x.Exec("update resource_specification set unit_price = ?,updated_time = ?,status = ? where id = ?", unitPrice, timeutil.TimeStampNow(), SpecOnShelf, id)
  225. return err
  226. }
  227. func ResourceSpecOffShelf(id int64) (int64, error) {
  228. sess := x.NewSession()
  229. var err error
  230. defer func() {
  231. if err != nil {
  232. sess.Rollback()
  233. }
  234. sess.Close()
  235. }()
  236. //delete scene spec relation
  237. if _, err = sess.Where("spec_id = ?", id).Delete(&ResourceSceneSpec{}); err != nil {
  238. return 0, err
  239. }
  240. param := ResourceSpecification{
  241. Status: SpecOffShelf,
  242. }
  243. n, err := sess.Where("id = ? and status = ?", id, SpecOnShelf).Update(&param)
  244. if err != nil {
  245. return 0, err
  246. }
  247. sess.Commit()
  248. return n, err
  249. }
  250. func GetResourceSpecification(r *ResourceSpecification) (*ResourceSpecification, error) {
  251. has, err := x.Get(r)
  252. if err != nil {
  253. return nil, err
  254. } else if !has {
  255. return nil, nil
  256. }
  257. return r, nil
  258. }
  259. func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceSpecification, existIds []int64) error {
  260. sess := x.NewSession()
  261. var err error
  262. defer func() {
  263. if err != nil {
  264. sess.Rollback()
  265. }
  266. sess.Close()
  267. }()
  268. //delete specs and scene that no longer exists
  269. deleteIds := make([]int64, 0)
  270. cond := builder.NewCond()
  271. cond = cond.And(builder.NotIn("resource_specification.id", existIds)).And(builder.Eq{"resource_queue.cluster": C2NetCluster})
  272. if err := sess.Cols("resource_specification.id").Table("resource_specification").
  273. Where(cond).Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id").
  274. Find(&deleteIds); err != nil {
  275. return err
  276. }
  277. if len(deleteIds) > 0 {
  278. if _, err = sess.In("id", deleteIds).Update(&ResourceSpecification{Status: SpecOffShelf}); err != nil {
  279. return err
  280. }
  281. if _, err = sess.In("spec_id", deleteIds).Delete(&ResourceSceneSpec{}); err != nil {
  282. return err
  283. }
  284. }
  285. //update exists specs
  286. if len(updateList) > 0 {
  287. for _, v := range updateList {
  288. if _, err = sess.ID(v.ID).Update(&v); err != nil {
  289. return err
  290. }
  291. }
  292. }
  293. //insert new specs
  294. if len(insertList) > 0 {
  295. if _, err = sess.Insert(insertList); err != nil {
  296. return err
  297. }
  298. }
  299. return sess.Commit()
  300. }
  301. //FindSpecs
  302. func FindSpecs(opts FindSpecsOptions) ([]*Specification, error) {
  303. var cond = builder.NewCond()
  304. if !opts.RequestAll && opts.JobType != "" {
  305. cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType})
  306. }
  307. if opts.ComputeResource != "" {
  308. cond = cond.And(builder.Eq{"resource_queue.compute_resource": opts.ComputeResource})
  309. }
  310. if opts.Cluster != "" {
  311. cond = cond.And(builder.Eq{"resource_queue.cluster": opts.Cluster})
  312. }
  313. if opts.AiCenterCode != "" {
  314. cond = cond.And(builder.Eq{"resource_queue.ai_center_code": opts.AiCenterCode})
  315. }
  316. if opts.SpecId > 0 {
  317. cond = cond.And(builder.Eq{"resource_specification.id": opts.SpecId})
  318. }
  319. if opts.QueueCode != "" {
  320. cond = cond.And(builder.Eq{"resource_queue.queue_code": opts.QueueCode})
  321. }
  322. if opts.SourceSpecId != "" {
  323. cond = cond.And(builder.Eq{"resource_specification.source_spec_id": opts.SourceSpecId})
  324. }
  325. if opts.UseAccCardsNum {
  326. cond = cond.And(builder.Eq{"resource_specification.acc_cards_num": opts.AccCardsNum})
  327. }
  328. if opts.AccCardType != "" {
  329. cond = cond.And(builder.Eq{"resource_queue.acc_card_type": opts.AccCardType})
  330. }
  331. if opts.UseCpuCores {
  332. cond = cond.And(builder.Eq{"resource_specification.cpu_cores": opts.CpuCores})
  333. }
  334. if opts.UseMemGiB {
  335. cond = cond.And(builder.Eq{"resource_specification.mem_gi_b": opts.MemGiB})
  336. }
  337. if opts.UseGPUMemGiB {
  338. cond = cond.And(builder.Eq{"resource_specification.gpu_mem_gi_b": opts.GPUMemGiB})
  339. }
  340. if opts.UseShareMemGiB {
  341. cond = cond.And(builder.Eq{"resource_specification.share_mem_gi_b": opts.ShareMemGiB})
  342. }
  343. r := make([]*Specification, 0)
  344. s := x.Where(cond).
  345. Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id")
  346. if !opts.RequestAll {
  347. s = s.Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id").
  348. Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id")
  349. }
  350. err := s.OrderBy("resource_queue.compute_resource asc,resource_queue.acc_card_type asc,resource_specification.acc_cards_num asc,resource_specification.cpu_cores asc").
  351. Unscoped().Find(&r)
  352. if err != nil {
  353. return nil, err
  354. }
  355. return r, nil
  356. }
  357. func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specification, error) {
  358. sess := x.NewSession()
  359. defer sess.Close()
  360. sess.Begin()
  361. param := ResourceQueue{
  362. QueueCode: queue.QueueCode,
  363. Cluster: queue.Cluster,
  364. AiCenterCode: queue.AiCenterCode,
  365. ComputeResource: queue.ComputeResource,
  366. AccCardType: queue.AccCardType,
  367. }
  368. _, err := sess.Get(&param)
  369. if err != nil {
  370. sess.Rollback()
  371. return nil, err
  372. }
  373. if param.ID == 0 {
  374. _, err = sess.InsertOne(&queue)
  375. if err != nil {
  376. sess.Rollback()
  377. return nil, err
  378. }
  379. } else {
  380. queue = param
  381. }
  382. spec.QueueId = queue.ID
  383. _, err = sess.InsertOne(&spec)
  384. if err != nil {
  385. sess.Rollback()
  386. return nil, err
  387. }
  388. sess.Commit()
  389. return BuildSpecification(queue, spec), nil
  390. }
  391. func BuildSpecification(queue ResourceQueue, spec ResourceSpecification) *Specification {
  392. return &Specification{
  393. ID: spec.ID,
  394. SourceSpecId: spec.SourceSpecId,
  395. AccCardsNum: spec.AccCardsNum,
  396. AccCardType: queue.AccCardType,
  397. CpuCores: spec.CpuCores,
  398. MemGiB: spec.MemGiB,
  399. GPUMemGiB: spec.GPUMemGiB,
  400. ShareMemGiB: spec.ShareMemGiB,
  401. ComputeResource: queue.ComputeResource,
  402. UnitPrice: spec.UnitPrice,
  403. QueueId: queue.ID,
  404. QueueCode: queue.QueueCode,
  405. Cluster: queue.Cluster,
  406. AiCenterCode: queue.AiCenterCode,
  407. AiCenterName: queue.AiCenterName,
  408. }
  409. }
  410. func GetCloudbrainOneAccCardType(queueCode string) string {
  411. switch queueCode {
  412. case "a100":
  413. return "A100"
  414. case "openidebug":
  415. return "T4"
  416. case "openidgx":
  417. return "V100"
  418. }
  419. return ""
  420. }
  421. var cloudbrainTwoSpecsInitFlag = false
  422. var cloudbrainTwoSpecs map[string]*Specification
  423. func GetCloudbrainTwoSpecs() (map[string]*Specification, error) {
  424. if !cloudbrainTwoSpecsInitFlag {
  425. r, err := InitCloudbrainTwoSpecs()
  426. if err != nil {
  427. return nil, err
  428. }
  429. cloudbrainTwoSpecsInitFlag = true
  430. cloudbrainTwoSpecs = r
  431. }
  432. return cloudbrainTwoSpecs, nil
  433. }
  434. func InitCloudbrainTwoSpecs() (map[string]*Specification, error) {
  435. r := make(map[string]*Specification, 0)
  436. queue, err := GetResourceQueue(&ResourceQueue{QueueCode: "openisupport"})
  437. if err != nil {
  438. return nil, err
  439. }
  440. if queue == nil {
  441. queue = &ResourceQueue{
  442. QueueCode: "openisupport",
  443. Cluster: OpenICluster,
  444. AiCenterCode: AICenterOfCloudBrainTwo,
  445. AiCenterName: "云脑二",
  446. ComputeResource: NPU,
  447. AccCardType: "ASCEND910",
  448. Remark: "处理历史云脑任务时自动生成",
  449. }
  450. _, err = x.InsertOne(queue)
  451. if err != nil {
  452. return nil, err
  453. }
  454. }
  455. for i := 1; i <= 8; i = i * 2 {
  456. sourceSpecId := "modelarts.bm.910.arm.public." + fmt.Sprint(i)
  457. spec, err := GetResourceSpecification(&ResourceSpecification{
  458. SourceSpecId: sourceSpecId,
  459. QueueId: queue.ID,
  460. })
  461. if err != nil {
  462. return nil, err
  463. }
  464. if spec == nil {
  465. spec = &ResourceSpecification{
  466. QueueId: queue.ID,
  467. SourceSpecId: sourceSpecId,
  468. AccCardsNum: i,
  469. CpuCores: i * 24,
  470. MemGiB: float32(i * 256),
  471. GPUMemGiB: float32(32),
  472. Status: SpecOffShelf,
  473. }
  474. _, err = x.Insert(spec)
  475. if err != nil {
  476. return nil, err
  477. }
  478. }
  479. r[sourceSpecId] = BuildSpecification(*queue, *spec)
  480. }
  481. return r, nil
  482. }
  483. var grampusSpecsInitFlag = false
  484. var grampusSpecs map[string]*Specification
  485. func GetGrampusSpecs() (map[string]*Specification, error) {
  486. if !grampusSpecsInitFlag {
  487. specMap := make(map[string]*Specification, 0)
  488. r, err := FindSpecs(FindSpecsOptions{
  489. Cluster: C2NetCluster,
  490. RequestAll: true,
  491. })
  492. if err != nil {
  493. return nil, err
  494. }
  495. for _, spec := range r {
  496. specMap[spec.SourceSpecId] = spec
  497. specMap[spec.SourceSpecId+"_"+spec.AiCenterCode] = spec
  498. }
  499. grampusSpecsInitFlag = true
  500. grampusSpecs = specMap
  501. }
  502. return grampusSpecs, nil
  503. }