You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

resource_specification.go 15 kB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago

  1. package models
  2. import (
  3. "code.gitea.io/gitea/modules/timeutil"
  4. "fmt"
  5. "xorm.io/builder"
  6. )
  7. const (
  8. SpecNotVerified int = iota + 1
  9. SpecOnShelf
  10. SpecOffShelf
  11. )
  12. type ResourceSpecification struct {
  13. ID int64 `xorm:"pk autoincr"`
  14. QueueId int64 `xorm:"INDEX"`
  15. SourceSpecId string `xorm:"INDEX"`
  16. AccCardsNum int
  17. CpuCores int
  18. MemGiB float32
  19. GPUMemGiB float32
  20. ShareMemGiB float32
  21. UnitPrice int
  22. Status int
  23. IsAvailable bool
  24. IsAutomaticSync bool
  25. CreatedTime timeutil.TimeStamp `xorm:"created"`
  26. CreatedBy int64
  27. UpdatedTime timeutil.TimeStamp `xorm:"updated"`
  28. UpdatedBy int64
  29. }
  30. func (r ResourceSpecification) ConvertToRes() *ResourceSpecificationRes {
  31. return &ResourceSpecificationRes{
  32. ID: r.ID,
  33. SourceSpecId: r.SourceSpecId,
  34. AccCardsNum: r.AccCardsNum,
  35. CpuCores: r.CpuCores,
  36. MemGiB: r.MemGiB,
  37. ShareMemGiB: r.ShareMemGiB,
  38. GPUMemGiB: r.GPUMemGiB,
  39. UnitPrice: r.UnitPrice,
  40. Status: r.Status,
  41. IsAvailable: r.IsAvailable,
  42. UpdatedTime: r.UpdatedTime,
  43. }
  44. }
  45. type ResourceSpecificationReq struct {
  46. QueueId int64 `binding:"Required"`
  47. SourceSpecId string
  48. AccCardsNum int
  49. CpuCores int
  50. MemGiB float32
  51. GPUMemGiB float32
  52. ShareMemGiB float32
  53. UnitPrice int
  54. Status int
  55. IsAutomaticSync bool
  56. CreatorId int64
  57. }
  58. func (r ResourceSpecificationReq) ToDTO() ResourceSpecification {
  59. return ResourceSpecification{
  60. QueueId: r.QueueId,
  61. SourceSpecId: r.SourceSpecId,
  62. AccCardsNum: r.AccCardsNum,
  63. CpuCores: r.CpuCores,
  64. MemGiB: r.MemGiB,
  65. GPUMemGiB: r.GPUMemGiB,
  66. ShareMemGiB: r.ShareMemGiB,
  67. UnitPrice: r.UnitPrice,
  68. Status: r.Status,
  69. IsAutomaticSync: r.IsAutomaticSync,
  70. CreatedBy: r.CreatorId,
  71. UpdatedBy: r.CreatorId,
  72. IsAvailable: true,
  73. }
  74. }
  75. type SearchResourceSpecificationOptions struct {
  76. ListOptions
  77. QueueId int64
  78. Status int
  79. Cluster string
  80. AvailableCode int
  81. }
  82. type SearchResourceBriefSpecificationOptions struct {
  83. QueueId int64
  84. Cluster string
  85. }
  86. type ResourceSpecAndQueueListRes struct {
  87. TotalSize int64
  88. List []*ResourceSpecAndQueueRes
  89. }
  90. func NewResourceSpecAndQueueListRes(totalSize int64, list []ResourceSpecAndQueue) *ResourceSpecAndQueueListRes {
  91. resList := make([]*ResourceSpecAndQueueRes, len(list))
  92. for i, v := range list {
  93. resList[i] = v.ConvertToRes()
  94. }
  95. return &ResourceSpecAndQueueListRes{
  96. TotalSize: totalSize,
  97. List: resList,
  98. }
  99. }
  100. type ResourceSpecificationRes struct {
  101. ID int64
  102. SourceSpecId string
  103. AccCardsNum int
  104. CpuCores int
  105. MemGiB float32
  106. GPUMemGiB float32
  107. ShareMemGiB float32
  108. UnitPrice int
  109. Status int
  110. IsAvailable bool
  111. UpdatedTime timeutil.TimeStamp
  112. }
  113. func (ResourceSpecificationRes) TableName() string {
  114. return "resource_specification"
  115. }
  116. type ResourceSpecAndQueueRes struct {
  117. Spec *ResourceSpecificationRes
  118. Queue *ResourceQueueRes
  119. }
  120. type ResourceSpecAndQueue struct {
  121. ResourceSpecification `xorm:"extends"`
  122. ResourceQueue `xorm:"extends"`
  123. }
  124. func (*ResourceSpecAndQueue) TableName() string {
  125. return "resource_specification"
  126. }
  127. func (r ResourceSpecAndQueue) ConvertToRes() *ResourceSpecAndQueueRes {
  128. return &ResourceSpecAndQueueRes{
  129. Spec: r.ResourceSpecification.ConvertToRes(),
  130. Queue: r.ResourceQueue.ConvertToRes(),
  131. }
  132. }
  133. type FindSpecsOptions struct {
  134. JobType JobType
  135. ComputeResource string
  136. Cluster string
  137. AiCenterCode string
  138. SpecId int64
  139. QueueCode string
  140. SourceSpecId string
  141. AccCardsNum int
  142. UseAccCardsNum bool
  143. AccCardType string
  144. CpuCores int
  145. UseCpuCores bool
  146. MemGiB float32
  147. UseMemGiB bool
  148. GPUMemGiB float32
  149. UseGPUMemGiB bool
  150. ShareMemGiB float32
  151. UseShareMemGiB bool
  152. //if true,find specs no matter used or not used in scene. if false,only find specs used in scene
  153. RequestAll bool
  154. SpecStatus int
  155. }
  156. type Specification struct {
  157. ID int64
  158. SourceSpecId string
  159. AccCardsNum int
  160. AccCardType string
  161. CpuCores int
  162. MemGiB float32
  163. GPUMemGiB float32
  164. ShareMemGiB float32
  165. ComputeResource string
  166. UnitPrice int
  167. QueueId int64
  168. QueueCode string
  169. Cluster string
  170. AiCenterCode string
  171. AiCenterName string
  172. IsExclusive bool
  173. ExclusiveOrg string
  174. }
  175. func (Specification) TableName() string {
  176. return "resource_specification"
  177. }
  178. func InsertResourceSpecification(r ResourceSpecification) (int64, error) {
  179. return x.Insert(&r)
  180. }
  181. func UpdateResourceSpecificationById(queueId int64, spec ResourceSpecification) (int64, error) {
  182. return x.ID(queueId).Update(&spec)
  183. }
  184. func UpdateSpecUnitPriceById(id int64, unitPrice int) error {
  185. _, err := x.Exec("update resource_specification set unit_price = ? ,updated_time = ? where id = ?", unitPrice, timeutil.TimeStampNow(), id)
  186. return err
  187. }
  188. func SearchResourceSpecification(opts SearchResourceSpecificationOptions) (int64, []ResourceSpecAndQueue, error) {
  189. var cond = builder.NewCond()
  190. if opts.Page <= 0 {
  191. opts.Page = 1
  192. }
  193. if opts.QueueId > 0 {
  194. cond = cond.And(builder.Eq{"resource_specification.queue_id": opts.QueueId})
  195. }
  196. if opts.Status > 0 {
  197. cond = cond.And(builder.Eq{"resource_specification.status": opts.Status})
  198. }
  199. if opts.Cluster != "" {
  200. cond = cond.And(builder.Eq{"resource_queue.cluster": opts.Cluster})
  201. }
  202. if opts.AvailableCode == 1 {
  203. cond = cond.And(builder.Eq{"resource_specification.is_available": true})
  204. } else if opts.AvailableCode == 2 {
  205. cond = cond.And(builder.Eq{"resource_specification.is_available": false})
  206. }
  207. //cond = cond.And(builder.Or(builder.Eq{"resource_queue.deleted_time": 0}).Or(builder.IsNull{"resource_queue.deleted_time"}))
  208. n, err := x.Where(cond).Join("INNER", "resource_queue", "resource_queue.ID = resource_specification.queue_id").
  209. Unscoped().Count(&ResourceSpecAndQueue{})
  210. if err != nil {
  211. return 0, nil, err
  212. }
  213. r := make([]ResourceSpecAndQueue, 0)
  214. err = x.Where(cond).
  215. Join("INNER", "resource_queue", "resource_queue.ID = resource_specification.queue_id").
  216. Desc("resource_specification.id").
  217. Limit(opts.PageSize, (opts.Page-1)*opts.PageSize).
  218. Unscoped().Find(&r)
  219. if err != nil {
  220. return 0, nil, err
  221. }
  222. return n, r, nil
  223. }
  224. func GetSpecScenes(specId int64) ([]ResourceSceneBriefRes, error) {
  225. r := make([]ResourceSceneBriefRes, 0)
  226. err := x.Where("resource_scene_spec.spec_id = ?", specId).
  227. Join("INNER", "resource_scene_spec", "resource_scene_spec.scene_id = resource_scene.id").
  228. Find(&r)
  229. if err != nil {
  230. return nil, err
  231. }
  232. return r, nil
  233. }
  234. func ResourceSpecOnShelf(id int64, unitPrice int) error {
  235. _, err := x.Exec("update resource_specification set unit_price = ?,updated_time = ?,status = ? where id = ?", unitPrice, timeutil.TimeStampNow(), SpecOnShelf, id)
  236. return err
  237. }
  238. func ResourceSpecOffShelf(id int64) (int64, error) {
  239. sess := x.NewSession()
  240. var err error
  241. defer func() {
  242. if err != nil {
  243. sess.Rollback()
  244. }
  245. sess.Close()
  246. }()
  247. param := ResourceSpecification{
  248. Status: SpecOffShelf,
  249. }
  250. n, err := sess.Where("id = ? and status = ?", id, SpecOnShelf).Update(&param)
  251. if err != nil {
  252. return 0, err
  253. }
  254. sess.Commit()
  255. return n, err
  256. }
  257. func GetResourceSpecification(r *ResourceSpecification) (*ResourceSpecification, error) {
  258. has, err := x.Get(r)
  259. if err != nil {
  260. return nil, err
  261. } else if !has {
  262. return nil, nil
  263. }
  264. return r, nil
  265. }
  266. func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceSpecification, existIds []int64) error {
  267. sess := x.NewSession()
  268. var err error
  269. defer func() {
  270. if err != nil {
  271. sess.Rollback()
  272. }
  273. sess.Close()
  274. }()
  275. //delete specs and scene that no longer exists
  276. deleteIds := make([]int64, 0)
  277. cond := builder.NewCond()
  278. cond = cond.And(builder.NotIn("resource_specification.id", existIds)).And(builder.Eq{"resource_queue.cluster": C2NetCluster})
  279. if err := sess.Cols("resource_specification.id").Table("resource_specification").
  280. Where(cond).Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id").
  281. Find(&deleteIds); err != nil {
  282. return err
  283. }
  284. if len(deleteIds) > 0 {
  285. if _, err = sess.Cols("status", "is_available").In("id", deleteIds).Update(&ResourceSpecification{Status: SpecOffShelf, IsAvailable: false}); err != nil {
  286. return err
  287. }
  288. }
  289. //update exists specs
  290. if len(updateList) > 0 {
  291. for _, v := range updateList {
  292. if _, err = sess.ID(v.ID).UseBool("is_available").Update(&v); err != nil {
  293. return err
  294. }
  295. }
  296. }
  297. //insert new specs
  298. if len(insertList) > 0 {
  299. if _, err = sess.Insert(insertList); err != nil {
  300. return err
  301. }
  302. }
  303. return sess.Commit()
  304. }
  305. //FindSpecs
  306. func FindSpecs(opts FindSpecsOptions) ([]*Specification, error) {
  307. var cond = builder.NewCond()
  308. if !opts.RequestAll && opts.JobType != "" {
  309. cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType})
  310. }
  311. if opts.ComputeResource != "" {
  312. cond = cond.And(builder.Eq{"resource_queue.compute_resource": opts.ComputeResource})
  313. }
  314. if opts.Cluster != "" {
  315. cond = cond.And(builder.Eq{"resource_queue.cluster": opts.Cluster})
  316. }
  317. if opts.AiCenterCode != "" {
  318. cond = cond.And(builder.Eq{"resource_queue.ai_center_code": opts.AiCenterCode})
  319. }
  320. if opts.SpecId > 0 {
  321. cond = cond.And(builder.Eq{"resource_specification.id": opts.SpecId})
  322. }
  323. if opts.QueueCode != "" {
  324. cond = cond.And(builder.Eq{"resource_queue.queue_code": opts.QueueCode})
  325. }
  326. if opts.SourceSpecId != "" {
  327. cond = cond.And(builder.Eq{"resource_specification.source_spec_id": opts.SourceSpecId})
  328. }
  329. if opts.UseAccCardsNum {
  330. cond = cond.And(builder.Eq{"resource_specification.acc_cards_num": opts.AccCardsNum})
  331. }
  332. if opts.AccCardType != "" {
  333. cond = cond.And(builder.Eq{"resource_queue.acc_card_type": opts.AccCardType})
  334. }
  335. if opts.UseCpuCores {
  336. cond = cond.And(builder.Eq{"resource_specification.cpu_cores": opts.CpuCores})
  337. }
  338. if opts.UseMemGiB {
  339. cond = cond.And(builder.Eq{"resource_specification.mem_gi_b": opts.MemGiB})
  340. }
  341. if opts.UseGPUMemGiB {
  342. cond = cond.And(builder.Eq{"resource_specification.gpu_mem_gi_b": opts.GPUMemGiB})
  343. }
  344. if opts.UseShareMemGiB {
  345. cond = cond.And(builder.Eq{"resource_specification.share_mem_gi_b": opts.ShareMemGiB})
  346. }
  347. if opts.SpecStatus > 0 {
  348. cond = cond.And(builder.Eq{"resource_specification.status": opts.SpecStatus})
  349. }
  350. r := make([]*Specification, 0)
  351. s := x.Where(cond).
  352. Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id")
  353. if !opts.RequestAll {
  354. s = s.Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id").
  355. Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id")
  356. }
  357. err := s.OrderBy("resource_queue.compute_resource asc,resource_queue.acc_card_type asc,resource_specification.acc_cards_num asc,resource_specification.cpu_cores asc,resource_specification.mem_gi_b asc,resource_specification.share_mem_gi_b asc").
  358. Unscoped().Find(&r)
  359. if err != nil {
  360. return nil, err
  361. }
  362. return r, nil
  363. }
  364. func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specification, error) {
  365. sess := x.NewSession()
  366. defer sess.Close()
  367. sess.Begin()
  368. param := ResourceQueue{
  369. QueueCode: queue.QueueCode,
  370. Cluster: queue.Cluster,
  371. AiCenterCode: queue.AiCenterCode,
  372. ComputeResource: queue.ComputeResource,
  373. AccCardType: queue.AccCardType,
  374. }
  375. _, err := sess.Get(&param)
  376. if err != nil {
  377. sess.Rollback()
  378. return nil, err
  379. }
  380. if param.ID == 0 {
  381. _, err = sess.InsertOne(&queue)
  382. if err != nil {
  383. sess.Rollback()
  384. return nil, err
  385. }
  386. } else {
  387. queue = param
  388. }
  389. spec.QueueId = queue.ID
  390. _, err = sess.InsertOne(&spec)
  391. if err != nil {
  392. sess.Rollback()
  393. return nil, err
  394. }
  395. sess.Commit()
  396. return BuildSpecification(queue, spec), nil
  397. }
  398. func BuildSpecification(queue ResourceQueue, spec ResourceSpecification) *Specification {
  399. return &Specification{
  400. ID: spec.ID,
  401. SourceSpecId: spec.SourceSpecId,
  402. AccCardsNum: spec.AccCardsNum,
  403. AccCardType: queue.AccCardType,
  404. CpuCores: spec.CpuCores,
  405. MemGiB: spec.MemGiB,
  406. GPUMemGiB: spec.GPUMemGiB,
  407. ShareMemGiB: spec.ShareMemGiB,
  408. ComputeResource: queue.ComputeResource,
  409. UnitPrice: spec.UnitPrice,
  410. QueueId: queue.ID,
  411. QueueCode: queue.QueueCode,
  412. Cluster: queue.Cluster,
  413. AiCenterCode: queue.AiCenterCode,
  414. AiCenterName: queue.AiCenterName,
  415. }
  416. }
  417. func GetCloudbrainOneAccCardType(queueCode string) string {
  418. switch queueCode {
  419. case "a100":
  420. return "A100"
  421. case "openidebug":
  422. return "T4"
  423. case "openidgx":
  424. return "V100"
  425. }
  426. return ""
  427. }
  428. var cloudbrainTwoSpecsInitFlag = false
  429. var cloudbrainTwoSpecs map[string]*Specification
  430. func GetCloudbrainTwoSpecs() (map[string]*Specification, error) {
  431. if !cloudbrainTwoSpecsInitFlag {
  432. r, err := InitCloudbrainTwoSpecs()
  433. if err != nil {
  434. return nil, err
  435. }
  436. cloudbrainTwoSpecsInitFlag = true
  437. cloudbrainTwoSpecs = r
  438. }
  439. return cloudbrainTwoSpecs, nil
  440. }
  441. func InitCloudbrainTwoSpecs() (map[string]*Specification, error) {
  442. r := make(map[string]*Specification, 0)
  443. queue, err := GetResourceQueue(&ResourceQueue{QueueCode: "openisupport"})
  444. if err != nil {
  445. return nil, err
  446. }
  447. if queue == nil {
  448. queue = &ResourceQueue{
  449. QueueCode: "openisupport",
  450. Cluster: OpenICluster,
  451. AiCenterCode: AICenterOfCloudBrainTwo,
  452. AiCenterName: "云脑二",
  453. ComputeResource: NPU,
  454. AccCardType: "ASCEND910",
  455. Remark: "处理历史云脑任务时自动生成",
  456. }
  457. _, err = x.InsertOne(queue)
  458. if err != nil {
  459. return nil, err
  460. }
  461. }
  462. for i := 1; i <= 8; i = i * 2 {
  463. sourceSpecId := "modelarts.bm.910.arm.public." + fmt.Sprint(i)
  464. spec, err := GetResourceSpecification(&ResourceSpecification{
  465. SourceSpecId: sourceSpecId,
  466. QueueId: queue.ID,
  467. })
  468. if err != nil {
  469. return nil, err
  470. }
  471. if spec == nil {
  472. spec = &ResourceSpecification{
  473. QueueId: queue.ID,
  474. SourceSpecId: sourceSpecId,
  475. AccCardsNum: i,
  476. CpuCores: i * 24,
  477. MemGiB: float32(i * 256),
  478. GPUMemGiB: float32(32),
  479. Status: SpecOffShelf,
  480. IsAvailable: true,
  481. }
  482. _, err = x.Insert(spec)
  483. if err != nil {
  484. return nil, err
  485. }
  486. }
  487. r[sourceSpecId] = BuildSpecification(*queue, *spec)
  488. }
  489. return r, nil
  490. }
  491. var grampusSpecsInitFlag = false
  492. var grampusSpecs map[string]*Specification
  493. func GetGrampusSpecs() (map[string]*Specification, error) {
  494. if !grampusSpecsInitFlag {
  495. specMap := make(map[string]*Specification, 0)
  496. r, err := FindSpecs(FindSpecsOptions{
  497. Cluster: C2NetCluster,
  498. RequestAll: true,
  499. })
  500. if err != nil {
  501. return nil, err
  502. }
  503. for _, spec := range r {
  504. specMap[spec.SourceSpecId] = spec
  505. specMap[spec.SourceSpecId+"_"+spec.AiCenterCode] = spec
  506. }
  507. grampusSpecsInitFlag = true
  508. grampusSpecs = specMap
  509. }
  510. return grampusSpecs, nil
  511. }