You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

check_rep_count.go 7.4 kB

2 years ago
2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. package event
  2. /*
  3. // TODO 可作为新逻辑的参考
  4. import (
  5. "fmt"
  6. "math"
  7. "github.com/samber/lo"
  8. "gitlink.org.cn/cloudream/common/pkgs/logger"
  9. mymath "gitlink.org.cn/cloudream/common/utils/math"
  10. mysort "gitlink.org.cn/cloudream/common/utils/sort"
  11. "gitlink.org.cn/cloudream/storage/common/consts"
  12. "gitlink.org.cn/cloudream/storage/common/pkgs/distlock/reqbuilder"
  13. "gitlink.org.cn/cloudream/storage/scanner/internal/config"
  14. "gitlink.org.cn/cloudream/storage/common/pkgs/db/model"
  15. scevt "gitlink.org.cn/cloudream/storage/common/pkgs/mq/scanner/event"
  16. )
  17. type CheckRepCount struct {
  18. *scevt.CheckRepCount
  19. }
  20. func NewCheckRepCount(fileHashes []string) *CheckRepCount {
  21. return &CheckRepCount{
  22. CheckRepCount: scevt.NewCheckRepCount(fileHashes),
  23. }
  24. }
  25. func (t *CheckRepCount) TryMerge(other Event) bool {
  26. event, ok := other.(*CheckRepCount)
  27. if !ok {
  28. return false
  29. }
  30. t.FileHashes = lo.Union(t.FileHashes, event.FileHashes)
  31. return true
  32. }
  33. func (t *CheckRepCount) Execute(execCtx ExecuteContext) {
  34. log := logger.WithType[CheckRepCount]("Event")
  35. log.Debugf("begin with %v", logger.FormatStruct(t.CheckRepCount))
  36. defer log.Debugf("end")
  37. mutex, err := reqbuilder.NewBuilder().
  38. Metadata().
  39. // 读取某个FileHash的备份数设定
  40. ObjectRep().ReadAny().
  41. // 读取某个FileHash是否被Block引用
  42. ObjectBlock().ReadAny().
  43. // 获取所有可用的节点
  44. Node().ReadAny().
  45. // 增加或修改FileHash关联的Cache记录
  46. Cache().WriteAny().
  47. MutexLock(execCtx.Args.DistLock)
  48. if err != nil {
  49. log.Warnf("acquire locks failed, err: %s", err.Error())
  50. return
  51. }
  52. defer mutex.Unlock()
  53. updatedNodeAndHashes := make(map[int64][]string)
  54. for _, fileHash := range t.FileHashes {
  55. updatedNodeIDs, err := t.checkOneRepCount(fileHash, execCtx)
  56. if err != nil {
  57. log.WithField("FileHash", fileHash).Warnf("check file rep count failed, err: %s", err.Error())
  58. continue
  59. }
  60. for _, id := range updatedNodeIDs {
  61. hashes := updatedNodeAndHashes[id]
  62. updatedNodeAndHashes[id] = append(hashes, fileHash)
  63. }
  64. }
  65. for nodeID, hashes := range updatedNodeAndHashes {
  66. // 新任务继承本任务的执行设定(紧急任务依然保持紧急任务)
  67. execCtx.Executor.Post(NewAgentCheckCache(nodeID, hashes), execCtx.Option)
  68. }
  69. }
  70. func (t *CheckRepCount) checkOneRepCount(fileHash string, execCtx ExecuteContext) ([]int64, error) {
  71. log := logger.WithType[CheckRepCount]("Event")
  72. sqlCtx := execCtx.Args.DB.SQLCtx()
  73. var updatedNodeIDs []int64
  74. // 计算所需的最少备份数:
  75. // 1. ObjectRep中期望备份数的最大值
  76. // 2. 如果ObjectBlock存在对此文件的引用,则至少为1
  77. repMaxCnt, err := execCtx.Args.DB.ObjectRep().GetFileMaxRepCount(sqlCtx, fileHash)
  78. if err != nil {
  79. return nil, fmt.Errorf("get file max rep count failed, err: %w", err)
  80. }
  81. blkCnt, err := execCtx.Args.DB.ObjectBlock().CountBlockWithHash(sqlCtx, fileHash)
  82. if err != nil {
  83. return nil, fmt.Errorf("count block with hash failed, err: %w", err)
  84. }
  85. needRepCount := mymath.Max(repMaxCnt, mymath.Min(1, blkCnt))
  86. repNodes, err := execCtx.Args.DB.Cache().GetCachingFileNodes(sqlCtx, fileHash)
  87. if err != nil {
  88. return nil, fmt.Errorf("get caching file nodes failed, err: %w", err)
  89. }
  90. allNodes, err := execCtx.Args.DB.Node().GetAllNodes(sqlCtx)
  91. if err != nil {
  92. return nil, fmt.Errorf("get all nodes failed, err: %w", err)
  93. }
  94. var normalNodes, unavaiNodes []model.Node
  95. for _, node := range repNodes {
  96. if node.State == consts.NodeStateNormal {
  97. normalNodes = append(normalNodes, node)
  98. } else if node.State == consts.NodeStateUnavailable {
  99. unavaiNodes = append(unavaiNodes, node)
  100. }
  101. }
  102. // 如果Available的备份数超过期望备份数,则让一些节点退出
  103. if len(normalNodes) > needRepCount {
  104. delNodes := chooseDeleteAvaiRepNodes(allNodes, normalNodes, len(normalNodes)-needRepCount)
  105. for _, node := range delNodes {
  106. err := execCtx.Args.DB.Cache().SetTemp(sqlCtx, fileHash, node.NodeID)
  107. if err != nil {
  108. return nil, fmt.Errorf("change cache state failed, err: %w", err)
  109. }
  110. updatedNodeIDs = append(updatedNodeIDs, node.NodeID)
  111. }
  112. return updatedNodeIDs, nil
  113. }
  114. // 因为总备份数不够,而需要增加的备份数
  115. add1 := mymath.Max(0, needRepCount-len(repNodes))
  116. // 因为Available的备份数占比过少,而需要增加的备份数
  117. minAvaiNodeCnt := int(math.Ceil(float64(config.Cfg().MinAvailableRepProportion) * float64(needRepCount)))
  118. add2 := mymath.Max(0, minAvaiNodeCnt-len(normalNodes))
  119. // 最终需要增加的备份数,是以上两种情况的最大值
  120. finalAddCount := mymath.Max(add1, add2)
  121. if finalAddCount > 0 {
  122. newNodes := chooseNewRepNodes(allNodes, repNodes, finalAddCount)
  123. if len(newNodes) < finalAddCount {
  124. log.WithField("FileHash", fileHash).Warnf("need %d more rep nodes, but get only %d nodes", finalAddCount, len(newNodes))
  125. // TODO 节点数不够,进行一个告警
  126. }
  127. for _, node := range newNodes {
  128. err := execCtx.Args.DB.Cache().CreatePinned(sqlCtx, fileHash, node.NodeID, 0)
  129. if err != nil {
  130. return nil, fmt.Errorf("create cache failed, err: %w", err)
  131. }
  132. updatedNodeIDs = append(updatedNodeIDs, node.NodeID)
  133. }
  134. }
  135. return updatedNodeIDs, err
  136. }
  137. func chooseNewRepNodes(allNodes []model.Node, curRepNodes []model.Node, newCount int) []model.Node {
  138. noRepNodes := lo.Reject(allNodes, func(node model.Node, index int) bool {
  139. return lo.ContainsBy(curRepNodes, func(n model.Node) bool { return node.NodeID == n.NodeID }) ||
  140. node.State != consts.NodeStateNormal
  141. })
  142. repNodeLocationIDs := make(map[int64]bool)
  143. for _, node := range curRepNodes {
  144. repNodeLocationIDs[node.LocationID] = true
  145. }
  146. mysort.Sort(noRepNodes, func(l, r model.Node) int {
  147. // LocationID不存在时为false,false - true < 0,所以LocationID不存在的会排在前面
  148. return mysort.CmpBool(repNodeLocationIDs[l.LocationID], repNodeLocationIDs[r.LocationID])
  149. })
  150. return noRepNodes[:mymath.Min(newCount, len(noRepNodes))]
  151. }
  152. func chooseDeleteAvaiRepNodes(allNodes []model.Node, curAvaiRepNodes []model.Node, delCount int) []model.Node {
  153. // 按照地域ID分组
  154. locationGroupedNodes := make(map[int64][]model.Node)
  155. for _, node := range curAvaiRepNodes {
  156. nodes := locationGroupedNodes[node.LocationID]
  157. nodes = append(nodes, node)
  158. locationGroupedNodes[node.LocationID] = nodes
  159. }
  160. // 每次从每个分组中取出一个元素放入结果数组,并将这个元素从分组中删除
  161. // 最后结果数组中的元素会按照地域交错循环排列,比如:ABCABCBCC。同时还有一个特征:靠后的循环节中的元素都来自于元素数多的分组
  162. // 将结果数组反转(此处是用存放时就逆序的形式实现),就把元素数多的分组提前了,此时从头部取出要删除的节点即可
  163. alternatedNodes := make([]model.Node, len(curAvaiRepNodes))
  164. for i := len(curAvaiRepNodes) - 1; i >= 0; {
  165. for id, nodes := range locationGroupedNodes {
  166. alternatedNodes[i] = nodes[0]
  167. if len(nodes) == 1 {
  168. delete(locationGroupedNodes, id)
  169. } else {
  170. locationGroupedNodes[id] = nodes[1:]
  171. }
  172. // 放置一个元素就移动一下下一个存放点
  173. i--
  174. }
  175. }
  176. return alternatedNodes[:mymath.Min(delCount, len(alternatedNodes))]
  177. }
  178. func init() {
  179. RegisterMessageConvertor(func(msg *scevt.CheckRepCount) Event { return NewCheckRepCount(msg.FileHashes) })
  180. }
  181. */

本项目旨在将云际存储公共基础设施化,使个人及企业可低门槛使用高效的云际存储服务(安装开箱即用云际存储客户端即可,无需关注其他组件的部署),同时支持用户灵活便捷定制云际存储的功能细节。