|
- package event
-
- import (
- "fmt"
- "math"
-
- "github.com/samber/lo"
- "gitlink.org.cn/cloudream/common/pkgs/logger"
- mymath "gitlink.org.cn/cloudream/common/utils/math"
- mysort "gitlink.org.cn/cloudream/common/utils/sort"
- "gitlink.org.cn/cloudream/storage/common/consts"
- "gitlink.org.cn/cloudream/storage/common/pkgs/distlock/reqbuilder"
- "gitlink.org.cn/cloudream/storage/scanner/internal/config"
-
- "gitlink.org.cn/cloudream/storage/common/pkgs/db/model"
- scevt "gitlink.org.cn/cloudream/storage/common/pkgs/mq/scanner/event"
- )
-
- type CheckRepCount struct {
- *scevt.CheckRepCount
- }
-
- func NewCheckRepCount(fileHashes []string) *CheckRepCount {
- return &CheckRepCount{
- CheckRepCount: scevt.NewCheckRepCount(fileHashes),
- }
- }
-
- func (t *CheckRepCount) TryMerge(other Event) bool {
- event, ok := other.(*CheckRepCount)
- if !ok {
- return false
- }
-
- t.FileHashes = lo.Union(t.FileHashes, event.FileHashes)
- return true
- }
-
- func (t *CheckRepCount) Execute(execCtx ExecuteContext) {
- log := logger.WithType[CheckRepCount]("Event")
- log.Debugf("begin with %v", logger.FormatStruct(t))
- defer log.Debugf("end")
-
- mutex, err := reqbuilder.NewBuilder().
- Metadata().
- // 读取某个FileHash的备份数设定
- ObjectRep().ReadAny().
- // 读取某个FileHash是否被Block引用
- ObjectBlock().ReadAny().
- // 获取所有可用的节点
- Node().ReadAny().
- // 增加或修改FileHash关联的Cache记录
- Cache().WriteAny().
- MutexLock(execCtx.Args.DistLock)
- if err != nil {
- log.Warnf("acquire locks failed, err: %s", err.Error())
- return
- }
- defer mutex.Unlock()
-
- updatedNodeAndHashes := make(map[int64][]string)
-
- for _, fileHash := range t.FileHashes {
- updatedNodeIDs, err := t.checkOneRepCount(fileHash, execCtx)
- if err != nil {
- log.WithField("FileHash", fileHash).Warnf("check file rep count failed, err: %s", err.Error())
- continue
- }
-
- for _, id := range updatedNodeIDs {
- hashes := updatedNodeAndHashes[id]
- updatedNodeAndHashes[id] = append(hashes, fileHash)
- }
- }
-
- for nodeID, hashes := range updatedNodeAndHashes {
- // 新任务继承本任务的执行设定(紧急任务依然保持紧急任务)
- execCtx.Executor.Post(NewAgentCheckCache(nodeID, hashes), execCtx.Option)
- }
- }
-
- func (t *CheckRepCount) checkOneRepCount(fileHash string, execCtx ExecuteContext) ([]int64, error) {
- log := logger.WithType[CheckRepCount]("Event")
- sqlCtx := execCtx.Args.DB.SQLCtx()
-
- var updatedNodeIDs []int64
- // 计算所需的最少备份数:
- // 1. ObjectRep中期望备份数的最大值
- // 2. 如果ObjectBlock存在对此文件的引用,则至少为1
- repMaxCnt, err := execCtx.Args.DB.ObjectRep().GetFileMaxRepCount(sqlCtx, fileHash)
- if err != nil {
- return nil, fmt.Errorf("get file max rep count failed, err: %w", err)
- }
-
- blkCnt, err := execCtx.Args.DB.ObjectBlock().CountBlockWithHash(sqlCtx, fileHash)
- if err != nil {
- return nil, fmt.Errorf("count block with hash failed, err: %w", err)
- }
-
- needRepCount := mymath.Max(repMaxCnt, mymath.Min(1, blkCnt))
-
- repNodes, err := execCtx.Args.DB.Cache().GetCachingFileNodes(sqlCtx, fileHash)
- if err != nil {
- return nil, fmt.Errorf("get caching file nodes failed, err: %w", err)
- }
-
- allNodes, err := execCtx.Args.DB.Node().GetAllNodes(sqlCtx)
- if err != nil {
- return nil, fmt.Errorf("get all nodes failed, err: %w", err)
- }
-
- var normalNodes, unavaiNodes []model.Node
- for _, node := range repNodes {
- if node.State == consts.NodeStateNormal {
- normalNodes = append(normalNodes, node)
- } else if node.State == consts.NodeStateUnavailable {
- unavaiNodes = append(unavaiNodes, node)
- }
- }
-
- // 如果Available的备份数超过期望备份数,则让一些节点退出
- if len(normalNodes) > needRepCount {
- delNodes := chooseDeleteAvaiRepNodes(allNodes, normalNodes, len(normalNodes)-needRepCount)
- for _, node := range delNodes {
- err := execCtx.Args.DB.Cache().SetTemp(sqlCtx, fileHash, node.NodeID)
- if err != nil {
- return nil, fmt.Errorf("change cache state failed, err: %w", err)
- }
- updatedNodeIDs = append(updatedNodeIDs, node.NodeID)
- }
- return updatedNodeIDs, nil
- }
-
- // 因为总备份数不够,而需要增加的备份数
- add1 := mymath.Max(0, needRepCount-len(repNodes))
-
- // 因为Available的备份数占比过少,而需要增加的备份数
- minAvaiNodeCnt := int(math.Ceil(float64(config.Cfg().MinAvailableRepProportion) * float64(needRepCount)))
- add2 := mymath.Max(0, minAvaiNodeCnt-len(normalNodes))
-
- // 最终需要增加的备份数,是以上两种情况的最大值
- finalAddCount := mymath.Max(add1, add2)
-
- if finalAddCount > 0 {
- newNodes := chooseNewRepNodes(allNodes, repNodes, finalAddCount)
- if len(newNodes) < finalAddCount {
- log.WithField("FileHash", fileHash).Warnf("need %d more rep nodes, but get only %d nodes", finalAddCount, len(newNodes))
- // TODO 节点数不够,进行一个告警
- }
-
- for _, node := range newNodes {
- err := execCtx.Args.DB.Cache().CreatePinned(sqlCtx, fileHash, node.NodeID, 0)
- if err != nil {
- return nil, fmt.Errorf("create cache failed, err: %w", err)
- }
- updatedNodeIDs = append(updatedNodeIDs, node.NodeID)
- }
- }
-
- return updatedNodeIDs, err
- }
-
- func chooseNewRepNodes(allNodes []model.Node, curRepNodes []model.Node, newCount int) []model.Node {
- noRepNodes := lo.Reject(allNodes, func(node model.Node, index int) bool {
- return lo.ContainsBy(curRepNodes, func(n model.Node) bool { return node.NodeID == n.NodeID }) ||
- node.State != consts.NodeStateNormal
- })
-
- repNodeLocationIDs := make(map[int64]bool)
- for _, node := range curRepNodes {
- repNodeLocationIDs[node.LocationID] = true
- }
-
- mysort.Sort(noRepNodes, func(l, r model.Node) int {
- // LocationID不存在时为false,false - true < 0,所以LocationID不存在的会排在前面
- return mysort.CmpBool(repNodeLocationIDs[l.LocationID], repNodeLocationIDs[r.LocationID])
- })
-
- return noRepNodes[:mymath.Min(newCount, len(noRepNodes))]
- }
-
- func chooseDeleteAvaiRepNodes(allNodes []model.Node, curAvaiRepNodes []model.Node, delCount int) []model.Node {
- // 按照地域ID分组
- locationGroupedNodes := make(map[int64][]model.Node)
- for _, node := range curAvaiRepNodes {
- nodes := locationGroupedNodes[node.LocationID]
- nodes = append(nodes, node)
- locationGroupedNodes[node.LocationID] = nodes
- }
-
- // 每次从每个分组中取出一个元素放入结果数组,并将这个元素从分组中删除
- // 最后结果数组中的元素会按照地域交错循环排列,比如:ABCABCBCC。同时还有一个特征:靠后的循环节中的元素都来自于元素数多的分组
- // 将结果数组反转(此处是用存放时就逆序的形式实现),就把元素数多的分组提前了,此时从头部取出要删除的节点即可
- alternatedNodes := make([]model.Node, len(curAvaiRepNodes))
- for i := len(curAvaiRepNodes) - 1; i >= 0; {
- for id, nodes := range locationGroupedNodes {
- alternatedNodes[i] = nodes[0]
-
- if len(nodes) == 1 {
- delete(locationGroupedNodes, id)
- } else {
- locationGroupedNodes[id] = nodes[1:]
- }
-
- // 放置一个元素就移动一下下一个存放点
- i--
- }
- }
-
- return alternatedNodes[:mymath.Min(delCount, len(alternatedNodes))]
- }
-
- func init() {
- RegisterMessageConvertor(func(msg *scevt.CheckRepCount) Event { return NewCheckRepCount(msg.FileHashes) })
- }
|