JointCloud
/
JCS-pub

 
			
			   
				 
					
						
						
							
							package task

import (
	"database/sql"
	"fmt"
	"math"

	"github.com/jmoiron/sqlx"
	"github.com/samber/lo"
	"gitlink.org.cn/cloudream/common/consts"
	"gitlink.org.cn/cloudream/scanner/internal/config"
	log "gitlink.org.cn/cloudream/utils/logger"
	mymath "gitlink.org.cn/cloudream/utils/math"
	mysort "gitlink.org.cn/cloudream/utils/sort"

	"gitlink.org.cn/cloudream/db/model"
	mysql "gitlink.org.cn/cloudream/db/sql"
)

type CheckRepCountTask struct {
	FileHashes []string
}

func NewCheckRepCountTask(fileHashes []string) *CheckRepCountTask {
	return &CheckRepCountTask{
		FileHashes: fileHashes,
	}
}

func (t *CheckRepCountTask) TryMerge(other Task) bool {
	chkTask, ok := other.(*CheckRepCountTask)
	if !ok {
		return false
	}

	t.FileHashes = lo.Union(t.FileHashes, chkTask.FileHashes)
	return true
}

func (t *CheckRepCountTask) Execute(execCtx *ExecuteContext, myOpts ExecuteOption) {
	updatedNodeAndHashes := make(map[int][]string)

	for _, fileHash := range t.FileHashes {
		updatedNodeIDs, err := t.checkOneRepCount(fileHash, execCtx)
		if err != nil {
			log.WithField("FileHash", fileHash).Warnf("check file rep count failed, err: %s", err.Error())
			continue
		}

		for _, id := range updatedNodeIDs {
			hashes := updatedNodeAndHashes[id]
			updatedNodeAndHashes[id] = append(hashes, fileHash)
		}
	}

	var agtChkEntries []AgentCheckCacheTaskEntry
	for nodeID, hashes := range updatedNodeAndHashes {
		agtChkEntries = append(agtChkEntries, NewAgentCheckCacheTaskEntry(nodeID, hashes))
	}
	// 新任务继承本任务的执行设定（紧急任务依然保持紧急任务）
	execCtx.Executor.Post(NewAgentCheckCacheTask(agtChkEntries), myOpts)
}

func (t *CheckRepCountTask) checkOneRepCount(fileHash string, execCtx *ExecuteContext) ([]int, error) {
	var updatedNodeIDs []int
	err := execCtx.DB.DoTx(sql.LevelSerializable, func(tx *sqlx.Tx) error {
		repMaxCnt, err := mysql.ObjectRep.GetFileMaxRepCount(tx, fileHash)
		if err != nil {
			return fmt.Errorf("get file max rep count failed, err: %w", err)
		}

		blkCnt, err := mysql.ObjectBlock.CountBlockWithHash(tx, fileHash)
		if err != nil {
			return fmt.Errorf("count block with hash failed, err: %w", err)
		}

		// 计算所需的最少备份数：
		// ObjectRep中期望备份数的最大值
		// 如果ObjectBlock存在对此文件的引用，则至少为1
		needRepCount := mymath.Max(repMaxCnt, mymath.Max(1, blkCnt))

		repNodes, err := mysql.Cache.GetCachingFileNodes(tx, fileHash)
		if err != nil {
			return fmt.Errorf("get caching file nodes failed, err: %w", err)
		}

		allNodes, err := mysql.Node.GetAllNodes(tx)
		if err != nil {
			return fmt.Errorf("get all nodes failed, err: %w", err)
		}

		var normalNodes, unavaiNodes []model.Node
		for _, node := range repNodes {
			if node.State == consts.NODE_STATE_NORMAL {
				normalNodes = append(normalNodes, node)
			} else if node.State == consts.NODE_STATE_UNAVAILABLE {
				unavaiNodes = append(unavaiNodes, node)
			}
		}

		// 如果Available的备份数超过期望备份数，则让一些节点退出
		if len(normalNodes) > needRepCount {
			delNodes := chooseDeleteAvaiRepNodes(allNodes, normalNodes, needRepCount-len(normalNodes))
			for _, node := range delNodes {
				err := mysql.Cache.ChangeState(tx, fileHash, node.NodeID, consts.CACHE_STATE_TEMP)
				if err != nil {
					return fmt.Errorf("change cache state failed, err: %w", err)
				}
				updatedNodeIDs = append(updatedNodeIDs, node.NodeID)
			}
			return nil
		}

		minAvaiNodeCnt := int(math.Ceil(float64(config.Cfg().MinAvailableRepProportion) * float64(needRepCount)))

		// 因为总备份数不够，而需要增加的备份数
		add1 := mymath.Max(0, needRepCount-len(repNodes))

		// 因为Available的备份数占比过少，而需要增加的备份数
		add2 := mymath.Max(0, minAvaiNodeCnt-len(normalNodes))

		// 最终需要增加的备份数，是以上两种情况的最大值
		finalAddCount := mymath.Max(add1, add2)

		if finalAddCount > 0 {
			newNodes := chooseNewRepNodes(allNodes, repNodes, finalAddCount)
			if len(newNodes) < finalAddCount {
				log.WithField("FileHash", fileHash).Warnf("need %d more rep nodes, but get only %d nodes", finalAddCount, len(newNodes))
				// TODO 节点数不够，进行一个告警
			}

			for _, node := range newNodes {
				err := mysql.Cache.Create(tx, fileHash, node.NodeID)
				if err != nil {
					return fmt.Errorf("create cache failed, err: %w", err)
				}
				updatedNodeIDs = append(updatedNodeIDs, node.NodeID)
			}
		}

		return nil
	})

	if err != nil {
		return nil, err
	}

	return updatedNodeIDs, nil
}

func chooseNewRepNodes(allNodes []model.Node, curRepNodes []model.Node, newCount int) []model.Node {
	noRepNodes := lo.Reject(allNodes, func(node model.Node, index int) bool {
		return lo.ContainsBy(curRepNodes, func(n model.Node) bool { return node.NodeID == n.NodeID }) ||
			node.State != consts.NODE_STATE_NORMAL
	})

	repNodeLocationIDs := make(map[int]bool)
	for _, node := range curRepNodes {
		repNodeLocationIDs[node.LocationID] = true
	}

	mysort.Sort(noRepNodes, func(l, r model.Node) int {
		// LocationID不存在时为false，false - true < 0，所以LocationID不存在的会排在前面
		return mysort.CmpBool(repNodeLocationIDs[l.LocationID], repNodeLocationIDs[r.LocationID])
	})

	return noRepNodes[:mymath.Min(newCount, len(noRepNodes))]
}

func chooseDeleteAvaiRepNodes(allNodes []model.Node, curAvaiRepNodes []model.Node, delCount int) []model.Node {
	// 按照地域ID分组
	locationGroupedNodes := make(map[int][]model.Node)
	for _, node := range curAvaiRepNodes {
		nodes := locationGroupedNodes[node.LocationID]
		nodes = append(nodes, node)
		locationGroupedNodes[node.LocationID] = nodes
	}

	// 每次从每个分组中取出一个元素放入结果数组，并将这个元素从分组中删除
	// 最后结果数组中的元素会按照地域交错循环排列，比如：ABCABCBCC。同时还有一个特征：靠后的循环节中的元素都来自于元素数多的分组
	// 将结果数组反转（此处是用存放时就逆序的形式实现），就把元素数多的分组提前了，此时从头部取出要删除的节点即可
	alternatedNodes := make([]model.Node, len(curAvaiRepNodes))
	for i := len(curAvaiRepNodes) - 1; i >= 0; {
		for id, nodes := range locationGroupedNodes {
			alternatedNodes[i] = nodes[0]

			if len(nodes) == 1 {
				delete(locationGroupedNodes, id)
			} else {
				locationGroupedNodes[id] = nodes[1:]
			}

			// 放置一个元素就移动一下下一个存放点
			i--
		}
	}

	return alternatedNodes[:mymath.Min(delCount, len(alternatedNodes))]
}