You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

redundancy_shrink.go 31 kB

5 months ago
6 months ago
6 months ago
6 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970
  1. package ticktock
  2. import (
  3. "context"
  4. "fmt"
  5. "math"
  6. "math/rand"
  7. "sync"
  8. "github.com/samber/lo"
  9. "gitlink.org.cn/cloudream/common/pkgs/bitmap"
  10. "gitlink.org.cn/cloudream/common/pkgs/logger"
  11. "gitlink.org.cn/cloudream/common/utils/lo2"
  12. "gitlink.org.cn/cloudream/common/utils/math2"
  13. "gitlink.org.cn/cloudream/common/utils/sort2"
  14. "gitlink.org.cn/cloudream/jcs-pub/client/internal/db"
  15. "gitlink.org.cn/cloudream/jcs-pub/common/consts"
  16. "gitlink.org.cn/cloudream/jcs-pub/common/pkgs/ioswitch/exec"
  17. "gitlink.org.cn/cloudream/jcs-pub/common/pkgs/ioswitch2"
  18. "gitlink.org.cn/cloudream/jcs-pub/common/pkgs/ioswitch2/ops2"
  19. "gitlink.org.cn/cloudream/jcs-pub/common/pkgs/ioswitch2/parser"
  20. "gitlink.org.cn/cloudream/jcs-pub/common/pkgs/publock"
  21. "gitlink.org.cn/cloudream/jcs-pub/common/pkgs/publock/reqbuilder"
  22. jcstypes "gitlink.org.cn/cloudream/jcs-pub/common/types"
  23. "gitlink.org.cn/cloudream/jcs-pub/common/types/datamap"
  24. )
  25. func (t *ChangeRedundancy) doRedundancyShrink(execCtx *changeRedundancyContext, pkg jcstypes.PackageDetail, objs []jcstypes.ObjectDetail, reen *publock.Reentrant) ([]db.UpdatingObjectRedundancy, []datamap.SysEventBody, error) {
  26. log := logger.WithType[ChangeRedundancy]("TickTock")
  27. var readerStgIDs []jcstypes.UserSpaceID
  28. for _, space := range execCtx.allUserSpaces {
  29. // TODO 可以考虑做成配置
  30. if space.AccessAmount >= float64(pkg.ObjectCount/2) {
  31. readerStgIDs = append(readerStgIDs, space.UserSpace.UserSpace.UserSpaceID)
  32. }
  33. }
  34. // 只对ec和rep对象进行处理
  35. var ecObjects []jcstypes.ObjectDetail
  36. var repObjects []jcstypes.ObjectDetail
  37. for _, obj := range objs {
  38. if _, ok := obj.Object.Redundancy.(*jcstypes.ECRedundancy); ok {
  39. ecObjects = append(ecObjects, obj)
  40. } else if _, ok := obj.Object.Redundancy.(*jcstypes.RepRedundancy); ok {
  41. repObjects = append(repObjects, obj)
  42. }
  43. }
  44. planningStgIDs := make(map[jcstypes.UserSpaceID]bool)
  45. var sysEvents []datamap.SysEventBody
  46. // 对于rep对象,统计出所有对象块分布最多的两个节点,用这两个节点代表所有rep对象块的分布,去进行退火算法
  47. var repObjectsUpdating []db.UpdatingObjectRedundancy
  48. repMostHubIDs := t.summaryRepObjectBlockNodes(repObjects)
  49. solu := t.startAnnealing(execCtx, readerStgIDs, annealingObject{
  50. totalBlockCount: 1,
  51. minBlockCnt: 1,
  52. pinnedAt: repMostHubIDs,
  53. blocks: nil,
  54. })
  55. iRepObj := 0
  56. for iRepObj < len(repObjects) {
  57. planBld := exec.NewPlanBuilder()
  58. for c := 0; c < 10 && iRepObj < len(repObjects); c++ {
  59. repObjectsUpdating = append(repObjectsUpdating, t.makePlansForRepObject(execCtx, solu, repObjects[iRepObj], planBld, planningStgIDs))
  60. sysEvents = append(sysEvents, t.generateSysEventForRepObject(solu, repObjects[iRepObj])...)
  61. iRepObj++
  62. }
  63. _, err := t.executePlans(execCtx, planBld, planningStgIDs, reen)
  64. if err != nil {
  65. log.Warn(err.Error())
  66. return nil, nil, fmt.Errorf("execute plans: %w", err)
  67. }
  68. }
  69. // 对于ec对象,则每个对象单独进行退火算法
  70. var ecObjectsUpdating []db.UpdatingObjectRedundancy
  71. for i, obj := range ecObjects {
  72. ecRed := obj.Object.Redundancy.(*jcstypes.ECRedundancy)
  73. solu := t.startAnnealing(execCtx, readerStgIDs, annealingObject{
  74. totalBlockCount: ecRed.N,
  75. minBlockCnt: ecRed.K,
  76. pinnedAt: obj.PinnedAt,
  77. blocks: obj.Blocks,
  78. })
  79. planBld := exec.NewPlanBuilder()
  80. ecObjectsUpdating = append(ecObjectsUpdating, t.makePlansForECObject(execCtx, solu, obj, planBld, planningStgIDs))
  81. sysEvents = append(sysEvents, t.generateSysEventForECObject(solu, obj)...)
  82. ioSwRets, err := t.executePlans(execCtx, planBld, planningStgIDs, reen)
  83. if err != nil {
  84. log.Warn(err.Error())
  85. return nil, nil, fmt.Errorf("execute plans: %w", err)
  86. }
  87. // 根据按照方案进行调整的结果,填充更新元数据的命令
  88. t.populateECObjectEntry(&ecObjectsUpdating[i], obj, ioSwRets)
  89. }
  90. return append(repObjectsUpdating, ecObjectsUpdating...), sysEvents, nil
  91. }
  92. func (t *ChangeRedundancy) summaryRepObjectBlockNodes(objs []jcstypes.ObjectDetail) []jcstypes.UserSpaceID {
  93. type stgBlocks struct {
  94. UserSpaceID jcstypes.UserSpaceID
  95. Count int
  96. }
  97. stgBlocksMap := make(map[jcstypes.UserSpaceID]*stgBlocks)
  98. for _, obj := range objs {
  99. cacheBlockStgs := make(map[jcstypes.UserSpaceID]bool)
  100. for _, block := range obj.Blocks {
  101. if _, ok := stgBlocksMap[block.UserSpaceID]; !ok {
  102. stgBlocksMap[block.UserSpaceID] = &stgBlocks{
  103. UserSpaceID: block.UserSpaceID,
  104. Count: 0,
  105. }
  106. }
  107. stgBlocksMap[block.UserSpaceID].Count++
  108. cacheBlockStgs[block.UserSpaceID] = true
  109. }
  110. for _, hubID := range obj.PinnedAt {
  111. if cacheBlockStgs[hubID] {
  112. continue
  113. }
  114. if _, ok := stgBlocksMap[hubID]; !ok {
  115. stgBlocksMap[hubID] = &stgBlocks{
  116. UserSpaceID: hubID,
  117. Count: 0,
  118. }
  119. }
  120. stgBlocksMap[hubID].Count++
  121. }
  122. }
  123. stgs := lo.Values(stgBlocksMap)
  124. sort2.Sort(stgs, func(left *stgBlocks, right *stgBlocks) int {
  125. return right.Count - left.Count
  126. })
  127. // 只选出块数超过一半的节点,但要保证至少有两个节点
  128. for i := 2; i < len(stgs); i++ {
  129. if stgs[i].Count < len(objs)/2 {
  130. stgs = stgs[:i]
  131. break
  132. }
  133. }
  134. return lo.Map(stgs, func(item *stgBlocks, idx int) jcstypes.UserSpaceID { return item.UserSpaceID })
  135. }
  136. type annealingState struct {
  137. ctx *changeRedundancyContext
  138. readerStgIDs []jcstypes.UserSpaceID // 近期可能访问此对象的节点
  139. stgsSortedByReader map[jcstypes.UserSpaceID][]stgDist // 拥有数据的节点到每个可能访问对象的节点按距离排序
  140. object annealingObject // 进行退火的对象
  141. blockList []objectBlock // 排序后的块分布情况
  142. stgBlockBitmaps map[jcstypes.UserSpaceID]*bitmap.Bitmap64 // 用位图的形式表示每一个节点上有哪些块
  143. stgCombTree combinatorialTree // 节点组合树,用于加速计算容灾度
  144. maxScore float64 // 搜索过程中得到过的最大分数
  145. maxScoreRmBlocks []bool // 最大分数对应的删除方案
  146. rmBlocks []bool // 当前删除方案
  147. inversedIndex int // 当前删除方案是从上一次的方案改动哪个flag而来的
  148. lastDisasterTolerance float64 // 上一次方案的容灾度
  149. lastSpaceCost float64 // 上一次方案的冗余度
  150. lastMinAccessCost float64 // 上一次方案的最小访问费用
  151. lastScore float64 // 上一次方案的分数
  152. }
  153. type objectBlock struct {
  154. Index int
  155. UserSpaceID jcstypes.UserSpaceID
  156. HasEntity bool // 节点拥有实际的文件数据块
  157. HasShadow bool // 如果节点拥有完整文件数据,那么认为这个节点拥有所有块,这些块被称为影子块
  158. FileHash jcstypes.FileHash // 只有在拥有实际文件数据块时,这个字段才有值
  159. Size int64 // 块大小
  160. }
  161. type stgDist struct {
  162. UserSpaceID jcstypes.UserSpaceID
  163. Distance float64
  164. }
  165. type combinatorialTree struct {
  166. nodes []combinatorialTreeNode
  167. blocksMaps map[int]bitmap.Bitmap64
  168. stgIDToLocalStgID map[jcstypes.UserSpaceID]int
  169. localStgIDToStgID []jcstypes.UserSpaceID
  170. }
  171. type annealingObject struct {
  172. totalBlockCount int
  173. minBlockCnt int
  174. pinnedAt []jcstypes.UserSpaceID
  175. blocks []jcstypes.ObjectBlock
  176. }
  177. const (
  178. iterActionNone = 0
  179. iterActionSkip = 1
  180. iterActionBreak = 2
  181. )
  182. func newCombinatorialTree(stgBlocksMaps map[jcstypes.UserSpaceID]*bitmap.Bitmap64) combinatorialTree {
  183. tree := combinatorialTree{
  184. blocksMaps: make(map[int]bitmap.Bitmap64),
  185. stgIDToLocalStgID: make(map[jcstypes.UserSpaceID]int),
  186. }
  187. tree.nodes = make([]combinatorialTreeNode, (1 << len(stgBlocksMaps)))
  188. for id, mp := range stgBlocksMaps {
  189. tree.stgIDToLocalStgID[id] = len(tree.localStgIDToStgID)
  190. tree.blocksMaps[len(tree.localStgIDToStgID)] = *mp
  191. tree.localStgIDToStgID = append(tree.localStgIDToStgID, id)
  192. }
  193. tree.nodes[0].localHubID = -1
  194. index := 1
  195. tree.initNode(0, &tree.nodes[0], &index)
  196. return tree
  197. }
  198. func (t *combinatorialTree) initNode(minAvaiLocalHubID int, parent *combinatorialTreeNode, index *int) {
  199. for i := minAvaiLocalHubID; i < len(t.stgIDToLocalStgID); i++ {
  200. curIndex := *index
  201. *index++
  202. bitMp := t.blocksMaps[i]
  203. bitMp.Or(&parent.blocksBitmap)
  204. t.nodes[curIndex] = combinatorialTreeNode{
  205. localHubID: i,
  206. parent: parent,
  207. blocksBitmap: bitMp,
  208. }
  209. t.initNode(i+1, &t.nodes[curIndex], index)
  210. }
  211. }
  212. // 获得索引指定的节点所在的层
  213. func (t *combinatorialTree) GetDepth(index int) int {
  214. depth := 0
  215. // 反复判断节点在哪个子树。从左到右,子树节点的数量呈现8 4 2的变化,由此可以得到每个子树的索引值的范围
  216. subTreeCount := 1 << len(t.stgIDToLocalStgID)
  217. for index > 0 {
  218. if index < subTreeCount {
  219. // 定位到一个子树后,深度+1,然后进入这个子树,使用同样的方法再进行定位。
  220. // 进入子树后需要将索引值-1,因为要去掉子树的根节点
  221. index--
  222. depth++
  223. } else {
  224. // 如果索引值不在这个子树范围内,则将值减去子树的节点数量,
  225. // 这样每一次都可以视为使用同样的逻辑对不同大小的树进行判断。
  226. index -= subTreeCount
  227. }
  228. subTreeCount >>= 1
  229. }
  230. return depth
  231. }
  232. // 更新某一个算力中心节点的块分布位图,同时更新它对应组合树节点的所有子节点。
  233. // 如果更新到某个节点时,已有K个块,那么就不会再更新它的子节点
  234. func (t *combinatorialTree) UpdateBitmap(stgID jcstypes.UserSpaceID, mp bitmap.Bitmap64, k int) {
  235. t.blocksMaps[t.stgIDToLocalStgID[stgID]] = mp
  236. // 首先定义两种遍历树节点时的移动方式:
  237. // 1. 竖直移动(深度增加):从一个节点移动到它最左边的子节点。每移动一步,index+1
  238. // 2. 水平移动:从一个节点移动到它右边的兄弟节点。每移动一步,根据它所在的深度,index+8,+4,+2
  239. // LocalID从0开始,将其+1后得到移动步数steps。
  240. // 将移动步数拆成多部分,分配到上述的两种移动方式上,并进行任意组合,且保证第一次为至少进行一次的竖直移动,移动之后的节点都会是同一个计算中心节点。
  241. steps := t.stgIDToLocalStgID[stgID] + 1
  242. for d := 1; d <= steps; d++ {
  243. t.iterCombBits(len(t.stgIDToLocalStgID)-1, steps-d, 0, func(i int) {
  244. index := d + i
  245. node := &t.nodes[index]
  246. newMp := t.blocksMaps[node.localHubID]
  247. newMp.Or(&node.parent.blocksBitmap)
  248. node.blocksBitmap = newMp
  249. if newMp.Weight() >= k {
  250. return
  251. }
  252. t.iterChildren(index, func(index, parentIndex, depth int) int {
  253. curNode := &t.nodes[index]
  254. parentNode := t.nodes[parentIndex]
  255. newMp := t.blocksMaps[curNode.localHubID]
  256. newMp.Or(&parentNode.blocksBitmap)
  257. curNode.blocksBitmap = newMp
  258. if newMp.Weight() >= k {
  259. return iterActionSkip
  260. }
  261. return iterActionNone
  262. })
  263. })
  264. }
  265. }
  266. // 遍历树,找到至少拥有K个块的树节点的最大深度
  267. func (t *combinatorialTree) FindKBlocksMaxDepth(k int) int {
  268. maxDepth := -1
  269. t.iterChildren(0, func(index, parentIndex, depth int) int {
  270. if t.nodes[index].blocksBitmap.Weight() >= k {
  271. if maxDepth < depth {
  272. maxDepth = depth
  273. }
  274. return iterActionSkip
  275. }
  276. // 如果到了叶子节点,还没有找到K个块,那就认为要满足K个块,至少需要再多一个节点,即深度+1。
  277. // 由于遍历时采用的是深度优先的算法,因此遍历到这个叶子节点时,叶子节点再加一个节点的组合已经在前面搜索过,
  278. // 所以用当前叶子节点深度+1来作为当前分支的结果就可以,即使当前情况下增加任意一个节点依然不够K块,
  279. // 可以使用同样的思路去递推到当前叶子节点增加两个块的情况。
  280. if t.nodes[index].localHubID == len(t.stgIDToLocalStgID)-1 {
  281. if maxDepth < depth+1 {
  282. maxDepth = depth + 1
  283. }
  284. }
  285. return iterActionNone
  286. })
  287. if maxDepth == -1 || maxDepth > len(t.stgIDToLocalStgID) {
  288. return len(t.stgIDToLocalStgID)
  289. }
  290. return maxDepth
  291. }
  292. func (t *combinatorialTree) iterCombBits(width int, count int, offset int, callback func(int)) {
  293. if count == 0 {
  294. callback(offset)
  295. return
  296. }
  297. for b := width; b >= count; b-- {
  298. t.iterCombBits(b-1, count-1, offset+(1<<b), callback)
  299. }
  300. }
  301. func (t *combinatorialTree) iterChildren(index int, do func(index int, parentIndex int, depth int) int) {
  302. curNode := &t.nodes[index]
  303. childIndex := index + 1
  304. curDepth := t.GetDepth(index)
  305. childCounts := len(t.stgIDToLocalStgID) - 1 - curNode.localHubID
  306. if childCounts == 0 {
  307. return
  308. }
  309. childTreeNodeCnt := 1 << (childCounts - 1)
  310. for c := 0; c < childCounts; c++ {
  311. act := t.itering(childIndex, index, curDepth+1, do)
  312. if act == iterActionBreak {
  313. return
  314. }
  315. childIndex += childTreeNodeCnt
  316. childTreeNodeCnt >>= 1
  317. }
  318. }
  319. func (t *combinatorialTree) itering(index int, parentIndex int, depth int, do func(index int, parentIndex int, depth int) int) int {
  320. act := do(index, parentIndex, depth)
  321. if act == iterActionBreak {
  322. return act
  323. }
  324. if act == iterActionSkip {
  325. return iterActionNone
  326. }
  327. curNode := &t.nodes[index]
  328. childIndex := index + 1
  329. childCounts := len(t.stgIDToLocalStgID) - 1 - curNode.localHubID
  330. if childCounts == 0 {
  331. return iterActionNone
  332. }
  333. childTreeNodeCnt := 1 << (childCounts - 1)
  334. for c := 0; c < childCounts; c++ {
  335. act = t.itering(childIndex, index, depth+1, do)
  336. if act == iterActionBreak {
  337. return act
  338. }
  339. childIndex += childTreeNodeCnt
  340. childTreeNodeCnt >>= 1
  341. }
  342. return iterActionNone
  343. }
  344. type combinatorialTreeNode struct {
  345. localHubID int
  346. parent *combinatorialTreeNode
  347. blocksBitmap bitmap.Bitmap64 // 选择了这个中心之后,所有中心一共包含多少种块
  348. }
  349. type annealingSolution struct {
  350. blockList []objectBlock // 所有节点的块分布情况
  351. rmBlocks []bool // 要删除哪些块
  352. disasterTolerance float64 // 本方案的容灾度
  353. spaceCost float64 // 本方案的冗余度
  354. minAccessCost float64 // 本方案的最小访问费用
  355. }
  356. func (t *ChangeRedundancy) startAnnealing(ctx *changeRedundancyContext, readerStgIDs []jcstypes.UserSpaceID, object annealingObject) annealingSolution {
  357. state := &annealingState{
  358. ctx: ctx,
  359. readerStgIDs: readerStgIDs,
  360. stgsSortedByReader: make(map[jcstypes.UserSpaceID][]stgDist),
  361. object: object,
  362. stgBlockBitmaps: make(map[jcstypes.UserSpaceID]*bitmap.Bitmap64),
  363. }
  364. t.initBlockList(state)
  365. if state.blockList == nil {
  366. return annealingSolution{}
  367. }
  368. t.initNodeBlockBitmap(state)
  369. t.sortNodeByReaderDistance(state)
  370. state.rmBlocks = make([]bool, len(state.blockList))
  371. state.inversedIndex = -1
  372. state.stgCombTree = newCombinatorialTree(state.stgBlockBitmaps)
  373. state.lastScore = t.calcScore(state)
  374. state.maxScore = state.lastScore
  375. state.maxScoreRmBlocks = lo2.ArrayClone(state.rmBlocks)
  376. // 模拟退火算法的温度
  377. curTemp := state.lastScore
  378. // 结束温度
  379. finalTemp := curTemp * 0.2
  380. // 冷却率
  381. coolingRate := 0.95
  382. for curTemp > finalTemp {
  383. state.inversedIndex = rand.Intn(len(state.rmBlocks))
  384. block := state.blockList[state.inversedIndex]
  385. state.rmBlocks[state.inversedIndex] = !state.rmBlocks[state.inversedIndex]
  386. state.stgBlockBitmaps[block.UserSpaceID].Set(block.Index, !state.rmBlocks[state.inversedIndex])
  387. state.stgCombTree.UpdateBitmap(block.UserSpaceID, *state.stgBlockBitmaps[block.UserSpaceID], state.object.minBlockCnt)
  388. curScore := t.calcScore(state)
  389. dScore := curScore - state.lastScore
  390. // 如果新方案比旧方案得分低,且没有要求强制接受新方案,那么就将变化改回去
  391. if curScore == 0 || (dScore < 0 && !t.alwaysAccept(curTemp, dScore, coolingRate)) {
  392. state.rmBlocks[state.inversedIndex] = !state.rmBlocks[state.inversedIndex]
  393. state.stgBlockBitmaps[block.UserSpaceID].Set(block.Index, !state.rmBlocks[state.inversedIndex])
  394. state.stgCombTree.UpdateBitmap(block.UserSpaceID, *state.stgBlockBitmaps[block.UserSpaceID], state.object.minBlockCnt)
  395. // fmt.Printf("\n")
  396. } else {
  397. // fmt.Printf(" accept!\n")
  398. state.lastScore = curScore
  399. if state.maxScore < curScore {
  400. state.maxScore = state.lastScore
  401. state.maxScoreRmBlocks = lo2.ArrayClone(state.rmBlocks)
  402. }
  403. }
  404. curTemp *= coolingRate
  405. }
  406. // fmt.Printf("final: %v\n", state.maxScoreRmBlocks)
  407. return annealingSolution{
  408. blockList: state.blockList,
  409. rmBlocks: state.maxScoreRmBlocks,
  410. disasterTolerance: state.lastDisasterTolerance,
  411. spaceCost: state.lastSpaceCost,
  412. minAccessCost: state.lastMinAccessCost,
  413. }
  414. }
  415. func (t *ChangeRedundancy) initBlockList(ctx *annealingState) {
  416. blocksMap := make(map[jcstypes.UserSpaceID][]objectBlock)
  417. // 先生成所有的影子块
  418. for _, pinned := range ctx.object.pinnedAt {
  419. blocks := make([]objectBlock, 0, ctx.object.totalBlockCount)
  420. for i := 0; i < ctx.object.totalBlockCount; i++ {
  421. blocks = append(blocks, objectBlock{
  422. Index: i,
  423. UserSpaceID: pinned,
  424. HasShadow: true,
  425. })
  426. }
  427. blocksMap[pinned] = blocks
  428. }
  429. // 再填充实际块
  430. for _, b := range ctx.object.blocks {
  431. blocks := blocksMap[b.UserSpaceID]
  432. has := false
  433. for i := range blocks {
  434. if blocks[i].Index == b.Index {
  435. blocks[i].HasEntity = true
  436. blocks[i].FileHash = b.FileHash
  437. has = true
  438. break
  439. }
  440. }
  441. if has {
  442. continue
  443. }
  444. blocks = append(blocks, objectBlock{
  445. Index: b.Index,
  446. UserSpaceID: b.UserSpaceID,
  447. HasEntity: true,
  448. FileHash: b.FileHash,
  449. Size: b.Size,
  450. })
  451. blocksMap[b.UserSpaceID] = blocks
  452. }
  453. var sortedBlocks []objectBlock
  454. for _, bs := range blocksMap {
  455. sortedBlocks = append(sortedBlocks, bs...)
  456. }
  457. sortedBlocks = sort2.Sort(sortedBlocks, func(left objectBlock, right objectBlock) int {
  458. d := left.UserSpaceID - right.UserSpaceID
  459. if d != 0 {
  460. return int(d)
  461. }
  462. return left.Index - right.Index
  463. })
  464. ctx.blockList = sortedBlocks
  465. }
  466. func (t *ChangeRedundancy) initNodeBlockBitmap(state *annealingState) {
  467. for _, b := range state.blockList {
  468. mp, ok := state.stgBlockBitmaps[b.UserSpaceID]
  469. if !ok {
  470. nb := bitmap.Bitmap64(0)
  471. mp = &nb
  472. state.stgBlockBitmaps[b.UserSpaceID] = mp
  473. }
  474. mp.Set(b.Index, true)
  475. }
  476. }
  477. func (t *ChangeRedundancy) sortNodeByReaderDistance(state *annealingState) {
  478. for _, r := range state.readerStgIDs {
  479. var nodeDists []stgDist
  480. for n := range state.stgBlockBitmaps {
  481. if r == n {
  482. // 同节点时距离视为0.1
  483. nodeDists = append(nodeDists, stgDist{
  484. UserSpaceID: n,
  485. Distance: consts.StorageDistanceSameStorage,
  486. })
  487. } else if state.ctx.allUserSpaces[r].UserSpace.UserSpace.Storage.GetLocation() == state.ctx.allUserSpaces[n].UserSpace.UserSpace.Storage.GetLocation() {
  488. // 同地区时距离视为1
  489. nodeDists = append(nodeDists, stgDist{
  490. UserSpaceID: n,
  491. Distance: consts.StorageDistanceSameLocation,
  492. })
  493. } else {
  494. // 不同地区时距离视为5
  495. nodeDists = append(nodeDists, stgDist{
  496. UserSpaceID: n,
  497. Distance: consts.StorageDistanceOther,
  498. })
  499. }
  500. }
  501. state.stgsSortedByReader[r] = sort2.Sort(nodeDists, func(left, right stgDist) int { return sort2.Cmp(left.Distance, right.Distance) })
  502. }
  503. }
  504. func (t *ChangeRedundancy) calcScore(state *annealingState) float64 {
  505. dt := t.calcDisasterTolerance(state)
  506. ac := t.calcMinAccessCost(state)
  507. sc := t.calcSpaceCost(state)
  508. state.lastDisasterTolerance = dt
  509. state.lastMinAccessCost = ac
  510. state.lastSpaceCost = sc
  511. dtSc := 1.0
  512. if dt < 1 {
  513. dtSc = 0
  514. } else if dt >= 2 {
  515. dtSc = 1.5
  516. }
  517. newSc := 0.0
  518. if dt == 0 || ac == 0 {
  519. newSc = 0
  520. } else {
  521. newSc = dtSc / (sc * ac)
  522. }
  523. // fmt.Printf("solu: %v, cur: %v, dt: %v, ac: %v, sc: %v \n", state.rmBlocks, newSc, dt, ac, sc)
  524. return newSc
  525. }
  526. // 计算容灾度
  527. func (t *ChangeRedundancy) calcDisasterTolerance(state *annealingState) float64 {
  528. if state.inversedIndex != -1 {
  529. node := state.blockList[state.inversedIndex]
  530. state.stgCombTree.UpdateBitmap(node.UserSpaceID, *state.stgBlockBitmaps[node.UserSpaceID], state.object.minBlockCnt)
  531. }
  532. return float64(len(state.stgBlockBitmaps) - state.stgCombTree.FindKBlocksMaxDepth(state.object.minBlockCnt))
  533. }
  534. // 计算最小访问数据的代价
  535. func (t *ChangeRedundancy) calcMinAccessCost(state *annealingState) float64 {
  536. cost := math.MaxFloat64
  537. for _, reader := range state.readerStgIDs {
  538. tarNodes := state.stgsSortedByReader[reader]
  539. gotBlocks := bitmap.Bitmap64(0)
  540. thisCost := 0.0
  541. for _, tar := range tarNodes {
  542. tarNodeMp := state.stgBlockBitmaps[tar.UserSpaceID]
  543. // 只需要从目的节点上获得缺少的块
  544. curWeigth := gotBlocks.Weight()
  545. // 下面的if会在拿到k个块之后跳出循环,所以or多了块也没关系
  546. gotBlocks.Or(tarNodeMp)
  547. // 但是算读取块的消耗时,不能多算,最多算读了k个块的消耗
  548. willGetBlocks := math2.Min(gotBlocks.Weight()-curWeigth, state.object.minBlockCnt-curWeigth)
  549. thisCost += float64(willGetBlocks) * float64(tar.Distance)
  550. if gotBlocks.Weight() >= state.object.minBlockCnt {
  551. break
  552. }
  553. }
  554. if gotBlocks.Weight() >= state.object.minBlockCnt {
  555. cost = math.Min(cost, thisCost)
  556. }
  557. }
  558. return cost
  559. }
  560. // 计算冗余度
  561. func (t *ChangeRedundancy) calcSpaceCost(ctx *annealingState) float64 {
  562. blockCount := 0
  563. for i, b := range ctx.blockList {
  564. if ctx.rmBlocks[i] {
  565. continue
  566. }
  567. if b.HasEntity {
  568. blockCount++
  569. }
  570. if b.HasShadow {
  571. blockCount++
  572. }
  573. }
  574. // 所有算力中心上拥有的块的总数 / 一个对象被分成了几个块
  575. return float64(blockCount) / float64(ctx.object.minBlockCnt)
  576. }
  577. // 如果新方案得分比旧方案小,那么在一定概率内也接受新方案
  578. func (t *ChangeRedundancy) alwaysAccept(curTemp float64, dScore float64, coolingRate float64) bool {
  579. v := math.Exp(dScore / curTemp / coolingRate)
  580. // fmt.Printf(" -- chance: %v, temp: %v", v, curTemp)
  581. return v > rand.Float64()
  582. }
  583. func (t *ChangeRedundancy) makePlansForRepObject(ctx *changeRedundancyContext, solu annealingSolution, obj jcstypes.ObjectDetail, planBld *exec.PlanBuilder, planningHubIDs map[jcstypes.UserSpaceID]bool) db.UpdatingObjectRedundancy {
  584. entry := db.UpdatingObjectRedundancy{
  585. ObjectID: obj.Object.ObjectID,
  586. FileHash: obj.Object.FileHash,
  587. Size: obj.Object.Size,
  588. Redundancy: obj.Object.Redundancy,
  589. }
  590. ft := ioswitch2.NewFromTo()
  591. fromStg := ctx.allUserSpaces[obj.Blocks[0].UserSpaceID].UserSpace
  592. ft.AddFrom(ioswitch2.NewFromShardstore(obj.Object.FileHash, *fromStg, ioswitch2.RawStream()))
  593. for i, f := range solu.rmBlocks {
  594. hasCache := lo.ContainsBy(obj.Blocks, func(b jcstypes.ObjectBlock) bool { return b.UserSpaceID == solu.blockList[i].UserSpaceID }) ||
  595. lo.ContainsBy(obj.PinnedAt, func(n jcstypes.UserSpaceID) bool { return n == solu.blockList[i].UserSpaceID })
  596. willRm := f
  597. if !willRm {
  598. // 如果对象在退火后要保留副本的节点没有副本,则需要在这个节点创建副本
  599. if !hasCache {
  600. toStg := ctx.allUserSpaces[solu.blockList[i].UserSpaceID].UserSpace
  601. ft.AddTo(ioswitch2.NewToShardStore(*toStg, ioswitch2.RawStream(), fmt.Sprintf("%d.0", obj.Object.ObjectID)))
  602. planningHubIDs[solu.blockList[i].UserSpaceID] = true
  603. }
  604. entry.Blocks = append(entry.Blocks, jcstypes.ObjectBlock{
  605. ObjectID: obj.Object.ObjectID,
  606. Index: solu.blockList[i].Index,
  607. UserSpaceID: solu.blockList[i].UserSpaceID,
  608. FileHash: obj.Object.FileHash,
  609. Size: solu.blockList[i].Size,
  610. })
  611. }
  612. }
  613. err := parser.Parse(ft, planBld)
  614. if err != nil {
  615. // TODO 错误处理
  616. }
  617. return entry
  618. }
  619. func (t *ChangeRedundancy) generateSysEventForRepObject(solu annealingSolution, obj jcstypes.ObjectDetail) []datamap.SysEventBody {
  620. var blockChgs []datamap.BlockChange
  621. for i, f := range solu.rmBlocks {
  622. hasCache := lo.ContainsBy(obj.Blocks, func(b jcstypes.ObjectBlock) bool { return b.UserSpaceID == solu.blockList[i].UserSpaceID }) ||
  623. lo.ContainsBy(obj.PinnedAt, func(n jcstypes.UserSpaceID) bool { return n == solu.blockList[i].UserSpaceID })
  624. willRm := f
  625. if !willRm {
  626. // 如果对象在退火后要保留副本的节点没有副本,则需要在这个节点创建副本
  627. if !hasCache {
  628. blockChgs = append(blockChgs, &datamap.BlockChangeClone{
  629. BlockType: datamap.BlockTypeRaw,
  630. SourceUserSpaceID: obj.Blocks[0].UserSpaceID,
  631. TargetUserSpaceID: solu.blockList[i].UserSpaceID,
  632. })
  633. }
  634. } else {
  635. blockChgs = append(blockChgs, &datamap.BlockChangeDeleted{
  636. Index: 0,
  637. UserSpaceID: solu.blockList[i].UserSpaceID,
  638. })
  639. }
  640. }
  641. transEvt := &datamap.BodyBlockTransfer{
  642. ObjectID: obj.Object.ObjectID,
  643. PackageID: obj.Object.PackageID,
  644. BlockChanges: blockChgs,
  645. }
  646. var blockDist []datamap.BlockDistributionObjectInfo
  647. for i, f := range solu.rmBlocks {
  648. if !f {
  649. blockDist = append(blockDist, datamap.BlockDistributionObjectInfo{
  650. BlockType: datamap.BlockTypeRaw,
  651. Index: 0,
  652. UserSpaceID: solu.blockList[i].UserSpaceID,
  653. })
  654. }
  655. }
  656. distEvt := &datamap.BodyBlockDistribution{
  657. ObjectID: obj.Object.ObjectID,
  658. PackageID: obj.Object.PackageID,
  659. Path: obj.Object.Path,
  660. Size: obj.Object.Size,
  661. FileHash: obj.Object.FileHash,
  662. FaultTolerance: solu.disasterTolerance,
  663. Redundancy: solu.spaceCost,
  664. AvgAccessCost: 0, // TODO 计算平均访问代价,从日常访问数据中统计
  665. BlockDistribution: blockDist,
  666. // TODO 不好计算传输量
  667. }
  668. return []datamap.SysEventBody{transEvt, distEvt}
  669. }
  670. func (t *ChangeRedundancy) makePlansForECObject(ctx *changeRedundancyContext, solu annealingSolution, obj jcstypes.ObjectDetail, planBld *exec.PlanBuilder, planningHubIDs map[jcstypes.UserSpaceID]bool) db.UpdatingObjectRedundancy {
  671. entry := db.UpdatingObjectRedundancy{
  672. ObjectID: obj.Object.ObjectID,
  673. FileHash: obj.Object.FileHash,
  674. Size: obj.Object.Size,
  675. Redundancy: obj.Object.Redundancy,
  676. }
  677. reconstrct := make(map[jcstypes.UserSpaceID]*[]int)
  678. for i, f := range solu.rmBlocks {
  679. block := solu.blockList[i]
  680. if !f {
  681. entry.Blocks = append(entry.Blocks, jcstypes.ObjectBlock{
  682. ObjectID: obj.Object.ObjectID,
  683. Index: block.Index,
  684. UserSpaceID: block.UserSpaceID,
  685. FileHash: block.FileHash,
  686. Size: block.Size,
  687. })
  688. // 如果这个块是影子块,那么就要从完整对象里重建这个块
  689. if !block.HasEntity {
  690. re, ok := reconstrct[block.UserSpaceID]
  691. if !ok {
  692. re = &[]int{}
  693. reconstrct[block.UserSpaceID] = re
  694. }
  695. *re = append(*re, block.Index)
  696. }
  697. }
  698. }
  699. ecRed := obj.Object.Redundancy.(*jcstypes.ECRedundancy)
  700. for id, idxs := range reconstrct {
  701. // 依次生成每个节点上的执行计划,因为如果放到一个计划里一起生成,不能保证每个节点上的块用的都是本节点上的副本
  702. ft := ioswitch2.NewFromTo()
  703. ft.ECParam = ecRed
  704. ft.AddFrom(ioswitch2.NewFromShardstore(obj.Object.FileHash, *ctx.allUserSpaces[id].UserSpace, ioswitch2.RawStream()))
  705. for _, i := range *idxs {
  706. ft.AddTo(ioswitch2.NewToShardStore(*ctx.allUserSpaces[id].UserSpace, ioswitch2.ECStream(i), fmt.Sprintf("%d.%d", obj.Object.ObjectID, i)))
  707. }
  708. err := parser.Parse(ft, planBld)
  709. if err != nil {
  710. // TODO 错误处理
  711. continue
  712. }
  713. planningHubIDs[id] = true
  714. }
  715. return entry
  716. }
  717. func (t *ChangeRedundancy) generateSysEventForECObject(solu annealingSolution, obj jcstypes.ObjectDetail) []datamap.SysEventBody {
  718. var blockChgs []datamap.BlockChange
  719. reconstrct := make(map[jcstypes.UserSpaceID]*[]int)
  720. for i, f := range solu.rmBlocks {
  721. block := solu.blockList[i]
  722. if !f {
  723. // 如果这个块是影子块,那么就要从完整对象里重建这个块
  724. if !block.HasEntity {
  725. re, ok := reconstrct[block.UserSpaceID]
  726. if !ok {
  727. re = &[]int{}
  728. reconstrct[block.UserSpaceID] = re
  729. }
  730. *re = append(*re, block.Index)
  731. }
  732. } else {
  733. blockChgs = append(blockChgs, &datamap.BlockChangeDeleted{
  734. Index: block.Index,
  735. UserSpaceID: block.UserSpaceID,
  736. })
  737. }
  738. }
  739. // 由于每一个需要被重建的块都是从同中心的副本里构建出来的,所以对于每一个中心都要产生一个BlockChangeEnDecode
  740. for id, idxs := range reconstrct {
  741. var tarBlocks []datamap.Block
  742. for _, idx := range *idxs {
  743. tarBlocks = append(tarBlocks, datamap.Block{
  744. BlockType: datamap.BlockTypeEC,
  745. Index: idx,
  746. UserSpaceID: id,
  747. })
  748. }
  749. blockChgs = append(blockChgs, &datamap.BlockChangeEnDecode{
  750. SourceBlocks: []datamap.Block{{
  751. BlockType: datamap.BlockTypeRaw,
  752. Index: 0,
  753. UserSpaceID: id, // 影子块的原始对象就在同一个节点上
  754. }},
  755. TargetBlocks: tarBlocks,
  756. // 传输量为0
  757. })
  758. }
  759. transEvt := &datamap.BodyBlockTransfer{
  760. ObjectID: obj.Object.ObjectID,
  761. PackageID: obj.Object.PackageID,
  762. BlockChanges: blockChgs,
  763. }
  764. var blockDist []datamap.BlockDistributionObjectInfo
  765. for i, f := range solu.rmBlocks {
  766. if !f {
  767. blockDist = append(blockDist, datamap.BlockDistributionObjectInfo{
  768. BlockType: datamap.BlockTypeEC,
  769. Index: solu.blockList[i].Index,
  770. UserSpaceID: solu.blockList[i].UserSpaceID,
  771. })
  772. }
  773. }
  774. distEvt := &datamap.BodyBlockDistribution{
  775. ObjectID: obj.Object.ObjectID,
  776. PackageID: obj.Object.PackageID,
  777. Path: obj.Object.Path,
  778. Size: obj.Object.Size,
  779. FileHash: obj.Object.FileHash,
  780. FaultTolerance: solu.disasterTolerance,
  781. Redundancy: solu.spaceCost,
  782. AvgAccessCost: 0, // TODO 计算平均访问代价,从日常访问数据中统计
  783. BlockDistribution: blockDist,
  784. // TODO 不好计算传输量
  785. }
  786. return []datamap.SysEventBody{transEvt, distEvt}
  787. }
  788. func (t *ChangeRedundancy) executePlans(ctx *changeRedundancyContext, planBld *exec.PlanBuilder, planningSpaceIDs map[jcstypes.UserSpaceID]bool, reen *publock.Reentrant) (exec.PlanResult, error) {
  789. reqBlder := reqbuilder.NewBuilder()
  790. for id, _ := range planningSpaceIDs {
  791. reqBlder.UserSpace().Buzy(id)
  792. }
  793. err := reen.Lock(reqBlder.Build())
  794. if err != nil {
  795. return exec.PlanResult{}, fmt.Errorf("locking shard resources: %w", err)
  796. }
  797. wg := sync.WaitGroup{}
  798. // 执行IO计划
  799. var ioSwRets exec.PlanResult
  800. var ioSwErr error
  801. wg.Add(1)
  802. go func() {
  803. defer wg.Done()
  804. execCtx := exec.NewExecContext()
  805. exec.SetValueByType(execCtx, ctx.ticktock.stgPool)
  806. ret, err := planBld.Execute(execCtx).Wait(context.TODO())
  807. if err != nil {
  808. ioSwErr = fmt.Errorf("executing io switch plan: %w", err)
  809. return
  810. }
  811. ioSwRets = ret
  812. }()
  813. wg.Wait()
  814. if ioSwErr != nil {
  815. return exec.PlanResult{}, ioSwErr
  816. }
  817. return ioSwRets, nil
  818. }
  819. func (t *ChangeRedundancy) populateECObjectEntry(entry *db.UpdatingObjectRedundancy, obj jcstypes.ObjectDetail, ioRets exec.PlanResult) {
  820. for i := range entry.Blocks {
  821. if entry.Blocks[i].FileHash != "" {
  822. continue
  823. }
  824. key := fmt.Sprintf("%d.%d", obj.Object.ObjectID, entry.Blocks[i].Index)
  825. // 不应该出现key不存在的情况
  826. r := ioRets.Get(key).(*ops2.FileInfoValue)
  827. entry.Blocks[i].FileHash = r.Hash
  828. entry.Blocks[i].Size = r.Size
  829. }
  830. }

本项目旨在将云际存储公共基础设施化,使个人及企业可低门槛使用高效的云际存储服务(安装开箱即用云际存储客户端即可,无需关注其他组件的部署),同时支持用户灵活便捷定制云际存储的功能细节。