You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

clean_pinned.go 33 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043
  1. package event
  2. import (
  3. "context"
  4. "fmt"
  5. "math"
  6. "math/rand"
  7. "sync"
  8. "time"
  9. "github.com/samber/lo"
  10. "gitlink.org.cn/cloudream/common/pkgs/bitmap"
  11. "gitlink.org.cn/cloudream/common/pkgs/ioswitch/exec"
  12. "gitlink.org.cn/cloudream/common/pkgs/logger"
  13. cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
  14. "gitlink.org.cn/cloudream/common/utils/lo2"
  15. "gitlink.org.cn/cloudream/common/utils/math2"
  16. "gitlink.org.cn/cloudream/common/utils/sort2"
  17. "gitlink.org.cn/cloudream/storage/common/consts"
  18. stgglb "gitlink.org.cn/cloudream/storage/common/globals"
  19. stgmod "gitlink.org.cn/cloudream/storage/common/models"
  20. "gitlink.org.cn/cloudream/storage/common/pkgs/distlock/reqbuilder"
  21. "gitlink.org.cn/cloudream/storage/common/pkgs/ioswitch2"
  22. "gitlink.org.cn/cloudream/storage/common/pkgs/ioswitch2/ops2"
  23. "gitlink.org.cn/cloudream/storage/common/pkgs/ioswitch2/parser"
  24. coormq "gitlink.org.cn/cloudream/storage/common/pkgs/mq/coordinator"
  25. scevt "gitlink.org.cn/cloudream/storage/common/pkgs/mq/scanner/event"
  26. )
  27. type CleanPinned struct {
  28. *scevt.CleanPinned
  29. }
  30. func NewCleanPinned(evt *scevt.CleanPinned) *CleanPinned {
  31. return &CleanPinned{
  32. CleanPinned: evt,
  33. }
  34. }
  35. func (t *CleanPinned) TryMerge(other Event) bool {
  36. event, ok := other.(*CleanPinned)
  37. if !ok {
  38. return false
  39. }
  40. return t.PackageID == event.PackageID
  41. }
  42. func (t *CleanPinned) Execute(execCtx ExecuteContext) {
  43. log := logger.WithType[CleanPinned]("Event")
  44. startTime := time.Now()
  45. log.Debugf("begin with %v", logger.FormatStruct(t.CleanPinned))
  46. defer func() {
  47. log.Debugf("end, time: %v", time.Since(startTime))
  48. }()
  49. // TODO 应该与其他event一样,直接访问数据库
  50. coorCli, err := stgglb.CoordinatorMQPool.Acquire()
  51. if err != nil {
  52. log.Warnf("new coordinator client: %s", err.Error())
  53. return
  54. }
  55. defer stgglb.CoordinatorMQPool.Release(coorCli)
  56. getObjs, err := coorCli.GetPackageObjectDetails(coormq.ReqGetPackageObjectDetails(t.PackageID))
  57. if err != nil {
  58. log.Warnf("getting package objects: %s", err.Error())
  59. return
  60. }
  61. stats, err := execCtx.Args.DB.PackageAccessStat().GetByPackageID(execCtx.Args.DB.DefCtx(), t.PackageID)
  62. if err != nil {
  63. log.Warnf("getting package access stat: %s", err.Error())
  64. return
  65. }
  66. var readerStgIDs []cdssdk.StorageID
  67. for _, item := range stats {
  68. // TODO 可以考虑做成配置
  69. if item.Amount >= float64(len(getObjs.Objects)/2) {
  70. readerStgIDs = append(readerStgIDs, item.StorageID)
  71. }
  72. }
  73. // 注意!需要保证allStgID包含所有之后可能用到的节点ID
  74. // TOOD 可以考虑设计Cache机制
  75. var allStgID []cdssdk.StorageID
  76. for _, obj := range getObjs.Objects {
  77. for _, block := range obj.Blocks {
  78. allStgID = append(allStgID, block.StorageID)
  79. }
  80. allStgID = append(allStgID, obj.PinnedAt...)
  81. }
  82. allStgID = append(allStgID, readerStgIDs...)
  83. getStgResp, err := coorCli.GetStorageDetails(coormq.ReqGetStorageDetails(lo.Union(allStgID)))
  84. if err != nil {
  85. log.Warnf("getting nodes: %s", err.Error())
  86. return
  87. }
  88. allStgInfos := make(map[cdssdk.StorageID]*stgmod.StorageDetail)
  89. for _, stg := range getStgResp.Storages {
  90. allStgInfos[stg.Storage.StorageID] = stg
  91. }
  92. // 只对ec和rep对象进行处理
  93. var ecObjects []stgmod.ObjectDetail
  94. var repObjects []stgmod.ObjectDetail
  95. for _, obj := range getObjs.Objects {
  96. if _, ok := obj.Object.Redundancy.(*cdssdk.ECRedundancy); ok {
  97. ecObjects = append(ecObjects, obj)
  98. } else if _, ok := obj.Object.Redundancy.(*cdssdk.RepRedundancy); ok {
  99. repObjects = append(repObjects, obj)
  100. }
  101. }
  102. planBld := exec.NewPlanBuilder()
  103. planningStgIDs := make(map[cdssdk.StorageID]bool)
  104. var sysEvents []stgmod.SysEventBody
  105. // 对于rep对象,统计出所有对象块分布最多的两个节点,用这两个节点代表所有rep对象块的分布,去进行退火算法
  106. var repObjectsUpdating []coormq.UpdatingObjectRedundancy
  107. repMostHubIDs := t.summaryRepObjectBlockNodes(repObjects)
  108. solu := t.startAnnealing(allStgInfos, readerStgIDs, annealingObject{
  109. totalBlockCount: 1,
  110. minBlockCnt: 1,
  111. pinnedAt: repMostHubIDs,
  112. blocks: nil,
  113. })
  114. for _, obj := range repObjects {
  115. repObjectsUpdating = append(repObjectsUpdating, t.makePlansForRepObject(allStgInfos, solu, obj, planBld, planningStgIDs))
  116. sysEvents = append(sysEvents, t.generateSysEventForRepObject(solu, obj)...)
  117. }
  118. // 对于ec对象,则每个对象单独进行退火算法
  119. var ecObjectsUpdating []coormq.UpdatingObjectRedundancy
  120. for _, obj := range ecObjects {
  121. ecRed := obj.Object.Redundancy.(*cdssdk.ECRedundancy)
  122. solu := t.startAnnealing(allStgInfos, readerStgIDs, annealingObject{
  123. totalBlockCount: ecRed.N,
  124. minBlockCnt: ecRed.K,
  125. pinnedAt: obj.PinnedAt,
  126. blocks: obj.Blocks,
  127. })
  128. ecObjectsUpdating = append(ecObjectsUpdating, t.makePlansForECObject(allStgInfos, solu, obj, planBld, planningStgIDs))
  129. sysEvents = append(sysEvents, t.generateSysEventForECObject(solu, obj)...)
  130. }
  131. ioSwRets, err := t.executePlans(execCtx, planBld, planningStgIDs)
  132. if err != nil {
  133. log.Warn(err.Error())
  134. return
  135. }
  136. // 根据按照方案进行调整的结果,填充更新元数据的命令
  137. for i := range ecObjectsUpdating {
  138. t.populateECObjectEntry(&ecObjectsUpdating[i], ecObjects[i], ioSwRets)
  139. }
  140. finalEntries := append(repObjectsUpdating, ecObjectsUpdating...)
  141. if len(finalEntries) > 0 {
  142. _, err = coorCli.UpdateObjectRedundancy(coormq.ReqUpdateObjectRedundancy(finalEntries))
  143. if err != nil {
  144. log.Warnf("changing object redundancy: %s", err.Error())
  145. return
  146. }
  147. for _, e := range sysEvents {
  148. execCtx.Args.EvtPub.Publish(e)
  149. }
  150. }
  151. }
  152. func (t *CleanPinned) summaryRepObjectBlockNodes(objs []stgmod.ObjectDetail) []cdssdk.StorageID {
  153. type stgBlocks struct {
  154. StorageID cdssdk.StorageID
  155. Count int
  156. }
  157. stgBlocksMap := make(map[cdssdk.StorageID]*stgBlocks)
  158. for _, obj := range objs {
  159. cacheBlockStgs := make(map[cdssdk.StorageID]bool)
  160. for _, block := range obj.Blocks {
  161. if _, ok := stgBlocksMap[block.StorageID]; !ok {
  162. stgBlocksMap[block.StorageID] = &stgBlocks{
  163. StorageID: block.StorageID,
  164. Count: 0,
  165. }
  166. }
  167. stgBlocksMap[block.StorageID].Count++
  168. cacheBlockStgs[block.StorageID] = true
  169. }
  170. for _, hubID := range obj.PinnedAt {
  171. if cacheBlockStgs[hubID] {
  172. continue
  173. }
  174. if _, ok := stgBlocksMap[hubID]; !ok {
  175. stgBlocksMap[hubID] = &stgBlocks{
  176. StorageID: hubID,
  177. Count: 0,
  178. }
  179. }
  180. stgBlocksMap[hubID].Count++
  181. }
  182. }
  183. stgs := lo.Values(stgBlocksMap)
  184. sort2.Sort(stgs, func(left *stgBlocks, right *stgBlocks) int {
  185. return right.Count - left.Count
  186. })
  187. // 只选出块数超过一半的节点,但要保证至少有两个节点
  188. for i := 2; i < len(stgs); i++ {
  189. if stgs[i].Count < len(objs)/2 {
  190. stgs = stgs[:i]
  191. break
  192. }
  193. }
  194. return lo.Map(stgs, func(item *stgBlocks, idx int) cdssdk.StorageID { return item.StorageID })
  195. }
  196. type annealingState struct {
  197. allStgInfos map[cdssdk.StorageID]*stgmod.StorageDetail // 所有节点的信息
  198. readerStgIDs []cdssdk.StorageID // 近期可能访问此对象的节点
  199. stgsSortedByReader map[cdssdk.StorageID][]stgDist // 拥有数据的节点到每个可能访问对象的节点按距离排序
  200. object annealingObject // 进行退火的对象
  201. blockList []objectBlock // 排序后的块分布情况
  202. stgBlockBitmaps map[cdssdk.StorageID]*bitmap.Bitmap64 // 用位图的形式表示每一个节点上有哪些块
  203. stgCombTree combinatorialTree // 节点组合树,用于加速计算容灾度
  204. maxScore float64 // 搜索过程中得到过的最大分数
  205. maxScoreRmBlocks []bool // 最大分数对应的删除方案
  206. rmBlocks []bool // 当前删除方案
  207. inversedIndex int // 当前删除方案是从上一次的方案改动哪个flag而来的
  208. lastDisasterTolerance float64 // 上一次方案的容灾度
  209. lastSpaceCost float64 // 上一次方案的冗余度
  210. lastMinAccessCost float64 // 上一次方案的最小访问费用
  211. lastScore float64 // 上一次方案的分数
  212. }
  213. type objectBlock struct {
  214. Index int
  215. StorageID cdssdk.StorageID
  216. HasEntity bool // 节点拥有实际的文件数据块
  217. HasShadow bool // 如果节点拥有完整文件数据,那么认为这个节点拥有所有块,这些块被称为影子块
  218. FileHash cdssdk.FileHash // 只有在拥有实际文件数据块时,这个字段才有值
  219. Size int64 // 块大小
  220. }
  221. type stgDist struct {
  222. StorageID cdssdk.StorageID
  223. Distance float64
  224. }
  225. type combinatorialTree struct {
  226. nodes []combinatorialTreeNode
  227. blocksMaps map[int]bitmap.Bitmap64
  228. stgIDToLocalStgID map[cdssdk.StorageID]int
  229. localStgIDToStgID []cdssdk.StorageID
  230. }
  231. type annealingObject struct {
  232. totalBlockCount int
  233. minBlockCnt int
  234. pinnedAt []cdssdk.StorageID
  235. blocks []stgmod.ObjectBlock
  236. }
  237. const (
  238. iterActionNone = 0
  239. iterActionSkip = 1
  240. iterActionBreak = 2
  241. )
  242. func newCombinatorialTree(stgBlocksMaps map[cdssdk.StorageID]*bitmap.Bitmap64) combinatorialTree {
  243. tree := combinatorialTree{
  244. blocksMaps: make(map[int]bitmap.Bitmap64),
  245. stgIDToLocalStgID: make(map[cdssdk.StorageID]int),
  246. }
  247. tree.nodes = make([]combinatorialTreeNode, (1 << len(stgBlocksMaps)))
  248. for id, mp := range stgBlocksMaps {
  249. tree.stgIDToLocalStgID[id] = len(tree.localStgIDToStgID)
  250. tree.blocksMaps[len(tree.localStgIDToStgID)] = *mp
  251. tree.localStgIDToStgID = append(tree.localStgIDToStgID, id)
  252. }
  253. tree.nodes[0].localHubID = -1
  254. index := 1
  255. tree.initNode(0, &tree.nodes[0], &index)
  256. return tree
  257. }
  258. func (t *combinatorialTree) initNode(minAvaiLocalHubID int, parent *combinatorialTreeNode, index *int) {
  259. for i := minAvaiLocalHubID; i < len(t.stgIDToLocalStgID); i++ {
  260. curIndex := *index
  261. *index++
  262. bitMp := t.blocksMaps[i]
  263. bitMp.Or(&parent.blocksBitmap)
  264. t.nodes[curIndex] = combinatorialTreeNode{
  265. localHubID: i,
  266. parent: parent,
  267. blocksBitmap: bitMp,
  268. }
  269. t.initNode(i+1, &t.nodes[curIndex], index)
  270. }
  271. }
  272. // 获得索引指定的节点所在的层
  273. func (t *combinatorialTree) GetDepth(index int) int {
  274. depth := 0
  275. // 反复判断节点在哪个子树。从左到右,子树节点的数量呈现8 4 2的变化,由此可以得到每个子树的索引值的范围
  276. subTreeCount := 1 << len(t.stgIDToLocalStgID)
  277. for index > 0 {
  278. if index < subTreeCount {
  279. // 定位到一个子树后,深度+1,然后进入这个子树,使用同样的方法再进行定位。
  280. // 进入子树后需要将索引值-1,因为要去掉子树的根节点
  281. index--
  282. depth++
  283. } else {
  284. // 如果索引值不在这个子树范围内,则将值减去子树的节点数量,
  285. // 这样每一次都可以视为使用同样的逻辑对不同大小的树进行判断。
  286. index -= subTreeCount
  287. }
  288. subTreeCount >>= 1
  289. }
  290. return depth
  291. }
  292. // 更新某一个算力中心节点的块分布位图,同时更新它对应组合树节点的所有子节点。
  293. // 如果更新到某个节点时,已有K个块,那么就不会再更新它的子节点
  294. func (t *combinatorialTree) UpdateBitmap(stgID cdssdk.StorageID, mp bitmap.Bitmap64, k int) {
  295. t.blocksMaps[t.stgIDToLocalStgID[stgID]] = mp
  296. // 首先定义两种遍历树节点时的移动方式:
  297. // 1. 竖直移动(深度增加):从一个节点移动到它最左边的子节点。每移动一步,index+1
  298. // 2. 水平移动:从一个节点移动到它右边的兄弟节点。每移动一步,根据它所在的深度,index+8,+4,+2
  299. // LocalID从0开始,将其+1后得到移动步数steps。
  300. // 将移动步数拆成多部分,分配到上述的两种移动方式上,并进行任意组合,且保证第一次为至少进行一次的竖直移动,移动之后的节点都会是同一个计算中心节点。
  301. steps := t.stgIDToLocalStgID[stgID] + 1
  302. for d := 1; d <= steps; d++ {
  303. t.iterCombBits(len(t.stgIDToLocalStgID)-1, steps-d, 0, func(i int) {
  304. index := d + i
  305. node := &t.nodes[index]
  306. newMp := t.blocksMaps[node.localHubID]
  307. newMp.Or(&node.parent.blocksBitmap)
  308. node.blocksBitmap = newMp
  309. if newMp.Weight() >= k {
  310. return
  311. }
  312. t.iterChildren(index, func(index, parentIndex, depth int) int {
  313. curNode := &t.nodes[index]
  314. parentNode := t.nodes[parentIndex]
  315. newMp := t.blocksMaps[curNode.localHubID]
  316. newMp.Or(&parentNode.blocksBitmap)
  317. curNode.blocksBitmap = newMp
  318. if newMp.Weight() >= k {
  319. return iterActionSkip
  320. }
  321. return iterActionNone
  322. })
  323. })
  324. }
  325. }
  326. // 遍历树,找到至少拥有K个块的树节点的最大深度
  327. func (t *combinatorialTree) FindKBlocksMaxDepth(k int) int {
  328. maxDepth := -1
  329. t.iterChildren(0, func(index, parentIndex, depth int) int {
  330. if t.nodes[index].blocksBitmap.Weight() >= k {
  331. if maxDepth < depth {
  332. maxDepth = depth
  333. }
  334. return iterActionSkip
  335. }
  336. // 如果到了叶子节点,还没有找到K个块,那就认为要满足K个块,至少需要再多一个节点,即深度+1。
  337. // 由于遍历时采用的是深度优先的算法,因此遍历到这个叶子节点时,叶子节点再加一个节点的组合已经在前面搜索过,
  338. // 所以用当前叶子节点深度+1来作为当前分支的结果就可以,即使当前情况下增加任意一个节点依然不够K块,
  339. // 可以使用同样的思路去递推到当前叶子节点增加两个块的情况。
  340. if t.nodes[index].localHubID == len(t.stgIDToLocalStgID)-1 {
  341. if maxDepth < depth+1 {
  342. maxDepth = depth + 1
  343. }
  344. }
  345. return iterActionNone
  346. })
  347. if maxDepth == -1 || maxDepth > len(t.stgIDToLocalStgID) {
  348. return len(t.stgIDToLocalStgID)
  349. }
  350. return maxDepth
  351. }
  352. func (t *combinatorialTree) iterCombBits(width int, count int, offset int, callback func(int)) {
  353. if count == 0 {
  354. callback(offset)
  355. return
  356. }
  357. for b := width; b >= count; b-- {
  358. t.iterCombBits(b-1, count-1, offset+(1<<b), callback)
  359. }
  360. }
  361. func (t *combinatorialTree) iterChildren(index int, do func(index int, parentIndex int, depth int) int) {
  362. curNode := &t.nodes[index]
  363. childIndex := index + 1
  364. curDepth := t.GetDepth(index)
  365. childCounts := len(t.stgIDToLocalStgID) - 1 - curNode.localHubID
  366. if childCounts == 0 {
  367. return
  368. }
  369. childTreeNodeCnt := 1 << (childCounts - 1)
  370. for c := 0; c < childCounts; c++ {
  371. act := t.itering(childIndex, index, curDepth+1, do)
  372. if act == iterActionBreak {
  373. return
  374. }
  375. childIndex += childTreeNodeCnt
  376. childTreeNodeCnt >>= 1
  377. }
  378. }
  379. func (t *combinatorialTree) itering(index int, parentIndex int, depth int, do func(index int, parentIndex int, depth int) int) int {
  380. act := do(index, parentIndex, depth)
  381. if act == iterActionBreak {
  382. return act
  383. }
  384. if act == iterActionSkip {
  385. return iterActionNone
  386. }
  387. curNode := &t.nodes[index]
  388. childIndex := index + 1
  389. childCounts := len(t.stgIDToLocalStgID) - 1 - curNode.localHubID
  390. if childCounts == 0 {
  391. return iterActionNone
  392. }
  393. childTreeNodeCnt := 1 << (childCounts - 1)
  394. for c := 0; c < childCounts; c++ {
  395. act = t.itering(childIndex, index, depth+1, do)
  396. if act == iterActionBreak {
  397. return act
  398. }
  399. childIndex += childTreeNodeCnt
  400. childTreeNodeCnt >>= 1
  401. }
  402. return iterActionNone
  403. }
  404. type combinatorialTreeNode struct {
  405. localHubID int
  406. parent *combinatorialTreeNode
  407. blocksBitmap bitmap.Bitmap64 // 选择了这个中心之后,所有中心一共包含多少种块
  408. }
  409. type annealingSolution struct {
  410. blockList []objectBlock // 所有节点的块分布情况
  411. rmBlocks []bool // 要删除哪些块
  412. disasterTolerance float64 // 本方案的容灾度
  413. spaceCost float64 // 本方案的冗余度
  414. minAccessCost float64 // 本方案的最小访问费用
  415. }
  416. func (t *CleanPinned) startAnnealing(allStgInfos map[cdssdk.StorageID]*stgmod.StorageDetail, readerStgIDs []cdssdk.StorageID, object annealingObject) annealingSolution {
  417. state := &annealingState{
  418. allStgInfos: allStgInfos,
  419. readerStgIDs: readerStgIDs,
  420. stgsSortedByReader: make(map[cdssdk.StorageID][]stgDist),
  421. object: object,
  422. stgBlockBitmaps: make(map[cdssdk.StorageID]*bitmap.Bitmap64),
  423. }
  424. t.initBlockList(state)
  425. if state.blockList == nil {
  426. return annealingSolution{}
  427. }
  428. t.initNodeBlockBitmap(state)
  429. t.sortNodeByReaderDistance(state)
  430. state.rmBlocks = make([]bool, len(state.blockList))
  431. state.inversedIndex = -1
  432. state.stgCombTree = newCombinatorialTree(state.stgBlockBitmaps)
  433. state.lastScore = t.calcScore(state)
  434. state.maxScore = state.lastScore
  435. state.maxScoreRmBlocks = lo2.ArrayClone(state.rmBlocks)
  436. // 模拟退火算法的温度
  437. curTemp := state.lastScore
  438. // 结束温度
  439. finalTemp := curTemp * 0.2
  440. // 冷却率
  441. coolingRate := 0.95
  442. for curTemp > finalTemp {
  443. state.inversedIndex = rand.Intn(len(state.rmBlocks))
  444. block := state.blockList[state.inversedIndex]
  445. state.rmBlocks[state.inversedIndex] = !state.rmBlocks[state.inversedIndex]
  446. state.stgBlockBitmaps[block.StorageID].Set(block.Index, !state.rmBlocks[state.inversedIndex])
  447. state.stgCombTree.UpdateBitmap(block.StorageID, *state.stgBlockBitmaps[block.StorageID], state.object.minBlockCnt)
  448. curScore := t.calcScore(state)
  449. dScore := curScore - state.lastScore
  450. // 如果新方案比旧方案得分低,且没有要求强制接受新方案,那么就将变化改回去
  451. if curScore == 0 || (dScore < 0 && !t.alwaysAccept(curTemp, dScore, coolingRate)) {
  452. state.rmBlocks[state.inversedIndex] = !state.rmBlocks[state.inversedIndex]
  453. state.stgBlockBitmaps[block.StorageID].Set(block.Index, !state.rmBlocks[state.inversedIndex])
  454. state.stgCombTree.UpdateBitmap(block.StorageID, *state.stgBlockBitmaps[block.StorageID], state.object.minBlockCnt)
  455. // fmt.Printf("\n")
  456. } else {
  457. // fmt.Printf(" accept!\n")
  458. state.lastScore = curScore
  459. if state.maxScore < curScore {
  460. state.maxScore = state.lastScore
  461. state.maxScoreRmBlocks = lo2.ArrayClone(state.rmBlocks)
  462. }
  463. }
  464. curTemp *= coolingRate
  465. }
  466. // fmt.Printf("final: %v\n", state.maxScoreRmBlocks)
  467. return annealingSolution{
  468. blockList: state.blockList,
  469. rmBlocks: state.maxScoreRmBlocks,
  470. disasterTolerance: state.lastDisasterTolerance,
  471. spaceCost: state.lastSpaceCost,
  472. minAccessCost: state.lastMinAccessCost,
  473. }
  474. }
  475. func (t *CleanPinned) initBlockList(ctx *annealingState) {
  476. blocksMap := make(map[cdssdk.StorageID][]objectBlock)
  477. // 先生成所有的影子块
  478. for _, pinned := range ctx.object.pinnedAt {
  479. blocks := make([]objectBlock, 0, ctx.object.totalBlockCount)
  480. for i := 0; i < ctx.object.totalBlockCount; i++ {
  481. blocks = append(blocks, objectBlock{
  482. Index: i,
  483. StorageID: pinned,
  484. HasShadow: true,
  485. })
  486. }
  487. blocksMap[pinned] = blocks
  488. }
  489. // 再填充实际块
  490. for _, b := range ctx.object.blocks {
  491. blocks := blocksMap[b.StorageID]
  492. has := false
  493. for i := range blocks {
  494. if blocks[i].Index == b.Index {
  495. blocks[i].HasEntity = true
  496. blocks[i].FileHash = b.FileHash
  497. has = true
  498. break
  499. }
  500. }
  501. if has {
  502. continue
  503. }
  504. blocks = append(blocks, objectBlock{
  505. Index: b.Index,
  506. StorageID: b.StorageID,
  507. HasEntity: true,
  508. FileHash: b.FileHash,
  509. Size: b.Size,
  510. })
  511. blocksMap[b.StorageID] = blocks
  512. }
  513. var sortedBlocks []objectBlock
  514. for _, bs := range blocksMap {
  515. sortedBlocks = append(sortedBlocks, bs...)
  516. }
  517. sortedBlocks = sort2.Sort(sortedBlocks, func(left objectBlock, right objectBlock) int {
  518. d := left.StorageID - right.StorageID
  519. if d != 0 {
  520. return int(d)
  521. }
  522. return left.Index - right.Index
  523. })
  524. ctx.blockList = sortedBlocks
  525. }
  526. func (t *CleanPinned) initNodeBlockBitmap(state *annealingState) {
  527. for _, b := range state.blockList {
  528. mp, ok := state.stgBlockBitmaps[b.StorageID]
  529. if !ok {
  530. nb := bitmap.Bitmap64(0)
  531. mp = &nb
  532. state.stgBlockBitmaps[b.StorageID] = mp
  533. }
  534. mp.Set(b.Index, true)
  535. }
  536. }
  537. func (t *CleanPinned) sortNodeByReaderDistance(state *annealingState) {
  538. for _, r := range state.readerStgIDs {
  539. var nodeDists []stgDist
  540. for n := range state.stgBlockBitmaps {
  541. if r == n {
  542. // 同节点时距离视为0.1
  543. nodeDists = append(nodeDists, stgDist{
  544. StorageID: n,
  545. Distance: consts.StorageDistanceSameStorage,
  546. })
  547. } else if state.allStgInfos[r].MasterHub.LocationID == state.allStgInfos[n].MasterHub.LocationID {
  548. // 同地区时距离视为1
  549. nodeDists = append(nodeDists, stgDist{
  550. StorageID: n,
  551. Distance: consts.StorageDistanceSameLocation,
  552. })
  553. } else {
  554. // 不同地区时距离视为5
  555. nodeDists = append(nodeDists, stgDist{
  556. StorageID: n,
  557. Distance: consts.StorageDistanceOther,
  558. })
  559. }
  560. }
  561. state.stgsSortedByReader[r] = sort2.Sort(nodeDists, func(left, right stgDist) int { return sort2.Cmp(left.Distance, right.Distance) })
  562. }
  563. }
  564. func (t *CleanPinned) calcScore(state *annealingState) float64 {
  565. dt := t.calcDisasterTolerance(state)
  566. ac := t.calcMinAccessCost(state)
  567. sc := t.calcSpaceCost(state)
  568. state.lastDisasterTolerance = dt
  569. state.lastMinAccessCost = ac
  570. state.lastSpaceCost = sc
  571. dtSc := 1.0
  572. if dt < 1 {
  573. dtSc = 0
  574. } else if dt >= 2 {
  575. dtSc = 1.5
  576. }
  577. newSc := 0.0
  578. if dt == 0 || ac == 0 {
  579. newSc = 0
  580. } else {
  581. newSc = dtSc / (sc * ac)
  582. }
  583. // fmt.Printf("solu: %v, cur: %v, dt: %v, ac: %v, sc: %v \n", state.rmBlocks, newSc, dt, ac, sc)
  584. return newSc
  585. }
  586. // 计算容灾度
  587. func (t *CleanPinned) calcDisasterTolerance(state *annealingState) float64 {
  588. if state.inversedIndex != -1 {
  589. node := state.blockList[state.inversedIndex]
  590. state.stgCombTree.UpdateBitmap(node.StorageID, *state.stgBlockBitmaps[node.StorageID], state.object.minBlockCnt)
  591. }
  592. return float64(len(state.stgBlockBitmaps) - state.stgCombTree.FindKBlocksMaxDepth(state.object.minBlockCnt))
  593. }
  594. // 计算最小访问数据的代价
  595. func (t *CleanPinned) calcMinAccessCost(state *annealingState) float64 {
  596. cost := math.MaxFloat64
  597. for _, reader := range state.readerStgIDs {
  598. tarNodes := state.stgsSortedByReader[reader]
  599. gotBlocks := bitmap.Bitmap64(0)
  600. thisCost := 0.0
  601. for _, tar := range tarNodes {
  602. tarNodeMp := state.stgBlockBitmaps[tar.StorageID]
  603. // 只需要从目的节点上获得缺少的块
  604. curWeigth := gotBlocks.Weight()
  605. // 下面的if会在拿到k个块之后跳出循环,所以or多了块也没关系
  606. gotBlocks.Or(tarNodeMp)
  607. // 但是算读取块的消耗时,不能多算,最多算读了k个块的消耗
  608. willGetBlocks := math2.Min(gotBlocks.Weight()-curWeigth, state.object.minBlockCnt-curWeigth)
  609. thisCost += float64(willGetBlocks) * float64(tar.Distance)
  610. if gotBlocks.Weight() >= state.object.minBlockCnt {
  611. break
  612. }
  613. }
  614. if gotBlocks.Weight() >= state.object.minBlockCnt {
  615. cost = math.Min(cost, thisCost)
  616. }
  617. }
  618. return cost
  619. }
  620. // 计算冗余度
  621. func (t *CleanPinned) calcSpaceCost(ctx *annealingState) float64 {
  622. blockCount := 0
  623. for i, b := range ctx.blockList {
  624. if ctx.rmBlocks[i] {
  625. continue
  626. }
  627. if b.HasEntity {
  628. blockCount++
  629. }
  630. if b.HasShadow {
  631. blockCount++
  632. }
  633. }
  634. // 所有算力中心上拥有的块的总数 / 一个对象被分成了几个块
  635. return float64(blockCount) / float64(ctx.object.minBlockCnt)
  636. }
  637. // 如果新方案得分比旧方案小,那么在一定概率内也接受新方案
  638. func (t *CleanPinned) alwaysAccept(curTemp float64, dScore float64, coolingRate float64) bool {
  639. v := math.Exp(dScore / curTemp / coolingRate)
  640. // fmt.Printf(" -- chance: %v, temp: %v", v, curTemp)
  641. return v > rand.Float64()
  642. }
  643. func (t *CleanPinned) makePlansForRepObject(allStgInfos map[cdssdk.StorageID]*stgmod.StorageDetail, solu annealingSolution, obj stgmod.ObjectDetail, planBld *exec.PlanBuilder, planningHubIDs map[cdssdk.StorageID]bool) coormq.UpdatingObjectRedundancy {
  644. entry := coormq.UpdatingObjectRedundancy{
  645. ObjectID: obj.Object.ObjectID,
  646. FileHash: obj.Object.FileHash,
  647. Size: obj.Object.Size,
  648. Redundancy: obj.Object.Redundancy,
  649. }
  650. ft := ioswitch2.NewFromTo()
  651. fromStg := allStgInfos[obj.Blocks[0].StorageID]
  652. ft.AddFrom(ioswitch2.NewFromShardstore(obj.Object.FileHash, *fromStg.MasterHub, *fromStg, ioswitch2.RawStream()))
  653. for i, f := range solu.rmBlocks {
  654. hasCache := lo.ContainsBy(obj.Blocks, func(b stgmod.ObjectBlock) bool { return b.StorageID == solu.blockList[i].StorageID }) ||
  655. lo.ContainsBy(obj.PinnedAt, func(n cdssdk.StorageID) bool { return n == solu.blockList[i].StorageID })
  656. willRm := f
  657. if !willRm {
  658. // 如果对象在退火后要保留副本的节点没有副本,则需要在这个节点创建副本
  659. if !hasCache {
  660. toStg := allStgInfos[solu.blockList[i].StorageID]
  661. ft.AddTo(ioswitch2.NewToShardStore(*toStg.MasterHub, *toStg, ioswitch2.RawStream(), fmt.Sprintf("%d.0", obj.Object.ObjectID)))
  662. planningHubIDs[solu.blockList[i].StorageID] = true
  663. }
  664. entry.Blocks = append(entry.Blocks, stgmod.ObjectBlock{
  665. ObjectID: obj.Object.ObjectID,
  666. Index: solu.blockList[i].Index,
  667. StorageID: solu.blockList[i].StorageID,
  668. FileHash: obj.Object.FileHash,
  669. Size: solu.blockList[i].Size,
  670. })
  671. }
  672. }
  673. err := parser.Parse(ft, planBld)
  674. if err != nil {
  675. // TODO 错误处理
  676. }
  677. return entry
  678. }
  679. func (t *CleanPinned) generateSysEventForRepObject(solu annealingSolution, obj stgmod.ObjectDetail) []stgmod.SysEventBody {
  680. var blockChgs []stgmod.BlockChange
  681. for i, f := range solu.rmBlocks {
  682. hasCache := lo.ContainsBy(obj.Blocks, func(b stgmod.ObjectBlock) bool { return b.StorageID == solu.blockList[i].StorageID }) ||
  683. lo.ContainsBy(obj.PinnedAt, func(n cdssdk.StorageID) bool { return n == solu.blockList[i].StorageID })
  684. willRm := f
  685. if !willRm {
  686. // 如果对象在退火后要保留副本的节点没有副本,则需要在这个节点创建副本
  687. if !hasCache {
  688. blockChgs = append(blockChgs, &stgmod.BlockChangeClone{
  689. BlockType: stgmod.BlockTypeRaw,
  690. SourceStorageID: obj.Blocks[0].StorageID,
  691. TargetStorageID: solu.blockList[i].StorageID,
  692. })
  693. }
  694. } else {
  695. blockChgs = append(blockChgs, &stgmod.BlockChangeDeleted{
  696. Index: 0,
  697. StorageID: solu.blockList[i].StorageID,
  698. })
  699. }
  700. }
  701. transEvt := &stgmod.BodyBlockTransfer{
  702. ObjectID: obj.Object.ObjectID,
  703. PackageID: obj.Object.PackageID,
  704. BlockChanges: blockChgs,
  705. }
  706. var blockDist []stgmod.BlockDistributionObjectInfo
  707. for i, f := range solu.rmBlocks {
  708. if !f {
  709. blockDist = append(blockDist, stgmod.BlockDistributionObjectInfo{
  710. BlockType: stgmod.BlockTypeRaw,
  711. Index: 0,
  712. StorageID: solu.blockList[i].StorageID,
  713. })
  714. }
  715. }
  716. distEvt := &stgmod.BodyBlockDistribution{
  717. ObjectID: obj.Object.ObjectID,
  718. PackageID: obj.Object.PackageID,
  719. Path: obj.Object.Path,
  720. Size: obj.Object.Size,
  721. FileHash: obj.Object.FileHash,
  722. FaultTolerance: solu.disasterTolerance,
  723. Redundancy: solu.spaceCost,
  724. AvgAccessCost: 0, // TODO 计算平均访问代价,从日常访问数据中统计
  725. BlockDistribution: blockDist,
  726. // TODO 不好计算传输量
  727. }
  728. return []stgmod.SysEventBody{transEvt, distEvt}
  729. }
  730. func (t *CleanPinned) makePlansForECObject(allStgInfos map[cdssdk.StorageID]*stgmod.StorageDetail, solu annealingSolution, obj stgmod.ObjectDetail, planBld *exec.PlanBuilder, planningHubIDs map[cdssdk.StorageID]bool) coormq.UpdatingObjectRedundancy {
  731. entry := coormq.UpdatingObjectRedundancy{
  732. ObjectID: obj.Object.ObjectID,
  733. FileHash: obj.Object.FileHash,
  734. Size: obj.Object.Size,
  735. Redundancy: obj.Object.Redundancy,
  736. }
  737. reconstrct := make(map[cdssdk.StorageID]*[]int)
  738. for i, f := range solu.rmBlocks {
  739. block := solu.blockList[i]
  740. if !f {
  741. entry.Blocks = append(entry.Blocks, stgmod.ObjectBlock{
  742. ObjectID: obj.Object.ObjectID,
  743. Index: block.Index,
  744. StorageID: block.StorageID,
  745. FileHash: block.FileHash,
  746. Size: block.Size,
  747. })
  748. // 如果这个块是影子块,那么就要从完整对象里重建这个块
  749. if !block.HasEntity {
  750. re, ok := reconstrct[block.StorageID]
  751. if !ok {
  752. re = &[]int{}
  753. reconstrct[block.StorageID] = re
  754. }
  755. *re = append(*re, block.Index)
  756. }
  757. }
  758. }
  759. ecRed := obj.Object.Redundancy.(*cdssdk.ECRedundancy)
  760. for id, idxs := range reconstrct {
  761. // 依次生成每个节点上的执行计划,因为如果放到一个计划里一起生成,不能保证每个节点上的块用的都是本节点上的副本
  762. ft := ioswitch2.NewFromTo()
  763. ft.ECParam = ecRed
  764. ft.AddFrom(ioswitch2.NewFromShardstore(obj.Object.FileHash, *allStgInfos[id].MasterHub, *allStgInfos[id], ioswitch2.RawStream()))
  765. for _, i := range *idxs {
  766. ft.AddTo(ioswitch2.NewToShardStore(*allStgInfos[id].MasterHub, *allStgInfos[id], ioswitch2.ECStream(i), fmt.Sprintf("%d.%d", obj.Object.ObjectID, i)))
  767. }
  768. err := parser.Parse(ft, planBld)
  769. if err != nil {
  770. // TODO 错误处理
  771. continue
  772. }
  773. planningHubIDs[id] = true
  774. }
  775. return entry
  776. }
  777. func (t *CleanPinned) generateSysEventForECObject(solu annealingSolution, obj stgmod.ObjectDetail) []stgmod.SysEventBody {
  778. var blockChgs []stgmod.BlockChange
  779. reconstrct := make(map[cdssdk.StorageID]*[]int)
  780. for i, f := range solu.rmBlocks {
  781. block := solu.blockList[i]
  782. if !f {
  783. // 如果这个块是影子块,那么就要从完整对象里重建这个块
  784. if !block.HasEntity {
  785. re, ok := reconstrct[block.StorageID]
  786. if !ok {
  787. re = &[]int{}
  788. reconstrct[block.StorageID] = re
  789. }
  790. *re = append(*re, block.Index)
  791. }
  792. } else {
  793. blockChgs = append(blockChgs, &stgmod.BlockChangeDeleted{
  794. Index: block.Index,
  795. StorageID: block.StorageID,
  796. })
  797. }
  798. }
  799. // 由于每一个需要被重建的块都是从同中心的副本里构建出来的,所以对于每一个中心都要产生一个BlockChangeEnDecode
  800. for id, idxs := range reconstrct {
  801. var tarBlocks []stgmod.Block
  802. for _, idx := range *idxs {
  803. tarBlocks = append(tarBlocks, stgmod.Block{
  804. BlockType: stgmod.BlockTypeEC,
  805. Index: idx,
  806. StorageID: id,
  807. })
  808. }
  809. blockChgs = append(blockChgs, &stgmod.BlockChangeEnDecode{
  810. SourceBlocks: []stgmod.Block{{
  811. BlockType: stgmod.BlockTypeRaw,
  812. Index: 0,
  813. StorageID: id, // 影子块的原始对象就在同一个节点上
  814. }},
  815. TargetBlocks: tarBlocks,
  816. // 传输量为0
  817. })
  818. }
  819. transEvt := &stgmod.BodyBlockTransfer{
  820. ObjectID: obj.Object.ObjectID,
  821. PackageID: obj.Object.PackageID,
  822. BlockChanges: blockChgs,
  823. }
  824. var blockDist []stgmod.BlockDistributionObjectInfo
  825. for i, f := range solu.rmBlocks {
  826. if !f {
  827. blockDist = append(blockDist, stgmod.BlockDistributionObjectInfo{
  828. BlockType: stgmod.BlockTypeEC,
  829. Index: solu.blockList[i].Index,
  830. StorageID: solu.blockList[i].StorageID,
  831. })
  832. }
  833. }
  834. distEvt := &stgmod.BodyBlockDistribution{
  835. ObjectID: obj.Object.ObjectID,
  836. PackageID: obj.Object.PackageID,
  837. Path: obj.Object.Path,
  838. Size: obj.Object.Size,
  839. FileHash: obj.Object.FileHash,
  840. FaultTolerance: solu.disasterTolerance,
  841. Redundancy: solu.spaceCost,
  842. AvgAccessCost: 0, // TODO 计算平均访问代价,从日常访问数据中统计
  843. BlockDistribution: blockDist,
  844. // TODO 不好计算传输量
  845. }
  846. return []stgmod.SysEventBody{transEvt, distEvt}
  847. }
  848. func (t *CleanPinned) executePlans(ctx ExecuteContext, planBld *exec.PlanBuilder, planningStgIDs map[cdssdk.StorageID]bool) (map[string]exec.VarValue, error) {
  849. // 统一加锁,有重复也没关系
  850. lockBld := reqbuilder.NewBuilder()
  851. for id := range planningStgIDs {
  852. lockBld.Shard().Buzy(id)
  853. }
  854. lock, err := lockBld.MutexLock(ctx.Args.DistLock)
  855. if err != nil {
  856. return nil, fmt.Errorf("acquiring distlock: %w", err)
  857. }
  858. defer lock.Unlock()
  859. wg := sync.WaitGroup{}
  860. // 执行IO计划
  861. var ioSwRets map[string]exec.VarValue
  862. var ioSwErr error
  863. wg.Add(1)
  864. go func() {
  865. defer wg.Done()
  866. execCtx := exec.NewExecContext()
  867. exec.SetValueByType(execCtx, ctx.Args.StgMgr)
  868. ret, err := planBld.Execute(execCtx).Wait(context.TODO())
  869. if err != nil {
  870. ioSwErr = fmt.Errorf("executing io switch plan: %w", err)
  871. return
  872. }
  873. ioSwRets = ret
  874. }()
  875. wg.Wait()
  876. if ioSwErr != nil {
  877. return nil, ioSwErr
  878. }
  879. return ioSwRets, nil
  880. }
  881. func (t *CleanPinned) populateECObjectEntry(entry *coormq.UpdatingObjectRedundancy, obj stgmod.ObjectDetail, ioRets map[string]exec.VarValue) {
  882. for i := range entry.Blocks {
  883. if entry.Blocks[i].FileHash != "" {
  884. continue
  885. }
  886. key := fmt.Sprintf("%d.%d", obj.Object.ObjectID, entry.Blocks[i].Index)
  887. // 不应该出现key不存在的情况
  888. r := ioRets[key].(*ops2.ShardInfoValue)
  889. entry.Blocks[i].FileHash = r.Hash
  890. entry.Blocks[i].Size = r.Size
  891. }
  892. }
  893. func init() {
  894. RegisterMessageConvertor(NewCleanPinned)
  895. }

本项目旨在将云际存储公共基础设施化,使个人及企业可低门槛使用高效的云际存储服务(安装开箱即用云际存储客户端即可,无需关注其他组件的部署),同时支持用户灵活便捷定制云际存储的功能细节。